Gnash  0.8.10
Namespaces | Defines | Functions
utf8.cpp File Reference
#include "utf8.h"
#include <limits>
#include <boost/cstdint.hpp>
#include <string>
#include <vector>
#include <cstdlib>

Namespaces

namespace  gnash
 

Anonymous namespace for callbacks, local functions, event handlers etc.


namespace  gnash::utf8
 

Utilities to convert between std::string and std::wstring.


Defines

#define FIRST_BYTE(mask, shift)
#define NEXT_BYTE(shift)
#define ENC_DEFAULT   0
#define ENC_UTF8   1
#define ENC_UTF16BE   2
#define ENC_UTF16LE   3

Functions

std::wstring gnash::utf8::decodeCanonicalString (const std::string &str, int version)
 Converts a std::string with multibyte characters into a std::wstring.
std::string gnash::utf8::encodeCanonicalString (const std::wstring &wstr, int version)
 Converts a std::wstring into canonical std::string.
std::string gnash::utf8::encodeLatin1Character (boost::uint32_t ucsCharacter)
 Encodes the given wide character into an at least 8-bit character.
boost::uint32_t gnash::utf8::decodeNextUnicodeCharacter (std::string::const_iterator &it, const std::string::const_iterator &e)
 Return the next Unicode character in the UTF-8 encoded string.
std::string gnash::utf8::encodeUnicodeCharacter (boost::uint32_t ucs_character)
 Encodes the given wide character into a canonical string, theoretically up to 6 chars in length.
char * gnash::utf8::stripBOM (char *in, size_t &size, TextEncoding &encoding)
 Interpret (and skip) Byte Order Mark in input stream.
const char * gnash::utf8::textEncodingName (TextEncoding enc)
 Return name of a text encoding.
EncodingGuess gnash::utf8::guessEncoding (const std::string &s, int &length, std::vector< int > &offsets)
 Common code for guessing at the encoding of random text, between.

Define Documentation

#define ENC_DEFAULT   0
#define ENC_UTF16BE   2
#define ENC_UTF16LE   3
#define ENC_UTF8   1
#define FIRST_BYTE (   mask,
  shift 
)
Value:
/* Post-increment iterator */ \
    uc = (*it++ & (mask)) << (shift);

Referenced by gnash::utf8::decodeNextUnicodeCharacter().

#define NEXT_BYTE (   shift)
Value:
\
    if (it == e || *it == 0) return 0; /* end of buffer, do not advance */    \
    if ((*it & 0xC0) != 0x80) return invalid; /* standard check */    \
    /* Post-increment iterator: */        \
    uc |= (*it++ & 0x3F) << shift;

Referenced by gnash::utf8::decodeNextUnicodeCharacter().