Gnash  0.8.10
utf8.h
Go to the documentation of this file.
00001 // utf8.h: utilities for converting to and from UTF-8
00002 // 
00003 //   Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
00004 // 
00005 // This program is free software; you can redistribute it and/or modify
00006 // it under the terms of the GNU General Public License as published by
00007 // the Free Software Foundation; either version 3 of the License, or
00008 // (at your option) any later version.
00009 // 
00010 // This program is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 // GNU General Public License for more details.
00014 // 
00015 // You should have received a copy of the GNU General Public License
00016 // along with this program; if not, write to the Free Software
00017 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018 //
00019 // Based on the public domain work of Thatcher Ulrich <tu@tulrich.com> 2004
00020 
00021 #ifndef UTF8_H
00022 #define UTF8_H
00023 
00024 #include <string>
00025 #include <boost/cstdint.hpp> // for C99 int types
00026 #include <vector>
00027 
00028 #include "dsodefs.h" // For DSOEXPORT
00029 
00030 namespace gnash {
00031 
00033 //
00055 //
00059 namespace utf8 {
00060 
00062     //
00066     //
00069     DSOEXPORT std::wstring decodeCanonicalString(const std::string& str, int version);
00070 
00072     //
00082     DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr, int version);
00083 
00085     //
00090     DSOEXPORT boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it,
00091                                                      const std::string::const_iterator& e);
00092 
00095     DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t ucs_character);
00096     
00098     //
00101     DSOEXPORT std::string encodeLatin1Character(boost::uint32_t ucsCharacter);
00102 
00103     enum TextEncoding {
00104         encUNSPECIFIED,
00105         encUTF8,
00106         encUTF16BE,
00107         encUTF16LE,
00108         encUTF32BE,
00109         encUTF32LE,
00110         encSCSU,
00111         encUTF7,
00112         encUTFEBCDIC,
00113         encBOCU1
00114     };
00115 
00117     //
00140     DSOEXPORT char* stripBOM(char* in, size_t& size, TextEncoding& encoding);
00141 
00143     DSOEXPORT const char* textEncodingName(TextEncoding enc);
00144 
00145     enum EncodingGuess {
00146         ENCGUESS_UNICODE = 0,
00147         ENCGUESS_JIS = 1,
00148         ENCGUESS_OTHER = 2
00149     };
00150 
00152     // Shift-Jis, UTF8, and other. Puts the DisplayObject count in length,
00153     // and the offsets to the DisplayObjects in offsets, if offsets is not NULL.
00154     // If not NULL, offsets should be at least s.length().
00155     // offsets are not accurate if the return value is GUESSENC_OTHER
00156     //
00159     DSOEXPORT EncodingGuess guessEncoding(const std::string& s, int& length,
00160             std::vector<int>& offsets);
00161 
00162 
00163 } // namespace utf8
00164 } // namespace gnash
00165 
00166 #endif // UTF8_H
00167 
00168 
00169 // Local Variables:
00170 // mode: C++
00171 // c-basic-offset: 8 
00172 // tab-width: 8
00173 // indent-tabs-mode: t
00174 // End: