#include <Utf8String.h>
Public Member Functions | |
UTF8String () | |
UTF8String (const std::string &s) | |
UTF8String (const UTF8String &s) | |
unsigned int | unicodeValueCount () const |
UTF8String | unicodeSubString (unsigned int stt, unsigned int howManyCharacters=0) const |
UTF8String | operator[] (unsigned int pos) const |
UTF32 | unicodeValueAtPosition (unsigned int pos=0) const |
bool | isRTL (void) const |
bool | isIndic (void) const |
bool | isArabic (void) const |
SCRIPTCODE | getScriptCode (void) |
std::basic_string< UTF32 > | UTF32String () const |
UTF8String & | append (const std::basic_string< UTF32 > &UTF32String) |
UTF8String & | append (const std::basic_string< UTF16 > &UTF16String) |
UTF8String & | operator+= (const std::basic_string< UTF32 > &UTF32String) |
UTF8String & | operator+= (const std::basic_string< UTF16 > &UTF16String) |
UTF8String & | operator= (const std::basic_string< UTF32 > &UTF32String) |
UTF8String & | operator= (const std::basic_string< UTF16 > &UTF16String) |
UTF8String (const std::basic_string< UTF32 > &UTF32String) | |
UTF8String (const std::basic_string< UTF16 > &UTF16String) | |
Private Member Functions | |
const char * | _UTF32ValueToUTF8 (UTF32 UTF32Value) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getScriptCode(): Returns a script code based on the first character in the string: |
|
Returns true if the string starts with a Unicode character from Arabic |
|
Returns true if the string starts with a Unicode character from an Indic or Indic-derived script |
|
Returns true if the string starts with a Unicode character from a script that is written right-to-left. |
|
|
|
|
|
|
|
|
|
|
|
|
|
Note: The code in this method was inspired by a UTF8ToUTF32 method in the GPL'ed swunicod.cpp (c) 1998 by the CrossWire Bible Society (http://www.crosswire.org). It differs a bit from the method presented by the Unicode Consortium which relies on some static lookup-tables. |
|
This function tells you the "length" of a UTF8 string in terms of the number of Unicode code points (values) encoded in the UTF-8 string. Since all UTF-8 continuation bytes have the pattern "10xxxxxx" (hence the mask 0x00C0), counting only those bytes that are not continuation bytes (hence "!=0x0080) directly provides the number of encoded code points: |
|
UTF32String(): Converts a UTF8String to a UTF32String |