Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Class Members | File Members

Utf8String.h

Go to the documentation of this file.
00001 
00002 //
00003 // This file is part of the MADELINE 2 program 
00004 // written by Edward H. Trager and Ritu Khanna
00005 // Copyright (c) 2005 by the
00006 // Regents of the University of Michigan.
00007 // All Rights Reserved.
00008 // 
00009 // The latest version of this program is available from:
00010 // 
00011 //   http://eyegene.ophthy.med.umich.edu/madeline/
00012 //   
00013 // Released under the GNU General Public License.
00014 // A copy of the GPL is included in the distribution
00015 // package of this software, or see:
00016 // 
00017 //   http://www.gnu.org/copyleft/
00018 //   
00019 // ... for licensing details.
00020 // 
00022 //
00023 // utf8String.h
00024 //
00025 // (c) 2006 by Edward H. Trager
00026 // released under the GNU General Public License
00027 // 
00028 // This file was originally written for inclusion
00029 // in "Font Playground" .
00030 //
00031 // 2006.04.30.et.
00032 // LAST UPDATE: 2007.01.08
00033 // 
00034 
00035 #ifndef UTF8STRING_INCLUDED
00036 #define UTF8STRING_INCLUDED
00037 
00038 #include "ScriptCodes.h"
00039 #include <string>
00040 
00041 typedef unsigned long  UTF32; // at least 32 bits
00042 typedef unsigned short UTF16; // at least 16 bits
00043 typedef unsigned char  UTF8;
00044 
00045 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
00046 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
00047 
00048 //
00049 // The following are needed for UTF-16 conversion:
00050 // 
00051 #define UNI_SUR_HIGH_START  (UTF32)0xD800
00052 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
00053 #define UNI_SUR_LOW_START   (UTF32)0xDC00
00054 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
00055 
00056 class UTF8String : public std::string {
00057         
00058         
00059 private:
00060         
00061         const char *_UTF32ValueToUTF8( UTF32 UTF32Value );
00062         
00063 public:
00064         
00065         // Default constructor just calls base class std::String():
00066         UTF8String();
00067         // Copy Constructors:
00068         UTF8String(const std::string &s);
00069         UTF8String(const UTF8String &s);
00070         // How many Unicode values are stored in the string?:
00071         unsigned int unicodeValueCount() const;
00072         // Get the Unicode substring starting at the "stt" unicode value --
00073         // Note that stt=1 (*not* zero) returns the entire string:
00074         UTF8String unicodeSubString(unsigned int stt,unsigned int howManyCharacters=0) const;
00075         // Read-only bracket operator retrieves the nth unicode character --
00076         // Note that pos=1 (*not* zero) specifies the first character:
00077         UTF8String operator[](unsigned int pos) const;
00078         // Return the Unicode code value of the nth Unicode character:
00079         UTF32 unicodeValueAtPosition(unsigned int pos=0) const;
00080         
00081         // Returns boolean TRUE if the string begins with a character
00082         // from a right-to-left script:
00083         bool isRTL(void) const;
00084         // Returns a boolean TRUE if the string begins with a character
00085         // from an Indic or Indic-derived script.  Such scripts have
00086         // special complex text layout requirements:
00087         bool isIndic(void) const;
00088         
00089         // Returns a boolean TRUE if the string begins with a character
00090         // from the Arabic script.  This script has
00091         // special complex text layout requirements:
00092         bool isArabic(void) const;
00093         
00094         //
00095         // Returns a script code based on the Unicode range of the first
00096         // character in the string: Currently only handles the Arabic and
00097         // Indic cases relevant for complex text layout
00098         // 
00099         SCRIPTCODE getScriptCode(void);
00100         
00101         // Returns a UTF32 String:
00102         std::basic_string<UTF32> UTF32String() const;
00103         
00104         //
00105         // Append and Derived Overloaded Assignment operators:
00106         //
00107         UTF8String& append( const std::basic_string<UTF32> &UTF32String );
00108         UTF8String& append( const std::basic_string<UTF16> &UTF16String );
00109         
00110         UTF8String& operator+=( const std::basic_string<UTF32> &UTF32String );
00111         UTF8String& operator+=( const std::basic_string<UTF16> &UTF16String );
00112         
00113         UTF8String& operator=( const std::basic_string<UTF32> &UTF32String );
00114         UTF8String& operator=( const std::basic_string<UTF16> &UTF16String );
00115         
00116         // 
00117         // Specialized constructors:
00118         // 
00119         // Construct a UTF8String from a UTF32 or UTF16 string:
00120         // 
00121         // These also ultimately use the append() methods from above:
00122         // 
00123         UTF8String( const std::basic_string<UTF32> &UTF32String );
00124         UTF8String( const std::basic_string<UTF16> &UTF16String );
00125         
00126 };
00127 
00128 #endif
00129 

Generated on Fri Nov 18 16:24:39 2011 for MADELINE by  doxygen 1.4.4