00001 00002 // 00003 // This file is part of the MADELINE 2 program 00004 // written by Edward H. Trager and Ritu Khanna 00005 // Copyright (c) 2005 by the 00006 // Regents of the University of Michigan. 00007 // All Rights Reserved. 00008 // 00009 // The latest version of this program is available from: 00010 // 00011 // http://eyegene.ophthy.med.umich.edu/madeline/ 00012 // 00013 // Released under the GNU General Public License. 00014 // A copy of the GPL is included in the distribution 00015 // package of this software, or see: 00016 // 00017 // http://www.gnu.org/copyleft/ 00018 // 00019 // ... for licensing details. 00020 // 00022 // 00023 // 2005.02.02.ET.晚上十一點鐘 00024 // 00025 00026 #ifndef FILETYPECLASSIFIER_INCLUDED 00027 00028 // 00029 // FileTypeClassifier 00030 // 00031 class FileTypeClassifier { 00032 00033 public: 00034 00035 enum FILE_TYPE { BZIP2, GZIP, XML, PKZIP, UTF8, UNKNOWN_TYPE }; 00036 enum FILE_SUBTYPE { XML_MS, XML_OO , XML_HTML, XML_MADELINE, UNKNOWN_SUBTYPE }; 00037 00038 private: 00039 00040 static const int BUFFER_SIZE=1024; // What buffer size ? 00041 static const int XML_BUFFER_SIZE=26; // Last word of the XML file 00042 static const char UTF8_CONTINUATION_BYTE_MASK=0xC0; 00043 static const char UTF8_CONTINUATION_BYTE_PATTERN=0x80; 00044 00045 char _buffer[BUFFER_SIZE+1]; 00046 char _xmlBuffer[XML_BUFFER_SIZE+1]; 00047 00048 int _bytesToTest; 00049 FILE_TYPE _type; 00050 00051 bool _readHeaderIntoBuffer(const char *fileName); 00052 00053 enum FILE_TYPE testUTF8(); 00054 00055 00056 public: 00057 00058 // 00059 // Constructors: 00060 // 00061 FileTypeClassifier( const char *fileName=0 ); 00062 00063 // 00064 // classify(): returns an enumerated value for the file type: 00065 // 00066 enum FILE_TYPE classify( void ); 00067 00068 // 00069 // classifyByName(): returns a string representation of the file type: 00070 // 00071 const char *classifyByName( void ); 00072 00073 // 00074 // setFileToTest(): 00075 // 00076 bool setFileToTest( const char *fileName ); 00077 00078 // 00079 // OBSOLETE: DON'T USE 00080 // 00081 enum FILE_SUBTYPE getFileSubType(); 00082 00083 // 00084 // OBSOLETE: DON'T USE 00085 // 00086 bool checkFileSubType( const char *flag ); 00087 00088 }; 00089 00090 #endif