Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Class Members | File Members

XMLTableParser.h

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 //
00024 // XMLTableParser.h
00025 //
00026 
00027 #ifndef XMLTABLEPARSER_INCLUDED
00028 #define XMLTABLEPARSER_INCLUDED
00029 
00030 #include <libxml/parser.h>
00031 #include <libxml/tree.h>
00032 #include <libxml/debugXML.h>
00033 #include <fstream>
00034 #include <iostream>
00035 //#include <vector>
00036 //#include <string>
00037 #include "Exception.h"
00038 #include "ColumnClassifier.h"
00039 #include "../3rdParty/unzip/unzip.h"
00040 #include "XMLTagManager.h"
00041 
00042 #include "TableParser.h"
00043 
00048 class XMLTableParser : public TableParser {
00049         
00050         private:
00051                 
00052                 const XMLTagManager* _pXMLTagManager;
00053                 
00054                 //unsigned long _rows;                     // Number of rows in the table
00055                 //unsigned _columns;                       // Number of columns in the table
00056                 //std::vector<std::string> _element;       // Table cell values
00057                 //std::vector<std::string> _title; 
00058                 //std::vector<char> _columnType;
00059                 
00060                 
00061                 xmlDoc *_document;                       // XML document
00062                 xmlNode *_rootElement;                   // XML document root Element
00063                 //
00064                 // Constants
00065                 //
00066                 const static int WRITEBUFFERSIZE = 8192; // Buffer size to extract the XML file
00067                 int _tableCount;
00068                 bool _readTitles;
00069                 
00070                 std::string CELL, TABLE, ROW, COL, TEXT, REPEAT;
00071                 //
00072                 // Functions
00073                 //
00074                 bool _readDocument(void);
00075                 void _parse(xmlNode *node ); //Parse the XML file
00076                 
00077                 void _trimWhiteSpace(std::string &str);
00078                 // Wrapper to check for string equality:
00079                 bool _tagNameMatches(const xmlChar *xmlTagName,std::string referenceTagName);
00080                 unsigned _getRepeatCount( xmlNode* node );
00081                 std::string _getNodeContents( xmlNode* node);
00082                 void _storeColumnTitle(const std::string & title);
00083                 
00084         public:
00085                 //
00086                 // Constructors :
00087                 //
00088                 XMLTableParser(const XMLTagManager *p);
00089                 
00090                 //
00091                 // Read in the XML string starting with the given node. 
00092                 // If node == NULL, start from the rootnode.
00093                 // xmlTagManagerVectorIndex in effect tells us which type of XML
00094                 // file to read.
00095                 // 
00096                 bool readString(const std::string &inString, xmlNode *node);
00097                 
00098                 void display(void);
00099                 
00100                 // reset all values for a clean read
00101                 void reset();
00102                 
00103                 //bool isXMLFormat(const std::string &inString);
00104                 
00105                 //unsigned getColumnCount( void ){ return _columns; }
00106                 //unsigned long getNumberOfRows( void ){ return _rows; }
00107                 
00108                 //std::vector<std::string> getContents() {return _element;}
00109                 //std::vector<std::string> getTitles() {return _title;}
00110                 //std::vector<char> getTypes() {return _columnType;}
00111                 
00112 };
00113 #endif

Generated on Fri Nov 18 16:24:39 2011 for MADELINE by  doxygen 1.4.4