00001 00002 // 00003 // This file is part of the MADELINE 2 program 00004 // written by Edward H. Trager and Ritu Khanna 00005 // Copyright (c) 2005 by the 00006 // Regents of the University of Michigan. 00007 // All Rights Reserved. 00008 // 00009 // The latest version of this program is available from: 00010 // 00011 // http://eyegene.ophthy.med.umich.edu/madeline/ 00012 // 00013 // Released under the GNU General Public License. 00014 // A copy of the GPL is included in the distribution 00015 // package of this software, or see: 00016 // 00017 // http://www.gnu.org/copyleft/ 00018 // 00019 // ... for licensing details. 00020 // 00022 00023 // 00024 // MadelineTable.h 00025 // 00026 00027 #ifndef MADELINETABLE_INCLUDED 00028 #define MADELINETABLE_INCLUDED 00029 00030 #include <iostream> 00031 #include <fstream> 00032 00033 //#include <vector> 00034 //#include <string> 00035 #include "TableParser.h" 00036 00037 #include "LineInformation.h" 00038 #include "ColumnOffset.h" 00039 #include "Exception.h" 00040 #include "RandomGenerator.h" 00041 #include "RandomSampleSizes.h" 00042 00043 #include "Warning.h" 00044 00045 class MadelineTable : public TableParser{ 00046 00047 private: 00048 00049 char *_header; 00050 00051 // RAW BUFFER, LINES, and BLOCKS: 00052 char *_buffer; // Pointer to the beginning of the data buffer 00053 LineInformation *_lineInfo; // vector of LineInformation objects -- one object per line of file 00054 unsigned long _lines; // Total number of lines in the file (HEADER, DATA, and EMPTY lines) 00055 int _blocks; // Number of blocks should be 2, i.e., one HEADER and one DATA block 00056 00057 // DATA BLOCK: 00058 const char *_data ; // Pointer to the beginning of the rectangular DATA block 00059 unsigned long _firstDataRow; // first row of DATA block 00060 unsigned long _dataRows; // Number of rows of data 00061 unsigned long _lastDataRow; // last row of DATA block 00062 int _length; // Length of one row of data in the DATA block 00063 00064 // HEADER BLOCK: 00065 unsigned long _firstHeaderRow; 00066 unsigned long _headerRows; 00067 unsigned long _lastHeaderRow; 00068 00069 unsigned _dataColumns; 00070 00071 unsigned long *_fill; // Vector of column fill rates: vector length is DATA block line length 00072 ColumnOffset *_columnOffset; // Vector of ColumnOffsets objects -- one object per column 00073 00074 void _readFile(const char *fileName); 00075 void _readString(const std::string inString); 00076 void _getLineCount( void ); 00077 void _allocateLineInformationVector( void ); 00078 void _assignLinePointers( void ); 00079 void _determineNumberOfBlocks( void ); 00080 void _assignRecordTypes( void ); 00081 void _checkDataBlockRectangularity( void ); 00082 void _calculateColumnFillRates( void ); 00083 void _determineNumberOfColumns( void ); 00084 void _determineFieldBoundaries( void ); 00085 char *_readAndNullTerminateColumnName(char *beginning,const char *headerEnd, ColumnOffset &columnOffset); 00086 // 00087 // _readColumnType() 00088 // 00089 // Allowed field types are: C -- character 00090 // N -- numeric 00091 // D -- date 00092 // G -- genotype 00093 // A -- allele 00094 // X -- character-type gender field 00095 // 00096 char *_readColumnType(char *beginning,const char *headerEnd, ColumnOffset &columnOffset ); 00097 void _parseHeader( void ); 00098 //void _classifyColumns( void ); 00099 //void _resolveDeclaredVersusDiscoveredColumnTypes( void ); 00100 void _fillVectors(); 00101 00102 public: 00103 00104 // debug: 00105 void debug( void ); 00106 00107 // getters: 00108 int getBlockCount( void ){ return _blocks; } 00109 00110 // setters: 00111 void setFile(const char *fileName); 00112 void setString(std::string inString); 00113 00114 // Constructor: 00115 MadelineTable(); 00116 00117 // Destructor: 00118 ~MadelineTable(); 00119 00120 // Get Methods: 00121 const char *getColumnName( unsigned columnIndex ){ return _columnOffset[ columnIndex ].getName(); } 00122 DATATYPE getColumnType( unsigned columnIndex ){ return _columnOffset[ columnIndex ].getDiscoveredType(); } 00123 00124 std::string getData(unsigned columnIndex, unsigned long rowIndex); 00125 00126 // 00127 // TableParser base class now has these methods: 00128 // 00129 //unsigned getColumnCount( void ){ return _columns; }; 00130 //unsigned long getNumberOfRows( void ){ return _dataRows; } 00131 00132 void display(void); 00133 00134 }; 00135 00136 #endif