ISTable.h

Go to the documentation of this file.
00001 //$$FILE$$
00002 //$$VERSION$$
00003 //$$DATE$$
00004 //$$LICENSE$$
00005 
00006 
00007 #ifndef ISTABLE_H
00008 #define ISTABLE_H
00009 
00010 
00011 #include <float.h>
00012 
00013 #include <string>
00014 #include <vector>
00015 #include <map>
00016 
00017 #include "GenString.h"
00018 #include "STable.h"
00019 #include "FileNavigator.h"
00020 
00021 
00022 using namespace std;
00023 
00024 
00025 typedef multimap<string, unsigned int, StringCompare> tIndex;
00026 
00027 
00028 class ISTable : public STable
00029 {
00030   public:
00031     enum eTableDiff
00032     {
00033         eNONE = 0,
00034         eCASE_SENSE,
00035         eMORE_ROWS,
00036         eLESS_ROWS,
00037         eMORE_COLS,
00038         eLESS_COLS,
00039         eCOL_NAMES,
00040         eCELLS,
00041         // Used only in block diff to indicate missing table in first block
00042         eMISSING,
00043         // Used only in block diff to indicate extra table in first block
00044         eEXTRA
00045     };
00046 
00047     enum eSearchType
00048     {
00049         eEQUAL = 0,
00050         eLESS_THAN,
00051         eLESS_THAN_OR_EQUAL,
00052         eGREATER_THAN,
00053         eGREATER_THAN_OR_EQUAL
00054     };
00055 
00056     enum eSearchDir
00057     {
00058         eFORWARD = 0,
00059         eBACKWARD
00060     };
00061 
00062   private:
00063     static const string _version;
00064     FileNavigator* _fnav;
00065 
00066     void InsertColumn(const string& colName, const unsigned int colIndex,
00067       const unsigned char opts = DEFAULT_OPTIONS,
00068       const vector<string>& col = vector<string> (0));
00069     void FillColumn(const vector<string>& col, const unsigned int colIndex);
00070     int AppendToColumn(const vector<string>& col, const unsigned int colIndex);
00071     int UpdateCell(const string& cell, const unsigned int colIndex,
00072       const unsigned int rowIndex);
00073     int GetCell(string& cell, const unsigned int colIndex,
00074       const unsigned int rowIndex);
00075     int SetFlags(const unsigned char newOpts, const unsigned int colIndex);
00076     void FindRedundantRows(const vector<unsigned int>& colIndices,
00077       vector<pair<unsigned int, unsigned int> >& duplRows,
00078       const unsigned int keep, const eSearchDir searchDir = eFORWARD);
00079     bool AreListsOfColumnsValid(const vector<unsigned int>& colIndices);
00080     void CreateIndex(const string& indexName,
00081       const vector<unsigned int>& colIndices, const unsigned int unique = 0);
00082     void CreateKey(const vector<unsigned int>& colIndices);
00083     unsigned int FindFirst(const vector<string>& targets,
00084       const vector<unsigned int>& colIndices, const string& indexName =
00085       String::Empty);
00086     void Search(vector<unsigned int>& res, const string& target,
00087       const unsigned int colIndex);
00088     void Search(vector<unsigned int>& res, const vector<string>& targets,
00089       const vector<unsigned int>& colIndices,
00090       const eSearchType searchType = eEQUAL,
00091       const string& indexName = String::Empty);
00092 
00093   protected:
00094     // number of digit DBL_MIN_10_EXP, letter e is not included in size
00095     static const unsigned int EXPONENT      =  4;
00096     static const unsigned int MAX_PRECISION = DBL_DIG;
00097     //???DBL_MANT_DIG;
00098     static const unsigned int MANTISSA       =  MAX_PRECISION + 2;
00099     static const unsigned int INT_LIMIT      = 11;
00100 
00101     static const unsigned char DT_STRING_VAL = 1; 
00102     static const unsigned char DT_INTEGER_VAL = 2;
00103     static const unsigned char DT_DOUBLE_VAL = 3;
00104     // datatype mask
00105     static const unsigned char DT_MASK        = 15 << 4;
00106     // string comparison sensitivity mask
00107     static const unsigned char SC_MASK        = 0x01;
00108     // white space sensitivity mask
00109     static const unsigned char WS_MASK        = 0x02;
00110     static const unsigned char LAST_DT_VALUE  = 3;
00111     static const unsigned int  DEFAULT_PRECISION = MAX_PRECISION;
00112     static const unsigned char DEFAULT_OPTIONS = DT_STRING_VAL << 4;
00113  
00114     bool _modified; // Indicates whether table has been modified
00115 
00116     vector<unsigned int> _rowMap; 
00117     unsigned int _numDels;
00118 
00119     vector<unsigned int> _precision;
00120     vector<unsigned char> _compare_opts;
00121 
00122     vector<string> _indexNames;
00123     vector<vector<unsigned int> > _listsOfColumns;
00124     vector<tIndex> _indices;
00125     vector<unsigned int> _unique;
00126 
00127     void Init();
00128     void Clear();
00129 
00130     inline unsigned int IntRowIndex(const unsigned int rowIndex) const;
00131 
00132     inline void ClearRowMap();
00133     void EnlargeRowMap(const unsigned int numRows);
00134     void ReduceRowMap(const unsigned int numRows);
00135     void MarkRowDeleted(const unsigned int rowIndex);
00136     void UnMarkRowDeleted(const unsigned int rowIndex);
00137 
00138     bool IsDelete(const unsigned int rowIndex);
00139 
00140     StringCompare::eCompareType
00141       GetCompareType(const vector<unsigned int>& colIndices);
00142 
00143     string CellValue(const unsigned int colIndex,
00144       const unsigned int rowIndex);
00145     string ConvertString(const string& value, const unsigned int colIndex);
00146     string MultiStringsValue(const vector<string>& values,
00147       const vector<unsigned int>& colIndices);
00148     string SubRowValue(const vector<unsigned int>& colIndices,
00149       const unsigned int rowIndex);
00150     string AggregateRow(const vector<unsigned int>& colIndices,
00151       const unsigned int rowIndex);
00152 
00153     inline void AppendToAndDelimit(string& to, const string& appending);
00154 
00155     void ValidateOptions(unsigned int colIndex);
00156 
00157     string CreateInternalIndexName(const unsigned int indexIndex);
00158     void UpdateIndex(const unsigned int indexIndex,
00159       const unsigned int rowIndex);
00160     void RebuildIndex(const unsigned int indexIndex);
00161     void ClearIndex(const unsigned int indexIndex);
00162     void DeleteIndex(const unsigned int indexIndex);
00163 
00164     int FindIndex(const string& indexName);
00165     int FindIndex(const vector<unsigned int>& colIndices);
00166 
00167     void UpdateIndices(const unsigned int rowIndex);
00168     void ClearIndices();
00169 
00170     bool IsColumnInIndex(const unsigned int indexIndex,
00171       const unsigned int colIndex);
00172 
00173     int FindKeyIndex();
00174 
00175     void UpdateColListOnColInsert(const unsigned int colIndex);
00176     void UpdateColListOnColDelete(const unsigned int colIndex);
00177     void UpdateColListOnCellUpdate(const unsigned int rowIndex,
00178       const unsigned int colIndex);
00179 
00180     unsigned int FindFirst(const vector<string>& targets,
00181       const vector<unsigned int>& colIndices, const unsigned int indexIndex);
00182 
00183     void Search(vector<unsigned int>& res, const vector<string>& targets,
00184       const vector<unsigned int>& colIndices, const unsigned int indexIndex,
00185       const eSearchType searchType = eEQUAL);
00186     void Search(vector<unsigned int>& res, const vector<string>& targets,
00187       const vector<string>& colNames, const unsigned int indexIndex,
00188       const eSearchType searchType = eEQUAL);
00189 
00190     int GetObjectV6(Word index, FileNavigator*, int& size);
00191     int GetObjectV3(Word index, FileNavigator*, int& size);
00192     int GetObjectV2(Word index, FileNavigator*, int& size);
00193     int GetObjectV1(Word index, FileNavigator*, int& size);
00194     int GetObjectV1_1(Word index, FileNavigator*, int& size);
00195 
00196 #ifdef VLAD_DECIDE
00197     void CompressTable();
00198 #endif
00199     void MakeTableRectangular();
00200 
00201     void Print(unsigned int indexIndex);
00202 
00203     void ConvertToInt(const string& a, string& ret);
00204     void ConvertDouble(const string& a, string& ret);
00205     void ConvertToLowerNoWhiteSpace(const string& a, string& ret);
00206 
00207   public:
00208     // Sets string comparison case sensitive
00209     static const unsigned char CASE_SENSE = 0x00;
00210     // Sets string comparison case insensitive
00211     static const unsigned char CASE_INSENSE = 0x01;
00212     // Sets string comparison to be sensitive to whitespace
00213     static const unsigned char W_SPACE_SENSE = 0x00;
00214     // Sets string comparison to ignore repeating whitspace.  
00215     // Also ignores leading and trailing whitespace
00216     static const unsigned char W_SPACE_INSENSE = 0x02;
00217     // string datatype
00218     static const unsigned char DT_STRING  = DT_STRING_VAL  << 4;
00219     // integer datatype
00220     static const unsigned char DT_INTEGER = DT_INTEGER_VAL << 4;
00221     // VLAD FEATURE NOT WORKING double is not working, maybe integer. check it      // double datatype
00222     static const unsigned char DT_DOUBLE  = DT_DOUBLE_VAL  << 4;
00223 
00224     ISTable(const StringCompare::eCompareType colCaseSense =
00225       StringCompare::eCASE_SENSITIVE);
00226 
00227     ISTable(const string& name,
00228       const StringCompare::eCompareType colCaseSense =
00229       StringCompare::eCASE_SENSITIVE);
00230 
00231     ISTable(const ISTable& inTable);
00232 
00233     ~ISTable();
00234  
00235     ISTable& operator=(const ISTable& inTable);
00236 
00237     eTableDiff operator==(ISTable& inTable);
00238 
00239     void AddColumn(const string& colName, const unsigned char opts =
00240       DEFAULT_OPTIONS, const vector<string>& col = vector<string> (0));
00241 
00242     void InsertColumn(const string& colName,
00243       const string& afterColName, const unsigned char opts = DEFAULT_OPTIONS,
00244       const vector<string>& col = vector<string> (0));
00245 
00246     void FillColumn(const string& colName, const vector<string>& col);
00247     void AppendToColumn(const string& colName, const vector<string>& col);
00248     void AppendToColumn(const string& colName, const string& cell);
00249 
00250     void ClearColumn(const string& colName);
00251     void DeleteColumn(const string& colName);
00252 
00253     void GetColumn(vector<string>& col, const string& colName);
00254     void GetColumn(vector<string>& col, const string& colName,
00255       const string& indexName);
00256 
00257     void GetColumn(vector<string>& subCol, const string& colName,
00258       const unsigned int fromRowIndex, unsigned int toRowIndex);
00259     void GetColumn(vector<string>& subCol,
00260       const string& colName, const vector<unsigned int>& rowIndex);
00261 
00262     unsigned int AddRow();
00263 
00264     unsigned int InsertRow(const unsigned int rowIndex,
00265       const vector<string>& row = vector<string> (0));
00266 
00267     void FillRow(const unsigned int rowIndex, const vector<string>& row);
00268 
00269     void ClearRow(const unsigned int rowIndex);
00270     void DeleteRow(const unsigned int rowIndex);
00271 
00272     void DeleteRows(const vector<unsigned int>& rows);
00273 
00274     inline unsigned int GetNumRows() const;
00275     inline unsigned int GetLastRowIndex();
00276 
00277     void GetRow(vector<string>& subRow, const unsigned int rowIndex,
00278       const string& fromColName = String::Empty, const string& toColName =
00279       String::Empty);
00280 
00281     void FindRedundantRows(const vector<string>& colNames,
00282       vector<pair<unsigned int, unsigned int> >& duplRows,
00283       const unsigned int keep, const eSearchDir searchDir = eFORWARD);
00284 
00285     void UpdateCell(const unsigned int rowIndex, const string& colName,
00286       const string& cell);
00287 
00288     const string& operator()(const unsigned int rowIndex,
00289       const string& colName) const;
00290 
00291     inline bool IndexExists(const string& indexName);
00292     void CreateIndex(const string& indexName, const vector<string>& colNames,
00293       const unsigned int unique = 0);
00294     void UpdateIndex(const string& indexName, const unsigned int rowIndex);
00295     void RebuildIndex(const string& indexName);
00296     void RebuildIndices();
00297     void DeleteIndex(const string& indexName);
00298     inline unsigned int GetNumIndices();
00299 
00300     void CreateKey(const vector<string>& colNames);
00301 
00302     void SetFlags(const string& colName, const unsigned char newOpts);
00303     unsigned char GetDataType(const string& colName);
00304 
00305     unsigned int FindFirst(const vector<string>& targets,
00306       const vector<string>& colNames = vector<string> (0),
00307       const string& indexName = String::Empty);
00308 
00309     void Search(vector<unsigned int>& res, const string& target,
00310       const string& colName);
00311     void Search(vector<unsigned int>& res, const vector<string>& targets,
00312       const vector<string>& colNames = vector<string> (0),
00313       const eSearchType searchType = eEQUAL,
00314       const string& indexName = String::Empty);
00315 
00316     inline void SetModified(const bool modified);
00317     inline bool GetModified();
00318 
00319     void SetFileNavigator(FileNavigator* fileNavigator);
00320 
00321     int WriteObject(FileNavigator*, int& size);
00322     int GetObject(Word index, FileNavigator*, int& size);
00323 
00324     void Read(unsigned int indexInFile);
00325     int Write();
00326 
00327     // typeOfMerge is 0 for overwrite, 1 for overlap
00328     void Merge(ISTable& inTable, unsigned int typeOfMerge = 0); 
00329 
00330     bool PrintDiff(ISTable& inTable);
00331     void Print(const string& indexName);
00332 
00333     // VLAD - HOW TO HANDLE THIS
00334     static void SetUnion(const vector<unsigned int>& a,
00335       const vector<unsigned int>& b, vector<unsigned int>& ret);
00336     static void SetIntersect(const vector<unsigned int>& a,
00337       const vector<unsigned int>& b, vector<unsigned int>& ret);
00338 
00339 };
00340 
00341 
00342 ostream& operator<<(ostream& out, const ISTable& isTable);
00343 
00344 
00345 inline unsigned int ISTable::GetNumRows() const
00346 {
00347 
00348     return(_numRows - _numDels);
00349 
00350 }
00351 
00352 
00353 inline unsigned int ISTable::GetLastRowIndex()
00354 {
00355 
00356     return(GetNumRows() - 1);
00357 
00358 }
00359 
00360 
00361 inline unsigned int ISTable::GetNumIndices()
00362 {
00363 
00364     return(_indexNames.size());
00365 
00366 }
00367 
00368 
00369 inline bool ISTable::IndexExists(const string& indexName)
00370 {
00371 
00372     int ret = FindIndex(indexName);
00373 
00374     if (ret == -1)
00375     {
00376         return(false);
00377     }
00378     else
00379     {
00380         return(true);
00381     }
00382 
00383 }
00384 
00385 
00386 inline unsigned int ISTable::IntRowIndex(const unsigned int rowIndex) const
00387 {
00388 
00389     // Returns the STable internal row index
00390     return(_rowMap[rowIndex]);
00391 
00392 }
00393 
00394 
00395 inline void ISTable::ClearRowMap()
00396 {
00397 
00398     _rowMap.clear(); 
00399 
00400 }
00401 
00402 
00403 inline void ISTable::AppendToAndDelimit(string& to, const string& appending)
00404 {
00405 
00406     to += appending;
00407     // VLAD HARDCODED CONST
00408     to += " ";
00409 
00410 }
00411 
00412 
00413 inline void ISTable::SetModified(const bool modified)
00414 {
00415     _modified = modified;
00416 }
00417 
00418 
00419 inline bool ISTable::GetModified()
00420 {
00421     return _modified;
00422 }
00423 
00424 
00425 #endif // ISTABLE_H

Generated on Mon Apr 2 09:28:50 2007 for cif-table-obj-v7.0 by  doxygen 1.5.1