00001
00002
00003
00004
00005
00006
00007 #ifndef ISTABLE_H
00008 #define ISTABLE_H
00009
00010
00011 #include <float.h>
00012
00013 #include <string>
00014 #include <vector>
00015 #include <map>
00016
00017 #include "GenString.h"
00018 #include "STable.h"
00019 #include "FileNavigator.h"
00020
00021
00022 using namespace std;
00023
00024
00025 typedef multimap<string, unsigned int, StringCompare> tIndex;
00026
00027
00028 class ISTable : public STable
00029 {
00030 public:
00031 enum eTableDiff
00032 {
00033 eNONE = 0,
00034 eCASE_SENSE,
00035 eMORE_ROWS,
00036 eLESS_ROWS,
00037 eMORE_COLS,
00038 eLESS_COLS,
00039 eCOL_NAMES,
00040 eCELLS,
00041
00042 eMISSING,
00043
00044 eEXTRA
00045 };
00046
00047 enum eSearchType
00048 {
00049 eEQUAL = 0,
00050 eLESS_THAN,
00051 eLESS_THAN_OR_EQUAL,
00052 eGREATER_THAN,
00053 eGREATER_THAN_OR_EQUAL
00054 };
00055
00056 enum eSearchDir
00057 {
00058 eFORWARD = 0,
00059 eBACKWARD
00060 };
00061
00062 private:
00063 static const string _version;
00064 FileNavigator* _fnav;
00065
00066 void InsertColumn(const string& colName, const unsigned int colIndex,
00067 const unsigned char opts = DEFAULT_OPTIONS,
00068 const vector<string>& col = vector<string> (0));
00069 void FillColumn(const vector<string>& col, const unsigned int colIndex);
00070 int AppendToColumn(const vector<string>& col, const unsigned int colIndex);
00071 int UpdateCell(const string& cell, const unsigned int colIndex,
00072 const unsigned int rowIndex);
00073 int GetCell(string& cell, const unsigned int colIndex,
00074 const unsigned int rowIndex);
00075 int SetFlags(const unsigned char newOpts, const unsigned int colIndex);
00076 void FindRedundantRows(const vector<unsigned int>& colIndices,
00077 vector<pair<unsigned int, unsigned int> >& duplRows,
00078 const unsigned int keep, const eSearchDir searchDir = eFORWARD);
00079 bool AreListsOfColumnsValid(const vector<unsigned int>& colIndices);
00080 void CreateIndex(const string& indexName,
00081 const vector<unsigned int>& colIndices, const unsigned int unique = 0);
00082 void CreateKey(const vector<unsigned int>& colIndices);
00083 unsigned int FindFirst(const vector<string>& targets,
00084 const vector<unsigned int>& colIndices, const string& indexName =
00085 String::Empty);
00086 void Search(vector<unsigned int>& res, const string& target,
00087 const unsigned int colIndex);
00088 void Search(vector<unsigned int>& res, const vector<string>& targets,
00089 const vector<unsigned int>& colIndices,
00090 const eSearchType searchType = eEQUAL,
00091 const string& indexName = String::Empty);
00092
00093 protected:
00094
00095 static const unsigned int EXPONENT = 4;
00096 static const unsigned int MAX_PRECISION = DBL_DIG;
00097
00098 static const unsigned int MANTISSA = MAX_PRECISION + 2;
00099 static const unsigned int INT_LIMIT = 11;
00100
00101 static const unsigned char DT_STRING_VAL = 1;
00102 static const unsigned char DT_INTEGER_VAL = 2;
00103 static const unsigned char DT_DOUBLE_VAL = 3;
00104
00105 static const unsigned char DT_MASK = 15 << 4;
00106
00107 static const unsigned char SC_MASK = 0x01;
00108
00109 static const unsigned char WS_MASK = 0x02;
00110 static const unsigned char LAST_DT_VALUE = 3;
00111 static const unsigned int DEFAULT_PRECISION = MAX_PRECISION;
00112 static const unsigned char DEFAULT_OPTIONS = DT_STRING_VAL << 4;
00113
00114 bool _modified;
00115
00116 vector<unsigned int> _rowMap;
00117 unsigned int _numDels;
00118
00119 vector<unsigned int> _precision;
00120 vector<unsigned char> _compare_opts;
00121
00122 vector<string> _indexNames;
00123 vector<vector<unsigned int> > _listsOfColumns;
00124 vector<tIndex> _indices;
00125 vector<unsigned int> _unique;
00126
00127 void Init();
00128 void Clear();
00129
00130 inline unsigned int IntRowIndex(const unsigned int rowIndex) const;
00131
00132 inline void ClearRowMap();
00133 void EnlargeRowMap(const unsigned int numRows);
00134 void ReduceRowMap(const unsigned int numRows);
00135 void MarkRowDeleted(const unsigned int rowIndex);
00136 void UnMarkRowDeleted(const unsigned int rowIndex);
00137
00138 bool IsDelete(const unsigned int rowIndex);
00139
00140 StringCompare::eCompareType
00141 GetCompareType(const vector<unsigned int>& colIndices);
00142
00143 string CellValue(const unsigned int colIndex,
00144 const unsigned int rowIndex);
00145 string ConvertString(const string& value, const unsigned int colIndex);
00146 string MultiStringsValue(const vector<string>& values,
00147 const vector<unsigned int>& colIndices);
00148 string SubRowValue(const vector<unsigned int>& colIndices,
00149 const unsigned int rowIndex);
00150 string AggregateRow(const vector<unsigned int>& colIndices,
00151 const unsigned int rowIndex);
00152
00153 inline void AppendToAndDelimit(string& to, const string& appending);
00154
00155 void ValidateOptions(unsigned int colIndex);
00156
00157 string CreateInternalIndexName(const unsigned int indexIndex);
00158 void UpdateIndex(const unsigned int indexIndex,
00159 const unsigned int rowIndex);
00160 void RebuildIndex(const unsigned int indexIndex);
00161 void ClearIndex(const unsigned int indexIndex);
00162 void DeleteIndex(const unsigned int indexIndex);
00163
00164 int FindIndex(const string& indexName);
00165 int FindIndex(const vector<unsigned int>& colIndices);
00166
00167 void UpdateIndices(const unsigned int rowIndex);
00168 void ClearIndices();
00169
00170 bool IsColumnInIndex(const unsigned int indexIndex,
00171 const unsigned int colIndex);
00172
00173 int FindKeyIndex();
00174
00175 void UpdateColListOnColInsert(const unsigned int colIndex);
00176 void UpdateColListOnColDelete(const unsigned int colIndex);
00177 void UpdateColListOnCellUpdate(const unsigned int rowIndex,
00178 const unsigned int colIndex);
00179
00180 unsigned int FindFirst(const vector<string>& targets,
00181 const vector<unsigned int>& colIndices, const unsigned int indexIndex);
00182
00183 void Search(vector<unsigned int>& res, const vector<string>& targets,
00184 const vector<unsigned int>& colIndices, const unsigned int indexIndex,
00185 const eSearchType searchType = eEQUAL);
00186 void Search(vector<unsigned int>& res, const vector<string>& targets,
00187 const vector<string>& colNames, const unsigned int indexIndex,
00188 const eSearchType searchType = eEQUAL);
00189
00190 int GetObjectV6(Word index, FileNavigator*, int& size);
00191 int GetObjectV3(Word index, FileNavigator*, int& size);
00192 int GetObjectV2(Word index, FileNavigator*, int& size);
00193 int GetObjectV1(Word index, FileNavigator*, int& size);
00194 int GetObjectV1_1(Word index, FileNavigator*, int& size);
00195
00196 #ifdef VLAD_DECIDE
00197 void CompressTable();
00198 #endif
00199 void MakeTableRectangular();
00200
00201 void Print(unsigned int indexIndex);
00202
00203 void ConvertToInt(const string& a, string& ret);
00204 void ConvertDouble(const string& a, string& ret);
00205 void ConvertToLowerNoWhiteSpace(const string& a, string& ret);
00206
00207 public:
00208
00209 static const unsigned char CASE_SENSE = 0x00;
00210
00211 static const unsigned char CASE_INSENSE = 0x01;
00212
00213 static const unsigned char W_SPACE_SENSE = 0x00;
00214
00215
00216 static const unsigned char W_SPACE_INSENSE = 0x02;
00217
00218 static const unsigned char DT_STRING = DT_STRING_VAL << 4;
00219
00220 static const unsigned char DT_INTEGER = DT_INTEGER_VAL << 4;
00221
00222 static const unsigned char DT_DOUBLE = DT_DOUBLE_VAL << 4;
00223
00224 ISTable(const StringCompare::eCompareType colCaseSense =
00225 StringCompare::eCASE_SENSITIVE);
00226
00227 ISTable(const string& name,
00228 const StringCompare::eCompareType colCaseSense =
00229 StringCompare::eCASE_SENSITIVE);
00230
00231 ISTable(const ISTable& inTable);
00232
00233 ~ISTable();
00234
00235 ISTable& operator=(const ISTable& inTable);
00236
00237 eTableDiff operator==(ISTable& inTable);
00238
00239 void AddColumn(const string& colName, const unsigned char opts =
00240 DEFAULT_OPTIONS, const vector<string>& col = vector<string> (0));
00241
00242 void InsertColumn(const string& colName,
00243 const string& afterColName, const unsigned char opts = DEFAULT_OPTIONS,
00244 const vector<string>& col = vector<string> (0));
00245
00246 void FillColumn(const string& colName, const vector<string>& col);
00247 void AppendToColumn(const string& colName, const vector<string>& col);
00248 void AppendToColumn(const string& colName, const string& cell);
00249
00250 void ClearColumn(const string& colName);
00251 void DeleteColumn(const string& colName);
00252
00253 void GetColumn(vector<string>& col, const string& colName);
00254 void GetColumn(vector<string>& col, const string& colName,
00255 const string& indexName);
00256
00257 void GetColumn(vector<string>& subCol, const string& colName,
00258 const unsigned int fromRowIndex, unsigned int toRowIndex);
00259 void GetColumn(vector<string>& subCol,
00260 const string& colName, const vector<unsigned int>& rowIndex);
00261
00262 unsigned int AddRow();
00263
00264 unsigned int InsertRow(const unsigned int rowIndex,
00265 const vector<string>& row = vector<string> (0));
00266
00267 void FillRow(const unsigned int rowIndex, const vector<string>& row);
00268
00269 void ClearRow(const unsigned int rowIndex);
00270 void DeleteRow(const unsigned int rowIndex);
00271
00272 void DeleteRows(const vector<unsigned int>& rows);
00273
00274 inline unsigned int GetNumRows() const;
00275 inline unsigned int GetLastRowIndex();
00276
00277 void GetRow(vector<string>& subRow, const unsigned int rowIndex,
00278 const string& fromColName = String::Empty, const string& toColName =
00279 String::Empty);
00280
00281 void FindRedundantRows(const vector<string>& colNames,
00282 vector<pair<unsigned int, unsigned int> >& duplRows,
00283 const unsigned int keep, const eSearchDir searchDir = eFORWARD);
00284
00285 void UpdateCell(const unsigned int rowIndex, const string& colName,
00286 const string& cell);
00287
00288 const string& operator()(const unsigned int rowIndex,
00289 const string& colName) const;
00290
00291 inline bool IndexExists(const string& indexName);
00292 void CreateIndex(const string& indexName, const vector<string>& colNames,
00293 const unsigned int unique = 0);
00294 void UpdateIndex(const string& indexName, const unsigned int rowIndex);
00295 void RebuildIndex(const string& indexName);
00296 void RebuildIndices();
00297 void DeleteIndex(const string& indexName);
00298 inline unsigned int GetNumIndices();
00299
00300 void CreateKey(const vector<string>& colNames);
00301
00302 void SetFlags(const string& colName, const unsigned char newOpts);
00303 unsigned char GetDataType(const string& colName);
00304
00305 unsigned int FindFirst(const vector<string>& targets,
00306 const vector<string>& colNames = vector<string> (0),
00307 const string& indexName = String::Empty);
00308
00309 void Search(vector<unsigned int>& res, const string& target,
00310 const string& colName);
00311 void Search(vector<unsigned int>& res, const vector<string>& targets,
00312 const vector<string>& colNames = vector<string> (0),
00313 const eSearchType searchType = eEQUAL,
00314 const string& indexName = String::Empty);
00315
00316 inline void SetModified(const bool modified);
00317 inline bool GetModified();
00318
00319 void SetFileNavigator(FileNavigator* fileNavigator);
00320
00321 int WriteObject(FileNavigator*, int& size);
00322 int GetObject(Word index, FileNavigator*, int& size);
00323
00324 void Read(unsigned int indexInFile);
00325 int Write();
00326
00327
00328 void Merge(ISTable& inTable, unsigned int typeOfMerge = 0);
00329
00330 bool PrintDiff(ISTable& inTable);
00331 void Print(const string& indexName);
00332
00333
00334 static void SetUnion(const vector<unsigned int>& a,
00335 const vector<unsigned int>& b, vector<unsigned int>& ret);
00336 static void SetIntersect(const vector<unsigned int>& a,
00337 const vector<unsigned int>& b, vector<unsigned int>& ret);
00338
00339 };
00340
00341
00342 ostream& operator<<(ostream& out, const ISTable& isTable);
00343
00344
00345 inline unsigned int ISTable::GetNumRows() const
00346 {
00347
00348 return(_numRows - _numDels);
00349
00350 }
00351
00352
00353 inline unsigned int ISTable::GetLastRowIndex()
00354 {
00355
00356 return(GetNumRows() - 1);
00357
00358 }
00359
00360
00361 inline unsigned int ISTable::GetNumIndices()
00362 {
00363
00364 return(_indexNames.size());
00365
00366 }
00367
00368
00369 inline bool ISTable::IndexExists(const string& indexName)
00370 {
00371
00372 int ret = FindIndex(indexName);
00373
00374 if (ret == -1)
00375 {
00376 return(false);
00377 }
00378 else
00379 {
00380 return(true);
00381 }
00382
00383 }
00384
00385
00386 inline unsigned int ISTable::IntRowIndex(const unsigned int rowIndex) const
00387 {
00388
00389
00390 return(_rowMap[rowIndex]);
00391
00392 }
00393
00394
00395 inline void ISTable::ClearRowMap()
00396 {
00397
00398 _rowMap.clear();
00399
00400 }
00401
00402
00403 inline void ISTable::AppendToAndDelimit(string& to, const string& appending)
00404 {
00405
00406 to += appending;
00407
00408 to += " ";
00409
00410 }
00411
00412
00413 inline void ISTable::SetModified(const bool modified)
00414 {
00415 _modified = modified;
00416 }
00417
00418
00419 inline bool ISTable::GetModified()
00420 {
00421 return _modified;
00422 }
00423
00424
00425 #endif // ISTABLE_H