00001
00002
00003
00004
00005
00006
00014 #ifndef ISTABLE_H
00015 #define ISTABLE_H
00016
00017
00018 #include <float.h>
00019
00020 #include <string>
00021 #include <vector>
00022 #include <map>
00023
00024 #include "mapped_vector.h"
00025 #include "mapped_vector.C"
00026 #include "GenString.h"
00027 #include "ITTable.h"
00028 #include "Serializer.h"
00029
00030
00031 using namespace std;
00032
00033
00034 typedef multimap<string, unsigned int, StringCompare> tIndex;
00035
00036
00056 class ISTable
00057 {
00058 public:
00059 typedef ITTable::eOrientation eOrientation;
00060
00061 static const eOrientation eCOLUMN_WISE = ITTable::eCOLUMN_WISE;
00062 static const eOrientation eROW_WISE = ITTable::eROW_WISE;
00063
00064 enum eTableDiff
00065 {
00066 eNONE = 0,
00067 eCASE_SENSE,
00068 eMORE_COLS,
00069 eLESS_COLS,
00070 eCOL_NAMES,
00071 eMORE_ROWS,
00072 eLESS_ROWS,
00073 eCELLS,
00074
00075 eMISSING,
00076
00077 eEXTRA
00078 };
00079
00080 typedef ITTable::eSearchType eSearchType;
00081
00082 static const eSearchType eEQUAL = ITTable::eEQUAL;
00083 static const eSearchType eLESS_THAN = ITTable::eLESS_THAN;
00084 static const eSearchType eLESS_THAN_OR_EQUAL = ITTable::eLESS_THAN_OR_EQUAL;
00085 static const eSearchType eGREATER_THAN = ITTable::eGREATER_THAN;
00086 static const eSearchType eGREATER_THAN_OR_EQUAL = ITTable::eGREATER_THAN_OR_EQUAL;
00087
00088 #ifdef VLAD_SECOND_ITTABLE
00089 enum eSearchType
00090 {
00091 eEQUAL = 0,
00092 eLESS_THAN,
00093 eLESS_THAN_OR_EQUAL,
00094 eGREATER_THAN,
00095 eGREATER_THAN_OR_EQUAL
00096 };
00097 #endif
00098
00099 typedef ITTable::eSearchDir eSearchDir;
00100
00101 static const eSearchDir eFORWARD = ITTable::eFORWARD;
00102 static const eSearchDir eBACKWARD = ITTable::eBACKWARD;
00103
00104 #ifdef VLAD_SECOND_ITTABLE
00105 enum eSearchDir
00106 {
00107 eFORWARD = 0,
00108 eBACKWARD
00109 };
00110 #endif
00111
00112 static const unsigned char DT_STRING_VAL = 1;
00113 static const unsigned char DT_INTEGER_VAL = 2;
00114
00115
00116
00117 static const unsigned char CASE_SENSE = 0x00;
00118
00119 static const unsigned char CASE_INSENSE = 0x01;
00120
00121 static const unsigned char W_SPACE_SENSE = 0x00;
00122
00123
00124 static const unsigned char W_SPACE_INSENSE = 0x02;
00125
00126 static const unsigned char DT_STRING = DT_STRING_VAL << 4;
00127
00128 static const unsigned char DT_INTEGER = DT_INTEGER_VAL << 4;
00129
00130
00131
00149 ISTable(const StringCompare::eCompareType colCaseSense =
00150 StringCompare::eCASE_SENSITIVE);
00151
00172 ISTable(eOrientation orient, const StringCompare::eCompareType
00173 colCaseSense = StringCompare::eCASE_SENSITIVE);
00174
00192 ISTable(const string& name,
00193 const StringCompare::eCompareType colCaseSense =
00194 StringCompare::eCASE_SENSITIVE);
00195
00216 ISTable(const string& name, eOrientation orient,
00217 const StringCompare::eCompareType colCaseSense =
00218 StringCompare::eCASE_SENSITIVE);
00219
00236 ISTable(const ISTable& inTable);
00237
00251 ~ISTable();
00252
00268 ISTable& operator=(const ISTable& inTable);
00269
00291 eTableDiff operator==(ISTable& inTable);
00292
00306 inline const string& GetName() const;
00307
00321 void SetName(const string& name);
00322
00336 inline unsigned int GetNumColumns() const;
00337
00351 const vector<string>& GetColumnNames() const;
00352
00367 bool IsColumnPresent(const string& colName);
00368
00397 void AddColumn(const string& colName, const vector<string>& col =
00398 vector<string> (0));
00399
00442 void InsertColumn(const string& colName,
00443 const string& afColName, const vector<string>& col =
00444 vector<string> (0));
00445
00474 void FillColumn(const string& colName, const vector<string>& col);
00475
00494 void GetColumn(vector<string>& col, const string& colName);
00495
00527 void GetColumn(vector<string>& col, const string& colName,
00528 const unsigned int fromRowIndex, unsigned int toRowIndex);
00529
00553 void GetColumn(vector<string>& col, const string& colName,
00554 const vector<unsigned int>& rowIndex);
00555
00578 void RenameColumn(const string& oldColName, const string& newColName);
00579
00596 void ClearColumn(const string& colName);
00597
00614 void DeleteColumn(const string& colName);
00615
00629 inline unsigned int GetNumRows() const;
00630
00667 unsigned int AddRow(const vector<string>& row = vector<string> (0));
00668
00715 unsigned int InsertRow(const unsigned int atRowIndex,
00716 const vector<string>& row = vector<string> (0));
00717
00742 void FillRow(const unsigned int rowIndex, const vector<string>& row);
00743
00783 void GetRow(vector<string>& row, const unsigned int rowIndex,
00784 const string& fromColName = String::Empty, const string& toColName =
00785 String::Empty);
00786
00803 const vector<string>& GetRow(const unsigned int rowIndex);
00804
00820 void ClearRow(const unsigned int rowIndex);
00821
00840 void DeleteRow(const unsigned int rowIndex);
00841
00859 void DeleteRows(const vector<unsigned int>& rows);
00860
00874 inline unsigned int GetLastRowIndex();
00875
00899 void UpdateCell(const unsigned int rowIndex, const string& colName,
00900 const string& value);
00901
00923 const string& operator()(const unsigned int rowIndex,
00924 const string& colName) const;
00925
00950 void SetFlags(const string& colName, const unsigned char flags);
00951
00969 unsigned char GetDataType(const string& colName);
00970
00999 unsigned int FindFirst(const vector<string>& targets,
01000 const vector<string>& colNames,
01001 const string& indexName = String::Empty);
01002
01026 void Search(vector<unsigned int>& res, const string& target,
01027 const string& colName, const unsigned int fromRowIndex = 0,
01028 const eSearchDir searchDir = eFORWARD,
01029 const eSearchType searchType = eEQUAL);
01030
01060 void Search(vector<unsigned int>& res, const vector<string>& targets,
01061 const vector<string>& colNames, const unsigned int fromRowIndex = 0,
01062 const eSearchDir searchDir = eFORWARD,
01063 const eSearchType searchType = eEQUAL,
01064 const string& indexName = String::Empty);
01065
01097 void FindDuplicateRows(vector<pair<unsigned int, unsigned int> >& duplRows,
01098 const vector<string>& colNames, const bool keepDuplRows,
01099 const eSearchDir searchDir = eFORWARD);
01100
01115 inline StringCompare::eCompareType GetColCaseSense() const;
01116
01120 inline void SetModified(const bool modified);
01121
01125 inline bool GetModified();
01126
01130 void SetSerializer(Serializer* ser);
01131
01135 int WriteObject(Serializer* ser, int& size);
01136
01140 int GetObject(UInt32 index, Serializer* ser);
01141
01145 void Read(unsigned int indexInFile);
01146
01150 int Write();
01151
01155
01156 static ISTable* Merge(ISTable& firstTable, ISTable& secondTable,
01157 unsigned int typeOfMerge = 0);
01158
01162 bool PrintDiff(ISTable& inTable);
01163
01167 inline bool IndexExists(const string& indexName);
01168
01172 void CreateIndex(const string& indexName, const vector<string>& colNames,
01173 const unsigned int unique = 0);
01174
01178 void UpdateIndex(const string& indexName, const unsigned int rowIndex);
01179
01183 void RebuildIndex(const string& indexName);
01184
01188 void RebuildIndices();
01189
01193 void DeleteIndex(const string& indexName);
01194
01198 inline unsigned int GetNumIndices();
01199
01203 void CreateKey(const vector<string>& colNames);
01204
01208 void DeleteKey();
01209
01213 static void SetUnion(const vector<unsigned int>& a,
01214 const vector<unsigned int>& b, vector<unsigned int>& ret);
01215
01219 static void SetIntersect(const vector<unsigned int>& a,
01220 const vector<unsigned int>& b, vector<unsigned int>& ret);
01221
01225 void GetColumnsIndices(vector<unsigned int>& colIndices,
01226 const vector<string>& colNames);
01227
01231 void GetColumn(vector<string>& col, const string& colName,
01232 const string& indexName);
01233
01234 private:
01235 static const unsigned int MAX_NUM_ITTABLE_ROWS = 1000;
01236
01237
01238 static const unsigned int EXPONENT = 4;
01239 static const unsigned int MAX_PRECISION = DBL_DIG;
01240
01241 static const unsigned int MANTISSA = MAX_PRECISION + 2;
01242 static const unsigned int INT_LIMIT = 11;
01243
01244
01245 static const unsigned char DT_MASK = 15 << 4;
01246
01247 static const unsigned char SC_MASK = 0x01;
01248
01249 static const unsigned char WS_MASK = 0x02;
01250 static const unsigned char LAST_DT_VALUE = 3;
01251 static const unsigned int DEFAULT_PRECISION = MAX_PRECISION;
01252 static const unsigned char DEFAULT_OPTIONS;
01253
01254 static const string _version;
01255
01256 string _name;
01257
01258 vector<ITTable> _ittables;
01259
01260 ITTable::eOrientation _orient;
01261
01262 StringCompare::eCompareType _colCaseSense;
01263
01264 mapped_vector<string, StringCompare> _colNames;
01265
01266 vector<unsigned int> _precision;
01267 vector<unsigned char> _compare_opts;
01268
01269 vector<string> _indexNames;
01270 vector<vector<unsigned int> > _listsOfColumns;
01271 vector<unsigned int> _unique;
01272
01273 Serializer* _ser;
01274
01275 bool _modified;
01276
01277 unsigned int _numRows;
01278
01279 mutable unsigned int _rowIndexCache;
01280 mutable pair<unsigned int, unsigned int> _rowLocCache;
01281
01282 void InsertColumn(const string& colName, const unsigned int atColIndex,
01283 const vector<string>& col = vector<string> (0));
01284 void CreateColumn(const string& colName, const unsigned int atColIndex,
01285 const vector<string>& col = vector<string> (0));
01286 int UpdateCell(const string& cell, const unsigned int colIndex,
01287 const unsigned int rowIndex);
01288 const string& operator()(const unsigned int rowIndex,
01289 const unsigned int colIndex) const;
01290 int SetFlags(const unsigned char newOpts, const unsigned int colIndex);
01291 void FindDuplicateRows(const vector<unsigned int>& colIndices,
01292 vector<pair<unsigned int, unsigned int> >& duplRows,
01293 const unsigned int keep, const eSearchDir searchDir = eFORWARD);
01294 void VerifyColumnsIndices(const vector<unsigned int>& colIndices);
01295 bool AreListsOfColumnsValid(const vector<unsigned int>& colIndices);
01296 void CreateIndex(const string& indexName,
01297 const vector<unsigned int>& colIndices, const unsigned int unique = 0);
01298 void CreateKey(const vector<unsigned int>& colIndices);
01299 unsigned int FindFirst(const vector<string>& targets,
01300 const vector<unsigned int>& colIndices,
01301 const string& indexName = String::Empty);
01302 void Search(vector<unsigned int>& res, const vector<string>& targets,
01303 const vector<unsigned int>& colIndices,
01304 const unsigned int fromRowIndex = 0,
01305 const eSearchDir searchDir = eFORWARD,
01306 const eSearchType searchType = eEQUAL,
01307 const string& indexName = String::Empty);
01308
01309 void Init();
01310 void Clear();
01311
01312 StringCompare::eCompareType
01313 GetCompareType(const vector<unsigned int>& colIndices);
01314
01315 string CellValue(const unsigned int colIndex,
01316 const unsigned int rowIndex);
01317 string ConvertString(const string& value, const unsigned int colIndex);
01318 string MultiStringsValue(const vector<string>& values,
01319 const vector<unsigned int>& colIndices);
01320 string SubRowValue(const vector<unsigned int>& colIndices,
01321 const unsigned int rowIndex);
01322 string AggregateRow(const vector<unsigned int>& colIndices,
01323 const unsigned int rowIndex);
01324
01325 inline void AppendToAndDelimit(string& to, const string& appending);
01326
01327 void ValidateOptions(unsigned int colIndex);
01328
01329 string CreateInternalIndexName(const unsigned int indexIndex);
01330 void UpdateIndex(const unsigned int indexIndex,
01331 const unsigned int rowIndex);
01332 void RebuildIndex(const unsigned int indexIndex);
01333 void ClearIndex(const unsigned int indexIndex);
01334 void DeleteIndex(const unsigned int indexIndex);
01335
01336 int FindIndex(const string& indexName);
01337 int FindIndex(const vector<unsigned int>& colIndices);
01338
01339 void UpdateIndices(const unsigned int rowIndex);
01340 void ClearIndices();
01341
01342 bool IsColumnInIndex(const unsigned int indexIndex,
01343 const unsigned int colIndex);
01344
01345 int FindKeyIndex();
01346
01347 void UpdateColListOnColInsert(const unsigned int colIndex);
01348 void UpdateColListOnColDelete(const unsigned int colIndex);
01349 void UpdateColListOnCellUpdate(const unsigned int rowIndex,
01350 const unsigned int colIndex);
01351
01352 unsigned int FindFirst(const vector<string>& targets,
01353 const vector<unsigned int>& colIndices, const unsigned int indexIndex);
01354
01355 int WriteObjectV9(Serializer*, int& size);
01356
01357 int GetObjectV9(UInt32 index, Serializer*);
01358 int GetObjectV8(UInt32 index, Serializer*);
01359 int GetObjectV7(UInt32 index, Serializer*);
01360 int GetObjectV6(UInt32 index, Serializer*);
01361 int GetObjectV3(UInt32 index, Serializer*);
01362 int GetObjectV2(UInt32 index, Serializer*);
01363 int GetObjectV1(UInt32 index, Serializer*);
01364 int GetObjectV1_1(UInt32 index, Serializer*);
01365
01366 void ConvertToInt(const string& a, string& ret);
01367 void ConvertDouble(const string& a, string& ret);
01368 void ConvertToLowerNoWhiteSpace(const string& a, string& ret);
01369
01370 void GetRowLocation(pair<unsigned int, unsigned int>& rowLoc,
01371 const unsigned int rowIndex) const;
01372 void CacheRowLocation(const unsigned int rowIndex) const;
01373
01374 void CreateSubtables(const unsigned int numRows);
01375 void CreateSubtableColumns(const unsigned int colIndex,
01376 const vector<string>& col);
01377 void CreateColumn(const unsigned int atColIndex,
01378 const vector<string>& col);
01379
01380 void Print(const string& indexName);
01381
01382 unsigned int GetColumnIndex(const string& colName) const;
01383
01384 };
01385
01386
01387 ostream& operator<<(ostream& out, const ISTable& isTable);
01388
01389
01390 inline unsigned int ISTable::GetLastRowIndex()
01391 {
01392
01393 return(GetNumRows() - 1);
01394
01395 }
01396
01397
01398 inline unsigned int ISTable::GetNumIndices()
01399 {
01400
01401 return(_indexNames.size());
01402
01403 }
01404
01405
01406 inline bool ISTable::IndexExists(const string& indexName)
01407 {
01408
01409 int ret = FindIndex(indexName);
01410
01411 if (ret == -1)
01412 {
01413 return(false);
01414 }
01415 else
01416 {
01417 return(true);
01418 }
01419
01420 }
01421
01422
01423 inline void ISTable::AppendToAndDelimit(string& to, const string& appending)
01424 {
01425
01426 to += appending;
01427
01428 to += " ";
01429
01430 }
01431
01432
01433 inline void ISTable::SetModified(const bool modified)
01434 {
01435 _modified = modified;
01436 }
01437
01438
01439 inline bool ISTable::GetModified()
01440 {
01441 return _modified;
01442 }
01443
01444
01445 inline const string& ISTable::GetName() const
01446 {
01447 return(_name);
01448 }
01449
01450
01451 inline unsigned int ISTable::GetNumRows() const
01452 {
01453 return(_numRows);
01454 }
01455
01456
01457 inline unsigned int ISTable::GetNumColumns() const
01458 {
01459 return(_colNames.size());
01460 }
01461
01462
01463 inline StringCompare::eCompareType ISTable::GetColCaseSense() const
01464 {
01465 return(_colCaseSense);
01466 }
01467
01468
01469 #endif // ISTABLE_H