00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef __CDBPP_H__
00034 #define __CDBPP_H__
00035
00036 #include <cstring>
00037 #include <fstream>
00038 #include <functional>
00039 #include <iostream>
00040 #include <vector>
00041 #include <stdint.h>
00042 #include <stdexcept>
00043
00044 namespace cdbpp
00045 {
00046
00054
00055 enum {
00056
00057 VERSION = 1,
00058
00059 NUM_TABLES = 256,
00060
00061 BYTEORDER_CHECK = 0x62445371,
00062 };
00063
00064
00065
00066
00080 class murmurhash2 :
00081 public std::binary_function<const void *, size_t, uint32_t>
00082 {
00083 protected:
00084 inline static uint32_t get32bits(const char *d)
00085 {
00086 return *reinterpret_cast<const uint32_t*>(d);
00087 }
00088
00089 public:
00090 inline uint32_t operator() (const void *key, size_t size) const
00091 {
00092
00093
00094
00095 const uint32_t m = 0x5bd1e995;
00096 const int32_t r = 24;
00097
00098
00099
00100 const uint32_t seed = 0x87654321;
00101 uint32_t h = seed ^ size;
00102
00103
00104
00105 const char * data = (const char *)key;
00106
00107 while (size >= 4)
00108 {
00109 uint32_t k = get32bits(data);
00110
00111 k *= m;
00112 k ^= k >> r;
00113 k *= m;
00114
00115 h *= m;
00116 h ^= k;
00117
00118 data += 4;
00119 size -= 4;
00120 }
00121
00122
00123
00124 switch (size)
00125 {
00126 case 3: h ^= data[2] << 16;
00127 case 2: h ^= data[1] << 8;
00128 case 1: h ^= data[0];
00129 h *= m;
00130 };
00131
00132
00133
00134
00135 h ^= h >> 13;
00136 h *= m;
00137 h ^= h >> 15;
00138
00139 return h;
00140 }
00141 };
00142
00143
00144
00145
00146 struct tableref_t
00147 {
00148 uint32_t offset;
00149 uint32_t num;
00150 };
00151
00152
00153 static uint32_t get_data_begin()
00154 {
00155 return (16 + sizeof(tableref_t) * NUM_TABLES);
00156 }
00157
00158
00159
00163 class builder_exception : public std::invalid_argument
00164 {
00165 public:
00166 builder_exception(const std::string& msg)
00167 : std::invalid_argument(msg)
00168 {
00169 }
00170 };
00171
00172
00173
00177 template <typename hash_function>
00178 class builder_base
00179 {
00180 protected:
00181
00182 struct bucket
00183 {
00184 uint32_t hash;
00185 uint32_t offset;
00186
00187 bucket() : hash(0), offset(0)
00188 {
00189 }
00190
00191 bucket(uint32_t h, uint32_t o) : hash(h), offset(o)
00192 {
00193 }
00194 };
00195
00196
00197 typedef std::vector<bucket> hashtable;
00198
00199 protected:
00200 std::ofstream& m_os;
00201 uint32_t m_begin;
00202 uint32_t m_cur;
00203 hashtable m_ht[NUM_TABLES];
00204
00205 public:
00212 builder_base(std::ofstream& os) : m_os(os)
00213 {
00214 m_begin = m_os.tellp();
00215 m_cur = get_data_begin();
00216 m_os.seekp(m_begin + m_cur);
00217 }
00218
00222 virtual ~builder_base()
00223 {
00224 this->close();
00225 }
00226
00236 template <class key_t, class value_t>
00237 void put(const key_t *key, size_t ksize, const value_t *value, size_t vsize)
00238 {
00239
00240 write_uint32((uint32_t)ksize);
00241 m_os.write(reinterpret_cast<const char *>(key), ksize);
00242 write_uint32((uint32_t)vsize);
00243 m_os.write(reinterpret_cast<const char *>(value), vsize);
00244
00245
00246 uint32_t hv = hash_function()(static_cast<const void *>(key), ksize);
00247 hashtable& ht = m_ht[hv % NUM_TABLES];
00248
00249
00250 ht.push_back(bucket(hv, m_cur));
00251
00252
00253 m_cur += sizeof(uint32_t) + ksize + sizeof(uint32_t) + vsize;
00254 }
00255
00256 protected:
00257 void close()
00258 {
00259
00260 if (m_begin + m_cur != (uint32_t)m_os.tellp()) {
00261 throw builder_exception("Inconsistent stream offset");
00262 }
00263
00264
00265
00266 for (size_t i = 0;i < NUM_TABLES;++i) {
00267 hashtable& ht = m_ht[i];
00268
00269
00270 if (!ht.empty()) {
00271
00272
00273 int n = ht.size() * 2;
00274
00275
00276 bucket* dst = new bucket[n];
00277
00278
00279 typename hashtable::const_iterator it;
00280 for (it = ht.begin();it != ht.end();++it) {
00281 int k = (it->hash >> 8) % n;
00282
00283
00284 while (dst[k].offset != 0) {
00285 k = (k+1) % n;
00286 }
00287
00288
00289 dst[k].hash = it->hash;
00290 dst[k].offset = it->offset;
00291 }
00292
00293
00294 for (int k = 0;k < n;++k) {
00295 write_uint32(dst[k].hash);
00296 write_uint32(dst[k].offset);
00297 }
00298
00299
00300 delete[] dst;
00301 }
00302 }
00303
00304
00305 uint32_t offset = (uint32_t)m_os.tellp();
00306
00307
00308 m_os.seekp(m_begin);
00309
00310
00311 char chunkid[4] = {'C','D','B','+'};
00312 m_os.write(chunkid, 4);
00313 write_uint32(offset - m_begin);
00314 write_uint32(VERSION);
00315 write_uint32(BYTEORDER_CHECK);
00316
00317
00318
00319 for (size_t i = 0;i < NUM_TABLES;++i) {
00320
00321 write_uint32(m_ht[i].empty() ? 0 : m_cur);
00322
00323 write_uint32(m_ht[i].size() * 2);
00324
00325 m_cur += sizeof(uint32_t) * 2 * m_ht[i].size() * 2;
00326 }
00327
00328
00329 m_os.seekp(offset);
00330 }
00331
00332 inline void write_uint32(uint32_t value)
00333 {
00334 m_os.write(reinterpret_cast<const char *>(&value), sizeof(value));
00335 }
00336 };
00337
00338
00339
00343 class cdbpp_exception : public std::invalid_argument
00344 {
00345 public:
00346 cdbpp_exception(const std::string& msg)
00347 : std::invalid_argument(msg)
00348 {
00349 }
00350 };
00351
00355 template <typename hash_function>
00356 class cdbpp_base
00357 {
00358 protected:
00359 struct bucket_t
00360 {
00361 uint32_t hash;
00362 uint32_t offset;
00363 };
00364
00365
00366 struct hashtable_t
00367 {
00368 uint32_t num;
00369 const bucket_t* buckets;
00370 };
00371
00372
00373 protected:
00374 const uint8_t* m_buffer;
00375 size_t m_size;
00376 bool m_own;
00377
00378 hashtable_t m_ht[NUM_TABLES];
00379 size_t m_n;
00380
00381 public:
00385 cdbpp_base()
00386 : m_buffer(NULL), m_size(0), m_own(false), m_n(0)
00387 {
00388 }
00389
00397 cdbpp_base(const void *buffer, size_t size, bool own)
00398 : m_buffer(NULL), m_size(0), m_own(false), m_n(0)
00399 {
00400 this->open(buffer, size, own);
00401 }
00402
00408 cdbpp_base(std::ifstream& ifs)
00409 : m_buffer(NULL), m_size(0), m_own(false), m_n(0)
00410 {
00411 this->open(ifs);
00412 }
00413
00417 virtual ~cdbpp_base()
00418 {
00419 close();
00420 }
00421
00427 bool is_open() const
00428 {
00429 return (m_buffer != NULL);
00430 }
00431
00436 size_t size() const
00437 {
00438 return m_n;
00439 }
00440
00446 bool empty() const
00447 {
00448 return (m_n == 0);
00449 }
00450
00456 size_t open(std::ifstream& ifs)
00457 {
00458 char chunk[4], size[4];
00459 std::istream::pos_type offset = ifs.tellg();
00460
00461 do {
00462
00463 ifs.read(chunk, 4);
00464 if (ifs.fail()) {
00465 break;
00466 }
00467
00468
00469 if (std::strncmp(chunk, "CDB+", 4) != 0) {
00470 break;
00471 }
00472
00473
00474 ifs.read(size, 4);
00475 if (ifs.fail()) {
00476 break;
00477 }
00478
00479
00480 uint32_t chunk_size = read_uint32(reinterpret_cast<uint8_t*>(size));
00481 uint8_t* block = new uint8_t[chunk_size];
00482
00483
00484 ifs.seekg(0, std::ios_base::beg);
00485 if (ifs.fail()) {
00486 break;
00487 }
00488 ifs.read(reinterpret_cast<char*>(block), chunk_size);
00489 if (ifs.fail()) {
00490 break;
00491 }
00492
00493 return this->open(block, chunk_size, true);
00494
00495 } while (0);
00496
00497 ifs.seekg(offset, std::ios::beg);
00498 return 0;
00499 }
00500
00508 size_t open(const void *buffer, size_t size, bool own = false)
00509 {
00510 const uint8_t *p = reinterpret_cast<const uint8_t*>(buffer);
00511
00512
00513 if (size < get_data_begin()) {
00514 throw cdbpp_exception("The memory image is smaller than a chunk header.");
00515 }
00516
00517
00518 if (memcmp(p, "CDB+", 4) != 0) {
00519 throw cdbpp_exception("Incorrect chunk header");
00520 }
00521 p += 4;
00522
00523
00524 uint32_t csize = read_uint32(p);
00525 p += sizeof(uint32_t);
00526 uint32_t version = read_uint32(p);
00527 p += sizeof(uint32_t);
00528 uint32_t byteorder = read_uint32(p);
00529 p += sizeof(uint32_t);
00530
00531
00532 if (byteorder != BYTEORDER_CHECK) {
00533 throw cdbpp_exception("Inconsistent byte order");
00534 }
00535
00536 if (size < csize) {
00537 throw cdbpp_exception("The memory image is smaller than a chunk size.");
00538 }
00539
00540
00541 m_buffer = reinterpret_cast<const uint8_t*>(buffer);
00542 m_size = size;
00543 m_own = own;
00544
00545
00546 m_n = 0;
00547 const tableref_t* ref = reinterpret_cast<const tableref_t*>(p);
00548 for (size_t i = 0;i < NUM_TABLES;++i) {
00549 if (ref[i].offset) {
00550
00551 m_ht[i].buckets = reinterpret_cast<const bucket_t*>(m_buffer + ref[i].offset);
00552 m_ht[i].num = ref[i].num;
00553 } else {
00554
00555 m_ht[i].buckets = NULL;
00556 m_ht[i].num = 0;
00557 }
00558
00559
00560 m_n += (ref[i].num / 2);
00561 }
00562
00563 return (size_t)csize;
00564 }
00565
00569 void close()
00570 {
00571 if (m_own && m_buffer != NULL) {
00572 delete[] m_buffer;
00573 }
00574 m_buffer = NULL;
00575 m_size = 0;
00576 m_n = 0;
00577 }
00578
00587 const void* get(const void *key, size_t ksize, size_t* vsize) const
00588 {
00589 uint32_t hv = hash_function()(key, ksize);
00590 const hashtable_t* ht = &m_ht[hv % NUM_TABLES];
00591
00592 if (ht->num && ht->buckets != NULL) {
00593 int n = ht->num;
00594 int k = (hv >> 8) % n;
00595 const bucket_t* p = NULL;
00596
00597 while (p = &ht->buckets[k], p->offset) {
00598 if (p->hash == hv) {
00599 const uint8_t *q = m_buffer + p->offset;
00600 if (read_uint32(q) == ksize &&
00601 memcmp(key, q + sizeof(uint32_t), ksize) == 0) {
00602 q += sizeof(uint32_t) + ksize;
00603 if (vsize != NULL) {
00604 *vsize = read_uint32(q);
00605 }
00606 return q + sizeof(uint32_t);
00607 }
00608 }
00609 k = (k+1) % n;
00610 }
00611 }
00612
00613 if (vsize != NULL) {
00614 *vsize = 0;
00615 }
00616 return NULL;
00617 }
00618
00619 protected:
00620 inline uint32_t read_uint32(const uint8_t* p) const
00621 {
00622 return *reinterpret_cast<const uint32_t*>(p);
00623 }
00624 };
00625
00627 typedef builder_base<murmurhash2> builder;
00629 typedef cdbpp_base<murmurhash2> cdbpp;
00630
00631
00632 };
00633
00732 #endif