Constant Quark Database (CQDB) is a database library specialized for serialization and retrieval of static associations between strings and integer identifiers. The database provides several features:
CQDB is suitable for implementing dictionaries in which fast look-ups of strings and identifiers are essential while a dictionary update rarely occurs. The data structure is a specialization (and extension) of the Constant Database proposed by Daniel J. Bernstein.
CQDB does not support assigning a unique integer identifier for a given string, modify associations, nor check collisions in strings and identifiers; thus, it may be necessary to use an existing Quark implementation to manage proper associations between strings and identifiers on memory.
This library is used by the CRFsuite project.
CQDB is distributed under the term of the modified BSD license.
#include <stdio.h> #include <stdlib.h> #include <string.h> #include "cqdb.h" #define DBNAME "test.cqdb" #define NUMELEMS 1000000 int main(int argc, char *argv[]) { int i, ret; char str[10]; FILE *fp = NULL; cqdb_writer_t* dbw = NULL; // Open a file for writing. fp = fopen(DBNAME, "wb"); if (fp == NULL) { fprintf(stderr, "ERROR: failed to open the file.\n"); return 1; } // Create a CQDB on the file stream. dbw = cqdb_writer(fp, 0); if (dbw == NULL) { fprintf(stderr, "ERROR: failed to create a CQDB on the file.\n"); goto error_exit; } // Put string/integer associations, "00000001"/1, ..., "01000000"/1000000. for (i = 0;i < NUMELEMS;++i) { sprintf(str, "%08d", i); if (ret = cqdb_writer_put(dbw, str, i)) { fprintf(stderr, "ERROR: failed to put a pair '%s'/%d.\n", str, i); goto error_exit; } } // Close the CQDB. if (ret = cqdb_writer_close(dbw)) { fprintf(stderr, "ERROR: failed to close the CQDB.\n"); goto error_exit; } // Close the file. fclose(fp); return 0; error_exit: if (dbw != NULL) cqdb_writer_close(dbw); if (fp != NULL) fclose(fp); return 1; }
#include <stdio.h> #include <stdlib.h> #include <string.h> #include "cqdb.h" #define DBNAME "test.cqdb" #define NUMELEMS 1000000 int main(int argc, char *argv[]) { int i, j, ret; long size = 0; char str[10], *value = NULL, *buffer = NULL; FILE *fp = NULL; cqdb_t* db = NULL; // Open the database. fp = fopen(DBNAME, "rb"); if (fp == NULL) { fprintf(stderr, "ERROR: failed to open the file\n"); return 1; } // Obtain the file size. fseek(fp, 0, SEEK_END); size = ftell(fp); fseek(fp, 0, SEEK_SET); // Read the content of the file at a time. buffer = (char *)malloc(size); if (buffer == NULL) { fprintf(stderr, "ERROR: out of memory.\n"); goto error_exit; } fread(buffer, 1, size, fp); fclose(fp); fp = NULL; // Open the database on the memory. db = cqdb_reader(buffer, size); if (db == NULL) { fprintf(stderr, "ERROR: failed to open a CQDB on the file.\n"); goto error_exit; } // Forward lookups: strings to integer identifiers. for (i = 0;i < NUMELEMS;++i) { sprintf(str, "%08d", i); j = cqdb_to_id(db, str); // Validity check. if (j < 0 || i != j) { fprintf(stderr, "ERROR: inconsistency error '%s'/%d.\n", str, i); goto error_exit; } } // Reverse lookups: integer identifiers to strings. for (i = 0;i < NUMELEMS;++i) { sprintf(str, "%08d", i); value = cqdb_to_string(db, i); // Validity check. if (value == NULL || strcmp(str, value) != 0) { fprintf(stderr, "ERROR: inconsistency error '%s'/%d.\n", str, i); goto error_exit; } } // Delete the instance of the CQDB. cqdb_delete(db); free(buffer); return 0; error_exit: if (fp != NULL) fclose(fp); if (buffer != NULL) free(buffer); return 1; }
This table shows the elapsed time for constructing the database (write time), the elapsed time for processing with the queries (read time), and the size of the database generated by each database library. The read/write access was extremely faster than those of other database libraries. The database was smaller than half the size of those generated by other libraries. This results suggest that the CQDB has the substantial advantage over the existing database libraries for implementing a static quark database.
Database | Parameters | Write time [sec] | Read time [sec] | Database size [MB] |
---|---|---|---|---|
Constant Quark Database (CQDB) 1.0 | Default (none) | 1.48 | 0.65 | 35.2 |
Berkeley DB (BDB) 4.5.20 | Default | 91.8 | 37.5 | 79.7 |
Berkeley DB (BDB) 4.5.20 | table_size=4000000; cache_size=200MB | 57.8 | 37.5 | 79.7 |
Quick Database Manager (QDBM) 1.8.75 | Default | 95.4 | 80.6 | 76.3 |
Quick Database Manager (QDBM) 1.8.75 | table_size=4000000 | 15.7 | 12.0 | 92.2 |