WordDBCache.h

Go to the documentation of this file.
00001 //
00002 // WordDBCache.h
00003 //
00004 // NAME
00005 // intermediate cache for WordList objects. 
00006 //
00007 // SYNOPSIS
00008 //
00009 // Internal helper for the WordListOne object.
00010 //
00011 // DESCRIPTION
00012 //
00013 // To speed up bulk insertions, the WordDBCache allows them to remain in
00014 // memory as long as a given limit is not reached. The inserted entries
00015 // are them sorted and dumped into a file. When a given number of files
00016 // have been produced, they are merged into one. Eventually the resulting
00017 // list of entries is inserted into the WordList index.
00018 //
00019 // 
00020 // END
00021 //
00022 // Part of the ht://Dig package   <http://www.htdig.org/>
00023 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00024 // For copyright details, see the file COPYING in your distribution
00025 // or the GNU General Public License version 2 or later
00026 // <http://www.gnu.org/copyleft/gpl.html>
00027 //
00028 // $Id: WordDBCache_8h-source.html,v 1.1 2008/06/08 10:13:02 sebdiaz Exp $
00029 //
00030 
00031 #ifndef _WordDBCache_h_
00032 #define _WordDBCache_h_
00033 
00034 #include <stdlib.h>
00035 #include <errno.h>
00036 
00037 #include "htString.h"
00038 #include "List.h"
00039 #include "db.h"
00040 #include "lib.h"
00041 #include "myqsort.h"
00042 #include "WordList.h"
00043 
00044 class WordDB;
00045 class WordLock;
00046 
00047 //
00048 // Minimum size of the pulsing cache
00049 //
00050 #define WORD_DB_CACHE_MINIMUM   (500 * 1024)
00051 
00052 //
00053 // We could use DBT instead but it's more than two times bigger and
00054 // time saving by the most efficient use of memory space is the whole
00055 // point of the cache.
00056 //
00057 class WordDBCacheEntry {
00058 public:
00059   char* key;
00060   unsigned int key_size;
00061   char* data;
00062   unsigned int data_size;
00063 };
00064 
00065 class WordDBCache {
00066 public:
00067   inline WordDBCache(WordContext* ncontext) {
00068     context = ncontext;
00069 
00070     entries = (WordDBCacheEntry*)malloc(1000 * sizeof(WordDBCacheEntry));
00071     entries_length = 0;
00072     entries_size = 1000;
00073 
00074     pool = (char*)malloc(WORD_DB_CACHE_MINIMUM);
00075     pool_length = 0;
00076     pool_size = pool_max = WORD_DB_CACHE_MINIMUM;
00077   }
00078 
00079   inline ~WordDBCache() {
00080     if(pool_length > 0) {
00081       fprintf(stderr, "WordDBCache::~WordDBCache: destructor called and cache not empty\n");
00082     }
00083     free(entries);
00084     free(pool);
00085   }
00086 
00087   inline int ResizeEntries() {
00088     entries_size *= 2;
00089     entries = (WordDBCacheEntry*)realloc(entries, entries_size * sizeof(WordDBCacheEntry));
00090     return entries ? 0 : DB_RUNRECOVERY;
00091   }
00092 
00093   inline int ResizePool(int wanted) {
00094     if(pool_size * 2 > pool_max) {
00095       if(pool_max > pool_size && pool_max > wanted)
00096         pool_size = pool_max;
00097       else
00098         return ENOMEM;
00099     } else {
00100       pool_size *= 2;
00101     }
00102     pool = (char*)realloc(pool, pool_size);
00103     return pool ? 0 : DB_RUNRECOVERY;
00104   }
00105 
00106   inline int Allocate(int size) {
00107     int ret;
00108     if(entries_length >= entries_size)
00109       if((ret = ResizeEntries()) != 0)
00110         return ret;
00111     if(pool_length + size >= pool_size) {
00112       if((ret = ResizePool(pool_length + size)) != 0)
00113         return ret;
00114     }
00115     return 0;
00116   }
00117 
00118   inline int GetMax() const { return pool_max; }
00119 
00120   inline int SetMax(int max) {
00121     if(max > pool_max)
00122       pool_max = max;
00123     return 0;
00124   }
00125 
00126   inline int SetCompare(int (*ncompare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *)) {
00127     compare = ncompare;
00128     return 0;
00129   }
00130 
00131   inline int Sort() {
00132     if(Absolute() != OK) return NOTOK;
00133     //
00134     // Reorder entries in increasing order
00135     //
00136     myqsort((void*)entries, entries_length, sizeof(WordDBCacheEntry), (myqsort_cmp)compare, (void*)context);
00137     return 0;
00138   }
00139 
00140   inline int Relative() {
00141     int i;
00142     for(i = 0; i < entries_length; i++) {
00143       entries[i].key = (char*)(entries[i].key - pool);
00144       entries[i].data = (char*)(entries[i].data - pool);
00145     }
00146     return OK;
00147   }
00148   
00149   inline int Absolute() {
00150     int i;
00151     for(i = 0; i < entries_length; i++) {
00152       entries[i].key = pool + (int)(entries[i].key);
00153       entries[i].data = pool + (int)(entries[i].data);
00154     }
00155     return OK;
00156   }
00157 
00158   inline int Entries(WordDBCacheEntry*& nentries, int& nentries_length) {
00159     nentries = entries;
00160     nentries_length = entries_length;
00161     return 0;
00162   }
00163 
00164   inline int Pool(char*& npool, int& npool_length) {
00165     npool = pool;
00166     npool_length = pool_length;
00167     return OK;
00168   }
00169   
00170   inline int Add(char* key, int key_size, char* data, int data_size) {
00171     int ret;
00172     if((ret = Allocate(key_size + data_size)) != 0)
00173       return ret;
00174 
00175     entries[entries_length].key = (char*)pool_length;
00176     entries[entries_length].key_size = key_size;
00177     entries[entries_length].data = (char*)(pool_length + key_size);
00178     entries[entries_length].data_size = data_size;
00179     entries_length++;
00180     memcpy(pool + pool_length, key, key_size);
00181     memcpy(pool + pool_length + key_size, data, data_size);
00182     pool_length += key_size + data_size;
00183 
00184     return 0;
00185   }
00186 
00187   inline int Flush() {
00188     entries_length = 0;
00189     pool_length = 0;
00190     return 0;
00191   }
00192 
00193   inline int Empty() {
00194     return entries_length <= 0;
00195   }
00196   
00197 private:
00198   WordDBCacheEntry* entries;
00199   int entries_length;
00200   int entries_size;
00201 
00202   char* pool;
00203   int pool_length;
00204   int pool_size;
00205   int pool_max;
00206 
00207   int (*compare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *);
00208   WordContext *context;
00209 };
00210 
00211 class WordDBCacheFile : public Object 
00212 {
00213 public:
00214   WordDBCacheFile() { size = 0; }
00215 
00216   String filename;
00217   unsigned int size;
00218 };
00219 
00220 class WordDBCaches {
00221  public:
00222   inline WordDBCaches(WordList* nwords, int nfile_max, int size_hint, int nsize_max) : cache(nwords->GetContext()) {
00223     words = nwords;
00224 
00225     files = new WordDB(words->GetContext()->GetDBInfo());
00226     files->Open(words->Filename(), "tmp", DB_BTREE, words->Flags(), 0666, WORD_DB_FILES);
00227     file_max = nfile_max;
00228     size_max = nsize_max;
00229     lock = 0;
00230 
00231     cache.SetMax(size_hint / 2);
00232   }
00233 
00234   ~WordDBCaches() {
00235     delete files;
00236   }
00237 
00238   int Full() const { return size_max > 0 ? size >= size_max : 0; }
00239 
00240   int Add(char* key, int key_size, char* data, int data_size);
00241   int AddFile(String& filename);
00242 
00243   int CacheFlush();
00244 
00245   int Merge();
00246   int Merge(const String& filea, const String& fileb, const String& tmpname);
00247   int Merge(WordDB& db);
00248 
00249   int CacheWrite(const String& filename);
00250   int CacheCompare(int (*compare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *)) { cache.SetCompare(compare); return OK; }
00251 
00252   int WriteEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size);
00253   int ReadEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size);
00254 
00255  private:
00256   WordList*             words;
00257 
00258   WordDB*               files;
00259   off_t                 file_max;
00260   off_t                 size_max;
00261   off_t                 size;
00262   
00263   WordLock*             lock;
00264   WordDBCache           cache;
00265 };
00266 
00267 #endif /* _WordDBCache_h */

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5