WordCursorOne.cc

Go to the documentation of this file.
00001 //
00002 // WordCursorOne.cc
00003 //
00004 // Part of the ht://Dig package   <http://www.htdig.org/>
00005 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00006 // For copyright details, see the file COPYING in your distribution
00007 // or the GNU General Public License version 2 or later
00008 // <http://www.gnu.org/copyleft/gpl.html>
00009 //
00010 // $Id: WordCursorOne_8cc-source.html,v 1.1 2008/06/08 10:12:59 sebdiaz Exp $
00011 //
00012 
00013 #ifdef HAVE_CONFIG_H
00014 #include "config.h"
00015 #endif /* HAVE_CONFIG_H */
00016 
00017 #include <stdlib.h>
00018 
00019 #include "WordCursorOne.h"
00020 #include "WordListOne.h"
00021 #include "WordDead.h"
00022 
00023 #include <stdio.h>
00024 
00025 //
00026 // WordCursorOne implementation
00027 // 
00028 
00029 // *****************************************************************************
00030 WordCursorOne::WordCursorOne(WordList *words) :
00031   WordCursor(words->GetContext()),
00032   prefixKey(words->GetContext())
00033 {
00034   Clear();
00035 }
00036 
00037 // *****************************************************************************
00038 WordCursorOne::WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) :
00039   WordCursor(words->GetContext()),
00040   prefixKey(words->GetContext())
00041 {
00042   Clear();
00043   Initialize(words, WordKey(words->GetContext()), callback, callback_data, HTDIG_WORDLIST_WALKER);
00044 }
00045 
00046 // *****************************************************************************
00047 WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, int action) :
00048   WordCursor(words->GetContext()),
00049   prefixKey(words->GetContext())
00050 {
00051   Clear();
00052   Initialize(words, searchKey, 0, 0, action);
00053 }
00054 
00055 // *****************************************************************************
00056 WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) :
00057   WordCursor(words->GetContext()),
00058   prefixKey(words->GetContext())
00059 {
00060   Clear();
00061   Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER);
00062 }
00063 
00064 // *****************************************************************************
00065 //
00066 int WordCursorOne::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
00067 {
00068   action = naction;
00069   searchKey = nsearchKey;
00070   callback = ncallback;
00071   callback_data = ncallback_data;
00072   words = nwords;
00073   cursor = ((WordListOne*)nwords)->db->Cursor();
00074   return OK;
00075 }
00076 
00077 // *****************************************************************************
00078 //
00079 void 
00080 WordCursorOne::Clear()
00081 {
00082   searchKey.Clear();
00083   action = 0;
00084   callback = 0;
00085   callback_data = 0;
00086   ClearResult();
00087   ClearInternal();
00088   words = 0;
00089 
00090   //
00091   // Debugging section. 
00092   //
00093   traceRes = 0;
00094 }
00095 
00096 // *****************************************************************************
00097 //
00098 void
00099 WordCursorOne::ClearInternal()
00100 {
00101   key.trunc();
00102   data.trunc();
00103   prefixKey.Clear();
00104   cursor_get_flags = DB_SET_RANGE;
00105   searchKeyIsSameAsPrefix = 0;
00106 }
00107 
00108 // *****************************************************************************
00109 //
00110 void
00111 WordCursorOne::ClearResult()
00112 {
00113   collectRes = 0;
00114   found.Clear();
00115   status = OK;
00116 }
00117 
00118 int
00119 WordCursorOne::ContextRestore(const String& buffer)
00120 {
00121   int ret = OK;
00122   if(!buffer.empty()) {
00123     WordKey key(words->GetContext(), buffer);
00124     if((ret = Seek(key)) != OK)
00125       return ret;
00126     //
00127     // Move to restored position so that next call to
00128     // WalkNext will go above the restored position.
00129     //
00130     if((ret = WalkNext()) != OK)
00131       return ret;
00132   }
00133   return ret;
00134 }
00135 
00136 // *****************************************************************************
00137 //
00138 // Walk and collect data from the word database.
00139 //
00140 // If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
00141 //    stored in a list and the list is returned.
00142 // If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
00143 //    is called for each WordReference found. No list is built and the
00144 //    function returns a null pointer.
00145 //
00146 // The <searchKey> argument may be a fully qualified key, containing precise values for each
00147 // field of the key. It may also contain only some fields of the key. In both cases
00148 // all the word occurrences matching the fields set in the key are retrieved. It may
00149 // be fast if key is a prefix (see WordKey::Prefix for a definition). It may
00150 // be *slow* if key is not a prefix because it forces a complete walk of the
00151 // index. 
00152 //
00153 int 
00154 WordCursorOne::Walk()
00155 {
00156   int ret;
00157   if((ret = WalkInit()) != OK) return ret;
00158   while((ret = WalkNext()) == OK)
00159     ;
00160   int ret1;
00161   if((ret1 = WalkFinish()) != OK) return ret1;
00162 
00163   return (ret & WORD_WALK_RESULT_MASK) == WORD_WALK_ATEND ? OK : NOTOK;
00164 }
00165 
00166 int 
00167 WordCursorOne::WalkInit()
00168 {
00169   ClearResult();
00170   ClearInternal();
00171 
00172   WordReference wordRef(words->GetContext());
00173 
00174   {
00175     int ret;
00176     if((ret = cursor->Open()) != 0)
00177       return ret;
00178   }
00179 
00180   if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());
00181 
00182   if(action & HTDIG_WORDLIST_COLLECTOR) {
00183     collectRes = new List;
00184   }
00185 
00186   WordKey first_key(words->GetContext());
00187   //
00188   // Move the cursor to start walking and do some sanity checks.
00189   //
00190   if(searchKey.Empty()) {
00191     //
00192     // Move past the stat data
00193     //
00194     if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is empty\n");
00195 
00196   } else {
00197     prefixKey = searchKey;
00198     //
00199     // If the key is a prefix, the start key is
00200     // the longest possible prefix contained in the key. If the
00201     // key does not contain any prefix, start from the beginning
00202     // of the file.
00203     //
00204     if(prefixKey.PrefixOnly() == NOTOK) {
00205       if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is not a prefix\n");
00206       prefixKey.Clear();
00207     } else {
00208       if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: go to %s \n", (char*)prefixKey.Get());
00209       first_key = prefixKey;
00210     }
00211   }
00212 
00213   first_key.Pack(key);
00214   //
00215   // Allow Seek immediately after Init
00216   //
00217   found.Key() = first_key;
00218 
00219   status = OK;
00220   searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
00221   cursor_get_flags = DB_SET_RANGE;
00222 
00223   return OK;
00224 }
00225 
00226 int 
00227 WordCursorOne::WalkRewind()
00228 {
00229   WordKey first_key(words->GetContext());
00230   //
00231   // Move the cursor to start walking and do some sanity checks.
00232   //
00233   if(searchKey.Empty()) {
00234     first_key.Clear();
00235   } else {
00236     prefixKey = searchKey;
00237     //
00238     // If the key is a prefix, the start key is
00239     // the longest possible prefix contained in the key. If the
00240     // key does not contain any prefix, start from the beginning
00241     // of the file.
00242     //
00243     if(prefixKey.PrefixOnly() == NOTOK) {
00244       prefixKey.Clear();
00245       first_key.Clear();
00246     } else {
00247       first_key = prefixKey;
00248     }
00249   }
00250 
00251   first_key.Pack(key);
00252   //
00253   // Allow Seek immediately after Rewind
00254   //
00255   found.Key() = first_key;
00256 
00257   status = OK;
00258   searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
00259   cursor_get_flags = DB_SET_RANGE;
00260 
00261   return OK;
00262 }
00263 
00264 int 
00265 WordCursorOne::WalkNext()
00266 {
00267   int ret;
00268   while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
00269     if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNext: got false match, retry\n");
00270 
00271   return ret;
00272 }
00273 
00274 int 
00275 WordCursorOne::WalkNextStep()
00276 {
00277   status = OK;
00278 
00279   {
00280     int error;
00281     if((error = cursor->Get(key, data, cursor_get_flags)) != 0) {
00282       if(error == DB_NOTFOUND) {
00283         if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, no more matches\n", (char*)searchKey.Get());
00284         found.Clear();
00285         return (status = WORD_WALK_ATEND);
00286       } else {
00287         return WORD_WALK_GET_FAILED;
00288       }
00289     }
00290   }
00291 
00292   //
00293   // Next step operation is always sequential walk
00294   //
00295   cursor_get_flags = DB_NEXT;
00296 
00297   found.Unpack(key, data);
00298 
00299   if(words->Dead()->Exists(found.Key()))
00300     return WORD_WALK_NOMATCH_FAILED;
00301   if(WalkNextExclude(found.Key()))
00302     return WORD_WALK_NOMATCH_FAILED;
00303 
00304   if(traceRes) traceRes->Add(new WordReference(found));
00305 
00306   if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());
00307 
00308   //
00309   // Don't bother to compare keys if we want to walk all the entries
00310   //
00311   if(!(searchKey.Empty()))     {
00312     // examples
00313     // searchKey: aabc 1 ? ? ?
00314     // prefixKey: aabc 1 ? ? ?
00315 
00316     //
00317     // Stop loop if we reach a record whose key does not
00318     // match prefix key requirement, provided we have a valid
00319     // prefix key.
00320     // (ie. stop loop if we're past last possible match...)
00321     //
00322     if(!prefixKey.Empty() &&
00323        !prefixKey.Equal(found.Key()))   {
00324       if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
00325       return (status = WORD_WALK_ATEND|WORD_WALK_ATEND_NOMATCH);
00326     }
00327 
00328     //
00329     // Skip entries that do not exactly match the specified key.
00330     // 
00331     if(!searchKeyIsSameAsPrefix && 
00332        !searchKey.Equal(found.Key()))   {
00333       int ret;
00334       switch((ret = SkipUselessSequentialWalking()) & WORD_WALK_RESULT_MASK) {
00335       case OK:
00336         if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
00337         return WORD_WALK_NOMATCH_FAILED;
00338         break;
00339       case WORD_WALK_ATEND:
00340         if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
00341         return status = ret;
00342         break;
00343       default:
00344         fprintf(stderr, "WordCursorOne::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
00345         return NOTOK;
00346         break;
00347       }
00348     }
00349   }
00350 
00351   if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());
00352 
00353   if(collectRes) {
00354     if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: collect\n");
00355     collectRes->Add(new WordReference(found));
00356   } else if(callback) {
00357     if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: calling callback\n");
00358     int ret = (*callback)(words, *cursor, &found, *(callback_data) );
00359     //
00360     // The callback function tells us that something went wrong, might
00361     // as well stop walking.
00362     //
00363     if(ret != OK) {
00364       if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: callback returned NOTOK");
00365       return (status = WORD_WALK_CALLBACK_FAILED|WORD_WALK_ATEND);
00366     }
00367   }
00368 
00369   return OK;
00370 }
00371 
00372 int 
00373 WordCursorOne::WalkFinish()
00374 {
00375   if(words->verbose) fprintf(stderr, "WordCursorOne::WalkFinish\n");
00376 
00377   return cursor->Close() == 0 ? OK : NOTOK;
00378 }
00379 
00380 // *****************************************************************************
00381 //
00382 // Helper for SkipUselessSequentialWalking. 
00383 // Undefine in foundKey all fields defined in searchKey
00384 // so that they are not considered by SetToFollowing.
00385 // It could become a method of WordKey but lacks generalisation and
00386 // from what I see it is a rather specific operation.
00387 //
00388 static inline void complement(WordContext* context, WordKey& key, const WordKey& mask)
00389 {
00390   int nfields = context->GetKeyInfo().nfields;
00391   int i;
00392   //
00393   // Undefine in 'key' all fields defined in 'mask'
00394   //
00395   for(i = 0; i < nfields; i++) {
00396     if(mask.IsDefined(i))
00397       key.Undefined(i);
00398     else
00399       key.SetDefined(i);
00400   }
00401 }
00402 
00403 // *****************************************************************************
00404 //
00405 // Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of 
00406 // sequential iterating (DB_NEXT). 
00407 // If it is decided that jump is a better move :
00408 //   cursor_set_flags = DB_SET_RANGE
00409 //   key = calculated next possible key
00410 // Else
00411 //   do nothing
00412 // Return values
00413 // OK: skipping successfull.
00414 // WORD_WALK_ATEND : no more possible match, reached the maximum
00415 // WORD_WALK_FAILED: general failure, occurs if called and no skipping
00416 //                   necessary.
00417 // 
00418 // Sequential searching can waste time by searching all keys, for example:
00419 // If searching for Key: argh <DEF> <UNDEF> 10
00420 // Under normal circonstances we would do the following
00421 // 
00422 //    DATA            STATUS   ACTION
00423 // 1: argh 1 10       match    DB_NEXT
00424 // 2: argh 2 11       nomatch  DB_NEXT
00425 // 3: argh 2 15       nomatch  DB_NEXT
00426 // 4: argh 2 20       nomatch  DB_NEXT
00427 // 5: argh 2 30       nomatch  DB_NEXT
00428 // 6: argh 5 1        nomatch  DB_NEXT
00429 // 7: argh 5 8        nomatch  DB_NEXT
00430 // 8: argh 8 6        nomatch  DB_NEXT
00431 //
00432 // But the optimal would be
00433 //
00434 //    DATA            STATUS   ACTION
00435 // 1: argh 1 10       match    DB_NEXT
00436 // 2: argh 2 11       nomatch  DB_SET_RANGE argh 3 10
00437 // 3: argh 2 15       
00438 // 4: argh 2 20       
00439 // 5: argh 2 30
00440 // 6: argh 5 1        nomatch  DB_SET_RANGE argh 5 10
00441 // 7: argh 5 8
00442 // 8: argh 8 6        nomatch  DB_SET_RANGE argh 8 10
00443 //
00444 // That saves a lot of unecessary hit. The underlying logic is a bit 
00445 // more complex but you have the idea.
00446 //
00447 int
00448 WordCursorOne::SkipUselessSequentialWalking()
00449 {
00450   WordKey&      foundKey         = found.Key();
00451 
00452   int nfields = words->GetContext()->GetKeyInfo().nfields;
00453   int i;
00454 
00455   //
00456   // Find out how the searchKey and the foundKey differ.
00457   //
00458   int diff_field = 0;
00459   int lower = 0;
00460   if(!foundKey.Diff(searchKey, diff_field, lower)) {
00461     //
00462     // foundKey matches searchKey (no difference), don't
00463     // skip, everything is fine. The caller of SkipUselessSequentialWalking
00464     // is expected to avoid this case for efficiency.
00465     //
00466     return WORD_WALK_FAILED;
00467   }
00468 
00469   if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for next %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
00470 
00471   //
00472   // Undefine in foundKey all fields defined in searchKey
00473   // so that they are not considered by SetToFollowing.
00474   //
00475   complement(words->GetContext(), foundKey, searchKey);
00476 
00477   //
00478   // If the key found is lower than the searched key when
00479   // considering only the fields defined in the search key,
00480   // we only need to enforce the key to get the match.
00481   // Otherwise we need to increment the found key to jump
00482   // properly.
00483   //
00484   if(lower) {
00485     if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
00486     for(i = diff_field + 1; i < nfields; i++)
00487       if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
00488   } else {
00489     if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: increment the key to jump forward\n");
00490     //
00491     // diff_field - 1 is not really necessary because diff_field is undefined
00492     // in foundKey and would therefore be ignored by SetToFollowing. We write
00493     // diff_field - 1 to clearly state that incrementing begins just before the
00494     // field for which a difference was found.
00495     //
00496     int ret;
00497     if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
00498       return ret;
00499   }
00500 
00501   //
00502   // Copy all fields defined in searchKey into foundKey. This will copy
00503   // searchKey in foundKey because all these fields have been
00504   // previously undefined in foundKey.
00505   //
00506   foundKey.Merge(searchKey);
00507 
00508   if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for next %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
00509 
00510   //
00511   // Instruct Next function to jump to the calculated key
00512   //
00513   if(foundKey.Pack(key) == NOTOK) {
00514     return WORD_WALK_FAILED;
00515   }
00516   cursor_get_flags = DB_SET_RANGE;
00517 
00518   return OK;
00519 }
00520 
00521 // *****************************************************************************
00522 //
00523 // Copy defined fields in patch into foundKey and 
00524 // initialize internal state so that WalkNext jumps to
00525 // this key next time it's called.
00526 //
00527 // Technically this means : Override latest key found (found data member)
00528 // with patch fields values, starting from the first field set in
00529 // patch up to the last.  Pack the result in the key field and set
00530 // cursor_get_flags to DB_SET_RANGE.
00531 //
00532 int
00533 WordCursorOne::Seek(const WordKey& patch)
00534 {
00535   int nfields = words->GetContext()->GetKeyInfo().nfields;
00536   WordKey pos = searchKey;
00537 
00538   if(patch.Empty()) {
00539     fprintf(stderr, "WordCursorOne::Seek: empty patch is useless\n");
00540     return NOTOK;
00541   }
00542   
00543   int i;
00544   //
00545   // Leave the most significant fields untouched if they are
00546   // already set, or set them to zero if they are not.
00547   //
00548   for(i = WORD_KEY_WORD + 1; i < nfields; i++) {
00549     if(patch.IsDefined(i))
00550       break;
00551     else if(!pos.IsDefined(i))
00552       pos.Set(i, 0);
00553   }
00554   //
00555   // From the first value set in the patch to the end
00556   // override.
00557   //
00558   for(; i < nfields; i++) {
00559     if(patch.IsDefined(i))
00560       pos.Set(i, patch.Get(i));
00561     else
00562       pos.Set(i, 0);
00563   }
00564 
00565   if(!pos.Filled()) {
00566     fprintf(stderr, "WordCursorOne::Seek: only makes sense if the resulting key is fully defined\n");
00567     return NOTOK;
00568   }
00569 
00570   if(words->verbose > 2) fprintf(stderr, "WordCursorOne::Seek: seek to %s\n", (char*)pos.Get());
00571 
00572   //
00573   // Next move will jump to the patched key
00574   //
00575   pos.Pack(key);
00576   cursor_get_flags = DB_SET_RANGE;
00577   
00578   return OK;
00579 }
00580 
00581 //
00582 // Convert the whole structure to an ascii string description
00583 //
00584 int WordCursorOne::Get(String& bufferout) const
00585 {
00586   String tmp;
00587   bufferout.trunc();
00588 
00589   searchKey.Get(tmp);
00590   bufferout << "Input: searchKey = " << tmp << ", action = " <<  action << "; Output: collectRes " << (collectRes ? "set" : "not set");
00591   found.Get(tmp);
00592   bufferout << ", found = " << tmp << ", status = " << status;
00593   prefixKey.Get(tmp);
00594   bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;
00595 
00596   return OK;
00597 }

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5