WordSearch.cc

Go to the documentation of this file.
00001 //
00002 // Part of the ht://Dig package   <http://www.htdig.org/>
00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00004 // For copyright details, see the file COPYING in your distribution
00005 // or the GNU General Public License version 2 or later
00006 // <http://www.gnu.org/copyleft/gpl.html>
00007 //
00008 // $Id: WordSearch_8cc-source.html,v 1.1 2008/06/08 10:13:23 sebdiaz Exp $
00009 //
00010 #ifdef HAVE_CONFIG_H
00011 #include <config.h>
00012 #endif /* HAVE_CONFIG_H */
00013 
00014 #ifdef HAVE_UNISTD_H
00015 #include <unistd.h>
00016 #endif /* HAVE_UNISTD_H */
00017 
00018 #include <WordSearch.h>
00019 #include <WordResults.h>
00020 
00021 WordSearch::WordSearch(WordList* nwords)
00022 {
00023   //
00024   // Internal
00025   //
00026   words = nwords;
00027   verbose = 0;
00028 
00029   //
00030   // Input/Output
00031   //
00032   limit_base = 0;
00033 
00034   //
00035   // Input
00036   //
00037   limit_count = 0;
00038   expr = 0;
00039 
00040   //
00041   // Output
00042   //
00043   matches = 0;
00044 }
00045 
00046 int WordSearch::ContextRestore()
00047 {
00048   String context_in;
00049   if(results->GetContext(context_in) != OK)
00050     return NOTOK;
00051 
00052   return expr->ContextRestore(context_in);
00053 }
00054 
00055 int WordSearch::ContextSave(int status)
00056 {
00057   String tmp;
00058 
00059   if(status != WORD_WALK_ATEND) {
00060     if(expr->ContextSave(tmp) != OK)
00061       return NOTOK;
00062   }
00063 
00064   results->PutContext(tmp);
00065 
00066   return OK;
00067 }
00068 
00069 WordMatches *WordSearch::Search()
00070 {
00071   //
00072   // Build space for results
00073   //
00074   matches = new WordMatches(words->GetContext());
00075   matches->Allocate(limit_count + 1);
00076 
00077   int ret;
00078   //
00079   // Call SearchFromIndex if returned value is neither OK (got them all)
00080   // or WORD_WALK_ATEND (did not get them all but at end of search anyway).
00081   //
00082   if((ret = SearchFromCache()) == WORD_WALK_END_CACHE) {
00083     unsigned int count;
00084     if(results->Count(count) == NOTOK)
00085       return 0;
00086     ret = SearchFromIndex(limit_base - count + limit_count);
00087   }
00088 
00089   //
00090   // Discard results if nothing was found or error occured
00091   //
00092   if(ret == NOTOK || matches->length <= 0) {
00093     delete matches;
00094     matches = 0;
00095   }
00096 
00097   return matches;
00098 }
00099 
00100 int WordSearch::SearchFromCache()
00101 {
00102   int filled = results->Filled();
00103   unsigned int available;
00104   unsigned int base = limit_base;
00105 
00106   if(results->Count(available) != OK)
00107     return NOTOK;
00108 
00109   if(available <= limit_base) {
00110     base = (available / limit_count) * limit_count;
00111     //
00112     // If the cache is not filled, it is the responsibility of
00113     // SearchFromIndex to set the limit_base according to what is
00114     // found when searching.
00115     //
00116     if(filled) {
00117       limit_base = base;
00118     } 
00119   }
00120 
00121   if(results->GetMatchesTotal(matches_total) != OK)
00122     return NOTOK;
00123 
00124   return results->Get(matches, limit_count, base);
00125 }
00126 
00127 int WordSearch::SearchFromIndex(unsigned int length)
00128 {
00129   int ret = 0;
00130 
00131   if(WordTree::TopLevelOptimize(expr) != OK)
00132     return NOTOK;
00133 
00134   /*
00135    * This happens when the optimization decided that the
00136    * expression was meaningless.
00137    */
00138   if(this==0 || expr == 0)
00139     return NOTOK;
00140 
00141   if(expr->Count(matches_total) != OK)
00142     return NOTOK;
00143   
00144   //
00145   // Move before first possible position. 
00146   //
00147   if((ret = expr->WalkInit()) != OK)
00148     goto end;
00149 
00150   if((ret = ContextRestore()) == NOTOK)
00151     goto end;
00152 
00153   //
00154   // Set the result list only after the context was restored otherwise
00155   // it will interfere with the re-initialization of the context because
00156   // WalkNext will skip previously seen documents.
00157   //
00158   if((ret = expr->SetResults(results)) != OK)
00159     goto end;
00160 
00161   ret = SearchLoop(expr, length);
00162 
00163   //
00164   // Don't bother saving the context if at end of 
00165   // search (WORD_WALK_ATEND) or error (NOTOK)
00166   //
00167   if(ret != NOTOK && (ret = ContextSave(ret)) == NOTOK)
00168     goto end;
00169 
00170 end:
00171   expr->WalkFinish();
00172 
00173   if(results) {
00174     if(results->PutMatchesTotal(matches_total) != OK)
00175       return NOTOK;
00176   }
00177 
00178   return ret;
00179 }
00180 
00181 int WordSearch::SearchLoop(WordTree *expr, unsigned int length)
00182 {
00183   int ret = OK;
00184   unsigned int i;
00185   WordResults* results = expr->GetResults();
00186   unsigned int count;
00187 
00188   if(results->Count(count) != OK)
00189     return NOTOK;
00190 
00191   for(i = 0; i < length; i++) {
00192     if((ret = expr->WalkNext()) != OK) {
00193       if(ret != WORD_WALK_ATEND)
00194         return ret;
00195       break;
00196     } else {
00197       WordMatch* match = matches->matches[matches->length];
00198       match->match = expr->GetDocument();
00199       if(expr->IsA() != WORD_TREE_LITERAL)
00200         match->info = ((WordTreeOperand*)expr)->GetInfo();
00201       if((ret = results->Put(*match, count + i)) != OK)
00202         return ret;
00203       if(verbose) fprintf(stderr, "WordSearch::SearchLoop: match %s\n", (char*)match->Get());
00204       matches->length = (matches->length + 1) % limit_count;
00205     }
00206   }
00207 
00208   if(i == 0) {
00209     ;
00210   } else {
00211     if(matches->length == 0) matches->length = limit_count;
00212     limit_base = ((count + i - 1) / limit_count) * limit_count;
00213   }
00214 
00215   //
00216   // Invalidate matches that are above the list of valid matches
00217   //
00218   for(i = 0; i < matches->size; i++)
00219     matches->matches[i]->valid = i < matches->length;
00220 
00221   return ret;
00222 }

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5