WordCursor.h

Go to the documentation of this file.
00001 //
00002 // WordCursor.h
00003 //
00004 // NAME
00005 // 
00006 // abstract class to search and retrieve entries in a WordList object.
00007 //
00008 // SYNOPSIS
00009 // 
00010 // #include <WordList.h>
00011 //
00012 // int callback(WordList *, WordDBCursor& , const WordReference *, Object &)
00013 // {
00014 //    ...
00015 // }
00016 //
00017 // Object* data = ...
00018 //
00019 // WordList *words = ...;
00020 //
00021 // WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"), HTDIG_WORDLIST_COLLECTOR);
00022 //
00023 // if(search->Walk() == NOTOK) bark;
00024 // List* results = search->GetResults();
00025 //
00026 // WordCursor *search = words->Cursor(callback, data);
00027 // WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"));
00028 // WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"), callback, data);
00029 // WordCursor *search = words->Cursor(WordKey());
00030 //
00031 // search->WalkInit();
00032 // if(search->WalkNext() == OK)
00033 //   dosomething(search->GetFound());
00034 // search->WalkFinish();
00035 // 
00036 // DESCRIPTION
00037 // 
00038 // WordCursor is an iterator on an inverted index. It is created by
00039 // asking a <i>WordList</i> object with the <i>Cursor.</i> There is
00040 // no other way to create a WordCursor object.
00041 // When the <i>Walk*</i> methods return,
00042 // the WordCursor object contains the result of the search and 
00043 // status information that indicates if it reached the end of 
00044 // the list (IsAtEnd() method).
00045 //
00046 // The <b>callback</b> function that is called each time a match is
00047 // found takes the following arguments:
00048 // <pre>
00049 // WordList* words pointer to the inverted index handle.
00050 // WordDBCursor& cursor to call Del() and delete the current match
00051 // WordReference* wordRef is the match
00052 // Object& data is the user data provided by the caller when
00053 //              search began.
00054 // </pre>
00055 //
00056 // The <i>WordKey</i> object that specifies the search criterion
00057 // may be used as follows (assuming word is followed by DOCID and
00058 // LOCATION):
00059 // 
00060 // Ex1: <b>WordKey()</b> walk the entire list of occurences.
00061 //
00062 // Ex2: <b>WordKey("word <UNDEF> <UNDEF>")</b> find all occurrences
00063 // of <i>word</i>.
00064 //
00065 // Ex3: <b>WordKey("meet <UNDEF> 1")</b> find all occurrences of
00066 // <i>meet</i> that occur at LOCATION 1 in any DOCID. This can
00067 // be inefficient since the search has to scan all occurrences
00068 // of <i>meet</i> to find the ones that occur at LOCATION 1.
00069 //
00070 // Ex4: <b>WordKey("meet 2 <UNDEF>")</b> find all occurrences of
00071 // <i>meet</i> that occur in DOCID 2, at any location.
00072 //
00073 // WordList is an abstract class and cannot be instanciated. 
00074 // See the WordCursorOne manual page for an actual implementation of
00075 // a WordCursor object.
00076 //
00077 // END
00078 //
00079 // Part of the ht://Dig package   <http://www.htdig.org/>
00080 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00081 // For copyright details, see the file COPYING in your distribution
00082 // or the GNU General Public License version 2 or later
00083 // <http://www.gnu.org/copyleft/gpl.html>
00084 //
00085 // $Id: WordCursor_8h-source.html,v 1.1 2008/06/08 10:13:00 sebdiaz Exp $
00086 //
00087 
00088 #ifndef _WordCursor_h_
00089 #define _WordCursor_h_
00090 
00091 #ifndef SWIG
00092 #include "htString.h"
00093 #include "WordKey.h"
00094 #include "WordDB.h"
00095 
00096 class WordList;
00097 class WordDBCursor;
00098 class WordDead;
00099 //
00100 // Possible values of the action argument of WordList::Walk
00101 // check walk function in WordList.cc for info on these:
00102 //
00103 #define HTDIG_WORDLIST_COLLECTOR        0x0001
00104 #define HTDIG_WORDLIST_WALKER           0x0002
00105 
00106 //
00107 // Type of the callback argument in WordCursor
00108 //
00109 typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &);
00110 #endif /* SWIG */
00111 
00112 //
00113 // Possible values of the status member
00114 //
00115 
00116 //
00117 // WalkNext reached the end of the matches
00118 //
00119 #define WORD_WALK_ATEND                 0x0001
00120 //
00121 // Failed to acquire Berkeley DB cursor
00122 //
00123 #define WORD_WALK_CURSOR_FAILED         0x0002
00124 //
00125 // Berkeley DB Get operation failed
00126 //
00127 #define WORD_WALK_GET_FAILED            0x0004
00128 //
00129 // WalkNextStep hit an entry that does not match the
00130 // searched key.
00131 //
00132 #define WORD_WALK_NOMATCH_FAILED        0x0008
00133 //
00134 // Mask that shows only the values described above.
00135 // The range of values 0xfffff000 can be used by applications
00136 // and will never be used by mifluz.
00137 //
00138 #define WORD_WALK_RESULT_MASK       0xfffff00f
00139 
00140 //
00141 // Callback function returned NOTOK
00142 //
00143 #define WORD_WALK_CALLBACK_FAILED       0x0010
00144 //
00145 // WalkNextStep went beyond search criterion but is not at end of the index
00146 //
00147 #define WORD_WALK_ATEND_NOMATCH         0x0020
00148 
00149 //
00150 // WordCursor contains undefined data
00151 //
00152 #define WORD_WALK_FAILED                0xffffffff
00153 
00154 //
00155 // Possible return values of the IsA() method
00156 //
00157 #define WORD_CURSOR                     1
00158 #define WORD_CURSORS                    2
00159 
00160 //
00161 // Wordlist::Walk uses WordCursor for :
00162 // state information : cursor
00163 // search term description
00164 // debug/trace/benchmarking
00165 // search result format description
00166 //
00167 class WordCursor
00168 {
00169  public:
00170 #ifndef SWIG
00171   WordCursor(WordContext *context) :
00172     searchKey(context),
00173     found(context) {}
00174 
00175   virtual ~WordCursor() { }
00176 #endif /* SWIG */
00177   //-
00178   // Clear all data in object, set <b>GetResult()</b> data to NULL but
00179   // do not delete it (the application is responsible for that).
00180   //
00181   virtual void Clear() = 0;
00182 #ifndef SWIG
00183   virtual void ClearInternal() = 0;
00184   virtual void ClearResult() = 0;
00185 #endif /* SWIG */
00186 
00187   //-
00188   // Returns the type of the object. May be overloaded by
00189   // derived classes to differentiate them at runtime.
00190   // Returns WORD_CURSOR.
00191   //
00192   virtual inline int IsA() const { return WORD_CURSOR; }
00193 
00194   //-
00195   // Optimize the cursor before starting a Walk.
00196   // Returns OK on success, NOTOK otherwise.
00197   //
00198   virtual inline int Optimize() { return OK; }
00199 
00200   //-
00201   // Save in <b>buffer</b> all the information necessary to resume
00202   // the walk at the point it left. The ASCII representation of the
00203   // last key found (GetFound()) is written in <b>buffer</b> using the
00204   // WordKey::Get method.
00205   //
00206   virtual int ContextSave(String& buffer) const = 0;
00207   //-
00208   // Restore from buffer all the information necessary to 
00209   // resume the walk at the point it left. The <b>buffer</b> is expected
00210   // to contain an ASCII representation of a WordKey (see WordKey::Set
00211   // method). A <b>Seek</b> is done on the key and the object is prepared
00212   // to jump to the next occurrence when <b>WalkNext</b> is called (the
00213   // cursor_get_flags is set to <i>DB_NEXT.</i>
00214   //
00215   virtual int ContextRestore(const String& buffer) = 0;
00216 
00217   //-
00218   // Walk and collect data from the index. 
00219   // Returns OK on success, NOTOK otherwise.
00220   //
00221   virtual int Walk() = 0;
00222   //-
00223   // Must be called before other Walk methods are used.
00224   // Fill internal state according to input parameters 
00225   // and move before the first matching entry.
00226   // Returns OK on success, NOTOK otherwise.
00227   //
00228   virtual int WalkInit() = 0;
00229   //-
00230   // Move before the first index matching entry.
00231   // Returns OK on success, NOTOK otherwise.
00232   //
00233   virtual int WalkRewind() = 0;
00234   //-
00235   // Move to the next matching entry.  At end of list, WORD_WALK_ATEND
00236   // is returned.  Returns OK on success, NOTOK otherwise. When OK
00237   // is returned, the GetFound() method returns the matched entry.
00238   // When WORD_WALK_ATEND is returned, the GetFound() method returns
00239   // an empty object if the end of the index was reached or the match
00240   // that was found and that is greated than the specified search
00241   // criterion.
00242   //
00243   virtual int WalkNext() = 0;
00244 #ifndef SWIG
00245   //-
00246   // Advance the cursor one step. The entry pointed to by the cursor may
00247   // or may not match the requirements.  Returns OK if entry pointed
00248   // by cursor matches requirements.  Returns NOTOK on
00249   // failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry
00250   // does not match requirements, it's safe to call WalkNextStep again
00251   // until either OK or NOTOK is returned.
00252   //
00253   virtual int WalkNextStep() = 0;
00254   //-
00255   // Return 0 if this key must not be returned by WalkNext as a valid
00256   // match. The WalkNextStep method calls this virtual method immediately
00257   // after jumping to the next entry in the database. This may be used,
00258   // for instance, to skip entries that were selected by a previous 
00259   // search.
00260   //
00261   virtual int WalkNextExclude(const WordKey& key) { return 0; }
00262 #endif /* SWIG */
00263   //-
00264   // Terminate Walk, free allocated resources.
00265   // Returns OK on success, NOTOK otherwise.
00266   //
00267   virtual int WalkFinish() = 0;
00268 
00269   //-
00270   // Move before the inverted index position specified in <b>patch.</b>
00271   // May only be called after a successfull call to the <i>WalkNext</i>
00272   // or <i>WalkNextStep</i>method.
00273   // Copy defined fields from <b>patch</b> into a copy of the 
00274   // <i>found</i> data member and 
00275   // initialize internal state so that <i>WalkNext</i> jumps to
00276   // this key next time it's called (cursor_get_flag set to DB_SET_RANGE).
00277   // Returns OK if successfull, NOTOK otherwise.
00278   //
00279   virtual int Seek(const WordKey& patch) = 0;
00280 
00281   //-
00282   // Returns true if cursor is positioned after the last possible
00283   // match, false otherwise.
00284   //
00285   virtual inline int IsAtEnd() const { return GetStatus() == WORD_WALK_ATEND; }
00286   //-
00287   // Returns true if cursor hit a value that does not match search criterion.
00288   //
00289   virtual inline int IsNoMatch() const { return status & WORD_WALK_ATEND_NOMATCH; }
00290 
00291   //
00292   // Accessors for input parameters
00293   //
00294   //-
00295   // Returns the search criterion.
00296   //
00297   inline WordKey& GetSearch() { return searchKey; }
00298 #ifndef SWIG
00299   inline const WordKey& GetSearch() const { return searchKey; }
00300 #endif /* SWIG */
00301   //-
00302   // Returns the type of action when a matching entry
00303   // is found.
00304   //
00305   inline int GetAction() const { return action; }
00306   //
00307   // Accessors for output parameters
00308   //
00309   //-
00310   // Returns the list of WordReference found. The application
00311   // is responsible for deallocation of the list. If the <b>action</b>
00312   // input flag bit HTDIG_WORDLIST_COLLECTOR is not set, return a NULL
00313   // pointer.
00314   //
00315   inline List *GetResults() { return collectRes; }
00316 #ifndef SWIG
00317   //-
00318   // For debugging purposes. Returns the list of WordReference hit 
00319   // during the search
00320   // process. Some of them match the searched key, some don't.
00321   // The application is responsible for deallocation of the list.
00322   //
00323   inline List *GetTraces() { return traceRes; }
00324   //-
00325   // For debugging purposes. Set the list of WordReference hit
00326   // during the search process. 
00327   //
00328   inline void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; }
00329 #endif /* SWIG */
00330   //-
00331   // Returns the last entry hit by the search. Only contains
00332   // a valid value if the last <i>WalkNext</i> or <i>WalkNextStep</i>
00333   // call was successfull (i.e. returned OK).
00334   //
00335   inline const WordReference& GetFound() { return found; }
00336   //-
00337   // Returns the status of the cursor which may be 
00338   // OK or WORD_WALK_ATEND.
00339   //
00340   inline int GetStatus() const { return status & WORD_WALK_RESULT_MASK; }
00341 
00342 #ifndef SWIG
00343   //-
00344   // Convert the whole structure to an ASCII string description.
00345   // Returns OK if successfull, NOTOK otherwise.
00346   //
00347   virtual int Get(String& bufferout) const = 0;
00348 #endif /* SWIG */
00349   //-
00350   // Convert the whole structure to an ASCII string description
00351   // and return it.
00352   //
00353   inline String Get() const { String tmp; Get(tmp);  return tmp; }
00354 
00355 #ifndef SWIG
00356  protected:
00357 
00358   //-
00359   // Protected method. Derived classes should use this function to initialize
00360   // the object if they do not call a WordCursor constructor in their own
00361   // constructutor. Initialization may occur after the object is created
00362   // and must occur before a <b>Walk*</b> method is called. See the 
00363   // DESCRIPTION section for the semantics of the arguments.
00364   // Return OK on success, NOTOK on error.
00365   //
00366   virtual int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction) = 0;
00367 
00368 
00369   //
00370   // Input parameters
00371   //
00372   //-
00373   // Input data. The key to be searched, see DESCRIPTION for more information.
00374   //
00375   WordKey searchKey;
00376   //
00377   // Input data. What do do when a WordReference is found.
00378   // Can either be
00379   // HTDIG_WORDLIST_COLLECTOR  WordReference found stored in collectRes
00380   // HTDIG_WORDLIST_WALKER     callback is called for each WordReference found
00381   //
00382   int action;
00383 
00384   //
00385   // Input data. Callback function called for each match found.
00386   //
00387   wordlist_walk_callback_t callback;
00388   //
00389   // Input data. Argument given to callback, contains arbitrary 
00390   // caller defined data.
00391   //
00392   Object *callback_data;
00393 
00394   //
00395   // Output parameters
00396   //
00397   //
00398   // Output data. List of WordReference found in the search.
00399   //
00400   List *collectRes;
00401 
00402   //-
00403   // Output data. Last match found. Use GetFound() to retrieve it.
00404   //
00405   WordReference found;
00406   //-
00407   // Output data. WORD_WALK_ATEND if cursor is past last match, 
00408   // OK otherwise. Use GetStatus() to retrieve it.
00409   //
00410   int status;
00411 
00412   //
00413   // Debugging section. Do not use unless you know exactly what you do.
00414   //
00415   //
00416   // Collect everything found while searching (not necessarily matching)
00417   //
00418   List *traceRes;
00419 
00420   //
00421   // Internal state
00422   //
00423   //-
00424   // The inverted index used by this cursor.
00425   //
00426   WordList *words;
00427 #endif /* SWIG */
00428 };
00429 
00430 #endif /* _WordCursor_h_ */

Generated on Sun Jun 8 10:56:39 2008 for GNUmifluz by  doxygen 1.5.5