00001 // 00002 // WordCursor.h 00003 // 00004 // NAME 00005 // 00006 // abstract class to search and retrieve entries in a WordList object. 00007 // 00008 // SYNOPSIS 00009 // 00010 // #include <WordList.h> 00011 // 00012 // int callback(WordList *, WordDBCursor& , const WordReference *, Object &) 00013 // { 00014 // ... 00015 // } 00016 // 00017 // Object* data = ... 00018 // 00019 // WordList *words = ...; 00020 // 00021 // WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"), HTDIG_WORDLIST_COLLECTOR); 00022 // 00023 // if(search->Walk() == NOTOK) bark; 00024 // List* results = search->GetResults(); 00025 // 00026 // WordCursor *search = words->Cursor(callback, data); 00027 // WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>")); 00028 // WordCursor *search = words->Cursor(WordKey("word <UNDEF> <UNDEF>"), callback, data); 00029 // WordCursor *search = words->Cursor(WordKey()); 00030 // 00031 // search->WalkInit(); 00032 // if(search->WalkNext() == OK) 00033 // dosomething(search->GetFound()); 00034 // search->WalkFinish(); 00035 // 00036 // DESCRIPTION 00037 // 00038 // WordCursor is an iterator on an inverted index. It is created by 00039 // asking a <i>WordList</i> object with the <i>Cursor.</i> There is 00040 // no other way to create a WordCursor object. 00041 // When the <i>Walk*</i> methods return, 00042 // the WordCursor object contains the result of the search and 00043 // status information that indicates if it reached the end of 00044 // the list (IsAtEnd() method). 00045 // 00046 // The <b>callback</b> function that is called each time a match is 00047 // found takes the following arguments: 00048 // <pre> 00049 // WordList* words pointer to the inverted index handle. 00050 // WordDBCursor& cursor to call Del() and delete the current match 00051 // WordReference* wordRef is the match 00052 // Object& data is the user data provided by the caller when 00053 // search began. 00054 // </pre> 00055 // 00056 // The <i>WordKey</i> object that specifies the search criterion 00057 // may be used as follows (assuming word is followed by DOCID and 00058 // LOCATION): 00059 // 00060 // Ex1: <b>WordKey()</b> walk the entire list of occurences. 00061 // 00062 // Ex2: <b>WordKey("word <UNDEF> <UNDEF>")</b> find all occurrences 00063 // of <i>word</i>. 00064 // 00065 // Ex3: <b>WordKey("meet <UNDEF> 1")</b> find all occurrences of 00066 // <i>meet</i> that occur at LOCATION 1 in any DOCID. This can 00067 // be inefficient since the search has to scan all occurrences 00068 // of <i>meet</i> to find the ones that occur at LOCATION 1. 00069 // 00070 // Ex4: <b>WordKey("meet 2 <UNDEF>")</b> find all occurrences of 00071 // <i>meet</i> that occur in DOCID 2, at any location. 00072 // 00073 // WordList is an abstract class and cannot be instanciated. 00074 // See the WordCursorOne manual page for an actual implementation of 00075 // a WordCursor object. 00076 // 00077 // END 00078 // 00079 // Part of the ht://Dig package <http://www.htdig.org/> 00080 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group 00081 // For copyright details, see the file COPYING in your distribution 00082 // or the GNU General Public License version 2 or later 00083 // <http://www.gnu.org/copyleft/gpl.html> 00084 // 00085 // $Id: WordCursor_8h-source.html,v 1.1 2008/06/08 10:13:00 sebdiaz Exp $ 00086 // 00087 00088 #ifndef _WordCursor_h_ 00089 #define _WordCursor_h_ 00090 00091 #ifndef SWIG 00092 #include "htString.h" 00093 #include "WordKey.h" 00094 #include "WordDB.h" 00095 00096 class WordList; 00097 class WordDBCursor; 00098 class WordDead; 00099 // 00100 // Possible values of the action argument of WordList::Walk 00101 // check walk function in WordList.cc for info on these: 00102 // 00103 #define HTDIG_WORDLIST_COLLECTOR 0x0001 00104 #define HTDIG_WORDLIST_WALKER 0x0002 00105 00106 // 00107 // Type of the callback argument in WordCursor 00108 // 00109 typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &); 00110 #endif /* SWIG */ 00111 00112 // 00113 // Possible values of the status member 00114 // 00115 00116 // 00117 // WalkNext reached the end of the matches 00118 // 00119 #define WORD_WALK_ATEND 0x0001 00120 // 00121 // Failed to acquire Berkeley DB cursor 00122 // 00123 #define WORD_WALK_CURSOR_FAILED 0x0002 00124 // 00125 // Berkeley DB Get operation failed 00126 // 00127 #define WORD_WALK_GET_FAILED 0x0004 00128 // 00129 // WalkNextStep hit an entry that does not match the 00130 // searched key. 00131 // 00132 #define WORD_WALK_NOMATCH_FAILED 0x0008 00133 // 00134 // Mask that shows only the values described above. 00135 // The range of values 0xfffff000 can be used by applications 00136 // and will never be used by mifluz. 00137 // 00138 #define WORD_WALK_RESULT_MASK 0xfffff00f 00139 00140 // 00141 // Callback function returned NOTOK 00142 // 00143 #define WORD_WALK_CALLBACK_FAILED 0x0010 00144 // 00145 // WalkNextStep went beyond search criterion but is not at end of the index 00146 // 00147 #define WORD_WALK_ATEND_NOMATCH 0x0020 00148 00149 // 00150 // WordCursor contains undefined data 00151 // 00152 #define WORD_WALK_FAILED 0xffffffff 00153 00154 // 00155 // Possible return values of the IsA() method 00156 // 00157 #define WORD_CURSOR 1 00158 #define WORD_CURSORS 2 00159 00160 // 00161 // Wordlist::Walk uses WordCursor for : 00162 // state information : cursor 00163 // search term description 00164 // debug/trace/benchmarking 00165 // search result format description 00166 // 00167 class WordCursor 00168 { 00169 public: 00170 #ifndef SWIG 00171 WordCursor(WordContext *context) : 00172 searchKey(context), 00173 found(context) {} 00174 00175 virtual ~WordCursor() { } 00176 #endif /* SWIG */ 00177 //- 00178 // Clear all data in object, set <b>GetResult()</b> data to NULL but 00179 // do not delete it (the application is responsible for that). 00180 // 00181 virtual void Clear() = 0; 00182 #ifndef SWIG 00183 virtual void ClearInternal() = 0; 00184 virtual void ClearResult() = 0; 00185 #endif /* SWIG */ 00186 00187 //- 00188 // Returns the type of the object. May be overloaded by 00189 // derived classes to differentiate them at runtime. 00190 // Returns WORD_CURSOR. 00191 // 00192 virtual inline int IsA() const { return WORD_CURSOR; } 00193 00194 //- 00195 // Optimize the cursor before starting a Walk. 00196 // Returns OK on success, NOTOK otherwise. 00197 // 00198 virtual inline int Optimize() { return OK; } 00199 00200 //- 00201 // Save in <b>buffer</b> all the information necessary to resume 00202 // the walk at the point it left. The ASCII representation of the 00203 // last key found (GetFound()) is written in <b>buffer</b> using the 00204 // WordKey::Get method. 00205 // 00206 virtual int ContextSave(String& buffer) const = 0; 00207 //- 00208 // Restore from buffer all the information necessary to 00209 // resume the walk at the point it left. The <b>buffer</b> is expected 00210 // to contain an ASCII representation of a WordKey (see WordKey::Set 00211 // method). A <b>Seek</b> is done on the key and the object is prepared 00212 // to jump to the next occurrence when <b>WalkNext</b> is called (the 00213 // cursor_get_flags is set to <i>DB_NEXT.</i> 00214 // 00215 virtual int ContextRestore(const String& buffer) = 0; 00216 00217 //- 00218 // Walk and collect data from the index. 00219 // Returns OK on success, NOTOK otherwise. 00220 // 00221 virtual int Walk() = 0; 00222 //- 00223 // Must be called before other Walk methods are used. 00224 // Fill internal state according to input parameters 00225 // and move before the first matching entry. 00226 // Returns OK on success, NOTOK otherwise. 00227 // 00228 virtual int WalkInit() = 0; 00229 //- 00230 // Move before the first index matching entry. 00231 // Returns OK on success, NOTOK otherwise. 00232 // 00233 virtual int WalkRewind() = 0; 00234 //- 00235 // Move to the next matching entry. At end of list, WORD_WALK_ATEND 00236 // is returned. Returns OK on success, NOTOK otherwise. When OK 00237 // is returned, the GetFound() method returns the matched entry. 00238 // When WORD_WALK_ATEND is returned, the GetFound() method returns 00239 // an empty object if the end of the index was reached or the match 00240 // that was found and that is greated than the specified search 00241 // criterion. 00242 // 00243 virtual int WalkNext() = 0; 00244 #ifndef SWIG 00245 //- 00246 // Advance the cursor one step. The entry pointed to by the cursor may 00247 // or may not match the requirements. Returns OK if entry pointed 00248 // by cursor matches requirements. Returns NOTOK on 00249 // failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry 00250 // does not match requirements, it's safe to call WalkNextStep again 00251 // until either OK or NOTOK is returned. 00252 // 00253 virtual int WalkNextStep() = 0; 00254 //- 00255 // Return 0 if this key must not be returned by WalkNext as a valid 00256 // match. The WalkNextStep method calls this virtual method immediately 00257 // after jumping to the next entry in the database. This may be used, 00258 // for instance, to skip entries that were selected by a previous 00259 // search. 00260 // 00261 virtual int WalkNextExclude(const WordKey& key) { return 0; } 00262 #endif /* SWIG */ 00263 //- 00264 // Terminate Walk, free allocated resources. 00265 // Returns OK on success, NOTOK otherwise. 00266 // 00267 virtual int WalkFinish() = 0; 00268 00269 //- 00270 // Move before the inverted index position specified in <b>patch.</b> 00271 // May only be called after a successfull call to the <i>WalkNext</i> 00272 // or <i>WalkNextStep</i>method. 00273 // Copy defined fields from <b>patch</b> into a copy of the 00274 // <i>found</i> data member and 00275 // initialize internal state so that <i>WalkNext</i> jumps to 00276 // this key next time it's called (cursor_get_flag set to DB_SET_RANGE). 00277 // Returns OK if successfull, NOTOK otherwise. 00278 // 00279 virtual int Seek(const WordKey& patch) = 0; 00280 00281 //- 00282 // Returns true if cursor is positioned after the last possible 00283 // match, false otherwise. 00284 // 00285 virtual inline int IsAtEnd() const { return GetStatus() == WORD_WALK_ATEND; } 00286 //- 00287 // Returns true if cursor hit a value that does not match search criterion. 00288 // 00289 virtual inline int IsNoMatch() const { return status & WORD_WALK_ATEND_NOMATCH; } 00290 00291 // 00292 // Accessors for input parameters 00293 // 00294 //- 00295 // Returns the search criterion. 00296 // 00297 inline WordKey& GetSearch() { return searchKey; } 00298 #ifndef SWIG 00299 inline const WordKey& GetSearch() const { return searchKey; } 00300 #endif /* SWIG */ 00301 //- 00302 // Returns the type of action when a matching entry 00303 // is found. 00304 // 00305 inline int GetAction() const { return action; } 00306 // 00307 // Accessors for output parameters 00308 // 00309 //- 00310 // Returns the list of WordReference found. The application 00311 // is responsible for deallocation of the list. If the <b>action</b> 00312 // input flag bit HTDIG_WORDLIST_COLLECTOR is not set, return a NULL 00313 // pointer. 00314 // 00315 inline List *GetResults() { return collectRes; } 00316 #ifndef SWIG 00317 //- 00318 // For debugging purposes. Returns the list of WordReference hit 00319 // during the search 00320 // process. Some of them match the searched key, some don't. 00321 // The application is responsible for deallocation of the list. 00322 // 00323 inline List *GetTraces() { return traceRes; } 00324 //- 00325 // For debugging purposes. Set the list of WordReference hit 00326 // during the search process. 00327 // 00328 inline void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; } 00329 #endif /* SWIG */ 00330 //- 00331 // Returns the last entry hit by the search. Only contains 00332 // a valid value if the last <i>WalkNext</i> or <i>WalkNextStep</i> 00333 // call was successfull (i.e. returned OK). 00334 // 00335 inline const WordReference& GetFound() { return found; } 00336 //- 00337 // Returns the status of the cursor which may be 00338 // OK or WORD_WALK_ATEND. 00339 // 00340 inline int GetStatus() const { return status & WORD_WALK_RESULT_MASK; } 00341 00342 #ifndef SWIG 00343 //- 00344 // Convert the whole structure to an ASCII string description. 00345 // Returns OK if successfull, NOTOK otherwise. 00346 // 00347 virtual int Get(String& bufferout) const = 0; 00348 #endif /* SWIG */ 00349 //- 00350 // Convert the whole structure to an ASCII string description 00351 // and return it. 00352 // 00353 inline String Get() const { String tmp; Get(tmp); return tmp; } 00354 00355 #ifndef SWIG 00356 protected: 00357 00358 //- 00359 // Protected method. Derived classes should use this function to initialize 00360 // the object if they do not call a WordCursor constructor in their own 00361 // constructutor. Initialization may occur after the object is created 00362 // and must occur before a <b>Walk*</b> method is called. See the 00363 // DESCRIPTION section for the semantics of the arguments. 00364 // Return OK on success, NOTOK on error. 00365 // 00366 virtual int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction) = 0; 00367 00368 00369 // 00370 // Input parameters 00371 // 00372 //- 00373 // Input data. The key to be searched, see DESCRIPTION for more information. 00374 // 00375 WordKey searchKey; 00376 // 00377 // Input data. What do do when a WordReference is found. 00378 // Can either be 00379 // HTDIG_WORDLIST_COLLECTOR WordReference found stored in collectRes 00380 // HTDIG_WORDLIST_WALKER callback is called for each WordReference found 00381 // 00382 int action; 00383 00384 // 00385 // Input data. Callback function called for each match found. 00386 // 00387 wordlist_walk_callback_t callback; 00388 // 00389 // Input data. Argument given to callback, contains arbitrary 00390 // caller defined data. 00391 // 00392 Object *callback_data; 00393 00394 // 00395 // Output parameters 00396 // 00397 // 00398 // Output data. List of WordReference found in the search. 00399 // 00400 List *collectRes; 00401 00402 //- 00403 // Output data. Last match found. Use GetFound() to retrieve it. 00404 // 00405 WordReference found; 00406 //- 00407 // Output data. WORD_WALK_ATEND if cursor is past last match, 00408 // OK otherwise. Use GetStatus() to retrieve it. 00409 // 00410 int status; 00411 00412 // 00413 // Debugging section. Do not use unless you know exactly what you do. 00414 // 00415 // 00416 // Collect everything found while searching (not necessarily matching) 00417 // 00418 List *traceRes; 00419 00420 // 00421 // Internal state 00422 // 00423 //- 00424 // The inverted index used by this cursor. 00425 // 00426 WordList *words; 00427 #endif /* SWIG */ 00428 }; 00429 00430 #endif /* _WordCursor_h_ */