WordPermute.h

Go to the documentation of this file.
00001 //
00002 // Part of the ht://Dig package   <http://www.htdig.org/>
00003 // Copyright (c) 1999, 2000, 2001 The ht://Dig Group
00004 // For copyright details, see the file COPYING in your distribution
00005 // or the GNU General Public License version 2 or later
00006 // <http://www.gnu.org/copyleft/gpl.html>
00007 //
00008 // $Id: WordPermute_8h-source.html,v 1.1 2008/06/08 10:13:20 sebdiaz Exp $
00009 //
00010 //
00011 // NAME
00012 //
00013 // WordExclude specialization with proximity toggle
00014 //
00015 // SYNOPSIS
00016 //
00017 // #include <WordPermute.h>
00018 //
00019 // #define BITS 5
00020 //
00021 // WordPermute permute;
00022 // permute.Initialize(BITS);
00023 // while(permute.Next() == WORD_EXCLUDE_OK)
00024 //    if(permute.UseProximity()) ...
00025 //
00026 // DESCRIPTION
00027 //
00028 // Each WordExclude permutation is used twice by Next. Once with
00029 // the proximity flag set and once with the proximity flag cleared.
00030 // If the length of the bit field (length argument of Initialize) is
00031 // lower or equal to 1, then the proximity flag is always false.
00032 //
00033 //
00034 // END
00035 //
00036 
00037 #ifndef _WordPermute_h
00038 #define _WordPermute_h
00039 
00040 #include <WordExcludeMask.h>
00041 
00042 // WordPermute methods return values
00043 //
00044 #define WORD_PERMUTE_OK         WORD_EXCLUDE_OK
00045 #define WORD_PERMUTE_END        WORD_EXCLUDE_END
00046 
00047 //
00048 // Use or don't use proximity flag
00049 //
00050 #define WORD_PERMUTE_PROXIMITY_NO       0
00051 #define WORD_PERMUTE_PROXIMITY_TOGGLE   1
00052 #define WORD_PERMUTE_PROXIMITY_ONLY     2
00053 
00054 //
00055 // Deals with word exclusion and proximity permutations for
00056 // the implementation of the Optional retrieval model.
00057 //
00058 class WordPermute : public WordExcludeMask {
00059 public:
00060   //-
00061   // The <b>nuse_proximity</b> may be set to the following:
00062   //
00063   // WORD_PERMUTE_PROXIMITY_NO so that the object behaves as
00064   // WordExcludeMask and Proximity() always return false.
00065   //
00066   // WORD_PERMUTE_PROXIMITY_TOGGLE so that each permutation is issued twice: 
00067   // once with the proximity flag set (Proximity() method) and once with
00068   // the proximity flag cleared. 
00069   //
00070   // WORD_PERMUTE_PROXIMITY_ONLY so that the object behaves as
00071   // WordExcludeMask and Proximity() always return true.
00072   //
00073   virtual inline int Initialize(unsigned int length, unsigned int ignore, unsigned int ignore_mask_arg, int nuse_proximity) {
00074     if(WordExcludeMask::Initialize(length, ignore, ignore_mask_arg, 0) != OK)
00075       return NOTOK;
00076 
00077     use_proximity = nuse_proximity;
00078     switch(use_proximity) {
00079     case WORD_PERMUTE_PROXIMITY_NO:
00080       proximity = 0;
00081       break;
00082     case WORD_PERMUTE_PROXIMITY_TOGGLE:
00083       //
00084       // Don't bother to try proximity search if only one word
00085       // is involved.
00086       //
00087       proximity = (WordExcludeMask::Maxi() - WordExcludeMask::ExcludedCount()) > 1;
00088       break;
00089     case WORD_PERMUTE_PROXIMITY_ONLY:
00090       proximity = 1;
00091       break;
00092     default:
00093       fprintf(stderr, "WordPermute::Initialize: unexpected use_proximity = %d\n", use_proximity);
00094       return NOTOK;
00095     }
00096     return OK;
00097   }
00098 
00099   //-
00100   // Return true if the proximity flag is set, false if it is 
00101   // cleared.
00102   //
00103   inline int Proximity() { return proximity; }
00104 
00105   //-
00106   // Return WORD_PERMUTE_PROXIMITY_NO, WORD_PERMUTE_PROXIMITY_TOGGLE or
00107   // WORD_PERMUTE_PROXIMITY_ONLY.
00108   //
00109   inline int UseProximity() { return use_proximity; }
00110 
00111   //-
00112   // Find the next permutation. If <b>WORD_PERMUTE_PROXIMITY_TOGGLE<b> was
00113   // specified in Initialize each permutation is issued twice (see
00114   // Proximity() to differentiate them), except when the mask 
00115   // only contains one non exluded bit (NotExcludeCount() <= 1).
00116   // In both case the last permutation with all bits excluded
00117   // (i.e. when NotExcludedCount() <= 0) is never returned because 
00118   // it is useless.
00119   // 
00120   virtual int Next() {
00121     if(WordExcludeMask::Maxi() <= 1)
00122       return WORD_PERMUTE_END;
00123 
00124     int ret = WORD_PERMUTE_OK;
00125     int check_useless = 0;
00126     if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) {
00127       //
00128       // Move to next permutation as follows: 
00129       // exclude mask 1 + use proximity
00130       // exclude mask 1 + don't use proximity
00131       // exclude mask 2 + use proximity 
00132       // exclude mask 2 + don't use proximity
00133       // and so on.
00134       // If only one word is involved never use proximity.
00135       //
00136       if(proximity) {
00137         proximity = 0;
00138       } else {
00139         proximity = 1;
00140         if((ret = WordExcludeMask::Next()) == WORD_PERMUTE_OK) {
00141           //
00142           // Do not toggle proximity for only one non excluded word
00143           //
00144           if(NotExcludedCount() <= 1)
00145             proximity = 0;
00146           check_useless = 1;
00147         } else if(ret == WORD_PERMUTE_END)
00148           proximity = 0;
00149       }
00150     } else {
00151       ret = WordExcludeMask::Next();
00152       check_useless = 1;
00153     }
00154 
00155     if(check_useless && ret == WORD_PERMUTE_OK) {
00156       //
00157       // If no bits are ignored or all ignore_mask bits are set to
00158       // one, the last permutation has all exclude bits set, which
00159       // is useless. Just skip it and expect to be at the end of
00160       // all permutations.
00161       //
00162       if(NotExcludedCount() <= 0) {
00163         ret = WordExcludeMask::Next();
00164         if(ret != WORD_PERMUTE_END) {
00165           fprintf(stderr, "WordPermute::Next: expected WORD_PERMUTE_END\n");
00166           ret = NOTOK;
00167         }
00168       }
00169     }
00170 
00171     return ret;
00172   }
00173 
00174   //-
00175   // The semantic is the same as the Get method of Wordexclude
00176   // but a letter T is appended to the string if the proximity
00177   // flag is set, or F is appended to the string if the proximity
00178   // is clear.
00179   //
00180   virtual inline void Get(String& buffer) const {
00181     WordExcludeMask::Get(buffer);
00182     if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE)
00183       buffer << (proximity ? 'T' : 'F');
00184   }
00185 
00186   //-
00187   // The semantic is the same as the Get method of Wordexclude
00188   // but if the string end with a T the proximity flag is set
00189   // and if the string end with a F the proximity flag is cleared.
00190   //
00191   virtual inline int Set(const String& buffer) {
00192     if(buffer.length() < 1) {
00193       fprintf(stderr, "WordPermute::Set: buffer length < 1\n");
00194       return NOTOK;
00195     }
00196     int ret = OK;
00197     if(use_proximity == WORD_PERMUTE_PROXIMITY_TOGGLE) {
00198       if((ret = WordExcludeMask::Set(buffer.sub(0, buffer.length() - 1))) == OK)
00199         proximity = buffer.last() == 'T';
00200     } else {
00201       ret = WordExcludeMask::Set(buffer);
00202     }
00203 
00204     return ret;
00205   }
00206 
00207 protected:
00208   int use_proximity;
00209   int proximity;
00210 };
00211 
00212 #endif /* _WordPermute_h */

Generated on Sun Jun 8 10:56:40 2008 for GNUmifluz by  doxygen 1.5.5