hash_func.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993
00009  *      Margo Seltzer.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * This code is derived from software contributed to Berkeley by
00016  * Margo Seltzer.
00017  *
00018  * Redistribution and use in source and binary forms, with or without
00019  * modification, are permitted provided that the following conditions
00020  * are met:
00021  * 1. Redistributions of source code must retain the above copyright
00022  *    notice, this list of conditions and the following disclaimer.
00023  * 2. Redistributions in binary form must reproduce the above copyright
00024  *    notice, this list of conditions and the following disclaimer in the
00025  *    documentation and/or other materials provided with the distribution.
00026  * 3. Neither the name of the University nor the names of its contributors
00027  *    may be used to endorse or promote products derived from this software
00028  *    without specific prior written permission.
00029  *
00030  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00031  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00032  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00033  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00034  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00035  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00036  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00037  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00038  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00039  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00040  * SUCH DAMAGE.
00041  */
00042 
00043 #include "config.h"
00044 
00045 #ifndef lint
00046 static const char revid[] = "$Id: hash__func_8c-source.html,v 1.1 2008/06/08 10:19:31 sebdiaz Exp $";
00047 #endif /* not lint */
00048 
00049 #ifndef NO_SYSTEM_INCLUDES
00050 #include <sys/types.h>
00051 #endif
00052 
00053 #include "db_int.h"
00054 #include "db_page.h"
00055 #include "hash.h"
00056 
00057 /*
00058  * CDB___ham_func2 --
00059  *      Phong Vo's linear congruential hash.
00060  *
00061  * PUBLIC: u_int32_t CDB___ham_func2 __P((const void *, u_int32_t));
00062  */
00063 #define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
00064 
00065 u_int32_t
00066 CDB___ham_func2(key, len)
00067         const void *key;
00068         u_int32_t len;
00069 {
00070         const u_int8_t *e, *k;
00071         u_int32_t h;
00072         u_int8_t c;
00073 
00074         k = key;
00075         e = k + len;
00076         for (h = 0; k != e;) {
00077                 c = *k++;
00078                 if (!c && k > e)
00079                         break;
00080                 DCHARHASH(h, c);
00081         }
00082         return (h);
00083 }
00084 
00085 /*
00086  * CDB___ham_func3 --
00087  *      Ozan Yigit's original sdbm hash.
00088  *
00089  * Ugly, but fast.  Break the string up into 8 byte units.  On the first time
00090  * through the loop get the "leftover bytes" (strlen % 8).  On every other
00091  * iteration, perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
00092  * saves us 7 cmp & branch instructions.
00093  *
00094  * PUBLIC: u_int32_t CDB___ham_func3 __P((const void *, u_int32_t));
00095  */
00096 u_int32_t
00097 CDB___ham_func3(key, len)
00098         const void *key;
00099         u_int32_t len;
00100 {
00101         const u_int8_t *k;
00102         u_int32_t n, loop;
00103 
00104         if (len == 0)
00105                 return (0);
00106 
00107 #define HASHC   n = *k++ + 65599 * n
00108         n = 0;
00109         k = key;
00110 
00111         loop = (len + 8 - 1) >> 3;
00112         switch (len & (8 - 1)) {
00113         case 0:
00114                 do {
00115                         HASHC;
00116         case 7:
00117                         HASHC;
00118         case 6:
00119                         HASHC;
00120         case 5:
00121                         HASHC;
00122         case 4:
00123                         HASHC;
00124         case 3:
00125                         HASHC;
00126         case 2:
00127                         HASHC;
00128         case 1:
00129                         HASHC;
00130                 } while (--loop);
00131         }
00132         return (n);
00133 }
00134 
00135 /*
00136  * CDB___ham_func4 --
00137  *      Chris Torek's hash function.  Although this function performs only
00138  *      slightly worse than CDB___ham_func5 on strings, it performs horribly on
00139  *      numbers.
00140  *
00141  * PUBLIC: u_int32_t CDB___ham_func4 __P((const void *, u_int32_t));
00142  */
00143 u_int32_t
00144 CDB___ham_func4(key, len)
00145         const void *key;
00146         u_int32_t len;
00147 {
00148         const u_int8_t *k;
00149         u_int32_t h, loop;
00150 
00151         if (len == 0)
00152                 return (0);
00153 
00154 #define HASH4a  h = (h << 5) - h + *k++;
00155 #define HASH4b  h = (h << 5) + h + *k++;
00156 #define HASH4   HASH4b
00157         h = 0;
00158         k = key;
00159 
00160         loop = (len + 8 - 1) >> 3;
00161         switch (len & (8 - 1)) {
00162         case 0:
00163                 do {
00164                         HASH4;
00165         case 7:
00166                         HASH4;
00167         case 6:
00168                         HASH4;
00169         case 5:
00170                         HASH4;
00171         case 4:
00172                         HASH4;
00173         case 3:
00174                         HASH4;
00175         case 2:
00176                         HASH4;
00177         case 1:
00178                         HASH4;
00179                 } while (--loop);
00180         }
00181         return (h);
00182 }
00183 
00184 /*
00185  * Fowler/Noll/Vo hash
00186  *
00187  * The basis of the hash algorithm was taken from an idea sent by email to the
00188  * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
00189  * Glenn Fowler (gsf@research.att.com).  Landon Curt Noll (chongo@toad.com)
00190  * later improved on their algorithm.
00191  *
00192  * The magic is in the interesting relationship between the special prime
00193  * 16777619 (2^24 + 403) and 2^32 and 2^8.
00194  *
00195  * This hash produces the fewest collisions of any function that we've seen so
00196  * far, and works well on both numbers and strings.
00197  *
00198  * PUBLIC: u_int32_t CDB___ham_func5 __P((const void *, u_int32_t));
00199  */
00200 u_int32_t
00201 CDB___ham_func5(key, len)
00202         const void *key;
00203         u_int32_t len;
00204 {
00205         const u_int8_t *k, *e;
00206         u_int32_t h;
00207 
00208         k = key;
00209         e = k + len;
00210         for (h = 0; k < e; ++k) {
00211                 h *= 16777619;
00212                 h ^= *k;
00213         }
00214         return (h);
00215 }

Generated on Sun Jun 8 10:56:37 2008 for GNUmifluz by  doxygen 1.5.5