hash_page.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994
00009  *      Margo Seltzer.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993, 1994
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * This code is derived from software contributed to Berkeley by
00016  * Margo Seltzer.
00017  *
00018  * Redistribution and use in source and binary forms, with or without
00019  * modification, are permitted provided that the following conditions
00020  * are met:
00021  * 1. Redistributions of source code must retain the above copyright
00022  *    notice, this list of conditions and the following disclaimer.
00023  * 2. Redistributions in binary form must reproduce the above copyright
00024  *    notice, this list of conditions and the following disclaimer in the
00025  *    documentation and/or other materials provided with the distribution.
00026  * 3. Neither the name of the University nor the names of its contributors
00027  *    may be used to endorse or promote products derived from this software
00028  *    without specific prior written permission.
00029  *
00030  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00031  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00032  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00033  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00034  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00035  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00036  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00037  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00038  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00039  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00040  * SUCH DAMAGE.
00041  */
00042 
00043 #include "config.h"
00044 
00045 #ifndef lint
00046 static const char revid[] = "$Id: hash__page_8c-source.html,v 1.1 2008/06/08 10:19:34 sebdiaz Exp $";
00047 #endif /* not lint */
00048 
00049 /*
00050  * PACKAGE:  hashing
00051  *
00052  * DESCRIPTION:
00053  *      Page manipulation for hashing package.
00054  *
00055  * ROUTINES:
00056  *
00057  * External
00058  *      __get_page
00059  *      __add_ovflpage
00060  *      __overflow_page
00061  * Internal
00062  *      open_temp
00063  */
00064 
00065 #ifndef NO_SYSTEM_INCLUDES
00066 #include <sys/types.h>
00067 
00068 #include <errno.h>
00069 #include <string.h>
00070 #endif
00071 
00072 #include "db_int.h"
00073 #include "db_page.h"
00074 #include "db_shash.h"
00075 #include "hash.h"
00076 #include "lock.h"
00077 #include "txn.h"
00078 
00079 /*
00080  * PUBLIC: int CDB___ham_item __P((DBC *, db_lockmode_t, db_pgno_t *));
00081  */
00082 int
00083 CDB___ham_item(dbc, mode, pgnop)
00084         DBC *dbc;
00085         db_lockmode_t mode;
00086         db_pgno_t *pgnop;
00087 {
00088         DB *dbp;
00089         HASH_CURSOR *hcp;
00090         db_pgno_t next_pgno;
00091         int ret;
00092 
00093         dbp = dbc->dbp;
00094         hcp = (HASH_CURSOR *)dbc->internal;
00095 
00096         if (F_ISSET(hcp, H_DELETED)) {
00097                 CDB___db_err(dbp->dbenv, "Attempt to return a deleted item");
00098                 return (EINVAL);
00099         }
00100         F_CLR(hcp, H_OK | H_NOMORE);
00101 
00102         /* Check if we need to get a page for this cursor. */
00103         if ((ret = CDB___ham_get_cpage(dbc, mode)) != 0)
00104                 return (ret);
00105 
00106 recheck:
00107         /* Check if we are looking for space in which to insert an item. */
00108         if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID
00109             && hcp->seek_size < P_FREESPACE(hcp->page))
00110                 hcp->seek_found_page = hcp->pgno;
00111 
00112         /* Check for off-page duplicates. */
00113         if (hcp->indx < NUM_ENT(hcp->page) &&
00114             HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
00115                 memcpy(pgnop,
00116                     HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)),
00117                     sizeof(db_pgno_t));
00118                 F_SET(hcp, H_OK);
00119                 return (0);
00120         }
00121 
00122         /* Check if we need to go on to the next page. */
00123         if (F_ISSET(hcp, H_ISDUP))
00124                 /*
00125                  * ISDUP is set, and offset is at the beginning of the datum.
00126                  * We need to grab the length of the datum, then set the datum
00127                  * pointer to be the beginning of the datum.
00128                  */
00129                 memcpy(&hcp->dup_len,
00130                     HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx)) +
00131                     hcp->dup_off, sizeof(db_indx_t));
00132 
00133         if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) {
00134                 /* Fetch next page. */
00135                 if (NEXT_PGNO(hcp->page) == PGNO_INVALID) {
00136                         F_SET(hcp, H_NOMORE);
00137                         return (DB_NOTFOUND);
00138                 }
00139                 next_pgno = NEXT_PGNO(hcp->page);
00140                 hcp->indx = 0;
00141                 if ((ret = CDB___ham_next_cpage(dbc, next_pgno, 0)) != 0)
00142                         return (ret);
00143                 goto recheck;
00144         }
00145 
00146         F_SET(hcp, H_OK);
00147         return (0);
00148 }
00149 
00150 /*
00151  * PUBLIC: int CDB___ham_item_reset __P((DBC *));
00152  */
00153 int
00154 CDB___ham_item_reset(dbc)
00155         DBC *dbc;
00156 {
00157         HASH_CURSOR *hcp;
00158         DB *dbp;
00159         int ret;
00160 
00161         ret = 0;
00162         dbp = dbc->dbp;
00163         hcp = (HASH_CURSOR *)dbc->internal;
00164         if (hcp->page != NULL)
00165                 ret = CDB___ham_put_page(dbp, hcp->page, 0);
00166 
00167         CDB___ham_item_init(dbc);
00168         return (ret);
00169 }
00170 
00171 /*
00172  * PUBLIC: void CDB___ham_item_init __P((DBC *));
00173  */
00174 void
00175 CDB___ham_item_init(dbc)
00176         DBC *dbc;
00177 {
00178         HASH_CURSOR *hcp;
00179 
00180         hcp = (HASH_CURSOR *)dbc->internal;
00181         /*
00182          * If this cursor still holds any locks, we must
00183          * release them if we are not running with transactions.
00184          */
00185         if (hcp->lock.off != LOCK_INVALID && dbc->txn == NULL)
00186             (void)CDB_lock_put(dbc->dbp->dbenv, &hcp->lock);
00187 
00188         /*
00189          * The following fields must *not* be initialized here
00190          * because they may have meaning across inits.
00191          *      hlock, hdr, split_buf, stats
00192          */
00193         hcp->bucket = BUCKET_INVALID;
00194         hcp->lbucket = BUCKET_INVALID;
00195         hcp->lock.off = LOCK_INVALID;
00196         hcp->lock_mode = DB_LOCK_NG;
00197         hcp->dup_off = 0;
00198         hcp->dup_len = 0;
00199         hcp->dup_tlen = 0;
00200         hcp->seek_size = 0;
00201         hcp->seek_found_page = PGNO_INVALID;
00202         hcp->flags = 0;
00203 
00204         hcp->pgno = PGNO_INVALID;
00205         hcp->indx = NDX_INVALID;
00206         hcp->page = NULL;
00207 }
00208 
00209 /*
00210  * Returns the last item in a bucket.
00211  *
00212  * PUBLIC: int CDB___ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *));
00213  */
00214 int
00215 CDB___ham_item_last(dbc, mode, pgnop)
00216         DBC *dbc;
00217         db_lockmode_t mode;
00218         db_pgno_t *pgnop;
00219 {
00220         HASH_CURSOR *hcp;
00221         int ret;
00222 
00223         hcp = (HASH_CURSOR *)dbc->internal;
00224         if ((ret = CDB___ham_item_reset(dbc)) != 0)
00225                 return (ret);
00226 
00227         hcp->bucket = hcp->hdr->max_bucket;
00228         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
00229         F_SET(hcp, H_OK);
00230         return (CDB___ham_item_prev(dbc, mode, pgnop));
00231 }
00232 
00233 /*
00234  * PUBLIC: int CDB___ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *));
00235  */
00236 int
00237 CDB___ham_item_first(dbc, mode, pgnop)
00238         DBC *dbc;
00239         db_lockmode_t mode;
00240         db_pgno_t *pgnop;
00241 {
00242         HASH_CURSOR *hcp;
00243         int ret;
00244 
00245         hcp = (HASH_CURSOR *)dbc->internal;
00246         if ((ret = CDB___ham_item_reset(dbc)) != 0)
00247                 return (ret);
00248         F_SET(hcp, H_OK);
00249         hcp->bucket = 0;
00250         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
00251         return (CDB___ham_item_next(dbc, mode, pgnop));
00252 }
00253 
00254 /*
00255  * CDB___ham_item_prev --
00256  *      Returns a pointer to key/data pair on a page.  In the case of
00257  *      bigkeys, just returns the page number and index of the bigkey
00258  *      pointer pair.
00259  *
00260  * PUBLIC: int CDB___ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *));
00261  */
00262 int
00263 CDB___ham_item_prev(dbc, mode, pgnop)
00264         DBC *dbc;
00265         db_lockmode_t mode;
00266         db_pgno_t *pgnop;
00267 {
00268         DB *dbp;
00269         HASH_CURSOR *hcp;
00270         db_pgno_t next_pgno;
00271         int ret;
00272 
00273         dbp = dbc->dbp;
00274         hcp = (HASH_CURSOR *)dbc->internal;
00275         /*
00276          * There are 5 cases for backing up in a hash file.
00277          * Case 1: In the middle of a page, no duplicates, just dec the index.
00278          * Case 2: In the middle of a duplicate set, back up one.
00279          * Case 3: At the beginning of a duplicate set, get out of set and
00280          *      back up to next key.
00281          * Case 4: At the beginning of a page; go to previous page.
00282          * Case 5: At the beginning of a bucket; go to prev bucket.
00283          */
00284         F_CLR(hcp, H_OK | H_NOMORE | H_DELETED);
00285 
00286         if ((ret = CDB___ham_get_cpage(dbc, mode)) != 0)
00287                 return (ret);
00288 
00289         /*
00290          * First handle the duplicates.  Either you'll get the key here
00291          * or you'll exit the duplicate set and drop into the code below
00292          * to handle backing up through keys.
00293          */
00294         if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) {
00295                 if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) {
00296                         memcpy(pgnop,
00297                             HOFFDUP_PGNO(H_PAIRDATA(hcp->page, hcp->indx)),
00298                             sizeof(db_pgno_t));
00299                         F_SET(hcp, H_OK);
00300                         return (0);
00301                 }
00302 
00303                 /* Duplicates are on-page. */
00304                 if (hcp->dup_off != 0) {
00305                         memcpy(&hcp->dup_len, HKEYDATA_DATA(
00306                             H_PAIRDATA(hcp->page, hcp->indx))
00307                             + hcp->dup_off - sizeof(db_indx_t),
00308                             sizeof(db_indx_t));
00309                         hcp->dup_off -=
00310                             DUP_SIZE(hcp->dup_len);
00311                         return (CDB___ham_item(dbc, mode, pgnop));
00312                 }
00313         }
00314 
00315         /*
00316          * If we get here, we are not in a duplicate set, and just need
00317          * to back up the cursor.  There are still three cases:
00318          * midpage, beginning of page, beginning of bucket.
00319          */
00320 
00321         if (F_ISSET(hcp, H_DUPONLY)) {
00322                 F_CLR(hcp, H_OK);
00323                 F_SET(hcp, H_NOMORE);
00324                 return (0);
00325         } else
00326                 /*
00327                  * We are no longer in a dup set;  flag this so the dup code
00328                  * will reinitialize should we stumble upon another one.
00329                  */
00330                 F_CLR(hcp, H_ISDUP);
00331 
00332         if (hcp->indx == 0) {           /* Beginning of page. */
00333                 hcp->pgno = PREV_PGNO(hcp->page);
00334                 if (hcp->pgno == PGNO_INVALID) {
00335                         /* Beginning of bucket. */
00336                         F_SET(hcp, H_NOMORE);
00337                         return (DB_NOTFOUND);
00338                 } else if ((ret =
00339                     CDB___ham_next_cpage(dbc, hcp->pgno, 0)) != 0)
00340                         return (ret);
00341                 else
00342                         hcp->indx = NUM_ENT(hcp->page);
00343         }
00344 
00345         /*
00346          * Either we've got the cursor set up to be decremented, or we
00347          * have to find the end of a bucket.
00348          */
00349         if (hcp->indx == NDX_INVALID) {
00350                 DB_ASSERT(hcp->page != NULL);
00351 
00352                 hcp->indx = NUM_ENT(hcp->page);
00353                 for (next_pgno = NEXT_PGNO(hcp->page);
00354                     next_pgno != PGNO_INVALID;
00355                     next_pgno = NEXT_PGNO(hcp->page)) {
00356                         if ((ret = CDB___ham_next_cpage(dbc, next_pgno, 0)) != 0)
00357                                 return (ret);
00358                         hcp->indx = NUM_ENT(hcp->page);
00359                 }
00360 
00361                 if (hcp->indx == 0) {
00362                         /* Bucket was empty. */
00363                         F_SET(hcp, H_NOMORE);
00364                         return (DB_NOTFOUND);
00365                 }
00366         }
00367 
00368         hcp->indx -= 2;
00369 
00370         return (CDB___ham_item(dbc, mode, pgnop));
00371 }
00372 
00373 /*
00374  * Sets the cursor to the next key/data pair on a page.
00375  *
00376  * PUBLIC: int CDB___ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *));
00377  */
00378 int
00379 CDB___ham_item_next(dbc, mode, pgnop)
00380         DBC *dbc;
00381         db_lockmode_t mode;
00382         db_pgno_t *pgnop;
00383 {
00384         HASH_CURSOR *hcp;
00385         int ret;
00386 
00387         hcp = (HASH_CURSOR *)dbc->internal;
00388 
00389         if ((ret = CDB___ham_get_cpage(dbc, mode)) != 0)
00390                 return (ret);
00391 
00392         /*
00393          * Deleted on-page duplicates are a weird case. If we delete the last
00394          * one, then our cursor is at the very end of a duplicate set and
00395          * we actually need to go on to the next key.
00396          */
00397         if (F_ISSET(hcp, H_DELETED)) {
00398                 if (hcp->indx != NDX_INVALID &&
00399                     F_ISSET(hcp, H_ISDUP) &&
00400                     HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx))
00401                         == H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) {
00402                         if (F_ISSET(hcp, H_DUPONLY)) {
00403                                 F_CLR(hcp, H_OK);
00404                                 F_SET(hcp, H_NOMORE);
00405                                 return (0);
00406                         } else {
00407                                 F_CLR(hcp, H_ISDUP);
00408                                 hcp->indx += 2;
00409                         }
00410                 } else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) {
00411                         F_CLR(hcp, H_OK);
00412                         F_SET(hcp, H_NOMORE);
00413                         return (0);
00414                 } else if (F_ISSET(hcp, H_ISDUP) &&
00415                     F_ISSET(hcp, H_NEXT_NODUP)) {
00416                         F_CLR(hcp, H_ISDUP);
00417                         hcp->indx += 2;
00418                 }
00419                 F_CLR(hcp, H_DELETED);
00420         } else if (hcp->indx == NDX_INVALID) {
00421                 hcp->indx = 0;
00422                 F_CLR(hcp, H_ISDUP);
00423         } else if (F_ISSET(hcp, H_NEXT_NODUP)) {
00424                 hcp->indx += 2;
00425                 F_CLR(hcp, H_ISDUP);
00426         } else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) {
00427                 if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >=
00428                     hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) {
00429                         F_CLR(hcp, H_OK);
00430                         F_SET(hcp, H_NOMORE);
00431                         return (0);
00432                 }
00433                 hcp->dup_off += DUP_SIZE(hcp->dup_len);
00434                 if (hcp->dup_off >= hcp->dup_tlen) {
00435                         F_CLR(hcp, H_ISDUP);
00436                         hcp->indx += 2;
00437                 }
00438         } else if (F_ISSET(hcp, H_DUPONLY)) {
00439                 F_CLR(hcp, H_OK);
00440                 F_SET(hcp, H_NOMORE);
00441                 return (0);
00442         } else {
00443                 hcp->indx += 2;
00444                 F_CLR(hcp, H_ISDUP);
00445         }
00446 
00447         return (CDB___ham_item(dbc, mode, pgnop));
00448 }
00449 
00450 /*
00451  * PUBLIC: void CDB___ham_putitem __P((PAGE *p, const DBT *, int));
00452  *
00453  * This is a little bit sleazy in that we're overloading the meaning
00454  * of the H_OFFPAGE type here.  When we recover deletes, we have the
00455  * entire entry instead of having only the DBT, so we'll pass type
00456  * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
00457  * an H_KEYDATA around it.
00458  */
00459 void
00460 CDB___ham_putitem(p, dbt, type)
00461         PAGE *p;
00462         const DBT *dbt;
00463         int type;
00464 {
00465         u_int16_t n, off;
00466 
00467         n = NUM_ENT(p);
00468 
00469         /* Put the item element on the page. */
00470         if (type == H_OFFPAGE) {
00471                 off = HOFFSET(p) - dbt->size;
00472                 HOFFSET(p) = p->inp[n] = off;
00473                 memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
00474         } else {
00475                 off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
00476                 HOFFSET(p) = p->inp[n] = off;
00477                 PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
00478         }
00479 
00480         /* Adjust page info. */
00481         NUM_ENT(p) += 1;
00482 }
00483 
00484 /*
00485  * PUBLIC: void CDB___ham_reputpair
00486  * PUBLIC:    __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
00487  *
00488  * This is a special case to restore a key/data pair to its original
00489  * location during recovery.  We are guaranteed that the pair fits
00490  * on the page and is not the last pair on the page (because if it's
00491  * the last pair, the normal insert works).
00492  */
00493 void
00494 CDB___ham_reputpair(p, psize, ndx, key, data)
00495         PAGE *p;
00496         u_int32_t psize, ndx;
00497         const DBT *key, *data;
00498 {
00499         db_indx_t i, movebytes, newbytes;
00500         u_int8_t *from;
00501 
00502         /* First shuffle the existing items up on the page.  */
00503         movebytes =
00504             (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - HOFFSET(p);
00505         newbytes = key->size + data->size;
00506         from = (u_int8_t *)p + HOFFSET(p);
00507         memmove(from - newbytes, from, movebytes);
00508 
00509         /*
00510          * Adjust the indices and move them up 2 spaces. Note that we
00511          * have to check the exit condition inside the loop just in case
00512          * we are dealing with index 0 (db_indx_t's are unsigned).
00513          */
00514         for (i = NUM_ENT(p) - 1; ; i-- ) {
00515                 p->inp[i + 2] = p->inp[i] - newbytes;
00516                 if (i == H_KEYINDEX(ndx))
00517                         break;
00518         }
00519 
00520         /* Put the key and data on the page. */
00521         p->inp[H_KEYINDEX(ndx)] =
00522             (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 2)]) - key->size;
00523         p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size;
00524         memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size);
00525         memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size);
00526 
00527         /* Adjust page info. */
00528         HOFFSET(p) -= newbytes;
00529         NUM_ENT(p) += 2;
00530 }
00531 
00532 /*
00533  * PUBLIC: int CDB___ham_del_pair __P((DBC *, int));
00534  */
00535 int
00536 CDB___ham_del_pair(dbc, reclaim_page)
00537         DBC *dbc;
00538         int reclaim_page;
00539 {
00540         DB *dbp;
00541         HASH_CURSOR *hcp;
00542         DBT data_dbt, key_dbt;
00543         DB_ENV *dbenv;
00544         DB_LSN new_lsn, *n_lsn, tmp_lsn;
00545         PAGE *p;
00546         db_indx_t ndx;
00547         db_pgno_t chg_pgno, pgno;
00548         int ret, t_ret;
00549 
00550         dbp = dbc->dbp;
00551         hcp = (HASH_CURSOR *)dbc->internal;
00552 
00553         dbenv = dbp->dbenv;
00554         ndx = hcp->indx;
00555 
00556         if (hcp->page == NULL &&
00557             (ret = CDB___ham_get_page(dbp, hcp->pgno, (PAGE **)&hcp->page)) != 0)
00558                 return (ret);
00559         p = hcp->page;
00560 
00561         /*
00562          * We optimize for the normal case which is when neither the key nor
00563          * the data are large.  In this case, we write a single log record
00564          * and do the delete.  If either is large, we'll call __big_delete
00565          * to remove the big item and then update the page to remove the
00566          * entry referring to the big item.
00567          */
00568         ret = 0;
00569         if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
00570                 memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
00571                     sizeof(db_pgno_t));
00572                 ret = CDB___db_doff(dbc, pgno);
00573         }
00574 
00575         if (ret == 0)
00576                 switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
00577                 case H_OFFPAGE:
00578                         memcpy(&pgno,
00579                             HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
00580                             sizeof(db_pgno_t));
00581                         ret = CDB___db_doff(dbc, pgno);
00582                         break;
00583                 case H_OFFDUP:
00584                 case H_DUPLICATE:
00585                         /*
00586                          * If we delete a pair that is/was a duplicate, then
00587                          * we had better clear the flag so that we update the
00588                          * cursor appropriately.
00589                          */
00590                         F_CLR(hcp, H_ISDUP);
00591                         break;
00592                 }
00593 
00594         if (ret)
00595                 return (ret);
00596 
00597         /* Now log the delete off this page. */
00598         if (DB_LOGGING(dbc)) {
00599                 key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
00600                 key_dbt.size = LEN_HITEM(p, dbp->pgsize, H_KEYINDEX(ndx));
00601                 data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
00602                 data_dbt.size = LEN_HITEM(p, dbp->pgsize, H_DATAINDEX(ndx));
00603 
00604                 if ((ret = CDB___ham_insdel_log(dbenv,
00605                     dbc->txn, &new_lsn, 0, DELPAIR,
00606                     dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
00607                     &LSN(p), &key_dbt, &data_dbt)) != 0)
00608                         return (ret);
00609 
00610                 /* Move lsn onto page. */
00611                 LSN(p) = new_lsn;
00612         }
00613 
00614         CDB___ham_dpair(dbp, p, ndx);
00615 
00616         /*
00617          * If we are locking, we will not maintain this, because it is
00618          * a hot spot.
00619          *
00620          * XXX
00621          * Perhaps we can retain incremental numbers and apply them later.
00622          */
00623         if (!STD_LOCKING(dbc))
00624                 --hcp->hdr->nelem;
00625 
00626         /*
00627          * If we need to reclaim the page, then check if the page is empty.
00628          * There are two cases.  If it's empty and it's not the first page
00629          * in the bucket (i.e., the bucket page) then we can simply remove
00630          * it. If it is the first chain in the bucket, then we need to copy
00631          * the second page into it and remove the second page.
00632          */
00633         if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
00634             NEXT_PGNO(p) != PGNO_INVALID) {
00635                 PAGE *n_pagep, *nn_pagep;
00636                 db_pgno_t tmp_pgno;
00637 
00638                 /*
00639                  * First page in chain is empty and we know that there
00640                  * are more pages in the chain.
00641                  */
00642                 if ((ret =
00643                     CDB___ham_get_page(dbp, NEXT_PGNO(p), &n_pagep)) != 0)
00644                         return (ret);
00645 
00646                 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
00647                         if ((ret =
00648                             CDB___ham_get_page(dbp, NEXT_PGNO(n_pagep),
00649                             &nn_pagep)) != 0) {
00650                                 (void) CDB___ham_put_page(dbp, n_pagep, 0);
00651                                 return (ret);
00652                         }
00653                 }
00654 
00655                 if (DB_LOGGING(dbc)) {
00656                         key_dbt.data = n_pagep;
00657                         key_dbt.size = dbp->pgsize;
00658                         if ((ret = CDB___ham_copypage_log(dbenv,
00659                             dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p),
00660                             &LSN(p), PGNO(n_pagep), &LSN(n_pagep),
00661                             NEXT_PGNO(n_pagep),
00662                             NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL :
00663                             &LSN(nn_pagep), &key_dbt)) != 0)
00664                                 return (ret);
00665 
00666                         /* Move lsn onto page. */
00667                         LSN(p) = new_lsn;       /* Structure assignment. */
00668                         LSN(n_pagep) = new_lsn;
00669                         if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
00670                                 LSN(nn_pagep) = new_lsn;
00671                 }
00672                 if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
00673                         PREV_PGNO(nn_pagep) = PGNO(p);
00674                         (void)CDB___ham_put_page(dbp, nn_pagep, 1);
00675                 }
00676 
00677                 tmp_pgno = PGNO(p);
00678                 tmp_lsn = LSN(p);
00679                 memcpy(p, n_pagep, dbp->pgsize);
00680                 PGNO(p) = tmp_pgno;
00681                 LSN(p) = tmp_lsn;
00682                 PREV_PGNO(p) = PGNO_INVALID;
00683 
00684                 /*
00685                  * We need to update this cursor and possibly all others.
00686                  * The ordering of this is critical to avoid marking valid
00687                  * entries as deleted.
00688                  * First, we update all the relevant cursors to reflect
00689                  * this deletion.
00690                  * Second, we update all the cursors to reflect the fact
00691                  * that some other entries changed pages.
00692                  * Finally, we advance this cursor to the next page.
00693                  */
00694                 CDB___ham_c_update(dbc, PGNO(p), 0, 0, 0);
00695                 CDB___ham_c_chgpg(dbc,
00696                     PGNO(n_pagep), NDX_INVALID, PGNO(p), NDX_INVALID);
00697                 hcp->indx = 0;
00698                 hcp->pgno = PGNO(p);
00699                 F_SET(hcp, H_DELETED);
00700                 if ((ret = CDB___ham_dirty_page(dbp, p)) != 0 ||
00701                     (ret = CDB___db_free(dbc, n_pagep)) != 0)
00702                         return (ret);
00703                 F_CLR(hcp, H_OK);
00704                 return (ret);
00705         } else if (reclaim_page &&
00706             NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
00707                 PAGE *n_pagep, *p_pagep;
00708 
00709                 if ((ret =
00710                     CDB___ham_get_page(dbp, PREV_PGNO(p), &p_pagep)) != 0)
00711                         return (ret);
00712 
00713                 if (NEXT_PGNO(p) != PGNO_INVALID) {
00714                         if ((ret = CDB___ham_get_page(dbp,
00715                             NEXT_PGNO(p), &n_pagep)) != 0) {
00716                                 (void)CDB___ham_put_page(dbp, p_pagep, 0);
00717                                 return (ret);
00718                         }
00719                         n_lsn = &LSN(n_pagep);
00720                 } else {
00721                         n_pagep = NULL;
00722                         n_lsn = NULL;
00723                 }
00724 
00725                 NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
00726                 if (n_pagep != NULL)
00727                         PREV_PGNO(n_pagep) = PGNO(p_pagep);
00728 
00729                 if (DB_LOGGING(dbc)) {
00730                         if ((ret = CDB___ham_newpage_log(dbenv,
00731                             dbc->txn, &new_lsn, 0, DELOVFL,
00732                             dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
00733                             PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
00734                                 return (ret);
00735 
00736                         /* Move lsn onto page. */
00737                         LSN(p_pagep) = new_lsn; /* Structure assignment. */
00738                         if (n_pagep)
00739                                 LSN(n_pagep) = new_lsn;
00740                         LSN(p) = new_lsn;
00741                 }
00742                 hcp->pgno = NEXT_PGNO(p);
00743                 hcp->indx = 0;
00744                 /*
00745                  * Since we are about to delete the cursor page and we have
00746                  * just moved the cursor, we need to make sure that the
00747                  * old page pointer isn't left hanging around in the cursor.
00748                  */
00749                 hcp->page = NULL;
00750                 chg_pgno = PGNO(p);
00751                 ret = CDB___db_free(dbc, p);
00752                 if ((t_ret = CDB___ham_put_page(dbp, p_pagep, 1)) != 0 && ret == 0)
00753                         ret = t_ret;
00754                 if (n_pagep != NULL &&
00755                     (t_ret = CDB___ham_put_page(dbp, n_pagep, 1)) != 0 && ret == 0)
00756                         ret = t_ret;
00757                 if (ret != 0)
00758                         return (ret);
00759         } else {
00760                 /*
00761                  * Mark item deleted so that we don't try to return it, and
00762                  * so that we update the cursor correctly on the next call
00763                  * to next.
00764                  */
00765                 F_SET(hcp, H_DELETED);
00766                 chg_pgno = hcp->pgno;
00767                 ret = CDB___ham_dirty_page(dbp, p);
00768         }
00769         CDB___ham_c_update(dbc, chg_pgno, 0, 0, 0);
00770 
00771         F_CLR(hcp, H_OK);
00772         return (ret);
00773 }
00774 
00775 /*
00776  * CDB___ham_replpair --
00777  *      Given the key data indicated by the cursor, replace part/all of it
00778  *      according to the fields in the dbt.
00779  *
00780  * PUBLIC: int CDB___ham_replpair __P((DBC *, DBT *, u_int32_t));
00781  */
00782 int
00783 CDB___ham_replpair(dbc, dbt, make_dup)
00784         DBC *dbc;
00785         DBT *dbt;
00786         u_int32_t make_dup;
00787 {
00788         DB *dbp;
00789         HASH_CURSOR *hcp;
00790         DBT old_dbt, tdata, tmp;
00791         DB_LSN  new_lsn;
00792         int32_t change;                 /* XXX: Possible overflow. */
00793         u_int32_t dup, len, memsize;
00794         int is_big, ret, type;
00795         u_int8_t *beg, *dest, *end, *hk, *src;
00796         void *memp;
00797 
00798         /*
00799          * Big item replacements are handled in generic code.
00800          * Items that fit on the current page fall into 4 classes.
00801          * 1. On-page element, same size
00802          * 2. On-page element, new is bigger (fits)
00803          * 3. On-page element, new is bigger (does not fit)
00804          * 4. On-page element, old is bigger
00805          * Numbers 1, 2, and 4 are essentially the same (and should
00806          * be the common case).  We handle case 3 as a delete and
00807          * add.
00808          */
00809         dbp = dbc->dbp;
00810         hcp = (HASH_CURSOR *)dbc->internal;
00811 
00812         /*
00813          * We need to compute the number of bytes that we are adding or
00814          * removing from the entry.  Normally, we can simply substract
00815          * the number of bytes we are replacing (dbt->dlen) from the
00816          * number of bytes we are inserting (dbt->size).  However, if
00817          * we are doing a partial put off the end of a record, then this
00818          * formula doesn't work, because we are essentially adding
00819          * new bytes.
00820          */
00821         change = dbt->size - dbt->dlen;
00822 
00823         hk = H_PAIRDATA(hcp->page, hcp->indx);
00824         is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
00825 
00826         if (is_big)
00827                 memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
00828         else
00829                 len = LEN_HKEYDATA(hcp->page,
00830                     dbp->pgsize, H_DATAINDEX(hcp->indx));
00831 
00832         if (dbt->doff + dbt->dlen > len)
00833                 change += dbt->doff + dbt->dlen - len;
00834 
00835         if (change > (int32_t)P_FREESPACE(hcp->page) || is_big) {
00836                 /*
00837                  * Case 3 -- two subcases.
00838                  * A. This is not really a partial operation, but an overwrite.
00839                  *    Simple del and add works.
00840                  * B. This is a partial and we need to construct the data that
00841                  *    we are really inserting (yuck).
00842                  * In both cases, we need to grab the key off the page (in
00843                  * some cases we could do this outside of this routine; for
00844                  * cleanliness we do it here.  If you happen to be on a big
00845                  * key, this could be a performance hit).
00846                  */
00847                 memset(&tmp, 0, sizeof(tmp));
00848                 if ((ret =
00849                     CDB___db_ret(dbp, hcp->page, H_KEYINDEX(hcp->indx),
00850                     &tmp, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
00851                         return (ret);
00852 
00853                 /* Preserve duplicate info. */
00854                 dup = F_ISSET(hcp, H_ISDUP);
00855                 if (dbt->doff == 0 && dbt->dlen == len) {
00856                         ret = CDB___ham_del_pair(dbc, 0);
00857                         if (ret == 0)
00858                             ret = CDB___ham_add_el(dbc,
00859                                 &tmp, dbt, dup ? H_DUPLICATE : H_KEYDATA);
00860                 } else {                                        /* Case B */
00861                         type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
00862                             HPAGE_PTYPE(hk) : H_KEYDATA;
00863                         memset(&tdata, 0, sizeof(tdata));
00864                         memp = NULL;
00865                         memsize = 0;
00866                         if ((ret = CDB___db_ret(dbp, hcp->page,
00867                             H_DATAINDEX(hcp->indx), &tdata, &memp, &memsize))
00868                             != 0)
00869                                 goto err;
00870 
00871                         /* Now we can delete the item. */
00872                         if ((ret = CDB___ham_del_pair(dbc, 0)) != 0) {
00873                                 CDB___os_free(memp, memsize);
00874                                 goto err;
00875                         }
00876 
00877                         /* Now shift old data around to make room for new. */
00878                         if (change > 0) {
00879                                  if ((ret = CDB___os_realloc(dbp->dbenv,
00880                                      tdata.size + change,
00881                                      NULL, &tdata.data)) != 0)
00882                                         return (ret);
00883                                 memp = tdata.data;
00884                                 memsize = tdata.size + change;
00885                                 memset((u_int8_t *)tdata.data + tdata.size,
00886                                     0, change);
00887                         }
00888                         end = (u_int8_t *)tdata.data + tdata.size;
00889 
00890                         src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
00891                         if (src < end && tdata.size > dbt->doff + dbt->dlen) {
00892                                 len = tdata.size - dbt->doff - dbt->dlen;
00893                                 dest = src + change;
00894                                 memmove(dest, src, len);
00895                         }
00896                         memcpy((u_int8_t *)tdata.data + dbt->doff,
00897                             dbt->data, dbt->size);
00898                         tdata.size += change;
00899 
00900                         /* Now add the pair. */
00901                         ret = CDB___ham_add_el(dbc, &tmp, &tdata, type);
00902                         CDB___os_free(memp, memsize);
00903                 }
00904                 F_SET(hcp, dup);
00905 err:            return (ret);
00906         }
00907 
00908         /*
00909          * Set up pointer into existing data. Do it before the log
00910          * message so we can use it inside of the log setup.
00911          */
00912         beg = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
00913         beg += dbt->doff;
00914 
00915         /*
00916          * If we are going to have to move bytes at all, figure out
00917          * all the parameters here.  Then log the call before moving
00918          * anything around.
00919          */
00920         if (DB_LOGGING(dbc)) {
00921                 old_dbt.data = beg;
00922                 old_dbt.size = dbt->dlen;
00923                 if ((ret = CDB___ham_replace_log(dbp->dbenv,
00924                     dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->page),
00925                     (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page),
00926                     (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
00927                         return (ret);
00928 
00929                 LSN(hcp->page) = new_lsn;       /* Structure assignment. */
00930         }
00931 
00932         CDB___ham_onpage_replace(hcp->page, dbp->pgsize,
00933             (u_int32_t)H_DATAINDEX(hcp->indx), (int32_t)dbt->doff, change, dbt);
00934 
00935         return (0);
00936 }
00937 
00938 /*
00939  * Replace data on a page with new data, possibly growing or shrinking what's
00940  * there.  This is called on two different occasions. On one (from replpair)
00941  * we are interested in changing only the data.  On the other (from recovery)
00942  * we are replacing the entire data (header and all) with a new element.  In
00943  * the latter case, the off argument is negative.
00944  * pagep: the page that we're changing
00945  * ndx: page index of the element that is growing/shrinking.
00946  * off: Offset at which we are beginning the replacement.
00947  * change: the number of bytes (+ or -) that the element is growing/shrinking.
00948  * dbt: the new data that gets written at beg.
00949  * PUBLIC: void CDB___ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
00950  * PUBLIC:     int32_t,  DBT *));
00951  */
00952 void
00953 CDB___ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
00954         PAGE *pagep;
00955         size_t pgsize;
00956         u_int32_t ndx;
00957         int32_t off;
00958         int32_t change;
00959         DBT *dbt;
00960 {
00961         db_indx_t i;
00962         int32_t len;
00963         u_int8_t *src, *dest;
00964         int zero_me;
00965 
00966         if (change != 0) {
00967                 zero_me = 0;
00968                 src = (u_int8_t *)(pagep) + HOFFSET(pagep);
00969                 if (off < 0)
00970                         len = pagep->inp[ndx] - HOFFSET(pagep);
00971                 else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
00972                         len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
00973                             LEN_HKEYDATA(pagep, pgsize, ndx) - src;
00974                         zero_me = 1;
00975                 } else
00976                         len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
00977                 dest = src - change;
00978                 memmove(dest, src, len);
00979                 if (zero_me)
00980                         memset(dest + len, 0, change);
00981 
00982                 /* Now update the indices. */
00983                 for (i = ndx; i < NUM_ENT(pagep); i++)
00984                         pagep->inp[i] -= change;
00985                 HOFFSET(pagep) -= change;
00986         }
00987         if (off >= 0)
00988                 memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
00989                     dbt->data, dbt->size);
00990         else
00991                 memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
00992 }
00993 
00994 /*
00995  * PUBLIC: int CDB___ham_split_page __P((DBC *, u_int32_t, u_int32_t));
00996  */
00997 int
00998 CDB___ham_split_page(dbc, obucket, nbucket)
00999         DBC *dbc;
01000         u_int32_t obucket, nbucket;
01001 {
01002         DB *dbp;
01003         DBC **carray;
01004         HASH_CURSOR *hcp, *cp;
01005         DBT key, page_dbt;
01006         DB_ENV *dbenv;
01007         DB_LSN new_lsn;
01008         PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
01009         db_indx_t n;
01010         db_pgno_t bucket_pgno, npgno, next_pgno;
01011         u_int32_t big_len, len;
01012         int i, ret, t_ret;
01013         void *big_buf;
01014 
01015         dbp = dbc->dbp;
01016         hcp = (HASH_CURSOR *)dbc->internal;
01017         dbenv = dbp->dbenv;
01018         temp_pagep = old_pagep = new_pagep = NULL;
01019 
01020         if ((ret = CDB___ham_get_clist(dbp, obucket, NDX_INVALID, &carray)) != 0)
01021                 return (ret);
01022 
01023         bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
01024         if ((ret = CDB___ham_get_page(dbp, bucket_pgno, &old_pagep)) != 0)
01025                 goto err;
01026 
01027         /* Properly initialize the new bucket page. */
01028         npgno = BUCKET_TO_PAGE(hcp, nbucket);
01029         if ((ret = CDB___ham_get_page(dbp, npgno, &new_pagep)) != 0)
01030                 goto err;
01031         P_INIT(new_pagep,
01032             dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH, dbp->tags);
01033 
01034         temp_pagep = hcp->split_buf;
01035         memcpy(temp_pagep, old_pagep, dbp->pgsize);
01036 
01037         if (DB_LOGGING(dbc)) {
01038                 page_dbt.size = dbp->pgsize;
01039                 page_dbt.data = old_pagep;
01040                 if ((ret = CDB___ham_splitdata_log(dbenv,
01041                     dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD,
01042                     PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
01043                         goto err;
01044         }
01045 
01046         P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID,
01047             PGNO_INVALID, 0, P_HASH, dbp->tags);
01048 
01049         if (DB_LOGGING(dbc))
01050                 LSN(old_pagep) = new_lsn;       /* Structure assignment. */
01051 
01052         big_len = 0;
01053         big_buf = NULL;
01054         key.flags = 0;
01055         while (temp_pagep != NULL) {
01056                 for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) {
01057                         if ((ret =
01058                             CDB___db_ret(dbp, temp_pagep, H_KEYINDEX(n),
01059                             &key, &big_buf, &big_len)) != 0)
01060                                 goto err;
01061 
01062                         if (CDB___ham_call_hash(dbc, key.data, key.size)
01063                             == obucket)
01064                                 pp = &old_pagep;
01065                         else
01066                                 pp = &new_pagep;
01067 
01068                         /*
01069                          * Figure out how many bytes we need on the new
01070                          * page to store the key/data pair.
01071                          */
01072 
01073                         len = LEN_HITEM(temp_pagep, dbp->pgsize,
01074                             H_DATAINDEX(n)) +
01075                             LEN_HITEM(temp_pagep, dbp->pgsize,
01076                             H_KEYINDEX(n)) +
01077                             2 * sizeof(db_indx_t);
01078 
01079                         if (P_FREESPACE(*pp) < len) {
01080                                 if (DB_LOGGING(dbc)) {
01081                                         page_dbt.size = dbp->pgsize;
01082                                         page_dbt.data = *pp;
01083                                         if ((ret = CDB___ham_splitdata_log(
01084                                             dbenv, dbc->txn,
01085                                             &new_lsn, 0, dbp->log_fileid,
01086                                             SPLITNEW, PGNO(*pp), &page_dbt,
01087                                             &LSN(*pp))) != 0)
01088                                                 goto err;
01089                                         LSN(*pp) = new_lsn;
01090                                 }
01091                                 if ((ret =
01092                                     CDB___ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
01093                                         goto err;
01094                         }
01095 
01096                         /* Check if we need to update a cursor. */
01097                         if (carray != NULL) {
01098                                 for (i = 0; carray[i] != NULL; i++) {
01099                                         cp =
01100                                             (HASH_CURSOR *)carray[i]->internal;
01101                                         if (cp->pgno == PGNO(temp_pagep)
01102                                             && cp->indx == n) {
01103                                                 cp->pgno = PGNO(*pp);
01104                                                 cp->indx = NUM_ENT(*pp);
01105                                         }
01106                                 }
01107                         }
01108                         CDB___ham_copy_item(dbp->pgsize,
01109                             temp_pagep, H_KEYINDEX(n), *pp);
01110                         CDB___ham_copy_item(dbp->pgsize,
01111                             temp_pagep, H_DATAINDEX(n), *pp);
01112                 }
01113                 next_pgno = NEXT_PGNO(temp_pagep);
01114 
01115                 /* Clear temp_page; if it's a link overflow page, free it. */
01116                 if (PGNO(temp_pagep) != bucket_pgno && (ret =
01117                     CDB___db_free(dbc, temp_pagep)) != 0)
01118                         goto err;
01119 
01120                 if (next_pgno == PGNO_INVALID)
01121                         temp_pagep = NULL;
01122                 else if ((ret =
01123                     CDB___ham_get_page(dbp, next_pgno, &temp_pagep)) != 0)
01124                         goto err;
01125 
01126                 if (temp_pagep != NULL && DB_LOGGING(dbc)) {
01127                         page_dbt.size = dbp->pgsize;
01128                         page_dbt.data = temp_pagep;
01129                         if ((ret = CDB___ham_splitdata_log(dbenv,
01130                             dbc->txn, &new_lsn, 0, dbp->log_fileid,
01131                             SPLITOLD, PGNO(temp_pagep),
01132                             &page_dbt, &LSN(temp_pagep))) != 0)
01133                                 goto err;
01134                         LSN(temp_pagep) = new_lsn;
01135                 }
01136         }
01137         if (big_buf != NULL)
01138                 CDB___os_free(big_buf, big_len);
01139 
01140         /*
01141          * If the original bucket spanned multiple pages, then we've got
01142          * a pointer to a page that used to be on the bucket chain.  It
01143          * should be deleted.
01144          */
01145         if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
01146             (ret = CDB___db_free(dbc, temp_pagep)) != 0)
01147                 goto err;
01148 
01149         /*
01150          * Write new buckets out.
01151          */
01152         if (DB_LOGGING(dbc)) {
01153                 page_dbt.size = dbp->pgsize;
01154                 page_dbt.data = old_pagep;
01155                 if ((ret = CDB___ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0,
01156                     dbp->log_fileid, SPLITNEW, PGNO(old_pagep), &page_dbt,
01157                     &LSN(old_pagep))) != 0)
01158                         goto err;
01159                 LSN(old_pagep) = new_lsn;
01160 
01161                 page_dbt.data = new_pagep;
01162                 if ((ret = CDB___ham_splitdata_log(dbenv, dbc->txn, &new_lsn, 0,
01163                     dbp->log_fileid, SPLITNEW, PGNO(new_pagep), &page_dbt,
01164                     &LSN(new_pagep))) != 0)
01165                         goto err;
01166                 LSN(new_pagep) = new_lsn;
01167         }
01168         ret = CDB___ham_put_page(dbp, old_pagep, 1);
01169         if ((t_ret = CDB___ham_put_page(dbp, new_pagep, 1)) != 0 && ret == 0)
01170                 ret = t_ret;
01171 
01172         if (0) {
01173 err:            if (old_pagep != NULL)
01174                         (void)CDB___ham_put_page(dbp, old_pagep, 1);
01175                 if (new_pagep != NULL)
01176                         (void)CDB___ham_put_page(dbp, new_pagep, 1);
01177                 if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
01178                         (void)CDB___ham_put_page(dbp, temp_pagep, 1);
01179         }
01180         if (carray != NULL)             /* We never knew its size. */
01181                 CDB___os_free(carray, 0);
01182         return (ret);
01183 }
01184 
01185 /*
01186  * Add the given pair to the page.  The page in question may already be
01187  * held (i.e. it was already gotten).  If it is, then the page is passed
01188  * in via the pagep parameter.  On return, pagep will contain the page
01189  * to which we just added something.  This allows us to link overflow
01190  * pages and return the new page having correctly put the last page.
01191  *
01192  * PUBLIC: int CDB___ham_add_el __P((DBC *, const DBT *, const DBT *, int));
01193  */
01194 int
01195 CDB___ham_add_el(dbc, key, val, type)
01196         DBC *dbc;
01197         const DBT *key, *val;
01198         int type;
01199 {
01200         DB *dbp;
01201         HASH_CURSOR *hcp;
01202         const DBT *pkey, *pdata;
01203         DBT key_dbt, data_dbt;
01204         DB_LSN new_lsn;
01205         HOFFPAGE doff, koff;
01206         db_pgno_t next_pgno;
01207         u_int32_t data_size, key_size, pairsize, rectype;
01208         int do_expand, is_keybig, is_databig, ret;
01209         int key_type, data_type;
01210 
01211         dbp = dbc->dbp;
01212         hcp = (HASH_CURSOR *)dbc->internal;
01213         do_expand = 0;
01214 
01215         if (hcp->page == NULL && (ret = CDB___ham_get_page(dbp,
01216             hcp->seek_found_page != PGNO_INVALID ?  hcp->seek_found_page :
01217             hcp->pgno, (PAGE **)&hcp->page)) != 0)
01218                 return (ret);
01219 
01220         key_size = HKEYDATA_PSIZE(key->size);
01221         data_size = HKEYDATA_PSIZE(val->size);
01222         is_keybig = ISBIG(hcp, key->size);
01223         is_databig = ISBIG(hcp, val->size);
01224         if (is_keybig)
01225                 key_size = HOFFPAGE_PSIZE;
01226         if (is_databig)
01227                 data_size = HOFFPAGE_PSIZE;
01228 
01229         pairsize = key_size + data_size;
01230 
01231         /* Advance to first page in chain with room for item. */
01232         while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) {
01233                 /*
01234                  * This may not be the end of the chain, but the pair may fit
01235                  * anyway.  Check if it's a bigpair that fits or a regular
01236                  * pair that fits.
01237                  */
01238                 if (P_FREESPACE(hcp->page) >= pairsize)
01239                         break;
01240                 next_pgno = NEXT_PGNO(hcp->page);
01241                 if ((ret =
01242                     CDB___ham_next_cpage(dbc, next_pgno, 0)) != 0)
01243                         return (ret);
01244         }
01245 
01246         /*
01247          * Check if we need to allocate a new page.
01248          */
01249         if (P_FREESPACE(hcp->page) < pairsize) {
01250                 do_expand = 1;
01251                 if ((ret = CDB___ham_add_ovflpage(dbc,
01252                     (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) !=  0)
01253                         return (ret);
01254                 hcp->pgno = PGNO(hcp->page);
01255         }
01256 
01257         /*
01258          * Update cursor.
01259          */
01260         hcp->indx = NUM_ENT(hcp->page);
01261         F_CLR(hcp, H_DELETED);
01262         if (is_keybig) {
01263                 koff.type = H_OFFPAGE;
01264                 UMRW(koff.unused[0]);
01265                 UMRW(koff.unused[1]);
01266                 UMRW(koff.unused[2]);
01267                 if ((ret = CDB___db_poff(dbc, key, &koff.pgno)) != 0)
01268                         return (ret);
01269                 koff.tlen = key->size;
01270                 key_dbt.data = &koff;
01271                 key_dbt.size = sizeof(koff);
01272                 pkey = &key_dbt;
01273                 key_type = H_OFFPAGE;
01274         } else {
01275                 pkey = key;
01276                 key_type = H_KEYDATA;
01277         }
01278 
01279         if (is_databig) {
01280                 doff.type = H_OFFPAGE;
01281                 UMRW(doff.unused[0]);
01282                 UMRW(doff.unused[1]);
01283                 UMRW(doff.unused[2]);
01284                 if ((ret = CDB___db_poff(dbc, val, &doff.pgno)) != 0)
01285                         return (ret);
01286                 doff.tlen = val->size;
01287                 data_dbt.data = &doff;
01288                 data_dbt.size = sizeof(doff);
01289                 pdata = &data_dbt;
01290                 data_type = H_OFFPAGE;
01291         } else {
01292                 pdata = val;
01293                 data_type = type;
01294         }
01295 
01296         if (DB_LOGGING(dbc)) {
01297                 rectype = PUTPAIR;
01298                 if (is_databig)
01299                         rectype |= PAIR_DATAMASK;
01300                 if (is_keybig)
01301                         rectype |= PAIR_KEYMASK;
01302                 if (type == H_DUPLICATE)
01303                         rectype |= PAIR_DUPMASK;
01304 
01305                 if ((ret = CDB___ham_insdel_log(dbp->dbenv, dbc->txn, &new_lsn, 0,
01306                     rectype, dbp->log_fileid, PGNO(hcp->page),
01307                     (u_int32_t)NUM_ENT(hcp->page), &LSN(hcp->page), pkey,
01308                     pdata)) != 0)
01309                         return (ret);
01310 
01311                 /* Move lsn onto page. */
01312                 LSN(hcp->page) = new_lsn;       /* Structure assignment. */
01313         }
01314 
01315         CDB___ham_putitem(hcp->page, pkey, key_type);
01316         CDB___ham_putitem(hcp->page, pdata, data_type);
01317 
01318         /*
01319          * For splits, we are going to update item_info's page number
01320          * field, so that we can easily return to the same page the
01321          * next time we come in here.  For other operations, this shouldn't
01322          * matter, since odds are this is the last thing that happens before
01323          * we return to the user program.
01324          */
01325         hcp->pgno = PGNO(hcp->page);
01326 
01327         /*
01328          * XXX
01329          * Maybe keep incremental numbers here.
01330          */
01331         if (!STD_LOCKING(dbc))
01332                 hcp->hdr->nelem++;
01333 
01334         if (do_expand || (hcp->hdr->ffactor != 0 &&
01335             (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor))
01336                 F_SET(hcp, H_EXPAND);
01337         return (0);
01338 }
01339 
01340 /*
01341  * Special __putitem call used in splitting -- copies one entry to
01342  * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
01343  * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
01344  * do not need to do any logging here.
01345  *
01346  * PUBLIC: void CDB___ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
01347  */
01348 void
01349 CDB___ham_copy_item(pgsize, src_page, src_ndx, dest_page)
01350         size_t pgsize;
01351         PAGE *src_page;
01352         u_int32_t src_ndx;
01353         PAGE *dest_page;
01354 {
01355         u_int32_t len;
01356         void *src, *dest;
01357 
01358         /*
01359          * Copy the key and data entries onto this new page.
01360          */
01361         src = P_ENTRY(src_page, src_ndx);
01362 
01363         /* Set up space on dest. */
01364         len = LEN_HITEM(src_page, pgsize, src_ndx);
01365         HOFFSET(dest_page) -= len;
01366         dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
01367         dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
01368         NUM_ENT(dest_page)++;
01369 
01370         memcpy(dest, src, len);
01371 }
01372 
01373 /*
01374  *
01375  * Returns:
01376  *      pointer on success
01377  *      NULL on error
01378  *
01379  * PUBLIC: int CDB___ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
01380  */
01381 int
01382 CDB___ham_add_ovflpage(dbc, pagep, release, pp)
01383         DBC *dbc;
01384         PAGE *pagep;
01385         int release;
01386         PAGE **pp;
01387 {
01388         DB *dbp;
01389         HASH_CURSOR *hcp;
01390         DB_LSN new_lsn;
01391         PAGE *new_pagep;
01392         int ret;
01393 
01394         dbp = dbc->dbp;
01395         hcp = (HASH_CURSOR *)dbc->internal;
01396 
01397         if ((ret = CDB___db_new(dbc, (P_HASH | dbp->tags), &new_pagep)) != 0)
01398                 return (ret);
01399 
01400         if (DB_LOGGING(dbc)) {
01401                 if ((ret = CDB___ham_newpage_log(dbp->dbenv, dbc->txn, &new_lsn, 0,
01402                     PUTOVFL, dbp->log_fileid, PGNO(pagep), &LSN(pagep),
01403                     PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
01404                         return (ret);
01405 
01406                 /* Move lsn onto page. */
01407                 LSN(pagep) = LSN(new_pagep) = new_lsn;
01408         }
01409         NEXT_PGNO(pagep) = PGNO(new_pagep);
01410         PREV_PGNO(new_pagep) = PGNO(pagep);
01411 
01412         if (release)
01413                 ret = CDB___ham_put_page(dbp, pagep, 1);
01414 
01415         *pp = new_pagep;
01416         return (ret);
01417 }
01418 
01419 /*
01420  * PUBLIC: int CDB___ham_put_page __P((DB *, PAGE *, int32_t));
01421  */
01422 int
01423 CDB___ham_put_page(dbp, pagep, is_dirty)
01424         DB *dbp;
01425         PAGE *pagep;
01426         int32_t is_dirty;
01427 {
01428         return (CDB_memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
01429 }
01430 
01431 /*
01432  * CDB___ham_dirty_page --
01433  *      Mark a page dirty.
01434  *
01435  * PUBLIC: int CDB___ham_dirty_page __P((DB *, PAGE *));
01436  */
01437 int
01438 CDB___ham_dirty_page(dbp, pagep)
01439         DB *dbp;
01440         PAGE *pagep;
01441 {
01442         return (CDB_memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY));
01443 }
01444 
01445 /*
01446  * PUBLIC: int CDB___ham_get_page __P((DB *, db_pgno_t, PAGE **));
01447  */
01448 int
01449 CDB___ham_get_page(dbp, addr, pagep)
01450         DB *dbp;
01451         db_pgno_t addr;
01452         PAGE **pagep;
01453 {
01454         return (CDB_memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep));
01455 }
01456 
01457 /*
01458  * PUBLIC: db_pgno_t CDB___bucket_to_page __P((HASH_CURSOR *, db_pgno_t));
01459  */
01460 db_pgno_t
01461 CDB___bucket_to_page(hcp, n)
01462         HASH_CURSOR *hcp;
01463         db_pgno_t n;
01464 {
01465         int ret_val;
01466 
01467         ret_val = n + 1;
01468         if (n != 0)
01469                 ret_val += hcp->hdr->spares[CDB___db_log2(n + 1) - 1];
01470         return (ret_val);
01471 }
01472 
01473 /*
01474  * PUBLIC: int CDB___ham_get_cpage __P((DBC *, db_lockmode_t));
01475  */
01476 int
01477 CDB___ham_get_cpage(dbc, mode)
01478         DBC *dbc;
01479         db_lockmode_t mode;
01480 {
01481         DB *dbp;
01482         DB_LOCK tmp_lock;
01483         HASH_CURSOR *hcp;
01484         int ret;
01485 
01486         dbp = dbc->dbp;
01487         hcp = (HASH_CURSOR *)dbc->internal;
01488         ret = 0;
01489 
01490         /*
01491          * There are four cases with respect to buckets and locks.
01492          * 1. If there is no lock held, then if we are locking, we should
01493          *    get the lock.
01494          * 2. If there is a lock held, it's for the current bucket, and it's
01495          *    for the right mode, we don't need to do anything.
01496          * 3. If there is a lock held for the current bucket but it's not
01497          *    strong enough, we need to upgrade.
01498          * 4. If there is a lock, but it's for a different bucket, then we need
01499          *    to release the existing lock and get a new lock.
01500          */
01501         tmp_lock.off = LOCK_INVALID;
01502         if (STD_LOCKING(dbc)) {
01503                 if (hcp->lock.off != LOCK_INVALID &&
01504                     hcp->lbucket != hcp->bucket) {              /* Case 4 */
01505                         if (dbc->txn == NULL &&
01506                             (ret = CDB_lock_put(dbp->dbenv, &hcp->lock)) != 0)
01507                                 return (ret);
01508                         hcp->lock.off = LOCK_INVALID;
01509                 }
01510                 if ((hcp->lock.off != LOCK_INVALID &&
01511                     (hcp->lock_mode == DB_LOCK_READ &&
01512                     mode == DB_LOCK_WRITE))) {
01513                         /* Case 3. */
01514                         tmp_lock = hcp->lock;
01515                         hcp->lock.off = LOCK_INVALID;
01516                 }
01517 
01518                 /* Acquire the lock. */
01519                 if (hcp->lock.off == LOCK_INVALID)
01520                         /* Cases 1, 3, and 4. */
01521                         if ((ret = CDB___ham_lock_bucket(dbc, mode)) != 0)
01522                                 return (ret);
01523 
01524                 if (ret == 0) {
01525                         hcp->lock_mode = mode;
01526                         hcp->lbucket = hcp->bucket;
01527                         if (tmp_lock.off != LOCK_INVALID)
01528                                 /* Case 3: release the original lock. */
01529                                 ret = CDB_lock_put(dbp->dbenv, &tmp_lock);
01530                 } else if (tmp_lock.off != LOCK_INVALID)
01531                         hcp->lock = tmp_lock;
01532         }
01533 
01534         if (ret == 0 && hcp->page == NULL) {
01535                 if (hcp->pgno == PGNO_INVALID)
01536                         hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
01537                 if ((ret =
01538                     CDB___ham_get_page(dbp, hcp->pgno, (PAGE **)&hcp->page)) != 0)
01539                         return (ret);
01540         }
01541 
01542         return (0);
01543 }
01544 
01545 /*
01546  * Get a new page at the cursor, putting the last page if necessary.
01547  * If the flag is set to H_ISDUP, then we are talking about the
01548  * duplicate page, not the CDB_main page.
01549  *
01550  * PUBLIC: int CDB___ham_next_cpage __P((DBC *, db_pgno_t, int));
01551  */
01552 int
01553 CDB___ham_next_cpage(dbc, pgno, dirty)
01554         DBC *dbc;
01555         db_pgno_t pgno;
01556         int dirty;
01557 {
01558         DB *dbp;
01559         HASH_CURSOR *hcp;
01560         PAGE *p;
01561         int ret;
01562 
01563         dbp = dbc->dbp;
01564         hcp = (HASH_CURSOR *)dbc->internal;
01565 
01566         if (hcp->page != NULL &&
01567             (ret = CDB___ham_put_page(dbp, hcp->page, dirty)) != 0)
01568                 return (ret);
01569 
01570         if ((ret = CDB___ham_get_page(dbp, pgno, &p)) != 0)
01571                 return (ret);
01572 
01573         hcp->page = p;
01574         hcp->pgno = pgno;
01575         hcp->indx = 0;
01576 
01577         return (0);
01578 }
01579 
01580 /*
01581  * CDB___ham_lock_bucket --
01582  *      Get the lock on a particular bucket.
01583  *
01584  * PUBLIC: int CDB___ham_lock_bucket __P((DBC *, db_lockmode_t));
01585  */
01586 int
01587 CDB___ham_lock_bucket(dbc, mode)
01588         DBC *dbc;
01589         db_lockmode_t mode;
01590 {
01591         HASH_CURSOR *hcp;
01592         u_int32_t flags;
01593         int gotmeta, ret;
01594 
01595         hcp = (HASH_CURSOR *)dbc->internal;
01596         gotmeta = hcp->hdr == NULL ? 1 : 0;
01597         if (gotmeta)
01598                 if ((ret = CDB___ham_get_meta(dbc)) != 0)
01599                         return (ret);
01600         dbc->lock.pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
01601         if (gotmeta)
01602                 if ((ret = CDB___ham_release_meta(dbc)) != 0)
01603                         return (ret);
01604 
01605         flags = 0;
01606         if (DB_NONBLOCK(dbc))
01607                 LF_SET(DB_LOCK_NOWAIT);
01608 
01609         ret = CDB_lock_get(dbc->dbp->dbenv,
01610                     dbc->locker, flags, &dbc->lock_dbt, mode, &hcp->lock);
01611 
01612         hcp->lock_mode = mode;
01613         return (ret);
01614 }
01615 
01616 /*
01617  * CDB___ham_dpair --
01618  *      Delete a pair on a page, paying no attention to what the pair
01619  *      represents.  The caller is responsible for freeing up duplicates
01620  *      or offpage entries that might be referenced by this pair.
01621  *
01622  * PUBLIC: void CDB___ham_dpair __P((DB *, PAGE *, u_int32_t));
01623  */
01624 void
01625 CDB___ham_dpair(dbp, p, indx)
01626         DB *dbp;
01627         PAGE *p;
01628         u_int32_t indx;
01629 {
01630         db_indx_t delta, n;
01631         u_int8_t *dest, *src;
01632 
01633         /*
01634          * Compute "delta", the amount we have to shift all of the
01635          * offsets.  To find the delta, we just need to calculate
01636          * the size of the pair of elements we are removing.
01637          */
01638         delta = H_PAIRSIZE(p, dbp->pgsize, indx);
01639 
01640         /*
01641          * The hard case: we want to remove something other than
01642          * the last item on the page.  We need to shift data and
01643          * offsets down.
01644          */
01645         if ((db_indx_t)indx != NUM_ENT(p) - 2) {
01646                 /*
01647                  * Move the data: src is the first occupied byte on
01648                  * the page. (Length is delta.)
01649                  */
01650                 src = (u_int8_t *)p + HOFFSET(p);
01651 
01652                 /*
01653                  * Destination is delta bytes beyond src.  This might
01654                  * be an overlapping copy, so we have to use memmove.
01655                  */
01656                 dest = src + delta;
01657                 memmove(dest, src, p->inp[H_DATAINDEX(indx)] - HOFFSET(p));
01658         }
01659 
01660         /* Adjust page metadata. */
01661         HOFFSET(p) = HOFFSET(p) + delta;
01662         NUM_ENT(p) = NUM_ENT(p) - 2;
01663 
01664         /* Adjust the offsets. */
01665         for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++)
01666                 p->inp[n] = p->inp[n + 2] + delta;
01667 
01668 }

Generated on Sun Jun 8 10:56:37 2008 for GNUmifluz by  doxygen 1.5.5