bt_rsearch.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994, 1995, 1996
00009  *      Keith Bostic.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * Redistribution and use in source and binary forms, with or without
00016  * modification, are permitted provided that the following conditions
00017  * are met:
00018  * 1. Redistributions of source code must retain the above copyright
00019  *    notice, this list of conditions and the following disclaimer.
00020  * 2. Redistributions in binary form must reproduce the above copyright
00021  *    notice, this list of conditions and the following disclaimer in the
00022  *    documentation and/or other materials provided with the distribution.
00023  * 3. Neither the name of the University nor the names of its contributors
00024  *    may be used to endorse or promote products derived from this software
00025  *    without specific prior written permission.
00026  *
00027  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00028  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00029  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00030  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00031  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00032  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00033  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00034  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00035  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00036  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00037  * SUCH DAMAGE.
00038  */
00039 
00040 #include "config.h"
00041 
00042 #ifndef lint
00043 static const char revid[] = "$Id: bt__rsearch_8c-source.html,v 1.1 2008/06/08 10:13:44 sebdiaz Exp $";
00044 #endif /* not lint */
00045 
00046 #ifndef NO_SYSTEM_INCLUDES
00047 #include <sys/types.h>
00048 #endif
00049 
00050 #include "db_int.h"
00051 #include "db_page.h"
00052 #include "btree.h"
00053 #include "db_shash.h"
00054 #include "lock.h"
00055 
00056 /*
00057  * CDB___bam_rsearch --
00058  *      Search a btree for a record number.
00059  *
00060  * PUBLIC: int CDB___bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
00061  */
00062 int
00063 CDB___bam_rsearch(dbc, recnop, flags, stop, exactp)
00064         DBC *dbc;
00065         db_recno_t *recnop;
00066         u_int32_t flags;
00067         int stop, *exactp;
00068 {
00069         BINTERNAL *bi;
00070         BTREE_CURSOR *cp;
00071         DB *dbp;
00072         DB_LOCK lock;
00073         PAGE *h;
00074         RINTERNAL *ri;
00075         db_indx_t adjust, deloffset, indx, top;
00076         db_lockmode_t lock_mode;
00077         db_pgno_t pg;
00078         db_recno_t recno, t_recno, total;
00079         int ret, stack;
00080 
00081         dbp = dbc->dbp;
00082         cp = (BTREE_CURSOR *)dbc->internal;
00083 
00084         BT_STK_CLR(cp);
00085 
00086         /*
00087          * There are several ways we search a btree tree.  The flags argument
00088          * specifies if we're acquiring read or write locks and if we are
00089          * locking pairs of pages.  In addition, if we're adding or deleting
00090          * an item, we have to lock the entire tree, regardless.  See btree.h
00091          * for more details.
00092          *
00093          * If write-locking pages, we need to know whether or not to acquire a
00094          * write lock on a page before getting it.  This depends on how deep it
00095          * is in tree, which we don't know until we acquire the root page.  So,
00096          * if we need to lock the root page we may have to upgrade it later,
00097          * because we won't get the correct lock initially.
00098          *
00099          * Retrieve the root page.
00100          */
00101         pg = cp->root;
00102         stack = LF_ISSET(S_STACK);
00103         lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ;
00104         if ((ret = CDB___db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00105                 return (ret);
00106         if ((ret = CDB_memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
00107                 /* Did not read it, so we can release the lock */
00108                 (void)__LPUT(dbc, lock);
00109                 return (ret);
00110         }
00111 
00112         /*
00113          * Decide if we need to save this page; if we do, write lock it.
00114          * We deliberately don't lock-couple on this call.  If the tree
00115          * is tiny, i.e., one page, and two threads are busily updating
00116          * the root page, we're almost guaranteed deadlocks galore, as
00117          * each one gets a read lock and then blocks the other's attempt
00118          * for a write lock.
00119          */
00120         if (!stack &&
00121             ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
00122             (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
00123                 (void)CDB_memp_fput(dbp->mpf, h, 0);
00124                 (void)__LPUT(dbc, lock);
00125                 lock_mode = DB_LOCK_WRITE;
00126                 if ((ret = CDB___db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00127                         return (ret);
00128                 if ((ret = CDB_memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
00129                         /* Did not read it, so we can release the lock */
00130                         (void)__LPUT(dbc, lock);
00131                         return (ret);
00132                 }
00133                 stack = 1;
00134         }
00135 
00136         /*
00137          * If appending to the tree, set the record number now -- we have the
00138          * root page locked.
00139          *
00140          * Delete only deletes exact matches, read only returns exact matches.
00141          * Note, this is different from CDB___bam_search(), which returns non-exact
00142          * matches for read.
00143          *
00144          * The record may not exist.  We can only return the correct location
00145          * for the record immediately after the last record in the tree, so do
00146          * a fast check now.
00147          */
00148         total = RE_NREC(h);
00149         if (LF_ISSET(S_APPEND)) {
00150                 *exactp = 0;
00151                 *recnop = recno = total + 1;
00152         } else {
00153                 recno = *recnop;
00154                 if (recno <= total)
00155                         *exactp = 1;
00156                 else {
00157                         *exactp = 0;
00158                         if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
00159                                 /*
00160                                  * Keep the page locked for serializability.
00161                                  *
00162                                  * XXX
00163                                  * This leaves the root page locked, which will
00164                                  * eliminate any concurrency.  A possible fix
00165                                  * would be to lock the last leaf page instead.
00166                                  */
00167                                 (void)CDB_memp_fput(dbp->mpf, h, 0);
00168                                 (void)__TLPUT(dbc, lock);
00169                                 return (DB_NOTFOUND);
00170                         }
00171                 }
00172         }
00173 
00174         /*
00175          * !!!
00176          * Record numbers in the tree are 0-based, but the recno is
00177          * 1-based.  All of the calculations below have to take this
00178          * into account.
00179          */
00180         for (total = 0;;) {
00181                 switch (TYPE(h)) {
00182                 case P_LBTREE:
00183                 case P_LDUP:
00184                         recno -= total;
00185                         /*
00186                          * There may be logically deleted records on the page.
00187                          * If there are enough, the record may not exist.
00188                          */
00189                         if (TYPE(h) == P_LBTREE) {
00190                                 adjust = P_INDX;
00191                                 deloffset = O_INDX;
00192                         } else {
00193                                 adjust = O_INDX;
00194                                 deloffset = 0;
00195                         }
00196                         for (t_recno = 0, indx = 0;; indx += adjust) {
00197                                 if (indx >= NUM_ENT(h)) {
00198                                         *exactp = 0;
00199                                         if (!LF_ISSET(S_PAST_EOF) ||
00200                                             recno > t_recno + 1) {
00201                                                 ret = DB_NOTFOUND;
00202                                                 goto err;
00203                                         }
00204                                 }
00205                                 if (!B_DISSET(
00206                                     GET_BKEYDATA(h, indx + deloffset)->type) &&
00207                                     ++t_recno == recno)
00208                                         break;
00209                         }
00210 
00211                         /* Correct from 1-based to 0-based for a page offset. */
00212                         BT_STK_ENTER(dbp->dbenv,
00213                             cp, h, indx, lock, lock_mode, ret);
00214                         if (ret != 0)
00215                                 goto err;
00216                         return (0);
00217                 case P_IBTREE:
00218                         for (indx = 0, top = NUM_ENT(h);;) {
00219                                 bi = GET_BINTERNAL(h, indx);
00220                                 if (++indx == top || total + bi->nrecs >= recno)
00221                                         break;
00222                                 total += bi->nrecs;
00223                         }
00224                         pg = bi->pgno;
00225                         break;
00226                 case P_LRECNO:
00227                         recno -= total;
00228 
00229                         /* Correct from 1-based to 0-based for a page offset. */
00230                         --recno;
00231                         BT_STK_ENTER(dbp->dbenv,
00232                             cp, h, recno, lock, lock_mode, ret);
00233                         if (ret != 0)
00234                                 goto err;
00235                         return (0);
00236                 case P_IRECNO:
00237                         for (indx = 0, top = NUM_ENT(h);;) {
00238                                 ri = GET_RINTERNAL(h, indx);
00239                                 if (++indx == top || total + ri->nrecs >= recno)
00240                                         break;
00241                                 total += ri->nrecs;
00242                         }
00243                         pg = ri->pgno;
00244                         break;
00245                 default:
00246                         return (CDB___db_pgfmt(dbp, h->pgno));
00247                 }
00248                 --indx;
00249 
00250                 if (stack) {
00251                         /* Return if this is the lowest page wanted. */
00252                         if (LF_ISSET(S_PARENT) && stop == h->level) {
00253                                 BT_STK_ENTER(dbp->dbenv,
00254                                     cp, h, indx, lock, lock_mode, ret);
00255                                 if (ret != 0)
00256                                         goto err;
00257                                 return (0);
00258                         }
00259                         BT_STK_PUSH(dbp->dbenv,
00260                             cp, h, indx, lock, lock_mode, ret);
00261                         if (ret != 0)
00262                                 goto err;
00263 
00264                         lock_mode = DB_LOCK_WRITE;
00265                         if ((ret =
00266                             CDB___db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
00267                                 goto err;
00268                 } else {
00269                         /*
00270                          * Decide if we want to return a pointer to the next
00271                          * page in the stack.  If we do, write lock it and
00272                          * never unlock it.
00273                          */
00274                         if ((LF_ISSET(S_PARENT) &&
00275                             (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
00276                             (h->level - 1) == LEAFLEVEL)
00277                                 stack = 1;
00278 
00279                         (void)CDB_memp_fput(dbp->mpf, h, 0);
00280 
00281                         lock_mode = stack &&
00282                             LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
00283                         if ((ret = CDB___db_lget(dbc,
00284                             LCK_COUPLE, pg, lock_mode, 0, &lock)) != 0) {
00285                                 /*
00286                                  * If we fail, discard the lock we held.  This
00287                                  * is OK because this only happens when we are
00288                                  * descending the tree holding read-locks.
00289                                  */
00290                                 __LPUT(dbc, lock);
00291                                 goto err;
00292                         }
00293                 }
00294 
00295                 if ((ret = CDB_memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
00296                         goto err;
00297         }
00298         /* NOTREACHED */
00299 
00300 err:    BT_STK_POP(cp);
00301         CDB___bam_stkrel(dbc, 0);
00302         return (ret);
00303 }
00304 
00305 /*
00306  * CDB___bam_adjust --
00307  *      Adjust the tree after adding or deleting a record.
00308  *
00309  * PUBLIC: int CDB___bam_adjust __P((DBC *, int32_t));
00310  */
00311 int
00312 CDB___bam_adjust(dbc, adjust)
00313         DBC *dbc;
00314         int32_t adjust;
00315 {
00316         BTREE_CURSOR *cp;
00317         DB *dbp;
00318         EPG *epg;
00319         PAGE *h;
00320         db_pgno_t root_pgno;
00321         int ret;
00322 
00323         dbp = dbc->dbp;
00324         cp = (BTREE_CURSOR *)dbc->internal;
00325         root_pgno = cp->root;
00326 
00327         /* Update the record counts for the tree. */
00328         for (epg = cp->sp; epg <= cp->csp; ++epg) {
00329                 h = epg->page;
00330                 if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
00331                         if (DB_LOGGING(dbc) &&
00332                             (ret = CDB___bam_cadjust_log(dbp->dbenv,
00333                             dbc->txn, &LSN(h), 0, dbp->log_fileid,
00334                             PGNO(h), &LSN(h), (u_int32_t)epg->indx, adjust,
00335                             PGNO(h) == root_pgno ? CAD_UPDATEROOT : 0)) != 0)
00336                                 return (ret);
00337 
00338                         if (TYPE(h) == P_IBTREE)
00339                                 GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
00340                         else
00341                                 GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
00342 
00343                         if (PGNO(h) == root_pgno)
00344                                 RE_NREC_ADJ(h, adjust);
00345 
00346                         if ((ret = CDB_memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
00347                                 return (ret);
00348                 }
00349         }
00350         return (0);
00351 }
00352 
00353 /*
00354  * CDB___bam_nrecs --
00355  *      Return the number of records in the tree.
00356  *
00357  * PUBLIC: int CDB___bam_nrecs __P((DBC *, db_recno_t *));
00358  */
00359 int
00360 CDB___bam_nrecs(dbc, rep)
00361         DBC *dbc;
00362         db_recno_t *rep;
00363 {
00364         DB *dbp;
00365         DB_LOCK lock;
00366         PAGE *h;
00367         db_pgno_t pgno;
00368         int ret;
00369 
00370         dbp = dbc->dbp;
00371 
00372         pgno = dbc->internal->root;
00373         if ((ret = CDB___db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
00374                 return (ret);
00375         if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
00376                 return (ret);
00377 
00378         *rep = RE_NREC(h);
00379 
00380         (void)CDB_memp_fput(dbp->mpf, h, 0);
00381         (void)__TLPUT(dbc, lock);
00382 
00383         return (0);
00384 }
00385 
00386 /*
00387  * CDB___bam_total --
00388  *      Return the number of records below a page.
00389  *
00390  * PUBLIC: db_recno_t CDB___bam_total __P((PAGE *));
00391  */
00392 db_recno_t
00393 CDB___bam_total(h)
00394         PAGE *h;
00395 {
00396         db_recno_t nrecs;
00397         db_indx_t indx, top;
00398 
00399         nrecs = 0;
00400         top = NUM_ENT(h);
00401 
00402         switch (TYPE(h)) {
00403         case P_LBTREE:
00404                 /* Check for logically deleted records. */
00405                 for (indx = 0; indx < top; indx += P_INDX)
00406                         if (!B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
00407                                 ++nrecs;
00408                 break;
00409         case P_LDUP:
00410                 /* Check for logically deleted records. */
00411                 for (indx = 0; indx < top; indx += O_INDX)
00412                         if (!B_DISSET(GET_BKEYDATA(h, indx)->type))
00413                                 ++nrecs;
00414                 break;
00415         case P_IBTREE:
00416                 for (indx = 0; indx < top; indx += O_INDX)
00417                         nrecs += GET_BINTERNAL(h, indx)->nrecs;
00418                 break;
00419         case P_LRECNO:
00420                 nrecs = NUM_ENT(h);
00421                 break;
00422         case P_IRECNO:
00423                 for (indx = 0; indx < top; indx += O_INDX)
00424                         nrecs += GET_RINTERNAL(h, indx)->nrecs;
00425                 break;
00426         }
00427 
00428         return (nrecs);
00429 }

Generated on Sun Jun 8 10:56:35 2008 for GNUmifluz by  doxygen 1.5.5