bt_verify.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: bt__verify_8c-source.html,v 1.1 2008/06/08 10:13:47 sebdiaz Exp $
00008  */
00009 
00010 #include "config.h"
00011 
00012 #ifndef lint
00013 static const char revid[] = "$Id: bt__verify_8c-source.html,v 1.1 2008/06/08 10:13:47 sebdiaz Exp $";
00014 #endif /* not lint */
00015 
00016 #ifndef NO_SYSTEM_INCLUDES
00017 #include <sys/types.h>
00018 
00019 #include <errno.h>
00020 #include <string.h>
00021 #endif
00022 
00023 #include "db_int.h"
00024 #include "db_page.h"
00025 #include "db_verify.h"
00026 #include "btree.h"
00027 
00028 static int __bam_safe_getdata __P((DB *, PAGE *, u_int32_t, int, DBT *, int *));
00029 static int __bam_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00030     db_indx_t *, u_int32_t));
00031 static int __bam_vrfy_treeorder __P((DB *, db_pgno_t, PAGE *, BINTERNAL *,
00032     BINTERNAL *, u_int32_t));
00033 static int __ram_vrfy_inp __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00034     db_indx_t *, u_int32_t));
00035 
00036 #define OKFLAGS (DB_AGGRESSIVE | DB_NOORDERCHK | DB_SALVAGE)
00037 
00038 /*
00039  * CDB___bam_vrfy_meta --
00040  *      Verify the btree-specific part of a metadata page.
00041  *
00042  * PUBLIC: int CDB___bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *,
00043  * PUBLIC:     db_pgno_t, u_int32_t));
00044  */
00045 int
00046 CDB___bam_vrfy_meta(dbp, vdp, meta, pgno, flags)
00047         DB *dbp;
00048         VRFY_DBINFO *vdp;
00049         BTMETA *meta;
00050         db_pgno_t pgno;
00051         u_int32_t flags;
00052 {
00053         VRFY_PAGEINFO *pip;
00054         int isbad, t_ret, ret;
00055 
00056         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00057                 return (ret);
00058 
00059         isbad = 0;
00060 
00061         /*
00062          * If VRFY_INCOMPLETE is not set, then we didn't come through
00063          * __db_vrfy_pagezero and didn't incompletely
00064          * check this page--we haven't checked it at all.
00065          * Thus we need to call CDB___db_vrfy_meta and check the common fields.
00066          *
00067          * If VRFY_INCOMPLETE is set, we've already done all the same work
00068          * in __db_vrfy_pagezero, so skip the check.
00069          */
00070         if (!F_ISSET(pip, VRFY_INCOMPLETE) &&
00071             (ret = CDB___db_vrfy_meta(dbp, vdp, &meta->dbmeta, pgno, flags)) != 0) {
00072                 if (ret == DB_VERIFY_BAD)
00073                         isbad = 1;
00074                 else
00075                         goto err;
00076         }
00077 
00078         /* bt_minkey:  must be >= 2, < (pagesize / BKEYDATA_PSIZE(0) */
00079         if (meta->minkey < 2 ||
00080             meta->minkey > (dbp->pgsize / BKEYDATA_PSIZE(0))) {
00081                 pip->bt_minkey = 0;
00082                 isbad = 1;
00083                 EPRINT((dbp->dbenv,
00084                     "Nonsensical bt_minkey value %lu on metadata page %lu",
00085                     meta->minkey, pgno));
00086         } else
00087                 pip->bt_minkey = meta->minkey;
00088 
00089         /* bt_maxkey: no constraints (XXX: right?) */
00090         pip->bt_maxkey = meta->maxkey;
00091 
00092         /* re_len: no constraints on this (may be zero or huge--we make rope) */
00093         pip->re_len = meta->re_len;
00094 
00095         /*
00096          * The root must not be current page or 0 and it must be within
00097          * database.  If this metadata page is the master meta data page
00098          * of the file, then the root page had better be page 1.
00099          */
00100         pip->root = 0;
00101         if (meta->root == PGNO_INVALID
00102             || meta->root == pgno || !IS_VALID_PGNO(meta->root) ||
00103             (pgno == PGNO_BASE_MD && meta->root != 1)) {
00104                 isbad = 1;
00105                 EPRINT((dbp->dbenv,
00106                     "Nonsensical root page %lu on metadata page %lu",
00107                     meta->root, vdp->last_pgno));
00108         } else
00109                 pip->root = meta->root;
00110 
00111         /* Flags. */
00112         if (F_ISSET(&meta->dbmeta, BTM_RENUMBER))
00113                 F_SET(pip, VRFY_IS_RRECNO);
00114 
00115         if (F_ISSET(&meta->dbmeta, BTM_SUBDB)) {
00116                 /*
00117                  * If this is a master db meta page, it had better not have
00118                  * duplicates.
00119                  */
00120                 if (F_ISSET(&meta->dbmeta, BTM_DUP) && pgno == PGNO_BASE_MD) {
00121                         isbad = 1;
00122                         EPRINT((dbp->dbenv,
00123         "Btree metadata page %lu has both duplicates and multiple databases",
00124                             pgno));
00125                 }
00126                 F_SET(pip, VRFY_HAS_SUBDBS);
00127         }
00128 
00129         if (F_ISSET(&meta->dbmeta, BTM_DUP))
00130                 F_SET(pip, VRFY_HAS_DUPS);
00131         if (F_ISSET(&meta->dbmeta, BTM_DUPSORT))
00132                 F_SET(pip, VRFY_HAS_DUPSORT);
00133         if (F_ISSET(&meta->dbmeta, BTM_RECNUM))
00134                 F_SET(pip, VRFY_HAS_RECNUMS);
00135         if (F_ISSET(pip, VRFY_HAS_RECNUMS) && F_ISSET(pip, VRFY_HAS_DUPS)) {
00136                 EPRINT((dbp->dbenv,
00137             "Btree metadata page %lu illegally has both recnums and dups",
00138                     pgno));
00139                 isbad = 1;
00140         }
00141 
00142         if (F_ISSET(&meta->dbmeta, BTM_RECNO)) {
00143                 F_SET(pip, VRFY_IS_RECNO);
00144                 dbp->type = DB_RECNO;
00145         } else if (F_ISSET(pip, VRFY_IS_RRECNO)) {
00146                 isbad = 1;
00147                 EPRINT((dbp->dbenv,
00148                     "Metadata page %lu has renumber flag set but is not recno",
00149                     pgno));
00150         }
00151 
00152         if (F_ISSET(pip, VRFY_IS_RECNO) && F_ISSET(pip, VRFY_HAS_DUPS)) {
00153                 EPRINT((dbp->dbenv,
00154                     "Recno metadata page %lu specifies duplicates", pgno));
00155                 isbad = 1;
00156         }
00157 
00158         if (F_ISSET(&meta->dbmeta, BTM_FIXEDLEN))
00159                 F_SET(pip, VRFY_IS_FIXEDLEN);
00160         else if (pip->re_len > 0) {
00161                 /*
00162                  * It's wrong to have an re_len if it's not a fixed-length
00163                  * database
00164                  */
00165                 isbad = 1;
00166                 EPRINT((dbp->dbenv,
00167                     "re_len of %lu in non-fixed-length database",
00168                     pip->re_len));
00169         }
00170 
00171         /*
00172          * We do not check that the rest of the page is 0, because it may
00173          * not be and may still be correct.
00174          */
00175 
00176 err:    if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00177                 ret = t_ret;
00178         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00179 }
00180 
00181 /*
00182  * CDB___ram_vrfy_leaf --
00183  *      Verify a recno leaf page.
00184  *
00185  * PUBLIC: int CDB___ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00186  * PUBLIC:     u_int32_t));
00187  */
00188 int
00189 CDB___ram_vrfy_leaf(dbp, vdp, h, pgno, flags)
00190         DB *dbp;
00191         VRFY_DBINFO *vdp;
00192         PAGE *h;
00193         db_pgno_t pgno;
00194         u_int32_t flags;
00195 {
00196         BKEYDATA *bk;
00197         VRFY_PAGEINFO *pip;
00198         db_indx_t i;
00199         int ret, t_ret, isbad;
00200         u_int32_t re_len_guess, len;
00201 
00202         isbad = 0;
00203         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00204                 return (ret);
00205 
00206         if ((ret = CDB___db_fchk(dbp->dbenv,
00207             "CDB___ram_vrfy_leaf", flags, OKFLAGS)) != 0)
00208                 goto err;
00209 
00210         if (TYPE(h) != P_LRECNO) {
00211                 /* We should not have been called. */
00212                 TYPE_ERR_PRINT(dbp->dbenv, "CDB___ram_vrfy_leaf", pgno, TYPE(h));
00213                 DB_ASSERT(0);
00214                 ret = EINVAL;
00215                 goto err;
00216         }
00217 
00218         /*
00219          * Verify (and, if relevant, save off) page fields common to
00220          * all PAGEs.
00221          */
00222         if ((ret = CDB___db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
00223                 if (ret == DB_VERIFY_BAD)
00224                         isbad = 1;
00225                 else
00226                         goto err;
00227         }
00228 
00229         /*
00230          * Verify inp[].  Return immediately if it returns DB_VERIFY_BAD;
00231          * further checks are dangerous.
00232          */
00233         if ((ret = __bam_vrfy_inp(dbp,
00234             vdp, h, pgno, &pip->entries, flags)) != 0)
00235                 goto err;
00236 
00237         if (F_ISSET(pip, VRFY_HAS_DUPS)) {
00238                 EPRINT((dbp->dbenv,
00239                     "Recno database has dups on page %lu", pgno));
00240                 ret = DB_VERIFY_BAD;
00241                 goto err;
00242         }
00243 
00244         /*
00245          * Walk through inp and see if the lengths of all the records are the
00246          * same--if so, this may be a fixed-length database, and we want to
00247          * save off this value.  We know inp to be safe if we've gotten this
00248          * far.
00249          */
00250         re_len_guess = 0;
00251         for (i = 0; i < NUM_ENT(h); i++) {
00252                 bk = GET_BKEYDATA(h, i);
00253                 /* KEYEMPTY.  Go on. */
00254                 if (B_DISSET(bk->type))
00255                         continue;
00256                 if (bk->type == B_OVERFLOW)
00257                         len = ((BOVERFLOW *)bk)->tlen;
00258                 else if (bk->type == B_KEYDATA)
00259                         len = bk->len;
00260                 else {
00261                         isbad = 1;
00262                         EPRINT((dbp->dbenv, "Nonsensical type for item %lu, page %lu",
00263                             i, pgno));
00264                         continue;
00265                 }
00266                 if (re_len_guess == 0)
00267                         re_len_guess = len;
00268 
00269                 /*
00270                  * Is this item's len the same as the last one's?  If not,
00271                  * reset to 0 and break--we don't have a single re_len.
00272                  * Otherwise, go on to the next item.
00273                  */
00274                 if (re_len_guess != len) {
00275                         re_len_guess = 0;
00276                         break;
00277                 }
00278         }
00279         pip->re_len = re_len_guess;
00280 
00281         /* Save off record count. */
00282         pip->rec_cnt = NUM_ENT(h);
00283 
00284 err:    if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00285                 ret = t_ret;
00286         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0);
00287 }
00288 
00289 /*
00290  * CDB___bam_vrfy --
00291  *      Verify a btree leaf or internal page.
00292  *
00293  * PUBLIC: int CDB___bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t,
00294  * PUBLIC:     u_int32_t));
00295  */
00296 int
00297 CDB___bam_vrfy(dbp, vdp, h, pgno, flags)
00298         DB *dbp;
00299         VRFY_DBINFO *vdp;
00300         PAGE *h;
00301         db_pgno_t pgno;
00302         u_int32_t flags;
00303 {
00304         VRFY_PAGEINFO *pip;
00305         int ret, t_ret, isbad;
00306 
00307         isbad = 0;
00308         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00309                 return (ret);
00310 
00311         switch (TYPE(h)) {
00312         case P_IBTREE:
00313         case P_IRECNO:
00314         case P_LBTREE:
00315         case P_LDUP:
00316                 break;
00317         default:
00318                 TYPE_ERR_PRINT(dbp->dbenv, "CDB___bam_vrfy", pgno, TYPE(h));
00319                 DB_ASSERT(0);
00320                 ret = EINVAL;
00321                 goto err;
00322         }
00323 
00324         /*
00325          * Verify (and, if relevant, save off) page fields common to
00326          * all PAGEs.
00327          */
00328         if ((ret = CDB___db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) {
00329                 if (ret == DB_VERIFY_BAD)
00330                         isbad = 1;
00331                 else
00332                         goto err;
00333         }
00334 
00335         /*
00336          * The record count is, on internal pages, stored in an overloaded
00337          * next_pgno field.  Save it off;  we'll verify it when we check
00338          * overall database structure.  We could overload the field
00339          * in VRFY_PAGEINFO, too, but this seems gross, and space
00340          * is not at such a premium.
00341          */
00342         pip->rec_cnt = RE_NREC(h);
00343 
00344         /*
00345          * Verify inp[].
00346          */
00347         if (TYPE(h) == P_IRECNO) {
00348                 if ((ret = __ram_vrfy_inp(dbp,
00349                     vdp, h, pgno, &pip->entries, flags)) != 0)
00350                         goto err;
00351         } else if ((ret = __bam_vrfy_inp(dbp,
00352             vdp, h, pgno, &pip->entries, flags)) != 0) {
00353                 if (ret == DB_VERIFY_BAD)
00354                         isbad = 1;
00355                 else
00356                         goto err;
00357                 EPRINT((dbp->dbenv,
00358                     "item order check on page %lu unsafe: skipping", pgno));
00359         } else if (!LF_ISSET(DB_NOORDERCHK) && (ret =
00360             CDB___bam_vrfy_itemorder(dbp, vdp, h, pgno, 0, 0, 0, flags)) != 0) {
00361                 /*
00362                  * We know that the elements of inp are reasonable.
00363                  *
00364                  * Check that elements fall in the proper order.
00365                  */
00366                 if (ret == DB_VERIFY_BAD)
00367                         isbad = 1;
00368                 else
00369                         goto err;
00370         }
00371 
00372 err:    if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00373                 ret = t_ret;
00374         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : 0);
00375 }
00376 
00377 /*
00378  * __ram_vrfy_inp --
00379  *      Verify that all entries in a P_IRECNO inp[] array are reasonable,
00380  *      and count them.  Note that P_LRECNO uses __bam_vrfy_inp;
00381  *      P_IRECNOs are a special, and simpler, case, since they have
00382  *      RINTERNALs rather than BKEYDATA/BINTERNALs.
00383  */
00384 static int
00385 __ram_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
00386         DB *dbp;
00387         VRFY_DBINFO *vdp;
00388         PAGE *h;
00389         db_pgno_t pgno;
00390         db_indx_t *nentriesp;
00391         u_int32_t flags;
00392 {
00393         RINTERNAL *ri;
00394         VRFY_CHILDINFO child;
00395         VRFY_PAGEINFO *pip;
00396         int ret, t_ret, isbad;
00397         db_indx_t himark, i, offset, nentries;
00398         u_int8_t *pagelayout, *p;
00399 
00400         isbad = 0;
00401         memset(&child, 0, sizeof(VRFY_CHILDINFO));
00402         nentries = 0;
00403         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00404                 return (ret);
00405 
00406         if (TYPE(h) != P_IRECNO) {
00407                 TYPE_ERR_PRINT(dbp->dbenv, "__ram_vrfy_inp", pgno, TYPE(h));
00408                 DB_ASSERT(0);
00409                 ret = EINVAL;
00410                 goto err;
00411         }
00412 
00413         himark = dbp->pgsize;
00414         if ((ret =
00415             CDB___os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pagelayout)) != 0)
00416                 goto err;
00417         memset(pagelayout, 0, dbp->pgsize);
00418         for (i = 0; i < NUM_ENT(h); i++) {
00419                 if ((u_int8_t *)h->inp + i >= (u_int8_t *)h + himark) {
00420                         EPRINT((dbp->dbenv,
00421                             "Page %lu entries listing %lu overlaps data",
00422                             pgno, i));
00423                         ret = DB_VERIFY_BAD;
00424                         goto err;
00425                 }
00426                 offset = h->inp[i];
00427                 /*
00428                  * Check that the item offset is reasonable:  it points
00429                  * somewhere after the inp array and before the end of the
00430                  * page.
00431                  */
00432                 if (offset <= ((u_int8_t *)h->inp + i - (u_int8_t *)h) ||
00433                     offset > dbp->pgsize - RINTERNAL_SIZE) {
00434                         isbad = 1;
00435                         EPRINT((dbp->dbenv,
00436                             "Bad offset %lu at page %lu index %lu",
00437                             offset, pgno, i));
00438                         continue;
00439                 }
00440 
00441                 /* Update the high-water mark (what HOFFSET should be) */
00442                 if (offset < himark)
00443                         himark = offset;
00444 
00445                 nentries++;
00446 
00447                 /* Make sure this RINTERNAL is not multiply referenced. */
00448                 ri = GET_RINTERNAL(h, i);
00449                 if (pagelayout[offset] == 0) {
00450                         pagelayout[offset] = 1;
00451                         child.pgno = ri->pgno;
00452                         child.type = V_RECNO;
00453                         child.nrecs = ri->nrecs;
00454                         if ((ret = CDB___db_vrfy_childput(vdp, pgno, &child)) != 0)
00455                                 goto err;
00456                 } else {
00457                         EPRINT((dbp->dbenv,
00458                 "RINTERNAL structure at offset %lu, page %lu referenced twice",
00459                             offset, pgno));
00460                         isbad = 1;
00461                 }
00462         }
00463 
00464         for (p = pagelayout + himark;
00465             p < pagelayout + dbp->pgsize;
00466             p += RINTERNAL_SIZE)
00467                 if (*p != 1) {
00468                         EPRINT((dbp->dbenv,
00469                             "Gap between items at offset %lu, page %lu",
00470                             p - pagelayout, pgno));
00471                         isbad = 1;
00472                 }
00473 
00474         if (himark != HOFFSET(h)) {
00475                 EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu",
00476                     HOFFSET(h), himark));
00477                 isbad = 1;
00478         }
00479 
00480         *nentriesp = nentries;
00481 
00482 err:    if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00483                 ret = t_ret;
00484         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
00485 }
00486 
00487 /*
00488  * __bam_vrfy_inp --
00489  *      Verify that all entries in inp[] array are reasonable;
00490  *      count them.
00491  */
00492 static int
00493 __bam_vrfy_inp(dbp, vdp, h, pgno, nentriesp, flags)
00494         DB *dbp;
00495         VRFY_DBINFO *vdp;
00496         PAGE *h;
00497         db_pgno_t pgno;
00498         db_indx_t *nentriesp;
00499         u_int32_t flags;
00500 {
00501         BKEYDATA *bk;
00502         BOVERFLOW *bo;
00503         VRFY_CHILDINFO child;
00504         VRFY_PAGEINFO *pip;
00505         int isbad, initem, isdupitem, ret, t_ret;
00506         u_int32_t himark, offset; /* These would be db_indx_ts but for algnmt.*/
00507         db_indx_t i, endoff, nentries;
00508         u_int8_t *pagelayout;
00509 
00510         isbad = isdupitem = 0;
00511         nentries = 0;
00512         memset(&child, 0, sizeof(VRFY_CHILDINFO));
00513         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00514                 return (ret);
00515 
00516         switch (TYPE(h)) {
00517         case P_IBTREE:
00518         case P_LBTREE:
00519         case P_LDUP:
00520         case P_LRECNO:
00521                 break;
00522         default:
00523                 /*
00524                  * In the salvager, we might call this from a page which
00525                  * we merely suspect is a btree page.  Otherwise, it
00526                  * shouldn't get called--if it is, that's a verifier bug.
00527                  */
00528                 if (LF_ISSET(DB_SALVAGE))
00529                         break;
00530                 TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_inp", pgno, TYPE(h));
00531                 DB_ASSERT(0);
00532                 ret = EINVAL;
00533                 goto err;
00534         }
00535 
00536         /*
00537          * Loop through inp[], the array of items, until we either
00538          * run out of entries or collide with the data.  Keep track
00539          * of h_offset in himark.
00540          *
00541          * For each element in inp[i], make sure it references a region
00542          * that starts after the end of the inp array (as defined by
00543          * NUM_ENT(h)), ends before the beginning of the page, doesn't
00544          * overlap any other regions, and doesn't have a gap between
00545          * it and the region immediately after it.
00546          */
00547         himark = dbp->pgsize;
00548         if ((ret = CDB___os_malloc(dbp->dbenv,
00549             dbp->pgsize, NULL, &pagelayout)) != 0)
00550                 goto err;
00551         memset(pagelayout, 0, dbp->pgsize);
00552         for (i = 0; i < NUM_ENT(h); i++) {
00553 
00554                 ret = CDB___db_vrfy_inpitem(dbp,
00555                     h, pgno, i, 1, flags, &himark, &offset);
00556                 if (ret == DB_VERIFY_BAD) {
00557                         isbad = 1;
00558                         continue;
00559                 } else if (ret == DB_VERIFY_FATAL) {
00560                         isbad = 1;
00561                         goto err;
00562                 } else if (ret != 0)
00563                         DB_ASSERT(0);
00564 
00565                 /*
00566                  * We now have a plausible beginning for the item, and we know
00567                  * its length is safe.
00568                  *
00569                  * Mark the beginning and end in pagelayout so we can make sure
00570                  * items have no overlaps or gaps.
00571                  */
00572                 bk = GET_BKEYDATA(h, i);
00573 #define ITEM_BEGIN      1
00574 #define ITEM_END        2
00575                 if (pagelayout[offset] == 0)
00576                         pagelayout[offset] = ITEM_BEGIN;
00577                 else if (pagelayout[offset] == ITEM_BEGIN) {
00578                         /*
00579                          * Having two inp entries that point at the same patch
00580                          * of page is legal if and only if the page is
00581                          * a btree leaf and they're onpage duplicate keys--
00582                          * that is, if (i % P_INDX) == 0.
00583                          */
00584                         if ((i % P_INDX == 0) && (TYPE(h) == P_LBTREE)) {
00585                                 /* Flag for later. */
00586                                 F_SET(pip, VRFY_HAS_DUPS);
00587 
00588                                 /* Bump up nentries so we don't undercount. */
00589                                 nentries++;
00590 
00591                                 /*
00592                                  * We'll check to make sure the end is
00593                                  * equal, too.
00594                                  */
00595                                 isdupitem = 1;
00596                         } else {
00597                                 isbad = 1;
00598                                 EPRINT((dbp->dbenv, "Duplicated item %lu on page %lu",
00599                                     i, pgno));
00600                         }
00601                 }
00602 
00603                 /*
00604                  * Mark the end.  Its location varies with the page type
00605                  * and the item type.
00606                  *
00607                  * If the end already has a sign other than 0, do nothing--
00608                  * it's an overlap that we'll catch later.
00609                  */
00610                 switch(B_TYPE(bk->type)) {
00611                 case B_KEYDATA:
00612                         if (TYPE(h) == P_IBTREE)
00613                                 /* It's a BINTERNAL. */
00614                                 endoff = offset + BINTERNAL_SIZE(bk->len) - 1;
00615                         else
00616                                 endoff = offset + BKEYDATA_SIZE(bk->len) - 1;
00617                         break;
00618                 case B_DUPLICATE:
00619                         /*
00620                          * Flag that we have dups; we'll check whether
00621                          * that's okay during the structure check.
00622                          */
00623                         F_SET(pip, VRFY_HAS_DUPS);
00624                         /* FALLTHROUGH */
00625                 case B_OVERFLOW:
00626                         /*
00627                          * Overflow entries on internal pages are stored
00628                          * as the _data_ of a BINTERNAL;  overflow entries
00629                          * on leaf pages are stored as the entire entry.
00630                          */
00631                         endoff = offset +
00632                             ((TYPE(h) == P_IBTREE) ?
00633                             BINTERNAL_SIZE(BOVERFLOW_SIZE) :
00634                             BOVERFLOW_SIZE) - 1;
00635                         break;
00636                 default:
00637                         /*
00638                          * We'll complain later;  for now, just mark
00639                          * a minimum.
00640                          */
00641                         endoff = offset + BKEYDATA_SIZE(0) - 1;
00642                         break;
00643                 }
00644 
00645                 /*
00646                  * If this is an onpage duplicate key we've seen before,
00647                  * the end had better coincide too.
00648                  */
00649                 if (isdupitem && pagelayout[endoff] != ITEM_END) {
00650                         EPRINT((dbp->dbenv, "Duplicated item %lu on page %lu", i,
00651                             pgno));
00652                         isbad = 1;
00653                 } else if (pagelayout[endoff] == 0)
00654                         pagelayout[endoff] = ITEM_END;
00655                 isdupitem = 0;
00656 
00657                 /*
00658                  * There should be no deleted items in a quiescent tree,
00659                  * except in recno.
00660                  */
00661                 if (B_DISSET(bk->type) && TYPE(h) != P_LRECNO) {
00662                         isbad = 1;
00663                         EPRINT((dbp->dbenv,
00664                             "Item %lu on page %lu marked deleted", i, pgno));
00665                 }
00666 
00667                 /*
00668                  * Check the type and such of bk--make sure it's reasonable
00669                  * for the pagetype.
00670                  */
00671                 switch (B_TYPE(bk->type)) {
00672                 case B_KEYDATA:
00673                         /*
00674                          * This is a normal, non-overflow BKEYDATA or BINTERNAL.
00675                          * The only thing to check is the len, and that's
00676                          * already been done.
00677                          */
00678                         break;
00679                 case B_DUPLICATE:
00680                         if (TYPE(h) == P_IBTREE) {
00681                                 isbad = 1;
00682                                 EPRINT((dbp->dbenv,
00683         "Duplicate page referenced by internal btree page %lu at item %lu",
00684                                     pgno, i));
00685                                 break;
00686                         } else if (TYPE(h) == P_LRECNO) {
00687                                 isbad = 1;
00688                                 EPRINT((dbp->dbenv,
00689         "Duplicate page referenced by recno page %lu at item %lu",
00690                                     pgno, i));
00691                                 break;
00692                         }
00693                         /* FALLTHROUGH */
00694                 case B_OVERFLOW:
00695                         bo = (TYPE(h) == P_IBTREE) ?
00696                             (BOVERFLOW *)(((BINTERNAL *)bk)->data) :
00697                             (BOVERFLOW *)bk;
00698 
00699                         if (B_TYPE(bk->type) == B_OVERFLOW)
00700                                 /* Make sure tlen is reasonable. */
00701                                 if (bo->tlen > dbp->pgsize * vdp->last_pgno) {
00702                                         isbad = 1;
00703                                         EPRINT((dbp->dbenv,
00704                                 "Impossible tlen %lu, item %lu, page %lu",
00705                                             bo->tlen, i, pgno));
00706                                         /* Don't save as a child. */
00707                                         break;
00708                                 }
00709 
00710                         if (!IS_VALID_PGNO(bo->pgno) || bo->pgno == pgno ||
00711                             bo->pgno == PGNO_INVALID) {
00712                                 isbad = 1;
00713                                 EPRINT((dbp->dbenv,
00714                                     "Offpage item %lu, page %lu has bad pgno",
00715                                     i, pgno));
00716                                 /* Don't save as a child. */
00717                                 break;
00718                         }
00719 
00720                         child.pgno = bo->pgno;
00721                         child.type = (B_TYPE(bk->type) == B_OVERFLOW ?
00722                             V_OVERFLOW : V_DUPLICATE);
00723                         child.tlen = bo->tlen;
00724                         if ((ret = CDB___db_vrfy_childput(vdp, pgno, &child)) != 0)
00725                                 goto err;
00726                         break;
00727                 default:
00728                         isbad = 1;
00729                         EPRINT((dbp->dbenv,
00730                             "Item %lu on page %lu of invalid type %lu",
00731                             i, pgno));
00732                         break;
00733                 }
00734         }
00735 
00736         /*
00737          * Now, loop through and make sure the items are contiguous and
00738          * non-overlapping.
00739          */
00740         initem = 0;
00741         for (i = himark; i < dbp->pgsize; i++)
00742                 if (initem == 0)
00743                         switch (pagelayout[i]) {
00744                         case 0:
00745                                 /* May be just for alignment. */
00746                                 if (i != ALIGN(i, 4))
00747                                         continue;
00748 
00749                                 isbad = 1;
00750                                 EPRINT((dbp->dbenv,
00751                                     "Gap between items, page %lu offset %lu",
00752                                     pgno, i));
00753                                 /* Find the end of the gap */
00754                                 for ( ; pagelayout[i + 1] == 0 &&
00755                                     (size_t)(i + 1) < dbp->pgsize; i++)
00756                                         ;
00757                                 break;
00758                         case ITEM_BEGIN:
00759                                 /* We've found an item. Check its alignment. */
00760                                 if (i != ALIGN(i, 4)) {
00761                                         isbad = 1;
00762                                         EPRINT((dbp->dbenv,
00763                                             "Offset %lu page %lu unaligned",
00764                                             i, pgno));
00765                                 }
00766                                 initem = 1;
00767                                 nentries++;
00768                                 break;
00769                         case ITEM_END:
00770                                 /*
00771                                  * We've hit the end of an item even though
00772                                  * we don't think we're in one;  must
00773                                  * be an overlap.
00774                                  */
00775                                 isbad = 1;
00776                                 EPRINT((dbp->dbenv,
00777                                     "Overlapping items, page %lu offset %lu",
00778                                     pgno, i));
00779                                 break;
00780                         default:
00781                                 /* Should be impossible. */
00782                                 DB_ASSERT(0);
00783                                 ret = EINVAL;
00784                                 goto err;
00785                         }
00786                 else
00787                         switch (pagelayout[i]) {
00788                         case 0:
00789                                 /* In the middle of an item somewhere. Okay. */
00790                                 break;
00791                         case ITEM_END:
00792                                 /* End of an item; switch to out-of-item mode.*/
00793                                 initem = 0;
00794                                 break;
00795                         case ITEM_BEGIN:
00796                                 /*
00797                                  * Hit a second item beginning without an
00798                                  * end.  Overlap.
00799                                  */
00800                                 isbad = 1;
00801                                 EPRINT((dbp->dbenv,
00802                                     "Overlapping items, page %lu offset %lu",
00803                                     pgno, i));
00804                                 break;
00805                         }
00806 
00807         (void)CDB___os_free(pagelayout, dbp->pgsize);
00808 
00809         /* Verify HOFFSET. */
00810         if (himark != HOFFSET(h)) {
00811                 EPRINT((dbp->dbenv, "Bad HOFFSET %lu, appears to be %lu",
00812                     HOFFSET(h), himark));
00813                 isbad = 1;
00814         }
00815 
00816 err:    if (nentriesp != NULL)
00817                 *nentriesp = nentries;
00818 
00819         if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00820                 ret = t_ret;
00821 
00822         return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
00823 }
00824 
00825 /*
00826  * CDB___bam_vrfy_itemorder --
00827  *      Make sure the items on a page sort correctly.
00828  *
00829  *      Assumes that NUM_ENT(h) and inp[0]..inp[NUM_ENT(h) - 1] are
00830  *      reasonable;  be sure that __bam_vrfy_inp has been called first.
00831  *
00832  *      If ovflok is set, it also assumes that overflow page chains
00833  *      hanging off the current page have been sanity-checked, and so we
00834  *      can use CDB___bam_cmp to verify their ordering.  If it is not set,
00835  *      and we run into an overflow page, carp and return DB_VERIFY_BAD;
00836  *      we shouldn't be called if any exist.
00837  *
00838  * PUBLIC: int CDB___bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *,
00839  * PUBLIC:     db_pgno_t, u_int32_t, int, int, u_int32_t));
00840  */
00841 int
00842 CDB___bam_vrfy_itemorder(dbp, vdp, h, pgno, nentries, ovflok, hasdups, flags)
00843         DB *dbp;
00844         VRFY_DBINFO *vdp;
00845         PAGE *h;
00846         db_pgno_t pgno;
00847         u_int32_t nentries;
00848         int ovflok, hasdups;
00849         u_int32_t flags;
00850 {
00851         DBT dbta, dbtb, dup1, dup2, *p1, *p2, *tmp;
00852         BTREE *bt;
00853         BINTERNAL *bi;
00854         BKEYDATA *bk;
00855         BOVERFLOW *bo;
00856         VRFY_PAGEINFO *pip;
00857         db_indx_t i;
00858         int cmp, freedup1, freedup2, isbad, ret, t_ret;
00859         int (*dupfunc) __P((const DBT *, const DBT *));
00860         int (*func) __P((const DBT *, const DBT *));
00861         void *buf1, *buf2, *tmpbuf;
00862 
00863         /*
00864          * We need to work in the ORDERCHKONLY environment where we might
00865          * not have a pip, but we also may need to work in contexts where
00866          * NUM_ENT isn't safe.
00867          */
00868         if (vdp != NULL) {
00869                 if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00870                         return (ret);
00871                 nentries = pip->entries;
00872         } else
00873                 pip = NULL;
00874 
00875         ret = isbad = 0;
00876         bo = NULL;                      /* Shut up compiler. */
00877 
00878         memset(&dbta, 0, sizeof(DBT));
00879         F_SET(&dbta, DB_DBT_REALLOC);
00880 
00881         memset(&dbtb, 0, sizeof(DBT));
00882         F_SET(&dbtb, DB_DBT_REALLOC);
00883 
00884         buf1 = buf2 = NULL;
00885 
00886         DB_ASSERT(!LF_ISSET(DB_NOORDERCHK));
00887 
00888         dupfunc = (dbp->dup_compare == NULL) ? CDB___bam_defcmp : dbp->dup_compare;
00889         if (TYPE(h) == P_LDUP)
00890                 func = dupfunc;
00891         else {
00892                 func = CDB___bam_defcmp;
00893                 if (dbp->bt_internal != NULL) {
00894                         bt = (BTREE *)dbp->bt_internal;
00895                         if (bt->bt_compare != NULL)
00896                                 func = bt->bt_compare;
00897                 }
00898         }
00899 
00900         /*
00901          * We alternate our use of dbta and dbtb so that we can walk
00902          * through the page key-by-key without copying a dbt twice.
00903          * p1 is always the dbt for index i - 1, and p2 for index i.
00904          */
00905         p1 = &dbta;
00906         p2 = &dbtb;
00907 
00908         /*
00909          * Loop through the entries.  nentries ought to contain the
00910          * actual count, and so is a safe way to terminate the loop;  whether
00911          * we inc. by one or two depends on whether we're a leaf page--
00912          * on a leaf page, we care only about keys.  On internal pages
00913          * and LDUP pages, we want to check the order of all entries.
00914          *
00915          * Note that on IBTREE pages, we start with item 1, since item
00916          * 0 doesn't get looked at by CDB___bam_cmp.
00917          */
00918         for (i = (TYPE(h) == P_IBTREE) ? 1 : 0; i < nentries;
00919             i += (TYPE(h) == P_LBTREE) ? P_INDX : O_INDX) {
00920                 /*
00921                  * Put key i-1, now in p2, into p1, by swapping DBTs and bufs.
00922                  */
00923                 tmp = p1;
00924                 p1 = p2;
00925                 p2 = tmp;
00926                 tmpbuf = buf1;
00927                 buf1 = buf2;
00928                 buf2 = tmpbuf;
00929 
00930                 /*
00931                  * Get key i into p2.
00932                  */
00933                 switch (TYPE(h)) {
00934                 case P_IBTREE:
00935                         bi = GET_BINTERNAL(h, i);
00936                         if (B_TYPE(bi->type) == B_OVERFLOW) {
00937                                 bo = (BOVERFLOW *)(bi->data);
00938                                 goto overflow;
00939                         } else {
00940                                 p2->data = bi->data;
00941                                 p2->size = bi->len;
00942                         }
00943 
00944                         /*
00945                          * The leftmost key on an internal page must be
00946                          * len 0, since it's just a placeholder and
00947                          * automatically sorts less than all keys.
00948                          *
00949                          * XXX
00950                          * This criterion does not currently hold!
00951                          * See todo list item #1686.  Meanwhile, it's harmless
00952                          * to just not check for it.
00953                          */
00954 #if 0
00955                         if (i == 0 && bi->len != 0) {
00956                                 isbad = 1;
00957                                 EPRINT((dbp->dbenv,
00958                         "Lowest key on internal page %lu of nonzero length",
00959                                     pgno));
00960                         }
00961 #endif
00962                         break;
00963                 case P_LBTREE:
00964                 case P_LDUP:
00965                         bk = GET_BKEYDATA(h, i);
00966                         if (B_TYPE(bk->type) == B_OVERFLOW) {
00967                                 bo = (BOVERFLOW *)bk;
00968                                 goto overflow;
00969                         } else {
00970                                 p2->data = bk->data;
00971                                 p2->size = bk->len;
00972                         }
00973                         break;
00974                 default:
00975                         /*
00976                          * This means our caller screwed up and sent us
00977                          * an inappropriate page.
00978                          */
00979                         TYPE_ERR_PRINT(dbp->dbenv,
00980                             "CDB___bam_vrfy_itemorder", pgno, TYPE(h))
00981                         DB_ASSERT(0);
00982                         ret = EINVAL;
00983                         goto err;
00984                         /* NOTREACHED */
00985                 }
00986 
00987                 if (0) {
00988                         /*
00989                          * If ovflok != 1, we can't safely go chasing
00990                          * overflow pages with the normal routines now;
00991                          * they might be unsafe or nonexistent.  Mark this
00992                          * page as incomplete and return.
00993                          *
00994                          * Note that we don't need to worry about freeing
00995                          * buffers, since they can't have been allocated
00996                          * if overflow items are unsafe.
00997                          */
00998 overflow:               if (!ovflok) {
00999                                 F_SET(pip, VRFY_INCOMPLETE);
01000                                 goto err;
01001                         }
01002 
01003                         /*
01004                          * Overflow items are safe to chase.  Do so.
01005                          * Fetch the overflow item into p2->data,
01006                          * NULLing it or reallocing it as appropriate.
01007                          *
01008                          * (We set p2->data to buf2 before the call
01009                          * so we're sure to realloc if we can and if p2
01010                          * was just pointing at a non-overflow item.)
01011                          */
01012                         p2->data = buf2;
01013                         if ((ret = CDB___db_goff(dbp,
01014                             p2, bo->tlen, bo->pgno, NULL, NULL)) != 0) {
01015                                 isbad = 1;
01016                                 EPRINT((dbp->dbenv,
01017                             "Error %lu in fetching overflow item %lu, page %lu",
01018                                     ret, i, pgno));
01019                         }
01020                         /* In case it got realloc'ed and thus changed. */
01021                         buf2 = p2->data;
01022                 }
01023 
01024                 /* Compare with the last key. */
01025                 if (p1->data != NULL && p2->data != NULL) {
01026                         cmp = func(p1, p2);
01027 
01028                         /* comparison succeeded */
01029                         if (cmp > 0) {
01030                                 isbad = 1;
01031                                 EPRINT((dbp->dbenv,
01032                                     "Out-of-order key, page %lu item %lu",
01033                                     pgno, i));
01034                                 /* proceed */
01035                         } else if (cmp == 0) {
01036                                 /*
01037                                  * If they compared equally, this
01038                                  * had better be a (sub)database with dups.
01039                                  * Mark it so we can check during the
01040                                  * structure check.
01041                                  */
01042                                 if (pip != NULL)
01043                                         F_SET(pip, VRFY_HAS_DUPS);
01044                                 else if (hasdups == 0) {
01045                                         isbad = 1;
01046                                         EPRINT((dbp->dbenv,
01047         "Database with no duplicates has duplicated keys on page %lu", pgno));
01048                                 }
01049 
01050                                 /*
01051                                  * If we're a btree leaf, check to see
01052                                  * if the data items of these on-page dups are
01053                                  * in sorted order.  If not, flag this, so
01054                                  * that we can make sure during the
01055                                  * structure checks that the DUPSORT flag
01056                                  * is unset.
01057                                  *
01058                                  * At this point i points to a duplicate key.
01059                                  * Compare the datum before it (same key)
01060                                  * to the datum after it, i.e. i-1 to i+1.
01061                                  */
01062                                 if (TYPE(h) == P_LBTREE) {
01063                                         /*
01064                                          * Unsafe;  continue and we'll pick
01065                                          * up the bogus nentries later.
01066                                          */
01067                                         if (i + 1 >= (db_indx_t)nentries)
01068                                                 continue;
01069 
01070                                         /*
01071                                          * We don't bother with clever memory
01072                                          * management with on-page dups,
01073                                          * as it's only really a big win
01074                                          * in the overflow case, and overflow
01075                                          * dups are probably (?) rare.
01076                                          */
01077                                         if (((ret = __bam_safe_getdata(dbp,
01078                                             h, i - 1, ovflok, &dup1,
01079                                             &freedup1)) != 0) ||
01080                                             ((ret = __bam_safe_getdata(dbp,
01081                                             h, i + 1, ovflok, &dup2,
01082                                             &freedup2)) != 0))
01083                                                 goto err;
01084 
01085                                         /*
01086                                          * If either of the data are NULL,
01087                                          * it's because they're overflows and
01088                                          * it's not safe to chase them now.
01089                                          * Mark an incomplete and return.
01090                                          */
01091                                         if (dup1.data == NULL ||
01092                                             dup2.data == NULL) {
01093                                                 DB_ASSERT(!ovflok);
01094                                                 F_SET(pip, VRFY_INCOMPLETE);
01095                                                 goto err;
01096                                         }
01097 
01098                                         /*
01099                                          * If the dups are out of order,
01100                                          * flag this.  It's not an error
01101                                          * until we do the structure check
01102                                          * and see whether DUPSORT is set.
01103                                          */
01104                                         if (dupfunc(&dup1, &dup2) > 0)
01105                                                 F_SET(pip, VRFY_DUPS_UNSORTED);
01106 
01107                                         if (freedup1)
01108                                                 CDB___os_free(dup1.data, 0);
01109                                         if (freedup2)
01110                                                 CDB___os_free(dup2.data, 0);
01111                                 }
01112                         }
01113                 }
01114         }
01115 
01116 err:    if (pip != NULL &&
01117             ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0) && ret == 0)
01118                 ret = t_ret;
01119 
01120         if (buf1 != NULL)
01121                 CDB___os_free(buf1, 0);
01122         if (buf2 != NULL)
01123                 CDB___os_free(buf2, 0);
01124 
01125         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01126 }
01127 
01128 /*
01129  * CDB___bam_vrfy_structure --
01130  *      Verify the tree structure of a btree database (including the master
01131  *      database containing subdbs).
01132  *
01133  * PUBLIC: int CDB___bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t,
01134  * PUBLIC:     u_int32_t));
01135  */
01136 int
01137 CDB___bam_vrfy_structure(dbp, vdp, meta_pgno, flags)
01138         DB *dbp;
01139         VRFY_DBINFO *vdp;
01140         db_pgno_t meta_pgno;
01141         u_int32_t flags;
01142 {
01143         DB *pgset;
01144         VRFY_PAGEINFO *mip, *rip;
01145         db_pgno_t root, p;
01146         int t_ret, ret;
01147         u_int32_t nrecs, level, relen, stflags;
01148 
01149         mip = rip = 0;
01150         pgset = vdp->pgset;
01151 
01152         if ((ret = CDB___db_vrfy_getpageinfo(vdp, meta_pgno, &mip)) != 0)
01153                 return (ret);
01154 
01155         if ((ret = CDB___db_vrfy_pgset_get(pgset, meta_pgno, (int *)&p)) != 0)
01156                 goto err;
01157         if (p != 0) {
01158                 EPRINT((dbp->dbenv,
01159                     "Btree metadata page number %lu observed twice",
01160                     meta_pgno));
01161                 ret = DB_VERIFY_BAD;
01162                 goto err;
01163         }
01164         if ((ret = CDB___db_vrfy_pgset_inc(pgset, meta_pgno)) != 0)
01165                 goto err;
01166 
01167         root = mip->root;
01168 
01169         if (root == 0) {
01170                 EPRINT((dbp->dbenv,
01171                     "Btree metadata page %lu has no root", meta_pgno));
01172                 ret = DB_VERIFY_BAD;
01173                 goto err;
01174         }
01175 
01176         if ((ret = CDB___db_vrfy_getpageinfo(vdp, root, &rip)) != 0)
01177                 goto err;
01178 
01179         switch (rip->type) {
01180         case P_IBTREE:
01181         case P_LBTREE:
01182                 stflags = flags | ST_TOPLEVEL;
01183                 if (F_ISSET(mip, VRFY_HAS_DUPS))
01184                         stflags |= ST_DUPOK;
01185                 if (F_ISSET(mip, VRFY_HAS_DUPSORT))
01186                         stflags |= ST_DUPSORT;
01187                 if (F_ISSET(mip, VRFY_HAS_RECNUMS))
01188                         stflags |= ST_RECNUM;
01189                 ret = CDB___bam_vrfy_subtree(dbp,
01190                     vdp, root, NULL, NULL, stflags, NULL, NULL, NULL);
01191                 break;
01192         case P_IRECNO:
01193         case P_LRECNO:
01194                 stflags = flags | ST_RECNUM | ST_IS_RECNO | ST_TOPLEVEL;
01195                 if (mip->re_len > 0)
01196                         stflags |= ST_RELEN;
01197                 if ((ret = CDB___bam_vrfy_subtree(dbp, vdp,
01198                     root, NULL, NULL, stflags, &level, &nrecs, &relen)) != 0)
01199                         goto err;
01200                 /*
01201                  * Even if mip->re_len > 0, re_len may come back zero if the
01202                  * tree is empty.  It should be okay to just skip the check in
01203                  * this case, as if there are any non-deleted keys at all,
01204                  * that should never happen.
01205                  */
01206                 if (mip->re_len > 0 && relen > 0 && mip->re_len != relen) {
01207                         EPRINT((dbp->dbenv,
01208                  "Recno database with meta page %lu has bad re_len %lu",
01209                             meta_pgno, relen));
01210                         ret = DB_VERIFY_BAD;
01211                         goto err;
01212                 }
01213                 ret = 0;
01214                 break;
01215         case P_LDUP:
01216                 EPRINT((dbp->dbenv,
01217                     "Duplicate tree referenced from metadata page %lu",
01218                     meta_pgno));
01219                 ret = DB_VERIFY_BAD;
01220                 break;
01221         default:
01222                 EPRINT((dbp->dbenv, "%s%s", "Btree root of incorrect type ",
01223                     "%lu specified on meta page %lu", rip->type, meta_pgno));
01224                 ret = DB_VERIFY_BAD;
01225                 break;
01226         }
01227 
01228 err:    if (mip != NULL &&
01229             ((t_ret = CDB___db_vrfy_putpageinfo(vdp, mip)) != 0) && ret == 0)
01230                 t_ret = ret;
01231         if (rip != NULL &&
01232             ((t_ret = CDB___db_vrfy_putpageinfo(vdp, rip)) != 0) && ret == 0)
01233                 t_ret = ret;
01234         return (ret);
01235 }
01236 
01237 /*
01238  * CDB___bam_vrfy_subtree--
01239  *      Verify a subtree (or entire) btree with specified root.
01240  *
01241  *      Note that this is public because it must be called to verify
01242  *      offpage dup trees, including from hash.
01243  *
01244  * PUBLIC: int CDB___bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *,
01245  * PUBLIC:     void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *));
01246  */
01247 int
01248 CDB___bam_vrfy_subtree(dbp,
01249     vdp, pgno, l, r, flags, levelp, nrecsp, relenp)
01250         DB *dbp;
01251         VRFY_DBINFO *vdp;
01252         db_pgno_t pgno;
01253         void *l, *r;
01254         u_int32_t flags, *levelp, *nrecsp, *relenp;
01255 {
01256         BINTERNAL *li, *ri, *lp, *rp;
01257         DB *pgset;
01258         PAGE *h;
01259         VRFY_CHILDINFO *child;
01260         VRFY_PAGEINFO *pip;
01261         db_recno_t nrecs, child_nrecs;
01262         db_indx_t i;
01263         int ret, t_ret, isbad, toplevel, p;
01264         u_int32_t level, child_level, stflags, child_relen, relen;
01265         DBC *cc;
01266 
01267         ret = isbad = 0;
01268         nrecs = 0;
01269         h = NULL;
01270         relen = 0;
01271         rp = (BINTERNAL *)r;
01272         lp = (BINTERNAL *)l;
01273 
01274         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
01275                 return (ret);
01276 
01277         cc = NULL;
01278         level = pip->bt_level;
01279 
01280         toplevel = LF_ISSET(ST_TOPLEVEL);
01281         LF_CLR(ST_TOPLEVEL);
01282 
01283         /*
01284          * We are recursively descending a btree, starting from the root
01285          * and working our way out to the leaves.
01286          *
01287          * There are four cases we need to deal with:
01288          *      1. pgno is a recno leaf page.  Any children are overflows.
01289          *      2. pgno is a duplicate leaf page.  Any children
01290          *         are overflow pages;  traverse them, and then return
01291          *         level and nrecs.
01292          *      3. pgno is an ordinary leaf page.  Check whether dups are
01293          *         allowed, and if so, traverse any off-page dups or
01294          *         overflows.  Then return nrecs and level.
01295          *      4. pgno is a recno internal page.  Recursively check any
01296          *         child pages, making sure their levels are one lower
01297          *         and their nrecs sum to ours.
01298          *      5. pgno is a btree internal page.  Same as #4, plus we
01299          *         must verify that for each pair of BINTERNAL entries
01300          *         N and N+1, the leftmost item on N's child sorts
01301          *         greater than N, and the rightmost item on N's child
01302          *         sorts less than N+1.
01303          *
01304          * Furthermore, in any sorted page type (P_LDUP, P_LBTREE, P_IBTREE),
01305          * we need to verify the internal sort order is correct if,
01306          * due to overflow items, we were not able to do so earlier.
01307          */
01308         switch (pip->type) {
01309         case P_LRECNO:
01310         case P_LDUP:
01311         case P_LBTREE:
01312                 /*
01313                  * Cases 1, 2 and 3 (overflow pages are common to all three);
01314                  * traverse child list, looking for overflows.
01315                  */
01316                 if ((ret = CDB___db_vrfy_childcursor(vdp, &cc)) != 0)
01317                         goto err;
01318                 for (ret = CDB___db_vrfy_ccset(cc, pgno, &child); ret == 0;
01319                     ret = CDB___db_vrfy_ccnext(cc, &child))
01320                         if (child->type == V_OVERFLOW &&
01321                             (ret = CDB___db_vrfy_ovfl_structure(dbp, vdp,
01322                             child->pgno, child->tlen,
01323                             flags | ST_OVFL_LEAF)) != 0) {
01324                                 if (ret == DB_VERIFY_BAD)
01325                                         isbad = 1;
01326                                 else
01327                                         goto done;
01328                         }
01329 
01330                 if ((ret = CDB___db_vrfy_ccclose(cc)) != 0)
01331                         goto err;
01332                 cc = NULL;
01333 
01334                 /* Case 1 */
01335                 if (pip->type == P_LRECNO) {
01336                         if (!LF_ISSET(ST_IS_RECNO) &&
01337                             !(LF_ISSET(ST_DUPOK) && !LF_ISSET(ST_DUPSORT))) {
01338                                 isbad = 1;
01339                                 EPRINT((dbp->dbenv,
01340                                     "Recno leaf page %lu in non-recno tree",
01341                                     pgno));
01342                                 goto done;
01343                         }
01344                         goto leaf;
01345                 } else if (LF_ISSET(ST_IS_RECNO)){
01346                         /*
01347                          * It's a non-recno leaf.  Had better not be a recno
01348                          * subtree.
01349                          */
01350                         isbad = 1;
01351                         EPRINT((dbp->dbenv,
01352                             "Non-recno leaf page %lu in recno tree",
01353                             pgno));
01354                         goto done;
01355                 }
01356 
01357                 /* Case 2--no more work. */
01358                 if (pip->type == P_LDUP)
01359                         goto leaf;
01360 
01361                 /* Case 3 */
01362 
01363                 /* Check if we have any dups. */
01364                 if (F_ISSET(pip, VRFY_HAS_DUPS)) {
01365                         /* If dups aren't allowed in this btree, trouble. */
01366                         if (!LF_ISSET(ST_DUPOK)) {
01367                                 isbad = 1;
01368                                 EPRINT((dbp->dbenv,
01369                                     "Duplicates on page %lu in non-dup btree",
01370                                     pgno));
01371                         } else {
01372                                 /*
01373                                  * We correctly have dups.  If any are off-page,
01374                                  * traverse those btrees recursively.
01375                                  */
01376                                 if ((ret =
01377                                     CDB___db_vrfy_childcursor(vdp, &cc)) != 0)
01378                                         goto err;
01379                                 for (ret = CDB___db_vrfy_ccset(cc, pgno, &child);
01380                                     ret == 0;
01381                                     ret = CDB___db_vrfy_ccnext(cc, &child)) {
01382                                         stflags = flags | ST_RECNUM;
01383                                         /* Skip any overflow entries. */
01384                                         if (child->type == V_DUPLICATE) {
01385                                                 if ((ret = CDB___db_vrfy_duptype(
01386                                                     dbp, vdp, child->pgno,
01387                                                     stflags)) != 0) {
01388                                                         isbad = 1;
01389                                                         /* Next child. */
01390                                                         continue;
01391                                                 }
01392                                                 if ((ret = CDB___bam_vrfy_subtree(
01393                                                     dbp, vdp, child->pgno, NULL,
01394                                                     NULL, stflags, NULL, NULL,
01395                                                     NULL)) != 0) {
01396                                                         if (ret !=
01397                                                             DB_VERIFY_BAD)
01398                                                                 goto err;
01399                                                         else
01400                                                                 isbad = 1;
01401                                                 }
01402                                         }
01403                                 }
01404 
01405                                 if ((ret = CDB___db_vrfy_ccclose(cc)) != 0)
01406                                         goto err;
01407                                 cc = NULL;
01408 
01409                                 /*
01410                                  * If VRFY_DUPS_UNSORTED is set,
01411                                  * ST_DUPSORT had better not be.
01412                                  */
01413                                 if (F_ISSET(pip, VRFY_DUPS_UNSORTED) &&
01414                                     LF_ISSET(ST_DUPSORT)) {
01415                                         EPRINT((dbp->dbenv,
01416                     "Unsorted duplicate set at page %lu in sorted-dup database",
01417                                             pgno));
01418                                         isbad = 1;
01419                                 }
01420                         }
01421                 }
01422                 goto leaf;
01423                 break;
01424         case P_IBTREE:
01425         case P_IRECNO:
01426                 /* We handle these below. */
01427                 break;
01428         default:
01429                 /* This should never get called if there's any doubt. */
01430                 TYPE_ERR_PRINT(dbp->dbenv, "CDB___bam_vrfy_subtree", pgno, pip->type);
01431                 DB_ASSERT(0);
01432                 ret = EINVAL;
01433                 goto done;
01434                 /* NOTREACHED */
01435         }
01436 
01437         /*
01438          * Cases 4 & 5: This is a btree or recno internal page.  For each child,
01439          * recurse, keeping a running count of nrecs and making sure the level
01440          * is always reasonable.
01441          */
01442         if ((ret = CDB___db_vrfy_childcursor(vdp, &cc)) != 0)
01443                 goto err;
01444         for (ret = CDB___db_vrfy_ccset(cc, pgno, &child); ret == 0;
01445             ret = CDB___db_vrfy_ccnext(cc, &child))
01446                 if (child->type == V_RECNO) {
01447                         if (pip->type != P_IRECNO) {
01448                                 TYPE_ERR_PRINT(dbp->dbenv, "CDB___bam_vrfy_subtree",
01449                                     pgno, pip->type);
01450                                 DB_ASSERT(0);
01451                                 ret = EINVAL;
01452                                 goto err;
01453                         }
01454                         if ((ret = CDB___bam_vrfy_subtree(dbp, vdp, child->pgno,
01455                             NULL, NULL, flags, &child_level, &child_nrecs,
01456                             &child_relen)) != 0) {
01457                                 if (ret != DB_VERIFY_BAD)
01458                                         goto done;
01459                                 else
01460                                         isbad = 1;
01461                         }
01462 
01463                         if (LF_ISSET(ST_RELEN)) {
01464                                 if (relen == 0)
01465                                         relen = child_relen;
01466                                 /*
01467                                  * child_relen may be zero if the child subtree
01468                                  * is empty.
01469                                  */
01470                                 else if (child_relen > 0 &&
01471                                     relen != child_relen) {
01472                                         isbad = 1;
01473                                         EPRINT((dbp->dbenv,
01474                                            "Recno page %lu returned bad re_len",
01475                                             child->pgno));
01476                                 }
01477                                 if (relenp)
01478                                         *relenp = relen;
01479                         }
01480                         if (LF_ISSET(ST_RECNUM))
01481                                 nrecs += child_nrecs;
01482                         if (level != child_level + 1) {
01483                                 isbad = 1;
01484                                 EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu",
01485                                     "Recno level incorrect on page ",
01486                                     child->pgno, ": got ", child_level,
01487                                     ", expected ", level - 1));
01488                         }
01489                 } else if (child->type == V_OVERFLOW &&
01490                     (ret = CDB___db_vrfy_ovfl_structure(dbp, vdp,
01491                     child->pgno, child->tlen, flags)) != 0) {
01492                         if (ret == DB_VERIFY_BAD)
01493                                 isbad = 1;
01494                         else
01495                                 goto done;
01496                 }
01497 
01498         if ((ret = CDB___db_vrfy_ccclose(cc)) != 0)
01499                 goto err;
01500         cc = NULL;
01501 
01502         /* We're done with case 4. */
01503         if (pip->type == P_IRECNO)
01504                 goto done;
01505 
01506         /*
01507          * Case 5.  Btree internal pages.
01508          * As described above, we need to iterate through all the
01509          * items on the page and make sure that our children sort appropriately
01510          * with respect to them.
01511          *
01512          * For each entry, li will be the "left-hand" key for the entry
01513          * itself, which must sort lower than all entries on its child;
01514          * ri will be the key to its right, which must sort greater.
01515          */
01516         if (h == NULL && (ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
01517                 goto err;
01518         for (i = 0; i < pip->entries; i += O_INDX) {
01519                 li = GET_BINTERNAL(h, i);
01520                 ri = (i + O_INDX < pip->entries) ?
01521                     GET_BINTERNAL(h, i + O_INDX) : NULL;
01522 
01523                 /*
01524                  * The leftmost key is forcibly sorted less than all entries,
01525                  * so don't bother passing it.
01526                  */
01527                 if ((ret = CDB___bam_vrfy_subtree(dbp, vdp, li->pgno,
01528                     i == 0 ? NULL : li, ri, flags, &child_level,
01529                     &child_nrecs, NULL)) != 0) {
01530                         if (ret != DB_VERIFY_BAD)
01531                                 goto done;
01532                         else
01533                                 isbad = 1;
01534                 }
01535 
01536                 if (LF_ISSET(ST_RECNUM)) {
01537                         /*
01538                          * Keep a running tally on the actual record count so
01539                          * we can return it to our parent (if we have one) or
01540                          * compare it to the NRECS field if we're a root page.
01541                          */
01542                         nrecs += child_nrecs;
01543 
01544                         /*
01545                          * Make sure the actual record count of the child
01546                          * is equal to the value in the BINTERNAL structure.
01547                          */
01548                         if (li->nrecs != child_nrecs) {
01549                                 isbad = 1;
01550                                 EPRINT((dbp->dbenv,
01551         "Item %lu page %lu has incorrect record count of %lu, should be %lu",
01552                                     i, pgno, li->nrecs, child_nrecs));
01553                         }
01554                 }
01555 
01556                 if (level != child_level + 1) {
01557                         isbad = 1;
01558                         EPRINT((dbp->dbenv, "%s%lu%s%lu%s%lu",
01559                             "Btree level incorrect on page ", li->pgno,
01560                             ": got ", child_level, ", expected ", level - 1));
01561                 }
01562         }
01563 
01564         if (0) {
01565 leaf:           level = LEAFLEVEL;
01566                 if (LF_ISSET(ST_RECNUM))
01567                         nrecs = pip->rec_cnt;
01568 
01569                 /* XXX
01570                  * We should verify that the record count on a leaf page
01571                  * is the sum of the number of keys and the number of
01572                  * records in its off-page dups.  This requires looking
01573                  * at the page again, however, and it may all be changing
01574                  * soon, so for now we don't bother.
01575                  */
01576 
01577                 if (LF_ISSET(ST_RELEN) && relenp)
01578                         *relenp = pip->re_len;
01579         }
01580 done:   if (F_ISSET(pip, VRFY_INCOMPLETE) && isbad == 0 && ret == 0) {
01581                 /*
01582                  * During the page-by-page pass, item order verification was
01583                  * not finished due to the presence of overflow items.  If
01584                  * isbad == 0, though, it's now safe to do so, as we've
01585                  * traversed any child overflow pages.  Do it.
01586                  */
01587                 if (h == NULL && (ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
01588                         goto err;
01589                 if ((ret = CDB___bam_vrfy_itemorder(dbp,
01590                     vdp, h, pgno, 0, 1, 0, flags)) != 0)
01591                         goto err;
01592                 F_CLR(pip, VRFY_INCOMPLETE);
01593         }
01594 
01595         /*
01596          * Our parent has sent us BINTERNAL pointers to parent records
01597          * so that we can verify our place with respect to them.  If it's
01598          * appropriate--we have a default sort function--verify this.
01599          */
01600         if (isbad == 0 && ret == 0 && !LF_ISSET(DB_NOORDERCHK) && lp != NULL) {
01601                 if (h == NULL && (ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
01602                         goto err;
01603                 if ((ret =
01604                     __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, flags)) != 0) {
01605                         if (ret == DB_VERIFY_BAD)
01606                                 isbad = 1;
01607                         else
01608                                 goto err;
01609                 }
01610         }
01611 
01612         /*
01613          * This is guaranteed to succeed for leaf pages, but no harm done.
01614          *
01615          * Internal pages below the top level do not store their own
01616          * record numbers, so we skip them.
01617          */
01618         if (LF_ISSET(ST_RECNUM) && nrecs != pip->rec_cnt && toplevel) {
01619                 isbad = 1;
01620                 EPRINT((dbp->dbenv,
01621                     "Bad record count on page %lu: got %lu, expected %lu",
01622                     pgno, nrecs, pip->rec_cnt));
01623         }
01624 
01625         if (levelp)
01626                 *levelp = level;
01627         if (nrecsp)
01628                 *nrecsp = nrecs;
01629 
01630         pgset = vdp->pgset;
01631         if ((ret = CDB___db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
01632                 goto err;
01633         if (p != 0) {
01634                 isbad = 1;
01635                 EPRINT((dbp->dbenv, "Page %lu linked twice", pgno));
01636         } else if ((ret = CDB___db_vrfy_pgset_inc(pgset, pgno)) != 0)
01637                 goto err;
01638 
01639 err:    if (h != NULL && (t_ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
01640                 ret = t_ret;
01641         if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
01642                 ret = t_ret;
01643         if (cc != NULL && ((t_ret = CDB___db_vrfy_ccclose(cc)) != 0) && ret == 0)
01644                 ret = t_ret;
01645         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01646 }
01647 
01648 /*
01649  * __bam_vrfy_treeorder --
01650  *      Verify that the lowest key on a page sorts greater than the
01651  *      BINTERNAL which points to it (lp), and the highest key
01652  *      sorts less than the BINTERNAL above that (rp).
01653  *
01654  *      If lp is NULL, this means that it was the leftmost key on the
01655  *      parent, which (regardless of sort function) sorts less than
01656  *      all keys.  No need to check it.
01657  *
01658  *      If rp is NULL, lp was the highest key on the parent, so there's
01659  *      no higher key we must sort less than.
01660  */
01661 static int
01662 __bam_vrfy_treeorder(dbp, pgno, h, lp, rp, flags)
01663         DB *dbp;
01664         db_pgno_t pgno;
01665         PAGE *h;
01666         BINTERNAL *lp, *rp;
01667         u_int32_t flags;
01668 {
01669         BOVERFLOW *bo;
01670         BTREE *t;
01671         DBT dbt;
01672         db_indx_t last;
01673         int (*func)__P((const DBT *, const DBT *));
01674         int ret, cmp;
01675 
01676         memset(&dbt, 0, sizeof(DBT));
01677         F_SET(&dbt, DB_DBT_MALLOC);
01678         t = dbp->bt_internal;
01679         func = (t->bt_compare != NULL) ? t->bt_compare : CDB___bam_defcmp;
01680         ret = 0;
01681 
01682         switch (TYPE(h)) {
01683         case P_IBTREE:
01684         case P_LDUP:
01685                 last = NUM_ENT(h) - O_INDX;
01686                 break;
01687         case P_LBTREE:
01688                 last = NUM_ENT(h) - P_INDX;
01689                 break;
01690         default:
01691                 TYPE_ERR_PRINT(dbp->dbenv, "__bam_vrfy_treeorder", pgno, TYPE(h));
01692                 DB_ASSERT(0);
01693                 return (EINVAL);
01694         }
01695 
01696         /*
01697          * The key on page h, the child page, is more likely to be
01698          * an overflow page, so we pass its offset, rather than lp/rp's,
01699          * into CDB___bam_cmp.  This will take advantage of CDB___db_moff.
01700          */
01701 
01702         /*
01703          * Skip first-item check if we're an internal page--the first
01704          * entry on an internal page is treated specially by CDB___bam_cmp,
01705          * so what's on the page shouldn't matter.  (Plus, since we're passing
01706          * our page and item 0 as to CDB___bam_cmp, we'll sort before our
01707          * parent and falsely report a failure.)
01708          */
01709         if (lp != NULL && TYPE(h) != P_IBTREE) {
01710                 if (lp->type == B_KEYDATA) {
01711                         dbt.data = lp->data;
01712                         dbt.size = lp->len;
01713                 } else if (lp->type == B_OVERFLOW) {
01714                         bo = (BOVERFLOW *)lp->data;
01715                         if ((ret = CDB___db_goff(dbp, &dbt, bo->tlen, bo->pgno,
01716                             NULL, NULL)) != 0)
01717                                 return (ret);
01718                 } else {
01719                         DB_ASSERT(0);
01720                         EPRINT((dbp->dbenv, "Unknown type for internal record"));
01721                         return (EINVAL);
01722                 }
01723 
01724                 /* On error, fall through, free if neeeded, and return. */
01725                 if ((ret = CDB___bam_cmp(dbp, &dbt, h, 0, func, &cmp)) == 0) {
01726                         if (cmp > 0) {
01727                                 EPRINT((dbp->dbenv,
01728                     "First item on page %lu sorted greater than parent entry",
01729                                     PGNO(h)));
01730                                 ret = DB_VERIFY_BAD;
01731                         }
01732                 } else
01733                         EPRINT((dbp->dbenv,
01734                             "First item on page %lu had comparison error",
01735                             PGNO(h)));
01736 
01737                 if (dbt.data != lp->data)
01738                         CDB___os_free(dbt.data, 0);
01739                 if (ret != 0)
01740                         return (ret);
01741         }
01742 
01743         if (rp != NULL) {
01744                 if (rp->type == B_KEYDATA) {
01745                         dbt.data = rp->data;
01746                         dbt.size = rp->len;
01747                 } else if (rp->type == B_OVERFLOW) {
01748                         bo = (BOVERFLOW *)rp->data;
01749                         if ((ret = CDB___db_goff(dbp, &dbt, bo->tlen, bo->pgno,
01750                             NULL, NULL)) != 0)
01751                                 return (ret);
01752                 } else {
01753                         DB_ASSERT(0);
01754                         EPRINT((dbp->dbenv, "Unknown type for internal record"));
01755                         return (EINVAL);
01756                 }
01757 
01758                 /* On error, fall through, free if neeeded, and return. */
01759                 if ((ret = CDB___bam_cmp(dbp, &dbt, h, last, func, &cmp)) == 0) {
01760                         if (cmp < 0) {
01761                                 EPRINT((dbp->dbenv,
01762                     "Last item on page %lu sorted greater than parent entry",
01763                                     PGNO(h)));
01764                                 ret = DB_VERIFY_BAD;
01765                         }
01766                 } else
01767                         EPRINT((dbp->dbenv,
01768                             "Last item on page %lu had comparison error",
01769                             PGNO(h)));
01770 
01771                 if (dbt.data != rp->data)
01772                         CDB___os_free(dbt.data, 0);
01773         }
01774 
01775         return (ret);
01776 }
01777 
01778 /*
01779  * CDB___bam_salvage --
01780  *      Safely dump out anything that looks like a key on an alleged
01781  *      btree leaf page.
01782  *
01783  * PUBLIC: int CDB___bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t,
01784  * PUBLIC:     PAGE *, void *, int (*)(void *, const void *), DBT *,
01785  * PUBLIC:     u_int32_t));
01786  */
01787 int
01788 CDB___bam_salvage(dbp, vdp, pgno, pgtype, h, handle, callback, key, flags)
01789         DB *dbp;
01790         VRFY_DBINFO *vdp;
01791         db_pgno_t pgno;
01792         u_int32_t pgtype;
01793         PAGE *h;
01794         void *handle;
01795         int (*callback) __P((void *, const void *));
01796         DBT *key;
01797         u_int32_t flags;
01798 {
01799         DBT dbt, unkdbt;
01800         BKEYDATA *bk;
01801         BOVERFLOW *bo;
01802         db_indx_t i, beg, end;
01803         u_int32_t himark;
01804         u_int8_t *pgmap;
01805         void *ovflbuf;
01806         int t_ret, ret, err_ret;
01807 
01808         /* Shut up lint. */
01809         COMPQUIET(end, 0);
01810 
01811         ovflbuf = pgmap = NULL;
01812         err_ret = ret = 0;
01813 
01814         memset(&dbt, 0, sizeof(DBT));
01815         dbt.flags = DB_DBT_REALLOC;
01816 
01817         memset(&unkdbt, 0, sizeof(DBT));
01818         unkdbt.size = strlen("UNKNOWN") + 1;
01819         unkdbt.data = "UNKNOWN";
01820 
01821         /*
01822          * Allocate a buffer for overflow items.  Start at one page;
01823          * CDB___db_safe_goff will realloc as needed.
01824          */
01825         if ((ret = CDB___os_malloc(dbp->dbenv, dbp->pgsize, NULL, &ovflbuf)) != 0)
01826                 return (ret);
01827 
01828         if (LF_ISSET(DB_AGGRESSIVE)) {
01829                 if ((ret =
01830                     CDB___os_malloc(dbp->dbenv, dbp->pgsize, NULL, &pgmap)) != 0)
01831                         goto err;
01832                 memset(pgmap, 0, dbp->pgsize);
01833         }
01834 
01835         /*
01836          * Loop through the inp array, spitting out key/data pairs.
01837          *
01838          * If we're salvaging normally, loop from 0 through NUM_ENT(h).
01839          * If we're being aggressive, loop until we hit the end of the page--
01840          * NUM_ENT() may be bogus.
01841          */
01842         i = 0;
01843         himark = dbp->pgsize;
01844         for (;;) {
01845                 /* If we're not aggressive, break when we hit NUM_ENT(h). */
01846                 if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h))
01847                         break;
01848 
01849                 /* Verify the current item. */
01850                 ret = CDB___db_vrfy_inpitem(dbp,
01851                     h, pgno, i, 1, flags, &himark, NULL);
01852                 /* If this returned a fatality, it's time to break. */
01853                 if (ret == DB_VERIFY_FATAL)
01854                         break;
01855                 /*
01856                  * If this returned 0, it's safe to print or (carefully)
01857                  * try to fetch.
01858                  */
01859                 if (ret == 0) {
01860                         /*
01861                          * We're going to go try to print the next item.  If
01862                          * key is non-NULL, we're a dup page, so we've got to
01863                          * print the key first, unless SA_SKIPFIRSTKEY is set
01864                          * and we're on the first entry.
01865                          */
01866                         if (key != NULL &&
01867                             (i != 0 || !LF_ISSET(SA_SKIPFIRSTKEY)))
01868                                 if ((ret = CDB___db_prdbt(key,
01869                                     0, " ", handle, callback, 0, NULL)) != 0)
01870                                         err_ret = ret;
01871 
01872                         bk = GET_BKEYDATA(h, i);
01873                         beg = h->inp[i];
01874                         switch (bk->type) {
01875                         case B_DUPLICATE:
01876                                 end = beg + BOVERFLOW_SIZE - 1;
01877                                 /*
01878                                  * If we're not on a normal btree leaf page,
01879                                  * there shouldn't be off-page
01880                                  * dup sets.  Something's confused;  just
01881                                  * drop it, and the code to pick up unlinked
01882                                  * offpage dup sets will print it out
01883                                  * with key "UNKNOWN" later.
01884                                  */
01885                                 if (pgtype != P_LBTREE)
01886                                         break;
01887 
01888                                 bo = (BOVERFLOW *)bk;
01889 
01890                                 /*
01891                                  * If the page number is unreasonable, or
01892                                  * if this is supposed to be a key item,
01893                                  * just spit out "UNKNOWN"--the best we
01894                                  * can do is run into the data items in the
01895                                  * unlinked offpage dup pass.
01896                                  */
01897                                 if (!IS_VALID_PGNO(bo->pgno) ||
01898                                     (i % P_INDX == 0)) {
01899                                         /* Not much to do on failure. */
01900                                         if ((ret = CDB___db_prdbt(&unkdbt, 0, " ",
01901                                             handle, callback, 0, NULL)) != 0)
01902                                                 err_ret = ret;
01903                                         break;
01904                                 }
01905 
01906                                 if ((ret = CDB___db_salvage_duptree(dbp,
01907                                     vdp, bo->pgno, &dbt, handle, callback,
01908                                         flags | SA_SKIPFIRSTKEY)) != 0)
01909                                         err_ret = ret;
01910 
01911                                 break;
01912                         default:
01913                                 /*
01914                                  * If we're being aggressive, fall through
01915                                  * and treat as a B_KEYDATA.  Seems unlikely
01916                                  * that the length would be okay and the type
01917                                  * bogus, but we can never be sure.
01918                                  */
01919                                 if (!LF_ISSET(DB_AGGRESSIVE))
01920                                         break;
01921                                 /* FALLTHROUGH */
01922                         case B_KEYDATA:
01923                                 end = ALIGN(beg + bk->len, 4) - 1;
01924                                 dbt.data = bk->data;
01925                                 dbt.size = bk->len;
01926                                 if ((ret = CDB___db_prdbt(&dbt,
01927                                     0, " ", handle, callback, 0, NULL)) != 0)
01928                                         err_ret = ret;
01929                                 break;
01930                         case B_OVERFLOW:
01931                                 end = beg + BOVERFLOW_SIZE - 1;
01932                                 bo = (BOVERFLOW *)bk;
01933                                 if ((ret = CDB___db_safe_goff(dbp, vdp,
01934                                     bo->pgno, &dbt, &ovflbuf, flags)) != 0) {
01935                                         err_ret = ret;
01936                                         /* We care about err_ret more. */
01937                                         (void)CDB___db_prdbt(&unkdbt, 0, " ",
01938                                             handle, callback, 0, NULL);
01939                                         break;
01940                                 }
01941                                 if ((ret = CDB___db_prdbt(&dbt,
01942                                     0, " ", handle, callback, 0, NULL)) != 0)
01943                                         err_ret = ret;
01944                                 break;
01945                         }
01946 
01947                         /*
01948                          * If we're being aggressive, mark the beginning
01949                          * and end of the item;  we'll come back and print
01950                          * whatever "junk" is in the gaps in case we had
01951                          * any bogus inp elements and thereby missed stuff.
01952                          */
01953                         if (LF_ISSET(DB_AGGRESSIVE)) {
01954                                 pgmap[beg] = ITEM_BEGIN;
01955                                 pgmap[end] = ITEM_END;
01956                         }
01957                 }
01958                 i += O_INDX;
01959         }
01960 
01961         /*
01962          * If i is odd and this is a btree leaf, we've printed out a key but not
01963          * a datum; fix this imbalance by printing an "UNKNOWN".
01964          */
01965         if (pgtype == P_LBTREE && (i % P_INDX == 1) && ((ret =
01966             CDB___db_prdbt(&unkdbt, 0, " ", handle, callback, 0, NULL)) != 0))
01967                 err_ret = ret;
01968 
01969 err:    if (pgmap != NULL)
01970                 CDB___os_free(pgmap, 0);
01971         CDB___os_free(ovflbuf, 0);
01972 
01973         /* Mark this page as done. */
01974         if ((t_ret = CDB___db_salvage_markdone(vdp, pgno)) != 0)
01975                 return (t_ret);
01976 
01977         return ((err_ret != 0) ? err_ret : ret);
01978 }
01979 
01980 /*
01981  * CDB___bam_salvage_walkdupint --
01982  *      Walk a known-good btree or recno internal page which is part of
01983  *      a dup tree, calling CDB___db_salvage_duptree on each child page.
01984  *
01985  * PUBLIC: int CDB___bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *,
01986  * PUBLIC:     DBT *, void *, int (*)(void *, const void *), u_int32_t));
01987  */
01988 int
01989 CDB___bam_salvage_walkdupint(dbp, vdp, h, key, handle, callback, flags)
01990         DB *dbp;
01991         VRFY_DBINFO *vdp;
01992         PAGE *h;
01993         DBT *key;
01994         void *handle;
01995         int (*callback) __P((void *, const void *));
01996         u_int32_t flags;
01997 {
01998         RINTERNAL *ri;
01999         BINTERNAL *bi;
02000         int ret, t_ret;
02001         db_indx_t i;
02002 
02003         ret = 0;
02004         for (i = 0; i < NUM_ENT(h); i++) {
02005                 switch (TYPE(h)) {
02006                 case P_IBTREE:
02007                         bi = GET_BINTERNAL(h, i);
02008                         if ((t_ret = CDB___db_salvage_duptree(dbp,
02009                             vdp, bi->pgno, key, handle, callback, flags)) != 0)
02010                                 ret = t_ret;
02011                 case P_IRECNO:
02012                         ri = GET_RINTERNAL(h, i);
02013                         if ((t_ret = CDB___db_salvage_duptree(dbp,
02014                             vdp, ri->pgno, key, handle, callback, flags)) != 0)
02015                                 ret = t_ret;
02016                         break;
02017                 default:
02018                         CDB___db_err(dbp->dbenv,
02019                             "CDB___bam_salvage_walkdupint called on non-int. page");
02020                         DB_ASSERT(0);
02021                         return (EINVAL);
02022                 }
02023                 /* Pass SA_SKIPFIRSTKEY, if set, on to the 0th child only. */
02024                 flags &= ~LF_ISSET(SA_SKIPFIRSTKEY);
02025         }
02026 
02027         return (ret);
02028 }
02029 
02030 /*
02031  * CDB___bam_meta2pgset --
02032  *      Given a known-good meta page, return in pgsetp a 0-terminated list of
02033  *      db_pgno_t's corresponding to the pages in the btree.
02034  *
02035  *      We do this by a somewhat sleazy method, to avoid having to traverse the
02036  *      btree structure neatly:  we walk down the left side to the very
02037  *      first leaf page, then we mark all the pages in the chain of
02038  *      NEXT_PGNOs (being wary of cycles and invalid ones), then we
02039  *      consolidate our scratch array into a nice list, and return.  This
02040  *      avoids the memory management hassles of recursion and the
02041  *      trouble of walking internal pages--they just don't matter, except
02042  *      for the left branch.
02043  *
02044  * PUBLIC: int CDB___bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *,
02045  * PUBLIC:     u_int32_t, DB *));
02046  */
02047 int
02048 CDB___bam_meta2pgset(dbp, vdp, btmeta, flags, pgset)
02049         DB *dbp;
02050         VRFY_DBINFO *vdp;
02051         BTMETA *btmeta;
02052         u_int32_t flags;
02053         DB *pgset;
02054 {
02055         BINTERNAL *bi;
02056         PAGE *h;
02057         RINTERNAL *ri;
02058         db_pgno_t current, p;
02059         int err_ret, ret;
02060 
02061         h = NULL;
02062         ret = err_ret = 0;
02063         DB_ASSERT(pgset != NULL);
02064         for (current = btmeta->root;;) {
02065                 if (!IS_VALID_PGNO(current) || current == PGNO(btmeta)) {
02066                         err_ret = DB_VERIFY_BAD;
02067                         goto err;
02068                 }
02069                 if ((ret = CDB_memp_fget(dbp->mpf, &current, 0, &h)) != 0) {
02070                         err_ret = ret;
02071                         goto err;
02072                 }
02073 
02074                 switch (TYPE(h)) {
02075                 case P_IBTREE:
02076                 case P_IRECNO:
02077                         if ((ret = CDB___bam_vrfy(dbp,
02078                             vdp, h, current, flags | DB_NOORDERCHK)) != 0) {
02079                                 err_ret = ret;
02080                                 goto err;
02081                         }
02082                         if (TYPE(h) == P_IBTREE) {
02083                                 bi = GET_BINTERNAL(h, 0);
02084                                 current = bi->pgno;
02085                         } else {        /* P_IRECNO */
02086                                 ri = GET_RINTERNAL(h, 0);
02087                                 current = ri->pgno;
02088                         }
02089                         break;
02090                 case P_LBTREE:
02091                 case P_LRECNO:
02092                         goto traverse;
02093                         /* NOTREACHED */
02094                 default:
02095                         err_ret = DB_VERIFY_BAD;
02096                         goto err;
02097                 }
02098 
02099                 if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
02100                         err_ret = ret;
02101                 h = NULL;
02102         }
02103 
02104         /*
02105          * At this point, current is the pgno of leaf page h, the 0th in the
02106          * tree we're concerned with.
02107          */
02108 traverse:
02109         while (IS_VALID_PGNO(current) && current != PGNO_INVALID) {
02110                 if (h == NULL &&
02111                     (ret = CDB_memp_fget(dbp->mpf, &current, 0, &h) != 0)) {
02112                         err_ret = ret;
02113                         break;
02114                 }
02115 
02116                 if ((ret = CDB___db_vrfy_pgset_get(pgset, current, (int *)&p)) != 0)
02117                         goto err;
02118 
02119                 if (p != 0) {
02120                         /*
02121                          * We've found a cycle.  Return success anyway--
02122                          * our caller may as well use however much of
02123                          * the pgset we've come up with.
02124                          */
02125                         break;
02126                 }
02127                 if ((ret = CDB___db_vrfy_pgset_inc(pgset, current)) != 0)
02128                         goto err;
02129 
02130                 current = NEXT_PGNO(h);
02131                 if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
02132                         err_ret = ret;
02133                 h = NULL;
02134         }
02135 
02136 err:    if (h != NULL)
02137                 (void)CDB_memp_fput(dbp->mpf, h, 0);
02138 
02139         return (ret == 0 ? err_ret : ret);
02140 }
02141 
02142 /*
02143  * __bam_safe_getdata --
02144  *
02145  *      Utility function for CDB___bam_vrfy_itemorder.  Safely gets the datum at
02146  *      index i, page h, and sticks it in DBT dbt.  If ovflok is 1 and i's an
02147  *      overflow item, we do a safe_goff to get the item and signal that we need
02148  *      to free dbt->data;  if ovflok is 0, we leaves the DBT zeroed.
02149  */
02150 static int
02151 __bam_safe_getdata(dbp, h, i, ovflok, dbt, freedbtp)
02152         DB *dbp;
02153         PAGE *h;
02154         u_int32_t i;
02155         int ovflok;
02156         DBT *dbt;
02157         int *freedbtp;
02158 {
02159         BKEYDATA *bk;
02160         BOVERFLOW *bo;
02161 
02162         memset(dbt, 0, sizeof(DBT));
02163         *freedbtp = 0;
02164 
02165         bk = GET_BKEYDATA(h, i);
02166         if (B_TYPE(bk->type) == B_OVERFLOW) {
02167                 if (!ovflok)
02168                         return(0);
02169 
02170                 bo = (BOVERFLOW *)bk;
02171                 F_SET(dbt, DB_DBT_MALLOC);
02172 
02173                 *freedbtp = 1;
02174                 return (CDB___db_goff(dbp, dbt, bo->tlen, bo->pgno, NULL, NULL));
02175                 /* NOTREACHED */
02176         } else {
02177                 dbt->data = bk->data;
02178                 dbt->size = bk->len;
02179         }
02180 
02181         return (0);
02182 }

Generated on Sun Jun 8 10:56:35 2008 for GNUmifluz by  doxygen 1.5.5