db_vrfy.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: db__vrfy_8c-source.html,v 1.1 2008/06/08 10:18:28 sebdiaz Exp $
00008  */
00009 
00010 #include "config.h"
00011 
00012 #ifndef lint
00013 static const char revid[] = "$Id: db__vrfy_8c-source.html,v 1.1 2008/06/08 10:18:28 sebdiaz Exp $";
00014 #endif /* not lint */
00015 
00016 #ifndef NO_SYSTEM_INCLUDES
00017 #include <sys/types.h>
00018 
00019 #include <errno.h>
00020 #include <string.h>
00021 #endif
00022 
00023 #include "db_int.h"
00024 #include "db_page.h"
00025 #include "db_swap.h"
00026 #include "db_verify.h"
00027 #include "db_ext.h"
00028 #include "btree.h"
00029 #include "hash.h"
00030 #include "qam.h"
00031 
00032 static int  __db_guesspgsize __P((DB_ENV *, DB_FH *));
00033 static int  __db_is_valid_magicno __P((u_int32_t, DBTYPE *));
00034 static int  __db_is_valid_pagetype __P((u_int32_t));
00035 static int  __db_meta2pgset
00036                 __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *));
00037 static int  __db_salvage_subdbs
00038                 __P((DB *, VRFY_DBINFO *, void *,
00039                 int(*)(void *, const void *), u_int32_t, int *));
00040 static int  __db_salvage_unknowns
00041                 __P((DB *, VRFY_DBINFO *, void *,
00042                 int (*)(void *, const void *), u_int32_t));
00043 static int  __db_vrfy_common
00044                 __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
00045 static int  __db_vrfy_freelist __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
00046 static int  __db_vrfy_invalid
00047                 __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
00048 static int  __db_vrfy_orderchkonly __P((DB *,
00049                 VRFY_DBINFO *, const char *, const char *, u_int32_t));
00050 static int  __db_vrfy_pagezero __P((DB *, VRFY_DBINFO *, DB_FH *, u_int32_t));
00051 static int  __db_vrfy_subdbs __P((DB *, VRFY_DBINFO *, char *, u_int32_t));
00052 static int  __db_vrfy_structure
00053                 __P((DB *, VRFY_DBINFO *, char *, db_pgno_t, u_int32_t));
00054 static int  __db_vrfy_walkpages
00055                 __P((DB *, VRFY_DBINFO *, void *, int (*)(void *, const void *),
00056                 u_int32_t));
00057 
00058 /*
00059  * This is the code for DB->verify, the DB database consistency checker.
00060  * For now, it checks all subdatabases in a database, and verifies
00061  * everything it knows how to (i.e. it's all-or-nothing, and one can't
00062  * check only for a subset of possible problems).
00063  */
00064 
00065 /*
00066  * CDB___db_verify --
00067  *      Walk the entire file page-by-page, either verifying with or without
00068  *      dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data
00069  *      pairs can be found and dumping them in standard (db_load-ready)
00070  *      dump format.
00071  *
00072  *      (Salvaging isn't really a verification operation, but we put it
00073  *      here anyway because it requires essentially identical top-level
00074  *      code.)
00075  *
00076  *      flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE
00077  *      (and optionally DB_AGGRESSIVE).
00078  *
00079  *      CDB___db_verify itself is simply a wrapper to CDB___db_verify_internal,
00080  *      which lets us pass appropriate equivalents to FILE * in from the
00081  *      non-C APIs.
00082  *
00083  * PUBLIC: int CDB___db_verify
00084  * PUBLIC:     __P((DB *, const char *, const char *, FILE *, u_int32_t));
00085  */
00086 int
00087 CDB___db_verify(dbp, file, database, outfile, flags)
00088         DB *dbp;
00089         const char *file, *database;
00090         FILE *outfile;
00091         u_int32_t flags;
00092 {
00093 
00094         return (CDB___db_verify_internal(dbp,
00095             file, database, outfile, CDB___db_verify_callback, flags));
00096 }
00097 
00098 /*
00099  * CDB___db_verify_callback --
00100  *      Callback function for using pr_* functions from C.
00101  *
00102  * PUBLIC: int  CDB___db_verify_callback __P((void *, const void *));
00103  */
00104 int
00105 CDB___db_verify_callback(handle, str_arg)
00106         void *handle;
00107         const void *str_arg;
00108 {
00109         char *str;
00110         FILE *f;
00111 
00112         str = (char *)str_arg;
00113         f = (FILE *)handle;
00114 
00115         if (fprintf(f, str) != (int)strlen(str))
00116                 return (EIO);
00117 
00118         return (0);
00119 }
00120 
00121 /*
00122  * CDB___db_verify_internal --
00123  *      Inner meat of CDB___db_verify.
00124  *
00125  * PUBLIC: int CDB___db_verify_internal __P((DB *, const char *,
00126  * PUBLIC:     const char *, void *, int (*)(void *, const void *), u_int32_t));
00127  */
00128 int
00129 CDB___db_verify_internal(dbp_orig, name, subdb, handle, callback, flags)
00130         DB *dbp_orig;
00131         const char *name, *subdb;
00132         void *handle;
00133         int (*callback) __P((void *, const void *));
00134         u_int32_t flags;
00135 {
00136         DB *dbp;
00137         DB_ENV *dbenv;
00138         DB_FH fh, *fhp;
00139         PAGE *h;
00140         VRFY_DBINFO *vdp;
00141         db_pgno_t last;
00142         int has, ret, isbad;
00143         char *real_name;
00144 
00145         dbenv = dbp_orig->dbenv;
00146         vdp = NULL;
00147         real_name = NULL;
00148         ret = isbad = 0;
00149 
00150         memset(&fh, 0, sizeof(fh));
00151         fhp = &fh;
00152 
00153         PANIC_CHECK(dbenv);
00154         DB_ILLEGAL_AFTER_OPEN(dbp_orig, "verify");
00155 
00156 #define OKFLAGS (DB_AGGRESSIVE | DB_NOORDERCHK | DB_ORDERCHKONLY | DB_SALVAGE)
00157         if ((ret = CDB___db_fchk(dbenv, "DB->verify", flags, OKFLAGS)) != 0)
00158                 return (ret);
00159 
00160         /*
00161          * DB_SALVAGE is mutually exclusive with the other flags except
00162          * DB_AGGRESSIVE.
00163          */
00164         if (LF_ISSET(DB_SALVAGE) &&
00165             (flags & ~DB_AGGRESSIVE) != DB_SALVAGE)
00166                 return (CDB___db_ferr(dbenv, "CDB___db_verify", 1));
00167 
00168         if (LF_ISSET(DB_ORDERCHKONLY) && flags != DB_ORDERCHKONLY)
00169                 return (CDB___db_ferr(dbenv, "CDB___db_verify", 1));
00170 
00171         if (LF_ISSET(DB_ORDERCHKONLY) && subdb == NULL) {
00172                 CDB___db_err(dbenv, "DB_ORDERCHKONLY requires a database name");
00173                 return (EINVAL);
00174         }
00175 
00176         /*
00177          * Forbid working in an environment that uses transactions or
00178          * locking;  we're going to be looking at the file freely,
00179          * and while we're not going to modify it, we aren't obeying
00180          * locking conventions either.
00181          */
00182         if (TXN_ON(dbenv) || LOCKING_ON(dbenv) || LOGGING_ON(dbenv)) {
00183                 dbp_orig->errx(dbp_orig,
00184             "verify may not be used with transactions, logging, or locking");
00185                 return (EINVAL);
00186                 /* NOTREACHED */
00187         }
00188 
00189         /* Create a dbp to use internally, which we can close at our leisure. */
00190         if ((ret = CDB_db_create(&dbp, dbenv, 0)) != 0)
00191                 goto err;
00192 
00193         /* Copy the supplied pagesize, which we use if the file one is bogus. */
00194         if (dbp_orig->pgsize >= DB_MIN_PGSIZE &&
00195             dbp_orig->pgsize <= DB_MAX_PGSIZE)
00196                 dbp->set_pagesize(dbp, dbp_orig->pgsize);
00197 
00198         /*
00199          * We don't know how large the cache is, and if the database
00200          * in question uses a small page size--which we don't know
00201          * yet!--it may be uncomfortably small for the default page
00202          * size [#2143].  However, the things we need temporary
00203          * databases for in dbinfo are largely tiny, so using a
00204          * 1024-byte pagesize is probably not going to be a big hit,
00205          * and will make us fit better into small spaces.
00206          */
00207         if ((ret = CDB___db_vrfy_dbinfo_create(dbenv, 1024, &vdp)) != 0)
00208                 goto err;
00209 
00210         /* Find the real name of the file. */
00211         if ((ret = CDB___db_appname(dbenv,
00212             DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
00213                 goto err;
00214 
00215         /*
00216          * Our first order of business is to verify page 0, which is
00217          * the metadata page for the master database of subdatabases
00218          * or of the only database in the file.  We want to do this by hand
00219          * rather than just calling CDB___db_open in case it's corrupt--various
00220          * things in CDB___db_open might act funny.
00221          *
00222          * Once we know the metadata page is healthy, I believe that it's
00223          * safe to open the database normally and then use the page swapping
00224          * code, which makes life easier.
00225          */
00226         if ((ret = CDB___os_open(dbenv, real_name, DB_OSO_RDONLY, 0444, fhp)) != 0)
00227                 goto err;
00228 
00229         /* Verify the metadata page 0; set pagesize and type. */
00230         if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, flags)) != 0) {
00231                 if (ret == DB_VERIFY_BAD)
00232                         isbad = 1;
00233                 else
00234                         goto err;
00235         }
00236 
00237         /*
00238          * We can assume at this point that dbp->pagesize and dbp->type are
00239          * set correctly, or at least as well as they can be, and that
00240          * locking, logging, and txns are not in use.  Thus we can trust
00241          * the memp code not to look at the page, and thus to be safe
00242          * enough to use.
00243          *
00244          * The dbp is not open, but the file is open in the fhp, and we
00245          * cannot assume that CDB___db_open is safe.  Call CDB___db_dbenv_setup,
00246          * the [safe] part of CDB___db_open that initializes the environment--
00247          * and the mpool--manually.
00248          */
00249         if ((ret = CDB___db_dbenv_setup(dbp,
00250             name, DB_ODDFILESIZE | DB_RDONLY)) != 0)
00251                 return (ret);
00252 
00253         /*
00254          * Find out the page number of the last page in the database.
00255          *
00256          * XXX: This currently fails if the last page is of bad type,
00257          * because it calls CDB___db_pgin and that pukes.  This is bad.
00258          */
00259         if ((ret = CDB_memp_fget(dbp->mpf, &last, DB_MPOOL_LAST, &h)) != 0)
00260                 goto err;
00261         if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
00262                 goto err;
00263 
00264         vdp->last_pgno = last;
00265 
00266         /*
00267          * DB_ORDERCHKONLY is a special case;  our file consists of
00268          * several subdatabases, which use different hash, bt_compare,
00269          * and/or dup_compare functions.  Consequently, we couldn't verify
00270          * sorting and hashing simply by calling DB->verify() on the file.
00271          * DB_ORDERCHKONLY allows us to come back and check those things;  it
00272          * requires a subdatabase, and assumes that everything but that
00273          * database's sorting/hashing is correct.
00274          */
00275         if (LF_ISSET(DB_ORDERCHKONLY)) {
00276                 ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags);
00277                 goto done;
00278         }
00279 
00280         /*
00281          * When salvaging, we use a db to keep track of whether we've seen a
00282          * given overflow or dup page in the course of traversing normal data.
00283          * If in the end we have not, we assume its key got lost and print it
00284          * with key "UNKNOWN".
00285          */
00286         if (LF_ISSET(DB_SALVAGE)) {
00287                 if ((ret = CDB___db_salvage_init(vdp)) != 0)
00288                         return (ret);
00289 
00290                 /*
00291                  * If we're not being aggressive, attempt to crack subdbs.
00292                  * "has" will indicate whether the attempt has succeeded
00293                  * (even in part), meaning that we have some semblance of
00294                  * subdbs;  on the walkpages pass, we print out
00295                  * whichever data pages we have not seen.
00296                  */
00297                 has = 0;
00298                 if (!LF_ISSET(DB_AGGRESSIVE) && (__db_salvage_subdbs(dbp,
00299                     vdp, handle, callback, flags, &has)) != 0)
00300                         isbad = 1;
00301 
00302                 /*
00303                  * If we have subdatabases, we need to signal that if
00304                  * any keys are found that don't belong to a subdatabase,
00305                  * they'll need to have an "__OTHER__" subdatabase header
00306                  * printed first.  Flag this.  Else, print a header for
00307                  * the normal, non-subdb database.
00308                  */
00309                 if (has == 1)
00310                         F_SET(vdp, SALVAGE_PRINTHEADER);
00311                 else if ((ret = CDB___db_prheader(dbp,
00312                     NULL, 0, 0, handle, callback, vdp, PGNO_BASE_MD)) != 0)
00313                         goto err;
00314         }
00315 
00316         if ((ret =
00317             __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) {
00318                 if (ret == DB_VERIFY_BAD)
00319                         isbad = 1;
00320                 else if (ret != 0)
00321                         goto err;
00322         }
00323 
00324         /* If we're verifying, verify inter-page structure. */
00325         if (!LF_ISSET(DB_SALVAGE) && isbad == 0)
00326                 if ((ret =
00327                     __db_vrfy_structure(dbp, vdp, real_name, 0, flags)) != 0) {
00328                         if (ret == DB_VERIFY_BAD)
00329                                 isbad = 1;
00330                         else if (ret != 0)
00331                                 goto err;
00332                 }
00333 
00334         /*
00335          * If we're salvaging, output with key UNKNOWN any overflow or dup pages
00336          * we haven't been able to put in context.  Then destroy the salvager's
00337          * state-saving database.
00338          */
00339         if (LF_ISSET(DB_SALVAGE)) {
00340                 if ((ret = __db_salvage_unknowns(dbp,
00341                     vdp, handle, callback, flags)) != 0)
00342                         isbad = 1;
00343                 /* No return value, since there's little we can do. */
00344                 CDB___db_salvage_destroy(vdp);
00345         }
00346 
00347         if (0) {
00348 err:            (void)CDB___db_err(dbenv, "%s: %s", name, CDB_db_strerror(ret));
00349         }
00350 
00351         if (LF_ISSET(DB_SALVAGE) &&
00352             (has == 0 || F_ISSET(vdp, SALVAGE_PRINTFOOTER)))
00353                 (void)CDB___db_prfooter(handle, callback);
00354 
00355 done:   if (F_ISSET(fhp, DB_FH_VALID))
00356                 (void)CDB___os_closehandle(fhp);
00357         if (dbp)
00358                 (void)dbp->close(dbp, 0);
00359         if (vdp)
00360                 (void)CDB___db_vrfy_dbinfo_destroy(vdp);
00361         if (real_name)
00362                 CDB___os_freestr(real_name);
00363 
00364         if ((ret == 0 && isbad == 1) || ret == DB_VERIFY_FATAL)
00365                 ret = DB_VERIFY_BAD;
00366 
00367         return (ret);
00368 }
00369 
00370 /*
00371  * __db_vrfy_pagezero --
00372  *      Verify the master metadata page.  Use seek, read, and a local buffer
00373  *      rather than the DB paging code, for safety.
00374  *
00375  *      Must correctly (or best-guess) set dbp->type and dbp->pagesize.
00376  */
00377 static int
00378 __db_vrfy_pagezero(dbp, vdp, fhp, flags)
00379         DB *dbp;
00380         VRFY_DBINFO *vdp;
00381         DB_FH *fhp;
00382         u_int32_t flags;
00383 {
00384         DBMETA *meta;
00385         DB_ENV *dbenv;
00386         VRFY_PAGEINFO *pip;
00387         db_pgno_t freelist;
00388         int t_ret, ret, nr, swapped;
00389         u_int8_t mbuf[DBMETASIZE];
00390 
00391         swapped = ret = t_ret = 0;
00392         freelist = 0;
00393         dbenv = dbp->dbenv;
00394         meta = (DBMETA *)mbuf;
00395         dbp->type = DB_UNKNOWN;
00396 
00397         /*
00398          * Seek to the metadata page.
00399          * Note that if we're just starting a verification, dbp->pgsize
00400          * may be zero;  this is okay, as we want page zero anyway and
00401          * 0*0 == 0.
00402          */
00403         if ((ret = CDB___os_seek(dbenv, fhp, 0, 0, 0, 0, DB_OS_SEEK_SET)) != 0)
00404                 goto err;
00405 
00406         if ((ret = CDB___os_read(dbenv, fhp, mbuf, DBMETASIZE, (size_t *)&nr)) != 0)
00407                 goto err;
00408 
00409         if (nr != DBMETASIZE) {
00410                 EPRINT((dbp->dbenv,
00411                     "Incomplete metadata page %lu", PGNO_BASE_MD));
00412                 t_ret = DB_VERIFY_FATAL;
00413                 goto err;
00414         }
00415 
00416         /*
00417          * Check all of the fields that we can.
00418          */
00419 
00420         /* 08-11: Current page number.  Must == pgno. */
00421         /* Note that endianness doesn't matter--it's zero. */
00422         if (meta->pgno != PGNO_BASE_MD) {
00423                 EPRINT((dbp->dbenv, "Bad pgno: was %lu, should be %lu",
00424                     meta->pgno, PGNO_BASE_MD));
00425                 ret = DB_VERIFY_BAD;
00426         }
00427 
00428         /* 12-15: Magic number.  Must be one of valid set. */
00429         if (__db_is_valid_magicno(meta->magic, &dbp->type))
00430                 swapped = 0;
00431         else {
00432                 M_32_SWAP(meta->magic);
00433                 if (__db_is_valid_magicno(meta->magic,
00434                     &dbp->type))
00435                         swapped = 1;
00436                 else {
00437                         EPRINT((dbp->dbenv, "Bad magic no.: %lu", meta->magic));
00438                         ret = DB_VERIFY_BAD;
00439                 }
00440         }
00441 
00442         /*
00443          * 16-19: Version.  Must be current;  for now, we
00444          * don't support verification of old versions.
00445          */
00446         if (swapped)
00447                 M_32_SWAP(meta->version);
00448         if ((dbp->type == DB_BTREE && meta->version != DB_BTREEVERSION) ||
00449             (dbp->type == DB_HASH && meta->version != DB_HASHVERSION) ||
00450             (dbp->type == DB_QUEUE && meta->version != DB_QAMVERSION)) {
00451                 ret = DB_VERIFY_BAD;
00452                 EPRINT((dbp->dbenv, "%s%s", "Old or incorrect DB ",
00453                     "version; extraneous errors may result"));
00454         }
00455 
00456         /*
00457          * 20-23: Pagesize.  Must be power of two,
00458          * greater than 512, and less than 64K.
00459          */
00460         if (swapped)
00461                 M_32_SWAP(meta->pagesize);
00462         if (IS_VALID_PAGESIZE(meta->pagesize))
00463                 dbp->pgsize = meta->pagesize;
00464         else {
00465                 EPRINT((dbp->dbenv, "Bad page size: %lu",
00466                     meta->pagesize));
00467                 ret = DB_VERIFY_BAD;
00468 
00469                 /*
00470                  * Now try to settle on a pagesize to use.
00471                  * If the user-supplied one is reasonable,
00472                  * use it;  else, guess.
00473                  */
00474                 if (!IS_VALID_PAGESIZE(dbp->pgsize))
00475                         dbp->pgsize = __db_guesspgsize(dbenv, fhp);
00476         }
00477 
00478         /*
00479          * 25: Page type.  Must be correct for dbp->type,
00480          * which is by now set as well as it can be.
00481          */
00482         /* Needs no swapping--only one byte! */
00483         if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) ||
00484             (dbp->type == DB_HASH && meta->type != P_HASHMETA) ||
00485             (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) {
00486                 ret = DB_VERIFY_BAD;
00487                 EPRINT((dbp->dbenv, "Bad page type: %lu", meta->type));
00488         }
00489 
00490         /*
00491          * 28-31: Free list page number.
00492          * We'll verify its sensibility when we do inter-page
00493          * verification later;  for now, just store it.
00494          */
00495         if (swapped)
00496             M_32_SWAP(meta->free);
00497         freelist = meta->free;
00498 
00499         /*
00500          * Initialize vdp->pages to fit a single pageinfo structure for
00501          * this one page.  We'll realloc later when we know how many
00502          * pages there are.
00503          */
00504         if ((ret = CDB___db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0)
00505                 return (ret);
00506         pip->pgno = PGNO_BASE_MD;
00507         pip->type = meta->type;
00508 
00509         /*
00510          * Signal that we still have to check the info specific to
00511          * a given type of meta page.
00512          */
00513         F_SET(pip, VRFY_INCOMPLETE);
00514 
00515         pip->free = freelist;
00516 
00517         if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
00518                 return (ret);
00519 
00520         if (0) {
00521 err:            CDB___db_err(dbenv, "%s", CDB_db_strerror(ret));
00522         }
00523 
00524         if (swapped == 1)
00525                 F_SET(dbp, DB_AM_SWAP);
00526         if (t_ret != 0)
00527                 ret = t_ret;
00528         return (ret);
00529 }
00530 
00531 /*
00532  * __db_vrfy_walkpages --
00533  *      Main loop of the verifier/salvager.  Walks through,
00534  *      page by page, and verifies all pages and/or prints all data pages.
00535  */
00536 static int
00537 __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)
00538         DB *dbp;
00539         VRFY_DBINFO *vdp;
00540         void *handle;
00541         int (*callback) __P((void *, const void *));
00542         u_int32_t flags;
00543 {
00544         DB_ENV *dbenv;
00545         PAGE *h;
00546         db_pgno_t i;
00547         int ret, t_ret, isbad;
00548 
00549         ret = isbad = t_ret = 0;
00550         dbenv = dbp->dbenv;
00551 
00552         if ((ret = CDB___db_fchk(dbenv,
00553             "__db_vrfy_walkpages", flags, OKFLAGS)) != 0)
00554                 return (ret);
00555 
00556         for (i = 0; i <= vdp->last_pgno; i++) {
00557                 /*
00558                  * If DB_SALVAGE is set, we inspect our database of
00559                  * completed pages, and skip any we've already printed in
00560                  * the subdb pass.
00561                  */
00562                 if (LF_ISSET(DB_SALVAGE) && (CDB___db_salvage_isdone(vdp, i) != 0))
00563                         continue;
00564 
00565                 /* If an individual page get fails, keep going. */
00566                 if ((t_ret = CDB_memp_fget(dbp->mpf, &i, 0, &h)) != 0) {
00567                         if (ret == 0)
00568                                 ret = t_ret;
00569                         continue;
00570                 }
00571 
00572                 if (LF_ISSET(DB_SALVAGE)) {
00573                         /*
00574                          * We pretty much don't want to quit unless a
00575                          * bomb hits.  May as well return that something
00576                          * was screwy, however.
00577                          */
00578                         if ((t_ret = CDB___db_salvage(dbp,
00579                             vdp, i, h, handle, callback, flags)) != 0) {
00580                                 if (ret == 0)
00581                                         ret = t_ret;
00582                                 isbad = 1;
00583                         }
00584                 } else {
00585                         /*
00586                          * Verify info common to all page
00587                          * types.
00588                          */
00589                         if (i != PGNO_BASE_MD)
00590                                 if ((t_ret = __db_vrfy_common(dbp,
00591                                     vdp, h, i, flags)) == DB_VERIFY_BAD)
00592                                         isbad = 1;
00593 
00594                         switch (TYPE(h)) {
00595                         case P_INVALID:
00596                                 t_ret = __db_vrfy_invalid(dbp,
00597                                     vdp, h, i, flags);
00598                                 break;
00599                         case __P_DUPLICATE:
00600                                 isbad = 1;
00601                                 EPRINT((dbp->dbenv,
00602                                     "Old-style dup page %lu", i));
00603                                 break;
00604                         case P_HASH:
00605                                 t_ret = CDB___ham_vrfy(dbp,
00606                                     vdp, h, i, flags);
00607                                 break;
00608                         case P_IBTREE:
00609                         case P_IRECNO:
00610                         case P_LBTREE:
00611                         case P_LDUP:
00612                                 t_ret = CDB___bam_vrfy(dbp,
00613                                     vdp, h, i, flags);
00614                                 break;
00615                         case P_LRECNO:
00616                                 t_ret = CDB___ram_vrfy_leaf(dbp,
00617                                     vdp, h, i, flags);
00618                                 break;
00619                         case P_OVERFLOW:
00620                                 t_ret = CDB___db_vrfy_overflow(dbp,
00621                                     vdp, h, i, flags);
00622                                 break;
00623                         case P_HASHMETA:
00624                                 t_ret = CDB___ham_vrfy_meta(dbp,
00625                                     vdp, (HMETA *)h, i, flags);
00626                                 break;
00627                         case P_BTREEMETA:
00628                                 t_ret = CDB___bam_vrfy_meta(dbp,
00629                                     vdp, (BTMETA *)h, i, flags);
00630                                 break;
00631                         case P_QAMMETA:
00632                                 t_ret = CDB___qam_vrfy_meta(dbp,
00633                                     vdp, (QMETA *)h, i, flags);
00634                                 break;
00635                         case P_QAMDATA:
00636                                 t_ret = CDB___qam_vrfy_data(dbp,
00637                                     vdp, (QPAGE *)h, i, flags);
00638                                 break;
00639                         default:
00640                                 EPRINT((dbp->dbenv,
00641                                     "Unknown page type: %lu", TYPE(h)));
00642                                 isbad = 1;
00643                                 break;
00644                         }
00645 
00646                         /*
00647                          * Set up error return.
00648                          */
00649                         if (t_ret == DB_VERIFY_BAD)
00650                                 isbad = 1;
00651                         else if (t_ret == DB_VERIFY_FATAL)
00652                                 goto err;
00653                         else
00654                                 ret = t_ret;
00655                 }
00656 
00657                 if ((t_ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
00658                         ret = t_ret;
00659         }
00660 
00661         if (0) {
00662 err:            if ((t_ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
00663                         return (ret == 0 ? t_ret : ret);
00664                 return (DB_VERIFY_BAD);
00665         }
00666 
00667         return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
00668 }
00669 
00670 /*
00671  * __db_vrfy_structure--
00672  *      After a beginning-to-end walk through the database has been
00673  *      completed, put together the information that has been collected
00674  *      to verify the overall database structure.
00675  *
00676  *      Should only be called if we want to do a database verification,
00677  *      i.e. if DB_SALVAGE is not set.
00678  */
00679 static int
00680 __db_vrfy_structure(dbp, vdp, dbname, meta_pgno, flags)
00681         DB *dbp;
00682         VRFY_DBINFO *vdp;
00683         char *dbname;
00684         db_pgno_t meta_pgno;
00685         u_int32_t flags;
00686 {
00687         DB *pgset;
00688         DB_ENV *dbenv;
00689         VRFY_PAGEINFO *pip;
00690         db_pgno_t i;
00691         int ret, isbad, hassubs, p;
00692 
00693         isbad = 0;
00694         pip = NULL;
00695         dbenv = dbp->dbenv;
00696         pgset = vdp->pgset;
00697 
00698         if ((ret = CDB___db_fchk(dbenv, "DB->verify", flags, OKFLAGS)) != 0)
00699                 return (ret);
00700         if (LF_ISSET(DB_SALVAGE)) {
00701                 CDB___db_err(dbenv, "__db_vrfy_structure called with DB_SALVAGE");
00702                 return (EINVAL);
00703         }
00704 
00705         /*
00706          * Call the appropriate function to downwards-traverse the db type.
00707          */
00708         switch(dbp->type) {
00709         case DB_BTREE:
00710         case DB_RECNO:
00711                 if ((ret = CDB___bam_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
00712                         if (ret == DB_VERIFY_BAD)
00713                                 isbad = 1;
00714                         else
00715                                 goto err;
00716                 }
00717 
00718                 /*
00719                  * If we have subdatabases and we know that the database is,
00720                  * thus far, sound, it's safe to walk the tree of subdatabases.
00721                  * Do so, and verify the structure of the databases within.
00722                  */
00723                 if ((ret = CDB___db_vrfy_getpageinfo(vdp, 0, &pip)) != 0)
00724                         goto err;
00725                 hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS);
00726                 if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
00727                         goto err;
00728 
00729                 if (isbad == 0 && hassubs)
00730                         if ((ret =
00731                             __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) {
00732                                 if (ret == DB_VERIFY_BAD)
00733                                         isbad = 1;
00734                                 else
00735                                         goto err;
00736                         }
00737                 break;
00738         case DB_HASH:
00739                 if ((ret = CDB___ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) {
00740                         if (ret == DB_VERIFY_BAD)
00741                                 isbad = 1;
00742                         else
00743                                 goto err;
00744                 }
00745                 break;
00746         case DB_QUEUE:
00747                 if ((ret = CDB___qam_vrfy_structure(dbp, vdp, flags)) != 0) {
00748                         if (ret == DB_VERIFY_BAD)
00749                                 isbad = 1;
00750                 }
00751 
00752                 /*
00753                  * Queue pages may be unreferenced and totally zeroed, if
00754                  * they're empty;  queue doesn't have much structure, so
00755                  * this is unlikely to be wrong in any troublesome sense.
00756                  * Skip to "err".
00757                  */
00758                 goto err;
00759                 /* NOTREACHED */
00760         default:
00761                 /* This should only happen if the verifier is somehow broken. */
00762                 DB_ASSERT(0);
00763                 ret = EINVAL;
00764                 goto err;
00765                 /* NOTREACHED */
00766         }
00767 
00768         /* Walk free list. */
00769         if ((ret =
00770             __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD)
00771                 isbad = 1;
00772 
00773         /*
00774          * If structure checks up until now have failed, it's likely that
00775          * checking what pages have been missed will result in oodles of
00776          * extraneous error messages being EPRINTed.  Skip to the end
00777          * if this is the case;  we're going to be printing at least one
00778          * error anyway, and probably all the more salient ones.
00779          */
00780         if (ret != 0 || isbad == 1)
00781                 goto err;
00782 
00783         /*
00784          * Make sure no page has been missed and that no page is still marked
00785          * "all zeroes" (only certain hash pages can be, and they're unmarked
00786          * in CDB___ham_vrfy_structure).
00787          */
00788         for (i = 0; i < vdp->last_pgno + 1; i++) {
00789                 if ((ret = CDB___db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
00790                         goto err;
00791                 if ((ret = CDB___db_vrfy_pgset_get(pgset, i, &p)) != 0)
00792                         goto err;
00793                 if (p == 0) {
00794                         EPRINT((dbp->dbenv, "Unreferenced page %lu", i));
00795                         /* isbad = 1;  */
00796                         /* XXX: this is a db bug */
00797                         if (pip->type != P_LRECNO && pip->type != P_LDUP)
00798                                 isbad = 1;
00799                 }
00800 
00801                 if (F_ISSET(pip, VRFY_IS_ALLZEROES)) {
00802                         EPRINT((dbp->dbenv, "Totally zeroed page %lu", i));
00803                         isbad = 1;
00804                 }
00805                 if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
00806                         goto err;
00807                 pip = NULL;
00808         }
00809 
00810 err:    if (pip != NULL)
00811                 (void)CDB___db_vrfy_putpageinfo(vdp, pip);
00812 
00813         return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
00814 }
00815 
00816 /*
00817  * __db_is_valid_pagetype
00818  */
00819 static int
00820 __db_is_valid_pagetype(type)
00821         u_int32_t type;
00822 {
00823         switch (type) {
00824         case P_INVALID:                 /* Order matches ordinal value. */
00825         case P_HASH:
00826         case P_IBTREE:
00827         case P_IRECNO:
00828         case P_LBTREE:
00829         case P_LRECNO:
00830         case P_OVERFLOW:
00831         case P_HASHMETA:
00832         case P_BTREEMETA:
00833         case P_QAMMETA:
00834         case P_QAMDATA:
00835         case P_LDUP:
00836                 return (1);
00837         }
00838         return (0);
00839 }
00840 
00841 /*
00842  * __db_is_valid_magicno
00843  */
00844 static int
00845 __db_is_valid_magicno(magic, typep)
00846         u_int32_t magic;
00847         DBTYPE *typep;
00848 {
00849         switch (magic) {
00850         case DB_BTREEMAGIC:
00851                 *typep = DB_BTREE;
00852                 return (1);
00853         case DB_HASHMAGIC:
00854                 *typep = DB_HASH;
00855                 return (1);
00856         case DB_QAMMAGIC:
00857                 *typep = DB_QUEUE;
00858                 return (1);
00859         }
00860         *typep = DB_UNKNOWN;
00861         return (0);
00862 }
00863 
00864 /*
00865  * __db_vrfy_common --
00866  *      Verify info common to all page types.
00867  */
00868 static int
00869 __db_vrfy_common(dbp, vdp, h, pgno, flags)
00870         DB *dbp;
00871         VRFY_DBINFO *vdp;
00872         PAGE *h;
00873         db_pgno_t pgno;
00874         u_int32_t flags;
00875 {
00876         VRFY_PAGEINFO *pip;
00877         int ret, t_ret;
00878         u_int8_t *p;
00879 
00880         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00881                 return (ret);
00882 
00883         pip->pgno = pgno;
00884         F_CLR(pip, VRFY_IS_ALLZEROES);
00885 
00886         /*
00887          * Hash expands the table by leaving some pages between the
00888          * old last and the new last totally zeroed.  Its pgin function
00889          * should fix things, but we might not be using that (e.g. if
00890          * we're a subdatabase).
00891          */
00892         if (pgno != 0 && PGNO(h) == 0) {
00893                 for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++)
00894                         if (*p != 0) {
00895                                 EPRINT((dbp->dbenv,
00896                                     "Hash page %lu should be zeroed and is not",
00897                                     pgno));
00898                                 ret = DB_VERIFY_BAD;
00899                                 goto err;
00900                         }
00901                 /*
00902                  * It's totally zeroed;  mark it as a hash, and we'll
00903                  * check that that makes sense structurally later.
00904                  */
00905                 pip->type = P_HASH;
00906                 F_SET(pip, VRFY_IS_ALLZEROES);
00907                 ret = 0;
00908                 goto err;       /* well, not really an err. */
00909         }
00910 
00911         if (PGNO(h) != pgno) {
00912                 EPRINT((dbp->dbenv,
00913                     "Bad page number: %lu should be %lu", h->pgno, pgno));
00914                 ret = DB_VERIFY_BAD;
00915         }
00916 
00917         if (!__db_is_valid_pagetype(h->type)) {
00918                 EPRINT((dbp->dbenv, "Bad page type: %lu", h->type));
00919                 ret = DB_VERIFY_BAD;
00920         }
00921         pip->type = h->type;
00922 
00923 err:    if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00924                 ret = t_ret;
00925 
00926         return (ret);
00927 }
00928 
00929 /*
00930  * __db_vrfy_invalid --
00931  *      Verify P_INVALID page.
00932  *      (Yes, there's not much to do here.)
00933  */
00934 static int
00935 __db_vrfy_invalid(dbp, vdp, h, pgno, flags)
00936         DB *dbp;
00937         VRFY_DBINFO *vdp;
00938         PAGE *h;
00939         db_pgno_t pgno;
00940         u_int32_t flags;
00941 {
00942         VRFY_PAGEINFO *pip;
00943         int ret, t_ret;
00944 
00945         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00946                 return (ret);
00947         pip->next_pgno = pip->prev_pgno = 0;
00948 
00949         if (!IS_VALID_PGNO(NEXT_PGNO(h))) {
00950                 EPRINT((dbp->dbenv,
00951                     "Invalid next_pgno %lu on page %lu", NEXT_PGNO(h), pgno));
00952                 ret = DB_VERIFY_BAD;
00953         } else
00954                 pip->next_pgno = NEXT_PGNO(h);
00955 
00956         if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
00957                 ret = t_ret;
00958         return (ret);
00959 }
00960 
00961 /*
00962  * CDB___db_vrfy_datapage --
00963  *      Verify elements common to data pages (P_HASH, P_LBTREE,
00964  *      P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e.,
00965  *      those defined in the PAGE structure.
00966  *
00967  *      Called from each of the per-page routines, after the
00968  *      all-page-type-common elements of pip have been verified and filled
00969  *      in.
00970  *
00971  * PUBLIC: int CDB___db_vrfy_datapage
00972  * PUBLIC:     __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t));
00973  */
00974 int
00975 CDB___db_vrfy_datapage(dbp, vdp, h, pgno, flags)
00976         DB *dbp;
00977         VRFY_DBINFO *vdp;
00978         PAGE *h;
00979         db_pgno_t pgno;
00980         u_int32_t flags;
00981 {
00982         VRFY_PAGEINFO *pip;
00983         int isbad, ret, t_ret;
00984 
00985         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
00986                 return (ret);
00987         isbad = 0;
00988 
00989         /*
00990          * prev_pgno and next_pgno:  store for inter-page checks,
00991          * verify that they point to actual pages and not to self.
00992          *
00993          * !!!
00994          * Internal btree pages do not maintain these fields (indeed,
00995          * they overload them).  Skip.
00996          */
00997         if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) {
00998                 if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) {
00999                         isbad = 1;
01000                         EPRINT((dbp->dbenv, "Page %lu: Invalid prev_pgno %lu",
01001                             pip->pgno, PREV_PGNO(h)));
01002                 }
01003                 if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) {
01004                         isbad = 1;
01005                         EPRINT((dbp->dbenv, "Page %lu: Invalid next_pgno %lu",
01006                             pip->pgno, NEXT_PGNO(h)));
01007                 }
01008                 pip->prev_pgno = PREV_PGNO(h);
01009                 pip->next_pgno = NEXT_PGNO(h);
01010         }
01011 
01012         /*
01013          * Verify the number of entries on the page.
01014          * There is no good way to determine if this is accurate;  the
01015          * best we can do is verify that it's not more than can, in theory,
01016          * fit on the page.  Then, we make sure there are at least
01017          * this many valid elements in inp[], and hope that this catches
01018          * most cases.
01019          */
01020         if (TYPE(h) != P_OVERFLOW) {
01021                 if (BKEYDATA_PSIZE(0) * NUM_ENT(h) > dbp->pgsize) {
01022                         isbad = 1;
01023                         EPRINT((dbp->dbenv,
01024                             "Page %lu: Too many entries: %lu",
01025                             pgno, NUM_ENT(h)));
01026                 }
01027                 pip->entries = NUM_ENT(h);
01028         }
01029 
01030         /*
01031          * btree level.  Should be zero unless we're a btree;
01032          * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL,
01033          * and we need to save it off.
01034          */
01035         switch (TYPE(h)) {
01036         case P_IBTREE:
01037         case P_IRECNO:
01038                 if (LEVEL(h) < LEAFLEVEL + 1 || LEVEL(h) > MAXBTREELEVEL) {
01039                         isbad = 1;
01040                         EPRINT((dbp->dbenv, "Bad btree level %lu on page %lu",
01041                             LEVEL(h), pgno));
01042                 }
01043                 pip->bt_level = LEVEL(h);
01044                 break;
01045         case P_LBTREE:
01046         case P_LDUP:
01047         case P_LRECNO:
01048                 if (LEVEL(h) != LEAFLEVEL) {
01049                         isbad = 1;
01050                         EPRINT((dbp->dbenv,
01051                             "Btree leaf page %lu has incorrect level %lu",
01052                             pgno, LEVEL(h)));
01053                 }
01054                 break;
01055         default:
01056                 if (LEVEL(h) != 0) {
01057                         isbad = 1;
01058                         EPRINT((dbp->dbenv,
01059                             "Nonzero level %lu in non-btree database page %lu",
01060                             LEVEL(h), pgno));
01061                 }
01062                 break;
01063         }
01064 
01065         /*
01066          * Even though inp[] occurs in all PAGEs, we look at it in the
01067          * access-method-specific code, since btree and hash treat
01068          * item lengths very differently, and one of the most important
01069          * things we want to verify is that the data--as specified
01070          * by offset and length--cover the right part of the page
01071          * without overlaps, gaps, or violations of the page boundary.
01072          */
01073         if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
01074                 ret = t_ret;
01075 
01076         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01077 }
01078 
01079 /*
01080  * CDB___db_vrfy_meta--
01081  *      Verify the access-method common parts of a meta page, using
01082  *      normal mpool routines.
01083  *
01084  * PUBLIC: int CDB___db_vrfy_meta
01085  * PUBLIC:     __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t));
01086  */
01087 int
01088 CDB___db_vrfy_meta(dbp, vdp, meta, pgno, flags)
01089         DB *dbp;
01090         VRFY_DBINFO *vdp;
01091         DBMETA *meta;
01092         db_pgno_t pgno;
01093         u_int32_t flags;
01094 {
01095         DBTYPE dbtype, magtype;
01096         VRFY_PAGEINFO *pip;
01097         int isbad, ret, t_ret;
01098 
01099         isbad = 0;
01100         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
01101                 return (ret);
01102 
01103         /* type plausible for a meta page */
01104         switch (meta->type) {
01105         case P_BTREEMETA:
01106                 dbtype = DB_BTREE;
01107                 break;
01108         case P_HASHMETA:
01109                 dbtype = DB_HASH;
01110                 break;
01111         case P_QAMMETA:
01112                 dbtype = DB_QUEUE;
01113                 break;
01114         default:
01115                 /* The verifier should never let us get here. */
01116                 DB_ASSERT(0);
01117                 ret = EINVAL;
01118                 goto err;
01119         }
01120 
01121         /* magic number valid */
01122         if (!__db_is_valid_magicno(meta->magic, &magtype)) {
01123                 isbad = 1;
01124                 EPRINT((dbp->dbenv, "Magic number invalid on page %lu", pgno));
01125         }
01126         if (magtype != dbtype) {
01127                 isbad = 1;
01128                 EPRINT((dbp->dbenv,
01129                     "Magic number does not match type of page %lu", pgno));
01130         }
01131 
01132         /* version */
01133         if ((dbtype == DB_BTREE && meta->version != DB_BTREEVERSION) ||
01134             (dbtype == DB_HASH && meta->version != DB_HASHVERSION) ||
01135             (dbtype == DB_QUEUE && meta->version != DB_QAMVERSION)) {
01136                 isbad = 1;
01137                 EPRINT((dbp->dbenv, "%s%s", "Old of incorrect DB ",
01138                     "version; extraneous errors may result"));
01139         }
01140 
01141         /* pagesize */
01142         if (meta->pagesize != dbp->pgsize) {
01143                 isbad = 1;
01144                 EPRINT((dbp->dbenv,
01145                     "Invalid pagesize %lu on page %lu", meta->pagesize, pgno));
01146         }
01147 
01148         /* free list */
01149         /* Can correctly be PGNO_INVALID--that's just the end of the list. */
01150         if (meta->free != PGNO_INVALID && IS_VALID_PGNO(meta->free))
01151                 pip->free = meta->free;
01152         else if (!IS_VALID_PGNO(meta->free)) {
01153                 isbad = 1;
01154                 EPRINT((dbp->dbenv,
01155                     "Nonsensical free list pgno %lu on page %lu",
01156                     meta->free, pgno));
01157         }
01158 
01159 err:    if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0 && ret == 0)
01160                 ret = t_ret;
01161 
01162         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01163 }
01164 
01165 /*
01166  * __db_vrfy_freelist --
01167  *      Walk free list, checking off pages and verifying absence of
01168  *      loops.
01169  */
01170 static int
01171 __db_vrfy_freelist(dbp, vdp, meta, flags)
01172         DB *dbp;
01173         VRFY_DBINFO *vdp;
01174         db_pgno_t meta;
01175         u_int32_t flags;
01176 {
01177         DB *pgset;
01178         VRFY_PAGEINFO *pip;
01179         db_pgno_t pgno;
01180         int p, ret, t_ret;
01181 
01182         pgset = vdp->pgset;
01183         DB_ASSERT(pgset != NULL);
01184 
01185         if ((ret = CDB___db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
01186                 return (ret);
01187         for (pgno = pip->free; pgno != PGNO_INVALID; pgno = pip->next_pgno) {
01188                 if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
01189                         return (ret);
01190 
01191                 /* This shouldn't happen, but just in case. */
01192                 if (!IS_VALID_PGNO(pgno)) {
01193                         EPRINT((dbp->dbenv,
01194                             "Invalid next_pgno on free list page %lu", pgno));
01195                         return (DB_VERIFY_BAD);
01196                 }
01197 
01198                 /* Detect cycles. */
01199                 if ((ret = CDB___db_vrfy_pgset_get(pgset, pgno, &p)) != 0)
01200                         return (ret);
01201                 if (p != 0) {
01202                         EPRINT((dbp->dbenv,
01203                             "Page %lu encountered a second time on free list",
01204                             pgno));
01205                         return (DB_VERIFY_BAD);
01206                 }
01207                 if ((ret = CDB___db_vrfy_pgset_inc(pgset, pgno)) != 0)
01208                         return (ret);
01209 
01210                 if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
01211                         return (ret);
01212 
01213                 if (pip->type != P_INVALID) {
01214                         EPRINT((dbp->dbenv, "Non-invalid page %lu on free list",
01215                             pgno, pip->type));
01216                         ret = DB_VERIFY_BAD;      /* unsafe to continue */
01217                         break;
01218                 }
01219         }
01220 
01221         if ((t_ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
01222                 ret = t_ret;
01223         return (ret);
01224 }
01225 
01226 /*
01227  * __db_vrfy_subdbs --
01228  *      Walk the known-safe master database of subdbs with a cursor,
01229  *      verifying the structure of each subdatabase we encounter.
01230  */
01231 static int
01232 __db_vrfy_subdbs(dbp, vdp, dbname, flags)
01233         DB *dbp;
01234         VRFY_DBINFO *vdp;
01235         char *dbname;
01236         u_int32_t flags;
01237 {
01238         DB *mdbp;
01239         DBC *dbc;
01240         DBT key, data;
01241         VRFY_PAGEINFO *pip;
01242         db_pgno_t meta_pgno;
01243         int ret, t_ret, isbad;
01244         u_int8_t type;
01245 
01246         isbad = 0;
01247         dbc = NULL;
01248 
01249         if ((ret = CDB___db_master_open(dbp, dbname, DB_RDONLY, 0, &mdbp)) != 0)
01250                 return (ret);
01251 
01252         if ((ret =
01253             CDB___db_icursor(mdbp, NULL, DB_BTREE, PGNO_INVALID, 0, &dbc)) != 0)
01254                 goto err;
01255 
01256         memset(&key, 0, sizeof(key));
01257         memset(&data, 0, sizeof(data));
01258         while ((ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0) {
01259                 if (data.size != sizeof(db_pgno_t)) {
01260                         EPRINT((dbp->dbenv, "Database entry of invalid size"));
01261                         isbad = 1;
01262                         goto err;
01263                 }
01264                 memcpy(&meta_pgno, data.data, data.size);
01265                 /*
01266                  * Subdatabase meta pgnos are stored in network byte
01267                  * order for cross-endian compatibility.  Swap if appropriate.
01268                  */
01269                 DB_NTOHL(&meta_pgno);
01270                 if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) {
01271                         EPRINT((dbp->dbenv,
01272                             "Database entry references invalid page %lu",
01273                             meta_pgno));
01274                         isbad = 1;
01275                         goto err;
01276                 }
01277                 if ((ret = CDB___db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0)
01278                         goto err;
01279                 type = pip->type;
01280                 if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
01281                         goto err;
01282                 switch (type) {
01283                 case P_BTREEMETA:
01284                         if ((ret = CDB___bam_vrfy_structure(
01285                             dbp, vdp, meta_pgno, flags)) != 0) {
01286                                 if (ret == DB_VERIFY_BAD)
01287                                         isbad = 1;
01288                                 else
01289                                         goto err;
01290                         }
01291                         break;
01292                 case P_HASHMETA:
01293                         if ((ret = CDB___ham_vrfy_structure(
01294                             dbp, vdp, meta_pgno, flags)) != 0) {
01295                                 if (ret == DB_VERIFY_BAD)
01296                                         isbad = 1;
01297                                 else
01298                                         goto err;
01299                         }
01300                         break;
01301                 case P_QAMMETA:
01302                 default:
01303                         EPRINT((dbp->dbenv,
01304             "Database entry references page %lu of invalid type %lu",
01305                             meta_pgno, type));
01306                         ret = DB_VERIFY_BAD;
01307                         goto err;
01308                         /* NOTREACHED */
01309                 }
01310         }
01311 
01312         if (ret == DB_NOTFOUND)
01313                 ret = 0;
01314 
01315 err:    if (dbc != NULL && (t_ret = CDB___db_c_close(dbc)) != 0 && ret == 0)
01316                 ret = t_ret;
01317 
01318         if ((t_ret = mdbp->close(mdbp, 0)) != 0 && ret == 0)
01319                 ret = t_ret;
01320 
01321         return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret);
01322 }
01323 
01324 /*
01325  * __db_vrfy_orderchkonly --
01326  *      Do an sort-order/hashing check on a known-otherwise-good subdb.
01327  */
01328 static int
01329 __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags)
01330         DB *dbp;
01331         VRFY_DBINFO *vdp;
01332         const char *name, *subdb;
01333         u_int32_t flags;
01334 {
01335         BTMETA *btmeta;
01336         DB *mdbp, *pgset;
01337         DBC *pgsc;
01338         DBT key, data;
01339         HASH *h_internal;
01340         HMETA *hmeta;
01341         PAGE *h, *currpg;
01342         db_pgno_t meta_pgno, p, pgno;
01343         u_int32_t bucket;
01344         int t_ret, ret;
01345 
01346         currpg = h = NULL;
01347         pgsc = NULL;
01348         pgset = NULL;
01349 
01350         LF_CLR(DB_NOORDERCHK);
01351 
01352         /* Open the master database and get the meta_pgno for the subdb. */
01353         if ((ret = CDB_db_create(&mdbp, NULL, 0)) != 0)
01354                 return (ret);
01355         if ((ret = CDB___db_master_open(dbp, name, DB_RDONLY, 0, &mdbp)) != 0)
01356                 goto err;
01357 
01358         memset(&key, 0, sizeof(key));
01359         key.data = (void *)subdb;
01360         memset(&data, 0, sizeof(data));
01361         if ((ret = dbp->get(dbp, NULL, &key, &data, 0)) != 0)
01362                 goto err;
01363 
01364         if (data.size != sizeof(db_pgno_t)) {
01365                 EPRINT((dbp->dbenv, "Database entry of invalid size"));
01366                 ret = DB_VERIFY_BAD;
01367                 goto err;
01368         }
01369 
01370         memcpy(&meta_pgno, data.data, data.size);
01371 
01372         if ((ret = CDB_memp_fget(dbp->mpf, &meta_pgno, 0, &h)) != 0)
01373                 goto err;
01374 
01375         if ((ret = CDB___db_vrfy_pgset(dbp->dbenv, dbp->pgsize, &pgset)) != 0)
01376                 goto err;
01377 
01378         switch (TYPE(h)) {
01379         case P_BTREEMETA:
01380                 btmeta = (BTMETA *)h;
01381                 if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) {
01382                         /* Recnos have no order to check. */
01383                         ret = 0;
01384                         goto err;
01385                 }
01386                 if ((ret =
01387                     __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0)
01388                         goto err;
01389                 if ((ret = pgset->cursor(pgset, NULL, &pgsc, 0)) != 0)
01390                         goto err;
01391                 while ((ret = CDB___db_vrfy_pgset_next(pgsc, &p)) == 0) {
01392                         if ((ret = CDB_memp_fget(dbp->mpf, &p, 0, &currpg)) != 0)
01393                                 goto err;
01394                         if ((ret = CDB___bam_vrfy_itemorder(dbp,
01395                             NULL, currpg, p, NUM_ENT(currpg), 1,
01396                             F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0)
01397                                 goto err;
01398                         if ((ret = CDB_memp_fput(dbp->mpf, currpg, 0)) != 0)
01399                                 goto err;
01400                         currpg = NULL;
01401                 }
01402                 if ((ret = pgsc->c_close(pgsc)) != 0)
01403                         goto err;
01404                 break;
01405         case P_HASHMETA:
01406                 hmeta = (HMETA *)h;
01407                 h_internal = (HASH *)dbp->h_internal;
01408                 /*
01409                  * Make sure h_charkey is right.
01410                  */
01411                 if (h_internal == NULL || h_internal->h_hash == NULL) {
01412                         EPRINT((dbp->dbenv,
01413                     "DB_ORDERCHKONLY requires that a hash function be set"));
01414                         ret = DB_VERIFY_BAD;
01415                         goto err;
01416                 }
01417                 if (hmeta->h_charkey !=
01418                     h_internal->h_hash(CHARKEY, sizeof(CHARKEY))) {
01419                         EPRINT((dbp->dbenv,
01420                             "Incorrect hash function for database"));
01421                         ret = DB_VERIFY_BAD;
01422                         goto err;
01423                 }
01424 
01425                 /*
01426                  * Foreach bucket, verify hashing on each page in the
01427                  * corresponding chain of pages.
01428                  */
01429                 for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) {
01430                         pgno = hmeta->spares[CDB___db_log2(bucket + 1)];
01431                         while (pgno != PGNO_INVALID) {
01432                                 if ((ret = CDB_memp_fget(dbp->mpf,
01433                                     &pgno, 0, &currpg)) != 0)
01434                                         goto err;
01435                                 if ((ret = CDB___ham_vrfy_hashing(dbp,
01436                                     NUM_ENT(currpg),hmeta, bucket, pgno,
01437                                     flags, h_internal->h_hash)) != 0)
01438                                         goto err;
01439                                 pgno = NEXT_PGNO(currpg);
01440                                 if ((ret = CDB_memp_fput(dbp->mpf, currpg, 0)) != 0)
01441                                         goto err;
01442                                 currpg = NULL;
01443                         }
01444                 }
01445                 break;
01446         default:
01447                 EPRINT((dbp->dbenv, "Database meta page %lu of bad type %lu",
01448                     meta_pgno, TYPE(h)));
01449                 ret = DB_VERIFY_BAD;
01450                 break;
01451         }
01452 
01453 err:    if (pgsc != NULL)
01454                 (void)pgsc->c_close(pgsc);
01455         if (pgset != NULL)
01456                 (void)pgset->close(pgset, 0);
01457         if (h != NULL && (t_ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
01458                 ret = t_ret;
01459         if (currpg != NULL && (t_ret = CDB_memp_fput(dbp->mpf, currpg, 0)) != 0)
01460                 ret = t_ret;
01461         if ((t_ret = mdbp->close(mdbp, 0)) != 0)
01462                 ret = t_ret;
01463         return (ret);
01464 }
01465 
01466 /*
01467  * CDB___db_salvage --
01468  *      Walk through a page, salvaging all likely or plausible (w/
01469  *      DB_AGGRESSIVE) key/data pairs.
01470  *
01471  * PUBLIC: int CDB___db_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *,
01472  * PUBLIC:     void *, int (*)(void *, const void *), u_int32_t));
01473  */
01474 int
01475 CDB___db_salvage(dbp, vdp, pgno, h, handle, callback, flags)
01476         DB *dbp;
01477         VRFY_DBINFO *vdp;
01478         db_pgno_t pgno;
01479         PAGE *h;
01480         void *handle;
01481         int (*callback) __P((void *, const void *));
01482         u_int32_t flags;
01483 {
01484         DB_ASSERT(LF_ISSET(DB_SALVAGE));
01485 
01486         /* If we got this page in the subdb pass, we can safely skip it. */
01487         if (CDB___db_salvage_isdone(vdp, pgno))
01488                 return (0);
01489 
01490         switch (TYPE(h)) {
01491         case P_HASH:
01492                 return (CDB___ham_salvage(dbp,
01493                     vdp, pgno, h, handle, callback, flags));
01494                 /* NOTREACHED */
01495         case P_LBTREE:
01496                 return (CDB___bam_salvage(dbp,
01497                     vdp, pgno, P_LBTREE, h, handle, callback, NULL, flags));
01498                 /* NOTREACHED */
01499         case P_LDUP:
01500                 return (CDB___db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP));
01501                 /* NOTREACHED */
01502         case P_OVERFLOW:
01503                 return (CDB___db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW));
01504                 /* NOTREACHED */
01505         case P_LRECNO:
01506                 /*
01507                  * Recnos are tricky -- they may represent dup pages, or
01508                  * they may be subdatabase/regular database pages in their
01509                  * own right.  If the former, they need to be printed with a
01510                  * key, preferably when we hit the corresponding datum in
01511                  * a btree/hash page.  If the latter, there is no key.
01512                  *
01513                  * If a database is sufficiently frotzed, we're not going
01514                  * to be able to get this right, so we best-guess:  just
01515                  * mark it needed now, and if we're really a normal recno
01516                  * database page, the "unknowns" pass will pick us up.
01517                  */
01518                 return (CDB___db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNO));
01519                 /* NOTREACHED */
01520         case P_IBTREE:
01521         case P_INVALID:
01522         case P_IRECNO:
01523         case __P_DUPLICATE:
01524         default:
01525                 /* XXX: Should we be more aggressive here? */
01526                 break;
01527         }
01528         return (0);
01529 }
01530 
01531 /*
01532  * __db_salvage_unknowns --
01533  *      Walk through the salvager database, printing with key "UNKNOWN"
01534  *      any pages we haven't dealt with.
01535  */
01536 static int
01537 __db_salvage_unknowns(dbp, vdp, handle, callback, flags)
01538         DB *dbp;
01539         VRFY_DBINFO *vdp;
01540         void *handle;
01541         int (*callback) __P((void *, const void *));
01542         u_int32_t flags;
01543 {
01544         DBT unkdbt, key, *dbt;
01545         PAGE *h;
01546         db_pgno_t pgno;
01547         u_int32_t pgtype;
01548         int ret, err_ret;
01549         void *ovflbuf;
01550 
01551         memset(&unkdbt, 0, sizeof(DBT));
01552         unkdbt.size = strlen("UNKNOWN") + 1;
01553         unkdbt.data = "UNKNOWN";
01554 
01555         if ((ret = CDB___os_malloc(dbp->dbenv, dbp->pgsize, 0, &ovflbuf)) != 0)
01556                 return (ret);
01557 
01558         err_ret = 0;
01559         while ((ret = CDB___db_salvage_getnext(vdp, &pgno, &pgtype)) == 0) {
01560                 dbt = NULL;
01561 
01562                 if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
01563                         err_ret = ret;
01564                         continue;
01565                 }
01566 
01567                 switch (pgtype) {
01568                 case SALVAGE_LDUP:
01569                 case SALVAGE_LRECNODUP:
01570                         dbt = &unkdbt;
01571                         /* FALLTHROUGH */
01572                 case SALVAGE_LBTREE:
01573                 case SALVAGE_LRECNO:
01574                         if ((ret = CDB___bam_salvage(dbp, vdp, pgno, pgtype,
01575                             h, handle, callback, dbt, flags)) != 0)
01576                                 err_ret = ret;
01577                         break;
01578                 case SALVAGE_OVERFLOW:
01579                         /*
01580                          * XXX:
01581                          * This may generate multiple "UNKNOWN" keys in
01582                          * a database with no dups.  What to do?
01583                          */
01584                         if ((ret = CDB___db_safe_goff(dbp,
01585                             vdp, pgno, &key, &ovflbuf, flags)) != 0) {
01586                                 err_ret = ret;
01587                                 continue;
01588                         }
01589                         if ((ret = CDB___db_prdbt(&key,
01590                             0, " ", handle, callback, 0, NULL)) != 0) {
01591                                 err_ret = ret;
01592                                 continue;
01593                         }
01594                         if ((ret = CDB___db_prdbt(&unkdbt,
01595                                 0, " ", handle, callback, 0, NULL)) != 0)
01596                                 err_ret = ret;
01597                         break;
01598                 case SALVAGE_HASH:
01599                         if ((ret = CDB___ham_salvage(
01600                             dbp, vdp, pgno, h, handle, callback, flags)) != 0)
01601                                 err_ret = ret;
01602                         break;
01603                 case SALVAGE_INVALID:
01604                 case SALVAGE_IGNORE:
01605                 default:
01606                         /*
01607                          * Shouldn't happen, but if it does, just do what the
01608                          * nice man says.
01609                          */
01610                         DB_ASSERT(0);
01611                         break;
01612                 }
01613                 if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
01614                         err_ret = ret;
01615         }
01616 
01617         CDB___os_free(ovflbuf, 0);
01618 
01619         if (err_ret != 0 && ret == 0)
01620                 ret = err_ret;
01621 
01622         return (ret == DB_NOTFOUND ? 0 : ret);
01623 }
01624 
01625 /*
01626  * Offset of the ith inp array entry, which we can compare to the offset
01627  * the entry stores.
01628  */
01629 #define INP_OFFSET(h, i)        \
01630     ((db_indx_t)((u_int8_t *)(h)->inp + (i) - (u_int8_t *)(h)))
01631 
01632 /*
01633  * CDB___db_vrfy_inpitem --
01634  *      Verify that a single entry in the inp array is sane, and update
01635  *      the high water mark and current item offset.  (The former of these is
01636  *      used for state information between calls, and is required;  it must
01637  *      be initialized to the pagesize before the first call.)
01638  *
01639  *      Returns DB_VERIFY_FATAL if inp has collided with the data,
01640  *      since verification can't continue from there;  returns DB_VERIFY_BAD
01641  *      if anything else is wrong.
01642  *
01643  * PUBLIC: int CDB___db_vrfy_inpitem __P((DB *, PAGE *,
01644  * PUBLIC:     db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *));
01645  */
01646 int
01647 CDB___db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp)
01648         DB *dbp;
01649         PAGE *h;
01650         db_pgno_t pgno;
01651         u_int32_t i;
01652         int is_btree;
01653         u_int32_t flags, *himarkp, *offsetp;
01654 {
01655         BKEYDATA *bk;
01656         db_indx_t offset, len;
01657 
01658         DB_ASSERT(himarkp != NULL);
01659 
01660         /*
01661          * Check that the inp array, which grows from the beginning of the
01662          * page forward, has not collided with the data, which grow from the
01663          * end of the page backward.
01664          */
01665         if ((u_int8_t *)h->inp + i >= (u_int8_t *)h + *himarkp) {
01666                 /* We've collided with the data.  We need to bail. */
01667                 EPRINT((dbp->dbenv,
01668                     "Page %lu entries listing %lu overlaps data", pgno, i));
01669                 return (DB_VERIFY_FATAL);
01670         }
01671 
01672         offset = h->inp[i];
01673 
01674         /*
01675          * Check that the item offset is reasonable:  it points somewhere
01676          * after the inp array and before the end of the page.
01677          */
01678         if (offset <= INP_OFFSET(h, i) || offset > dbp->pgsize) {
01679                 EPRINT((dbp->dbenv,
01680                     "Bad offset %lu at page %lu index %lu", offset, pgno, i));
01681                 return (DB_VERIFY_BAD);
01682         }
01683 
01684         /* Update the high-water mark (what HOFFSET should be) */
01685         if (offset < *himarkp)
01686                 *himarkp = offset;
01687 
01688         if (is_btree) {
01689                 /*
01690                  * Check that the item length remains on-page.
01691                  */
01692                 bk = GET_BKEYDATA(h, i);
01693                 len = B_TYPE(bk->type) == B_KEYDATA ? bk->len : BOVERFLOW_SIZE;
01694                 if ((size_t)(offset + len) > dbp->pgsize) {
01695                         EPRINT((dbp->dbenv,
01696                             "Item %lu on page %lu extends past page boundary",
01697                             i, pgno));
01698                         return (DB_VERIFY_BAD);
01699                 }
01700         }
01701 
01702         if (offsetp != NULL)
01703                 *offsetp = offset;
01704         return (0);
01705 }
01706 
01707 /*
01708  * CDB___db_vrfy_duptype--
01709  *      Given a page number and a set of flags to CDB___bam_vrfy_subtree,
01710  *      verify that the dup tree type is correct--i.e., it's a recno
01711  *      if DUPSORT is not set and a btree if it is.
01712  *
01713  * PUBLIC: int CDB___db_vrfy_duptype
01714  * PUBLIC:     __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t));
01715  */
01716 int
01717 CDB___db_vrfy_duptype(dbp, vdp, pgno, flags)
01718         DB *dbp;
01719         VRFY_DBINFO *vdp;
01720         db_pgno_t pgno;
01721         u_int32_t flags;
01722 {
01723         VRFY_PAGEINFO *pip;
01724         int ret, isbad;
01725 
01726         isbad = 0;
01727 
01728         if ((ret = CDB___db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0)
01729                 return (ret);
01730 
01731         switch (pip->type) {
01732         case P_IBTREE:
01733         case P_LDUP:
01734                 if (!LF_ISSET(ST_DUPSORT)) {
01735                         EPRINT((dbp->dbenv,
01736             "Sorted duplicate set at page %lu in unsorted-dup database",
01737                             pgno));
01738                         isbad = 1;
01739                 }
01740                 break;
01741         case P_IRECNO:
01742         case P_LRECNO:
01743                 if (LF_ISSET(ST_DUPSORT)) {
01744                         EPRINT((dbp->dbenv,
01745             "Unsorted duplicate set at page %lu in sorted-dup database",
01746                             pgno));
01747                         isbad = 1;
01748                 }
01749                 break;
01750         default:
01751                 EPRINT((dbp->dbenv, "Duplicate page %lu of inappropriate type %lu",
01752                     pgno, pip->type));
01753                 isbad = 1;
01754                 break;
01755         }
01756 
01757         if ((ret = CDB___db_vrfy_putpageinfo(vdp, pip)) != 0)
01758                 return (ret);
01759         return (isbad == 1 ? DB_VERIFY_BAD : 0);
01760 }
01761 
01762 /*
01763  * CDB___db_salvage_duptree --
01764  *      Attempt to salvage a given duplicate tree, given its alleged root.
01765  *
01766  *      The key that corresponds to this dup set has been passed to us
01767  *      in DBT *key.  Because data items follow keys, though, it has been
01768  *      printed once already.
01769  *
01770  *      The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a
01771  *      P_IBTREE, or a P_IRECNO.  If it's an internal page, use the verifier
01772  *      functions to make sure it's safe;  if it's not, we simply bail and the
01773  *      data will have to be printed with no key later on.  if it is safe,
01774  *      recurse on each of its children.
01775  *
01776  *      Whether or not it's safe, if it's a leaf page, CDB___bam_salvage it.
01777  *
01778  *      At all times, use the DB hanging off vdp to mark and check what we've
01779  *      done, so each page gets printed exactly once and we don't get caught
01780  *      in any cycles.
01781  *
01782  * PUBLIC: int CDB___db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t,
01783  * PUBLIC:     DBT *, void *, int (*)(void *, const void *), u_int32_t));
01784  */
01785 int
01786 CDB___db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags)
01787         DB *dbp;
01788         VRFY_DBINFO *vdp;
01789         db_pgno_t pgno;
01790         DBT *key;
01791         void *handle;
01792         int (*callback) __P((void *, const void *));
01793         u_int32_t flags;
01794 {
01795         PAGE *h;
01796         int ret, t_ret;
01797 
01798         if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno))
01799                 return (DB_VERIFY_BAD);
01800 
01801         /* We have a plausible page.  Try it. */
01802         if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
01803                 return (ret);
01804 
01805         switch (TYPE(h)) {
01806         case P_IBTREE:
01807         case P_IRECNO:
01808                 if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0)
01809                         goto err;
01810                 if ((ret = CDB___bam_vrfy(dbp,
01811                     vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 ||
01812                     (ret = CDB___db_salvage_markdone(vdp, pgno)) != 0)
01813                         goto err;
01814                 /*
01815                  * We have a known-healthy internal page.  Walk it.
01816                  */
01817                 if ((ret = CDB___bam_salvage_walkdupint(dbp, vdp, h, key,
01818                     handle, callback, flags)) != 0)
01819                         goto err;
01820                 break;
01821         case P_LRECNO:
01822         case P_LDUP:
01823                 if ((ret = CDB___bam_salvage(dbp,
01824                     vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0)
01825                         goto err;
01826                 break;
01827         default:
01828                 ret = DB_VERIFY_BAD;
01829                 goto err;
01830                 /* NOTREACHED */
01831         }
01832 
01833 err:    if ((t_ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0 && ret == 0)
01834                 ret = t_ret;
01835         return (ret);
01836 }
01837 
01838 /*
01839  * __db_salvage_subdbs --
01840  *      Check and see if this database has subdbs;  if so, try to salvage
01841  *      them independently.
01842  */
01843 static int
01844 __db_salvage_subdbs(dbp, vdp, handle, callback, flags, hassubsp)
01845         DB *dbp;
01846         VRFY_DBINFO *vdp;
01847         void *handle;
01848         int (*callback) __P((void *, const void *));
01849         u_int32_t flags;
01850         int *hassubsp;
01851 {
01852         BTMETA *btmeta;
01853         DB *pgset;
01854         DBC *pgsc;
01855         PAGE *h;
01856         db_pgno_t p, meta_pgno;
01857         int ret, err_ret;
01858 
01859         err_ret = 0;
01860         pgsc = NULL;
01861         pgset = NULL;
01862 
01863         meta_pgno = PGNO_BASE_MD;
01864         if ((ret = CDB_memp_fget(dbp->mpf, &meta_pgno, 0, &h)) != 0)
01865                 return (ret);
01866 
01867         if (TYPE(h) == P_BTREEMETA)
01868                 btmeta = (BTMETA *)h;
01869         else {
01870                 /* Not a btree metadata, ergo no subdbs, so just return. */
01871                 ret = 0;
01872                 goto err;
01873         }
01874 
01875         /* If it's not a safe page, bail on the attempt. */
01876         if ((ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) != 0 ||
01877            (ret = CDB___bam_vrfy_meta(dbp, vdp, btmeta, PGNO_BASE_MD, flags)) != 0)
01878                 goto err;
01879 
01880         if (!F_ISSET(&btmeta->dbmeta, BTM_SUBDB)) {
01881                 /* No subdbs, just return. */
01882                 ret = 0;
01883                 goto err;
01884         }
01885 
01886         /* We think we've got subdbs.  Mark it so. */
01887         *hassubsp = 1;
01888 
01889         if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
01890                 return (ret);
01891 
01892         /*
01893          * We have subdbs.  Try to crack them.
01894          *
01895          * To do so, get a set of leaf pages in the master
01896          * database, and then walk each of the valid ones, salvaging
01897          * subdbs as we go.  If any prove invalid, just drop them;  we'll
01898          * pick them up on a later pass.
01899          */
01900         if ((ret = CDB___db_vrfy_pgset(dbp->dbenv, dbp->pgsize, &pgset)) != 0)
01901                 return (ret);
01902         if ((ret =
01903             __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0)
01904                 goto err;
01905 
01906         if ((ret = pgset->cursor(pgset, NULL, &pgsc, 0)) != 0)
01907                 goto err;
01908         while ((ret = CDB___db_vrfy_pgset_next(pgsc, &p)) == 0) {
01909                 if ((ret = CDB_memp_fget(dbp->mpf, &p, 0, &h)) != 0) {
01910                         err_ret = ret;
01911                         continue;
01912                 }
01913                 if ((ret = __db_vrfy_common(dbp, vdp, h, p, flags)) != 0 ||
01914                     (ret = CDB___bam_vrfy(dbp,
01915                     vdp, h, p, flags | DB_NOORDERCHK)) != 0)
01916                         goto nextpg;
01917                 if (TYPE(h) != P_LBTREE)
01918                         goto nextpg;
01919                 else if ((ret = CDB___db_salvage_subdbpg(
01920                     dbp, vdp, h, handle, callback, flags)) != 0)
01921                         err_ret = ret;
01922 nextpg:         if ((ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
01923                         err_ret = ret;
01924         }
01925 
01926         if (ret != DB_NOTFOUND)
01927                 goto err;
01928         if ((ret = pgsc->c_close(pgsc)) != 0)
01929                 goto err;
01930 
01931         ret = pgset->close(pgset, 0);
01932         return ((ret == 0 && err_ret != 0) ? err_ret : ret);
01933 
01934         /* NOTREACHED */
01935 
01936 err:    if (pgsc != NULL)
01937                 (void)pgsc->c_close(pgsc);
01938         if (pgset != NULL)
01939                 (void)pgset->close(pgset, 0);
01940         (void)CDB_memp_fput(dbp->mpf, h, 0);
01941         return (ret);
01942 }
01943 
01944 /*
01945  * CDB___db_salvage_subdbpg --
01946  *      Given a known-good leaf page in the master database, salvage all
01947  *      leaf pages corresponding to each subdb.
01948  *
01949  * PUBLIC: int CDB___db_salvage_subdbpg
01950  * PUBLIC:     __P((DB *, VRFY_DBINFO *, PAGE *, void *,
01951  * PUBLIC:     int (*)(void *, const void *), u_int32_t));
01952  */
01953 int
01954 CDB___db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags)
01955         DB *dbp;
01956         VRFY_DBINFO *vdp;
01957         PAGE *master;
01958         void *handle;
01959         int (*callback) __P((void *, const void *));
01960         u_int32_t flags;
01961 {
01962         BKEYDATA *bkkey, *bkdata;
01963         BOVERFLOW *bo;
01964         DB *pgset;
01965         DBC *pgsc;
01966         DBT key;
01967         PAGE *subpg;
01968         db_indx_t i;
01969         db_pgno_t meta_pgno, p;
01970         int ret, err_ret, t_ret;
01971         char *subdbname;
01972 
01973         ret = err_ret = 0;
01974         subdbname = NULL;
01975 
01976         if ((ret = CDB___db_vrfy_pgset(dbp->dbenv, dbp->pgsize, &pgset)) != 0)
01977                 return (ret);
01978 
01979         /*
01980          * For each entry, get and salvage the set of pages
01981          * corresponding to that entry.
01982          */
01983         for (i = 0; i < NUM_ENT(master); i += P_INDX) {
01984                 bkkey = GET_BKEYDATA(master, i);
01985                 bkdata = GET_BKEYDATA(master, i + O_INDX);
01986 
01987                 /* Get the subdatabase name. */
01988                 if (B_TYPE(bkkey->type) == B_OVERFLOW) {
01989                         /*
01990                          * We can, in principle anyway, have a subdb
01991                          * name so long it overflows.  Ick.
01992                          */
01993                         bo = (BOVERFLOW *)bkkey;
01994                         if ((ret = CDB___db_safe_goff(dbp, vdp, bo->pgno, &key,
01995                             (void **)&subdbname, flags)) != 0) {
01996                                 err_ret = DB_VERIFY_BAD;
01997                                 continue;
01998                         }
01999 
02000                         /* Nul-terminate it. */
02001                         if ((ret = CDB___os_realloc(dbp->dbenv,
02002                             key.size + 1, NULL, &subdbname)) != 0)
02003                                 goto err;
02004                         subdbname[key.size] = '\0';
02005                 } else if (B_TYPE(bkkey->type == B_KEYDATA)) {
02006                         if ((ret = CDB___os_realloc(dbp->dbenv,
02007                             bkkey->len + 1, NULL, &subdbname)) != 0)
02008                                 goto err;
02009                         memcpy(subdbname, bkkey->data, bkkey->len);
02010                         subdbname[bkkey->len] = '\0';
02011                 }
02012 
02013                 /* Get the corresponding pgno. */
02014                 if (bkdata->len != sizeof(db_pgno_t)) {
02015                         err_ret = DB_VERIFY_BAD;
02016                         continue;
02017                 }
02018                 memcpy(&meta_pgno, bkdata->data, sizeof(db_pgno_t));
02019 
02020                 /* If we can't get the subdb meta page, just skip the subdb. */
02021                 if (!IS_VALID_PGNO(meta_pgno) ||
02022                     (ret = CDB_memp_fget(dbp->mpf, &meta_pgno, 0, &subpg)) != 0) {
02023                         err_ret = ret;
02024                         continue;
02025                 }
02026 
02027                 /*
02028                  * Verify the subdatabase meta page.  This has two functions.
02029                  * First, if it's bad, we have no choice but to skip the subdb
02030                  * and let the pages just get printed on a later pass.  Second,
02031                  * the access-method-specific meta verification routines record
02032                  * the various state info (such as the presence of dups)
02033                  * that we need for CDB___db_prheader().
02034                  */
02035                 if ((ret =
02036                     __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) {
02037                         err_ret = ret;
02038                         (void)CDB_memp_fput(dbp->mpf, subpg, 0);
02039                         continue;
02040                 }
02041                 switch (TYPE(subpg)) {
02042                 case P_BTREEMETA:
02043                         if ((ret = CDB___bam_vrfy_meta(dbp,
02044                             vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) {
02045                                 err_ret = ret;
02046                                 (void)CDB_memp_fput(dbp->mpf, subpg, 0);
02047                                 continue;
02048                         }
02049                         break;
02050                 case P_HASHMETA:
02051                         if ((ret = CDB___ham_vrfy_meta(dbp,
02052                             vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) {
02053                                 err_ret = ret;
02054                                 (void)CDB_memp_fput(dbp->mpf, subpg, 0);
02055                                 continue;
02056                         }
02057                         break;
02058                 default:
02059                         /* This isn't an appropriate page;  skip this subdb. */
02060                         err_ret = DB_VERIFY_BAD;
02061                         continue;
02062                         /* NOTREACHED */
02063                 }
02064 
02065                 if ((ret = CDB_memp_fput(dbp->mpf, subpg, 0)) != 0) {
02066                         err_ret = ret;
02067                         continue;
02068                 }
02069 
02070                 /* Print a subdatabase header. */
02071                 if ((ret = CDB___db_prheader(dbp,
02072                     subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0)
02073                         goto err;
02074 
02075                 if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno,
02076                     flags, pgset)) != 0) {
02077                         err_ret = ret;
02078                         continue;
02079                 }
02080 
02081                 if ((ret = pgset->cursor(pgset, NULL, &pgsc, 0)) != 0)
02082                         goto err;
02083                 while ((ret = CDB___db_vrfy_pgset_next(pgsc, &p)) == 0) {
02084                         if ((ret = CDB_memp_fget(dbp->mpf, &p, 0, &subpg)) != 0) {
02085                                 err_ret = ret;
02086                                 continue;
02087                         }
02088                         if ((ret = CDB___db_salvage(dbp, vdp, p, subpg,
02089                             handle, callback, flags)) != 0)
02090                                 err_ret = ret;
02091                         if ((ret = CDB_memp_fput(dbp->mpf, subpg, 0)) != 0)
02092                                 err_ret = ret;
02093                 }
02094 
02095                 if (ret != DB_NOTFOUND)
02096                         goto err;
02097 
02098                 if ((ret = pgsc->c_close(pgsc)) != 0)
02099                         goto err;
02100                 if ((ret = CDB___db_prfooter(handle, callback)) != 0)
02101                         goto err;
02102         }
02103 err:    if (subdbname)
02104                 CDB___os_free(subdbname, 0);
02105 
02106         if ((t_ret = pgset->close(pgset, 0)) != 0)
02107                 ret = t_ret;
02108 
02109         if ((t_ret = CDB___db_salvage_markdone(vdp, PGNO(master))) != 0)
02110                 return (t_ret);
02111 
02112         return ((err_ret != 0) ? err_ret : ret);
02113 }
02114 
02115 /*
02116  * __db_meta2pgset --
02117  *      Given a known-safe meta page number, return the set of pages
02118  *      corresponding to the database it represents.  Return DB_VERIFY_BAD if
02119  *      it's not a suitable meta page or is invalid.
02120  */
02121 static int
02122 __db_meta2pgset(dbp, vdp, pgno, flags, pgset)
02123         DB *dbp;
02124         VRFY_DBINFO *vdp;
02125         db_pgno_t pgno;
02126         u_int32_t flags;
02127         DB *pgset;
02128 {
02129         PAGE *h;
02130         int ret, t_ret;
02131 
02132         if ((ret = CDB_memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
02133                 return (ret);
02134 
02135         switch (TYPE(h)) {
02136         case P_BTREEMETA:
02137                 ret = CDB___bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset);
02138                 break;
02139         case P_HASHMETA:
02140                 ret = CDB___ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset);
02141                 break;
02142         default:
02143                 ret = DB_VERIFY_BAD;
02144                 break;
02145         }
02146 
02147         if ((t_ret = CDB_memp_fput(dbp->mpf, h, 0)) != 0)
02148                 return (t_ret);
02149         return (ret);
02150 }
02151 
02152 /*
02153  * __db_guesspgsize --
02154  *      Try to guess what the pagesize is if the one on the meta page
02155  *      and the one in the db are invalid.
02156  */
02157 static int
02158 __db_guesspgsize(dbenv, fhp)
02159         DB_ENV *dbenv;
02160         DB_FH *fhp;
02161 {
02162         db_pgno_t i;
02163         size_t nr;
02164         u_int32_t guess;
02165         u_int8_t type;
02166         int ret;
02167 
02168         for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) {
02169                 /*
02170                  * We try to read three pages ahead after the first one
02171                  * and make sure we have plausible types for all of them.
02172                  * If the seeks fail, continue with a smaller size;
02173                  * we're probably just looking past the end of the database.
02174                  * If they succeed but the types are wrong, also continue
02175                  * with a size smaller;  we may be looking at pages N,
02176                  * 2N, and 3N for some N > 1.
02177                  *
02178                  * As soon as we hit an invalid type, we stop and return
02179                  * our best guess; the last one was probably the page size.
02180                  */
02181                 for (i = 1; i <= 3; i++) {
02182                         if ((ret = CDB___os_seek(dbenv, fhp, guess,
02183                             i, SSZ(DBMETA, type), 0, DB_OS_SEEK_SET)) != 0)
02184                                 break;
02185                         if ((ret = CDB___os_read(dbenv,
02186                             fhp, &type, 1, &nr)) != 0 || nr == 0)
02187                                 break;
02188                         if (type == P_INVALID || type >= P_PAGETYPE_MAX)
02189                                 break;
02190                 }
02191         }
02192 
02193         return (guess);
02194 }

Generated on Sun Jun 8 10:56:37 2008 for GNUmifluz by  doxygen 1.5.5