htdb_dump.cc

Go to the documentation of this file.
00001 //
00002 // NAME
00003 // 
00004 // dump the content of an inverted index in Berkeley DB fashion
00005 //
00006 // SYNOPSIS
00007 //
00008 // htdb_dump [-klNpWz] [-S pagesize] [-C cachesize] [-d ahr] [-f file] [-h home] [-s subdb] db_file
00009 //
00010 // DESCRIPTION
00011 //
00012 // htdb_dump is a slightly modified version of the standard 
00013 // Berkeley DB db_dump utility.
00014 //
00015 // The htdb_dump utility reads the database file <b>db_file</b> and
00016 // writes it to the standard output using a portable flat-text format
00017 // understood by the <i>htdb_load</i>
00018 // utility. The argument <b>db_file</b> must be a file produced using
00019 // the Berkeley DB library functions.
00020 //
00021 // OPTIONS
00022 //
00023 // <dl>
00024 //
00025 // <dt><b>-W</b>
00026 // <dd>Initialize WordContext(3) before dumping. With the <b>-z</b>
00027 // flag allows to dump inverted indexes using the mifluz(3) specific
00028 // compression scheme. The MIFLUZ_CONFIG environment variable must be
00029 // set to a file containing the mifluz(3) configuration.
00030 //
00031 // <dt><b>-z</b>
00032 // <dd>The <b>db_file</b> is compressed. If <b>-W</b> is given the
00033 // mifluz(3) specific compression scheme is used. Otherwise the default
00034 // gzip compression scheme is used.
00035 //
00036 // <dt><b>-d</b>
00037 // <dd>Dump the specified database in a format helpful for debugging
00038 // the Berkeley DB library routines.
00039 // <dl>
00040 // <dt>
00041 // a
00042 // <dd>Display all information.
00043 // <dt>
00044 // h
00045 // <dd>Display only page headers.
00046 // <dt>
00047 // r
00048 // <dd>Do not display the free-list or pages on the free list.  This
00049 // mode is used by the recovery tests.
00050 // </dl>
00051 // The output format of the <b>-d</b> option is not standard and may change,
00052 // without notice, between releases of the Berkeley DB library.
00053 //
00054 // <dt><b>-f</b>
00055 // <dd>Write to the specified <b>file</b> instead of to the standard output.
00056 //
00057 // <dt><b>-h</b>
00058 // <dd>Specify a home directory for the database.
00059 // As Berkeley DB versions before 2.0 did not support the concept of a
00060 // <i>database home.</i>
00061 //
00062 // <dt><b>-k</b>
00063 // <dd>Dump record numbers from Queue and Recno databases as keys.
00064 //
00065 // <dt><b>-l</b>
00066 // <dd>List the subdatabases stored in the database.
00067 //
00068 // <dt><b>-N</b>
00069 // <dd>Do not acquire shared region locks while running.  Other problems such
00070 // as potentially fatal errors in Berkeley DB will be ignored as well.  This option
00071 // is intended only for debugging errors and should not be used under any
00072 // other circumstances.
00073 // 
00074 // <dt><b>-p</b>
00075 // <dd>If characters in either the key or data items are printing characters
00076 // (as defined by <b>isprint</b>(3)), use printing characters in
00077 // <b>file</b> to represent them.  This option permits users to use standard
00078 // text editors and tools to modify the contents of databases.
00079 //
00080 // Note, different systems may have different notions as to what characters
00081 // are considered <i>printing characters</i>, and databases dumped in
00082 // this manner may be less portable to external systems.
00083 //
00084 // <dt><b>-s</b>
00085 // <dd>Specify a subdatabase to dump.  If no subdatabase is specified, all
00086 // subdatabases found in the database are dumped.
00087 //
00088 // <dt><b>-V</b>
00089 // <dd>Write the version number to the standard output and exit.
00090 //
00091 // </dl>
00092 // 
00093 // Dumping and reloading Hash databases that use user-defined hash functions
00094 // will result in new databases that use the default hash function.
00095 // While using the default hash function may not be optimal for the new database,
00096 // it will continue to work correctly.
00097 //
00098 // Dumping and reloading Btree databases that use user-defined prefix or
00099 // comparison functions will result in new databases that use the default
00100 // prefix and comparison functions.
00101 // <b>In this case, it is quite likely that the database will be damaged
00102 // beyond repair permitting neither record storage or retrieval.</b>
00103 //
00104 // The only available workaround for either case is to modify the sources
00105 // for the <i>htdb_load</i> utility to load the
00106 // database using the correct hash, prefix and comparison functions.
00107 //
00108 // ENVIRONMENT
00109 //
00110 // <b>DB_HOME</b>
00111 // If the <b>-h</b> option is not specified and the environment variable
00112 // DB_HOME is set, it is used as the path of the database home.
00113 // <br>
00114 // <b>MIFLUZ_CONFIG</b>
00115 // file name of configuration file read by WordContext(3). Defaults to
00116 // <b>~/.mifluz.</b> 
00117 //
00118 // AUTHORS
00119 //
00120 // Sleepycat Software http://www.sleepycat.com/
00121 //
00122 //
00123 // END
00124 /*-
00125  * See the file LICENSE for redistribution information.
00126  *
00127  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00128  *      Sleepycat Software.  All rights reserved.
00129  */
00130 
00131 #ifdef HAVE_CONFIG_H
00132 #include "config.h"
00133 #endif /* HAVE_CONFIG_H */
00134 
00135 #ifndef lint
00136 static const char copyright[] =
00137     "Copyright (c) 1996-2000\nSleepycat Software Inc.  All rights reserved.\n";
00138 static const char revid[] =
00139     "$Id: htdb__dump_8cc-source.html,v 1.1 2008/06/08 10:19:42 sebdiaz Exp $";
00140 #endif
00141 
00142 #ifndef NO_SYSTEM_INCLUDES
00143 #include <sys/types.h>
00144 
00145 #include <errno.h>
00146 #include <stdio.h>
00147 #include <stdlib.h>
00148 #include <string.h>
00149 #include <unistd.h>
00150 #endif
00151 
00152 #ifdef HAVE_GETOPT_H
00153 #include <getopt.h>
00154 #endif /* HAVE_GETOPT_H */
00155 
00156 extern "C" {
00157 #include "db_int.h"
00158 #include "db_page.h"
00159 #include "db_shash.h"
00160 #include "btree.h"
00161 #include "hash.h"
00162 #include "lock.h"
00163 }
00164 
00165 #include "util_sig.h"
00166 
00167 #include "WordContext.h"
00168 #include "WordDBCompress.h"
00169 
00170 void     configure __P((char *));
00171 int      db_init __P((char *));
00172 int      dump __P((DB *, int, int));
00173 int      dump_sub __P((DB *, char *, int, int));
00174 int      is_sub __P((DB *, int *));
00175 int      main __P((int, char *[]));
00176 int      show_subs __P((DB *));
00177 void     usage __P((void));
00178 static   u_int32_t pagesize = 0;
00179 
00180 DB_ENV  *dbenv;
00181 const char
00182         *progname = "htdb_dump";                                /* Program name. */
00183 
00184 int
00185 main(int argc, char *argv[])
00186 {
00187         extern char *optarg;
00188         extern int optind;
00189         DB *dbp;
00190         int ch, d_close;
00191         int e_close, exitval;
00192         int lflag, pflag, ret, rflag, Rflag, subs, keyflag;
00193         char *dopt, *home, *subname;
00194         int compress = 0;
00195         int wordlist = 0;
00196         u_int32_t cachesize = 0;
00197         WordContext *context = 0;
00198 
00199         dbp = NULL;
00200         d_close = e_close = exitval = lflag = pflag = rflag = Rflag = 0;
00201         keyflag = 0;
00202         dopt = home = subname = NULL;
00203         while ((ch = getopt(argc, argv, "d:f:h:klNprRs:VC:S:zW")) != EOF)
00204                 switch (ch) {
00205                 case 'd':
00206                         dopt = optarg;
00207                         break;
00208                 case 'f':
00209                         if (freopen(optarg, "w", stdout) == NULL) {
00210                                 fprintf(stderr, "%s: %s: reopen: %s\n",
00211                                     progname, optarg, strerror(errno));
00212                                 exit (1);
00213                         }
00214                         break;
00215                 case 'h':
00216                         home = optarg;
00217                         break;
00218                 case 'k':
00219                         keyflag = 1;
00220                         break;
00221                 case 'l':
00222                         lflag = 1;
00223                         break;
00224                 case 'N':
00225                         if ((ret = CDB_db_env_set_mutexlocks(0)) != 0) {
00226                                 fprintf(stderr,
00227                                     "%s: db_env_set_mutexlocks: %s\n",
00228                                     progname, CDB_db_strerror(ret));
00229                                 return (1);
00230                         }
00231                         if ((ret = CDB_db_env_set_panicstate(0)) != 0) {
00232                                 fprintf(stderr,
00233                                     "%s: db_env_set_panicstate: %s\n",
00234                                     progname, CDB_db_strerror(ret));
00235                                 return (1);
00236                         }
00237                         break;
00238                 case 'p':
00239                         pflag = 1;
00240                         break;
00241                 case 's':
00242                         subname = optarg;
00243                         break;
00244                 case 'R':
00245                         Rflag = 1;
00246                         /* DB_AGGRESSIVE requires DB_SALVAGE */
00247                         /* FALLTHROUGH */
00248                 case 'r':
00249                         rflag = 1;
00250                         break;
00251                 case 'V':
00252                         printf("%s\n", CDB_db_version(NULL, NULL, NULL));
00253                         exit(0);
00254                 case 'C':
00255                         cachesize = atoi(optarg);
00256                         break;
00257                 case 'S':
00258                         pagesize = atoi(optarg);
00259                         break;
00260                 case 'z':
00261                         compress = DB_COMPRESS;
00262                         break;
00263                 case 'W':
00264                         wordlist = 1;
00265                         break;
00266                 case '?':
00267                 default:
00268                         usage();
00269                 }
00270         argc -= optind;
00271         argv += optind;
00272 
00273         if (argc != 1)
00274                 usage();
00275 
00276         if (dopt != NULL && pflag) {
00277                 fprintf(stderr,
00278                     "%s: the -d and -p options may not both be specified\n",
00279                     progname);
00280                 exit (1);
00281         }
00282         if (lflag && subname != NULL) {
00283                 fprintf(stderr,
00284                     "%s: the -l and -s options may not both be specified\n",
00285                     progname);
00286                 exit (1);
00287         }
00288 
00289         if (keyflag && rflag) {
00290                 fprintf(stderr, "%s: %s",
00291                     "the -k and -r or -R options may not both be specified\n",
00292                     progname);
00293                 exit(1);
00294         }
00295 
00296         if (subname != NULL && rflag) {
00297                 fprintf(stderr, "%s: %s",
00298                     "the -s and -r or R options may not both be specified\n",
00299                     progname);
00300                 exit(1);
00301         }
00302 
00303         /* Handle possible interruptions. */
00304         __db_util_siginit();
00305 
00306 
00307         if(wordlist && compress) {
00308           static ConfigDefaults defaults[] = {
00309             { "wordlist_wordkey_description", "Word 24/DocID 32/Flag 8/Location 16"},
00310             { "wordlist_env_skip", "true"},
00311             { 0, 0, 0 }
00312           };
00313           context = new WordContext(defaults);
00314         }
00315         /*
00316            * Create an environment object and initialize it for error
00317            * reporting.
00318            */
00319         if ((ret = CDB_db_env_create(&dbenv, 0)) != 0) {
00320           fprintf(stderr,
00321                   "%s: CDB_db_env_create: %s\n", progname, CDB_db_strerror(ret));
00322           goto err;
00323         }
00324         e_close = 1;
00325 
00326         dbenv->set_errfile(dbenv, stderr);
00327         dbenv->set_errpfx(dbenv, progname);
00328         if(compress && wordlist) dbenv->mp_cmpr_info = (new WordDBCompress(context))->CmprInfo();
00329 
00330         /* Initialize the environment. */
00331         if (db_init(home) != 0)
00332                 goto err;
00333 
00334         /* Create the DB object and open the file. */
00335         if ((ret = CDB_db_create(&dbp, dbenv, 0)) != 0) {
00336                 dbenv->err(dbenv, ret, "CDB_db_create");
00337                 goto err;
00338         }
00339         if(cachesize > 0) dbp->set_cachesize(dbp, 0, cachesize, 1);
00340         if(pagesize > 0) dbp->set_pagesize(dbp, pagesize);
00341         d_close = 1;
00342 
00343         /*
00344          * If we're salvaging, don't do an open;  it might not be safe.
00345          * Dispatch now into the salvager.
00346          */
00347         if (rflag) {
00348                 if ((ret = dbp->verify(dbp, argv[0], NULL, stdout,
00349                     DB_SALVAGE | (Rflag ? DB_AGGRESSIVE : 0))) != 0)
00350                         goto err;
00351                 exitval = 0;
00352                 goto done;
00353         }
00354 
00355         if ((ret = dbp->open(dbp,
00356             argv[0], subname, DB_UNKNOWN, (DB_RDONLY | compress), 0)) != 0) {
00357                 dbp->err(dbp, ret, "open: %s", argv[0]);
00358                 goto err;
00359         }
00360 
00361         if (dopt != NULL) {
00362                 if (CDB___db_dump(dbp, dopt, NULL)) {
00363                         dbp->err(dbp, ret, "CDB___db_dump: %s", argv[0]);
00364                         goto err;
00365                 }
00366         } else if (lflag) {
00367                 if (is_sub(dbp, &subs))
00368                         goto err;
00369                 if (subs == 0) {
00370                         dbp->errx(dbp,
00371                             "%s: does not contain multiple databases", argv[0]);
00372                         goto err;
00373                 }
00374                 if (show_subs(dbp))
00375                         goto err;
00376         } else {
00377                 subs = 0;
00378                 if (subname == NULL && is_sub(dbp, &subs))
00379                         goto err;
00380                 if (subs) {
00381                         if (dump_sub(dbp, argv[0], pflag, keyflag))
00382                                 goto err;
00383                 } else
00384                         if (CDB___db_prheader(dbp, NULL, pflag, keyflag, stdout,
00385                             CDB___db_verify_callback, NULL, 0) ||
00386                             dump(dbp, pflag, keyflag))
00387                                 goto err;
00388         }
00389 
00390         if (0) {
00391 err:            exitval = 1;
00392         }
00393 done:   if (d_close && (ret = dbp->close(dbp, 0)) != 0) {
00394                 exitval = 1;
00395                 dbp->err(dbp, ret, "close");
00396         }
00397         if(wordlist && compress) {
00398           delete (WordDBCompress*)dbenv->mp_cmpr_info->user_data;
00399           delete dbenv->mp_cmpr_info;
00400         }
00401         if (e_close && (ret = dbenv->close(dbenv, 0)) != 0) {
00402                 exitval = 1;
00403                 fprintf(stderr,
00404                     "%s: dbenv->close: %s\n", progname, CDB_db_strerror(ret));
00405         }
00406 
00407         if(context) delete context;
00408 
00409         /* Resend any caught signal. */
00410         __db_util_sigresend();
00411 
00412         return (exitval);
00413 }
00414 
00415 /*
00416  * db_init --
00417  *      Initialize the environment.
00418  */
00419 int
00420 db_init(char *home)
00421 {
00422         u_int32_t flags;
00423         int ret;
00424 
00425         /*
00426          * Try and use the shared memory pool region when dumping a database,
00427          * so our information is as up-to-date as possible, even if the mpool
00428          * cache hasn't been flushed.
00429          */
00430         flags = DB_USE_ENVIRON | DB_INIT_MPOOL | DB_INIT_LOCK;
00431         if (dbenv->open(dbenv, home, flags, 0) == 0)
00432                 return (0);
00433 
00434         /*
00435          * An environment is required because we may be trying to look at
00436          * databases in directories other than the current one.  We could
00437          * avoid using an environment iff the -h option wasn't specified,
00438          * but that seems like more work than it's worth.
00439          *
00440          * No environment exists (or, at least no environment that includes
00441          * an mpool region exists).  Create one, but make it private so that
00442          * no files are actually created.
00443          */
00444         LF_SET(DB_CREATE | DB_PRIVATE);
00445         if ((ret = dbenv->open(dbenv, home, flags, 0)) == 0)
00446                 return (0);
00447 
00448         /* An environment is required. */
00449         dbenv->err(dbenv, ret, "open");
00450         return (1);
00451 }
00452 
00453 /*
00454  * is_sub --
00455  *      Return if the database contains subdatabases.
00456  */
00457 int
00458 is_sub(DB *dbp, int *yesno)
00459 {
00460         DB_BTREE_STAT *btsp = 0;
00461         DB_HASH_STAT *hsp = 0;
00462         int ret;
00463 
00464         switch (dbp->type) {
00465         case DB_BTREE:
00466         case DB_RECNO:
00467                 if ((ret = dbp->stat(dbp, &btsp, NULL, 0)) != 0) {
00468                         dbp->err(dbp, ret, "DB->stat");
00469                         return (ret);
00470                 }
00471                 *yesno = btsp->bt_metaflags & BTM_SUBDB ? 1 : 0;
00472                 break;
00473         case DB_HASH:
00474                 if ((ret = dbp->stat(dbp, &hsp, NULL, 0)) != 0) {
00475                         dbp->err(dbp, ret, "DB->stat");
00476                         return (ret);
00477                 }
00478                 *yesno = hsp->hash_metaflags & DB_HASH_SUBDB ? 1 : 0;
00479                 break;
00480         case DB_QUEUE:
00481                 break;
00482         default:
00483                 dbp->errx(dbp, "unknown database type");
00484                 return (1);
00485         }
00486         if(btsp) free(btsp);
00487         if(hsp) free(hsp);
00488         return (0);
00489 }
00490 
00491 /*
00492  * dump_sub --
00493  *      Dump out the records for a DB containing subdatabases.
00494  */
00495 int
00496 dump_sub(DB *parent_dbp, char *parent_name, int pflag, int keyflag)
00497 {
00498         DB *dbp;
00499         DBC *dbcp;
00500         DBT key, data;
00501         int ret;
00502         char *subdb;
00503 
00504         /*
00505          * Get a cursor and step through the database, dumping out each
00506          * subdatabase.
00507          */
00508         if ((ret = parent_dbp->cursor(parent_dbp, NULL, &dbcp, 0)) != 0) {
00509                 dbenv->err(dbenv, ret, "DB->cursor");
00510                 return (1);
00511         }
00512 
00513         memset(&key, 0, sizeof(key));
00514         memset(&data, 0, sizeof(data));
00515         while ((ret = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
00516                 /* Nul terminate the subdatabase name. */
00517                 if ((subdb = (char*)malloc(key.size + 1)) == NULL) {
00518                         dbenv->err(dbenv, ENOMEM, NULL);
00519                         return (1);
00520                 }
00521                 memcpy(subdb, key.data, key.size);
00522                 subdb[key.size] = '\0';
00523 
00524                 /* Create the DB object and open the file. */
00525                 if ((ret = CDB_db_create(&dbp, dbenv, 0)) != 0) {
00526                         dbenv->err(dbenv, ret, "CDB_db_create");
00527                         free(subdb);
00528                         return (1);
00529                 }
00530                 if(pagesize > 0) dbp->set_pagesize(dbp, pagesize);
00531                 if ((ret = dbp->open(dbp,
00532                     parent_name, subdb, DB_UNKNOWN, (DB_RDONLY | ((parent_dbp->flags & DB_AM_CMPR) ? DB_COMPRESS : 0)), 0)) != 0)
00533                         dbp->err(dbp, ret,
00534                             "DB->open: %s:%s", parent_name, subdb);
00535                 if (ret == 0 &&
00536                     (CDB___db_prheader(dbp, subdb, pflag, keyflag, stdout,
00537                     CDB___db_verify_callback, NULL, 0) ||
00538                      dump(dbp, pflag, keyflag)))
00539                         ret = 1;
00540                 (void)dbp->close(dbp, 0);
00541                 free(subdb);
00542                 if (ret != 0)
00543                         return (1);
00544         }
00545         if (ret != DB_NOTFOUND) {
00546                 dbp->err(dbp, ret, "DBcursor->get");
00547                 return (1);
00548         }
00549 
00550         if ((ret = dbcp->c_close(dbcp)) != 0) {
00551                 dbp->err(dbp, ret, "DBcursor->close");
00552                 return (1);
00553         }
00554 
00555         return (0);
00556 }
00557 
00558 /*
00559  * show_subs --
00560  *      Display the subdatabases for a database.
00561  */
00562 int
00563 show_subs(DB *dbp)
00564 {
00565         DBC *dbcp;
00566         DBT key, data;
00567         int ret;
00568 
00569         /*
00570          * Get a cursor and step through the database, printing out the key
00571          * of each key/data pair.
00572          */
00573         if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) {
00574                 dbp->err(dbp, ret, "DB->cursor");
00575                 return (1);
00576         }
00577 
00578         memset(&key, 0, sizeof(key));
00579         memset(&data, 0, sizeof(data));
00580         while ((ret = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
00581                 if ((ret = CDB___db_prdbt(&key, 1, NULL, stdout,
00582                     CDB___db_verify_callback, 0, NULL)) != 0) {
00583                         dbp->errx(dbp, NULL);
00584                         return (1);
00585                 }
00586         }
00587         if (ret != DB_NOTFOUND) {
00588                 dbp->err(dbp, ret, "DBcursor->get");
00589                 return (1);
00590         }
00591 
00592         if ((ret = dbcp->c_close(dbcp)) != 0) {
00593                 dbp->err(dbp, ret, "DBcursor->close");
00594                 return (1);
00595         }
00596         return (0);
00597 }
00598 
00599 /*
00600  * dump --
00601  *      Dump out the records for a DB.
00602  */
00603 int
00604 dump(DB *dbp, int pflag, int keyflag)
00605 {
00606         DBC *dbcp;
00607         DBT key, data;
00608         int ret, is_recno;
00609 
00610         /*
00611          * Get a cursor and step through the database, printing out each
00612          * key/data pair.
00613          */
00614         if ((ret = dbp->cursor(dbp, NULL, &dbcp, 0)) != 0) {
00615                 dbp->err(dbp, ret, "DB->cursor");
00616                 return (1);
00617         }
00618 
00619         memset(&key, 0, sizeof(key));
00620         memset(&data, 0, sizeof(data));
00621         is_recno = (dbp->type == DB_RECNO || dbp->type == DB_QUEUE);
00622         keyflag = is_recno ? keyflag : 1;
00623         while ((ret = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0)
00624                 if ((keyflag && (ret = CDB___db_prdbt(&key,
00625                     pflag, " ", stdout, CDB___db_verify_callback,
00626                     is_recno, NULL)) != 0) || (ret =
00627                     CDB___db_prdbt(&data, pflag, " ", stdout,
00628                         CDB___db_verify_callback, 0, NULL)) != 0) {
00629                         dbp->errx(dbp, NULL);
00630                         return (1);
00631                 }
00632         if (ret != DB_NOTFOUND) {
00633                 dbp->err(dbp, ret, "DBcursor->get");
00634                 return (1);
00635         }
00636 
00637         if ((ret = dbcp->c_close(dbcp)) != 0) {
00638                 dbp->err(dbp, ret, "DBcursor->close");
00639                 return (1);
00640         }
00641 
00642         (void)CDB___db_prfooter(stdout, CDB___db_verify_callback);
00643         return (0);
00644 }
00645 
00646 /*
00647  * usage --
00648  *      Display the usage message.
00649  */
00650 void
00651 usage()
00652 {
00653         (void)fprintf(stderr, "usage: %s\n",
00654 "htdb_dump [-klNprRVWz] [-S pagesize] [-C cachesize] [-d ahr] [-f output] [-h home] [-s database] db_file\n");
00655         exit(1);
00656 }

Generated on Sun Jun 8 10:56:39 2008 for GNUmifluz by  doxygen 1.5.5