htdb_load.cc

Go to the documentation of this file.
00001 //
00002 // NAME
00003 // 
00004 // displays statistics for Berkeley DB environments.
00005 //
00006 // SYNOPSIS
00007 //
00008 // htdb_load [-nTzW] [-c name=value] [-f file] [-h home] [-C cachesize] [-t btree | hash | recno] db_file
00009 //
00010 // DESCRIPTION
00011 //
00012 // The htdb_load utility reads from the standard input and loads it into
00013 // the database <b>db_file</b>.
00014 // The database <b>db_file</b> is created if it does not already exist.
00015 //
00016 // The input to htdb_load must be in the output format specified by the
00017 // htdb_dump utility, or as specified for the <b>-T</b> below.
00018 //
00019 // OPTIONS
00020 // 
00021 // <dl>
00022 //
00023 //
00024 // <dt><b>-W</b>
00025 // <dd>Initialize WordContext(3) before loading. With the <b>-z</b>
00026 // flag allows to load inverted indexes using the mifluz(3) specific
00027 // compression scheme. The MIFLUZ_CONFIG environment variable must be
00028 // set to a file containing the mifluz(3) configuration.
00029 //
00030 // <dt><b>-z</b>
00031 // <dd>The <b>db_file</b> is compressed. If <b>-W</b> is given the
00032 // mifluz(3) specific compression scheme is used. Otherwise the default
00033 // gzip compression scheme is used.
00034 //
00035 // <dt><b>-c</b>
00036 // <dd>Specify configuration options for the DB structure 
00037 // ignoring any value they may have based on the input.
00038 // The command-line format is <b>name=value</b>.
00039 // See <i>Supported Keywords</i> for
00040 // a list of supported words for the <b>-c</b> option.
00041 //
00042 // <dt><b>-f</b>
00043 // <dd>Read from the specified <b>input</b> file instead of from
00044 // the standard input.
00045 //
00046 // <dt><b>-h</b>
00047 // <dd>Specify a home directory for the database.
00048 // If a home directory is specified, the database environment is opened using
00049 // the <i>DB_INIT_LOCK</i>, <i>DB_INIT_LOG</i>, <i>DB_INIT_MPOOL</i>,
00050 // <i>DB_INIT_TXN</i> and <i>DB_USE_ENVIRON</i> flags to
00051 // DBENV-&gt;open. This means that htdb_load can be used to load
00052 // data into databases while they are in use by other processes. If the
00053 // DBENV-&gt;open call fails, or if no home directory is specified, the
00054 // database is still updated, but the environment is ignored, e.g., no
00055 // locking is done.
00056 //
00057 // <dt><b>-n</b>
00058 // <dd>Do not overwrite existing keys in the database when loading into an
00059 // already existing database.
00060 // If a key/data pair cannot be loaded into the database for this reason,
00061 // a warning message is displayed on the standard error output and the
00062 // key/data pair are skipped.
00063 // 
00064 // <dt><b>-T</b>
00065 // <dd>The <b>-T</b>
00066 // option allows non-Berkeley DB applications to easily load text files 
00067 // into databases.
00068 //
00069 // If the database to be created is of type Btree or Hash, or the keyword
00070 // <b>keys</b> is specified as set, the input must be paired lines of text,
00071 // where the first line of the pair is the key item, and the second line of
00072 // the pair is its corresponding data item.  If the database to be created
00073 // is of type Queue or Recno and the keywork <b>keys</b> is not set, the
00074 // input must be lines of text, where each line is a new data item for the
00075 // database.
00076 // 
00077 // A simple escape mechanism, where newline and backslash (\)
00078 // characters are special, is applied to the text input.
00079 // Newline characters are interpreted as record separators.
00080 // Backslash characters in the text will be interpreted in one of two ways:
00081 // if the backslash character precedes another backslash character, the pair
00082 // will be interpreted as a literal backslash.
00083 // If the backslash character precedes any other character, the two characters
00084 // following the backslash will be interpreted as hexadecimal specification of
00085 // a single character, e.g., \0a is a newline character in the ASCII
00086 // character set.
00087 // 
00088 // For this reason, any backslash or newline characters that naturally
00089 // occur in the text input must be escaped to avoid misinterpretation by
00090 // htdb_load
00091 // 
00092 // If the <b>-T</b> option is specified, the underlying access method type
00093 // must be specified using the <b>-t</b> option.
00094 //
00095 // <dt><b>-t</b>
00096 // <dd>Specify the underlying access method.
00097 // If no <b>-t</b> option is specified, the database will be loaded into a
00098 // database of the same type as was dumped, e.g., a Hash database will be
00099 // created if a Hash database was dumped.
00100 // 
00101 // Btree and Hash databases may be converted from one to the other.  Queue
00102 // and Recno databases may be converted from one to the other.  If the
00103 // <b>-k</b> option was specified on the call to htdb_dump then Queue
00104 // and Recno databases may be converted to Btree or Hash, with the key being
00105 // the integer record number.
00106 //
00107 // <dt><b>-V</b>
00108 // <dd>Write the version number to the standard output and exit.
00109 //
00110 // </dl>
00111 //
00112 // The htdb_load utility attaches to one or more of the Berkeley DB
00113 // shared memory regions.  In order to avoid region corruption, it 
00114 // should always be given
00115 // the chance to detach and exit gracefully.  To cause htdb_load to clean up
00116 // after itself and exit, send it an interrupt signal (SIGINT).
00117 //
00118 // The htdb_load utility exits 0 on success, 1 if one or more key/data
00119 // pairs were not loaded into the database because the key already existed,
00120 // and &gt;1 if an error occurs.
00121 // 
00122 // KEYWORDS
00123 //
00124 // The following keywords are supported for the <b>-c</b> command-line option
00125 // to the htdb_load utility. See DB-&gt;open for further discussion of
00126 // these keywords and what values should be specified.
00127 //
00128 // The parenthetical listing specifies how the value part of the
00129 // <b>name=value</b> pair is interpreted.
00130 // Items listed as (boolean) expect value to be <b>1</b> (set) or <b>0</b>
00131 // (unset).
00132 // Items listed as (number) convert value to a number.
00133 // Items listed as (string) use the string value without modification.
00134 //
00135 // <dl>
00136 // <dt>bt_minkey (number)
00137 // <dd>The minimum number of keys per page.
00138 // <dt>db_lorder (number)
00139 // <dd>The byte order for integers in the stored database metadata.
00140 // <dt>db_pagesize (number)
00141 // <dd>The size of pages used for nodes in the tree, in bytes.
00142 // <dt>duplicates (boolean)
00143 // <dd>The value of the DB_DUP flag.
00144 // <dt>h_ffactor (number)
00145 // <dd>The density within the Hash database.
00146 // <dt>h_nelem (number)
00147 // <dd>The size of the Hash database.
00148 // <dt>keys (boolean)
00149 // <dd>Specify if keys are present for Queue or Recno databases.
00150 // <dt>re_len (number)
00151 // <dd>Specify fixed-length records of the specified length.
00152 // <dt>re_pad (string)
00153 // <dd>Specify the fixed-length record pad character.
00154 // <dt>recnum (boolean)
00155 // <dd>The value of the DB_RECNUM flag.
00156 // <dt>renumber (boolean)
00157 // <dd>The value of the DB_RENUMBER flag.
00158 // <dt>subdatabase (string)
00159 // <dd>The subdatabase to load.
00160 // </dl>
00161 //
00162 // ENVIRONMENT
00163 //
00164 // <b>DB_HOME</b>
00165 // If the <b>-h</b> option is not specified and the environment variable
00166 // DB_HOME is set, it is used as the path of the database home.
00167 // <br>
00168 // <b>MIFLUZ_CONFIG</b>
00169 // file name of configuration file read by WordContext(3). Defaults to
00170 // <b>~/.mifluz.</b> 
00171 //
00172 // AUTHORS
00173 //
00174 // Sleepycat Software http://www.sleepycat.com/
00175 //
00176 //
00177 // END
00178 // 
00179 /*-
00180  * See the file LICENSE for redistribution information.
00181  *
00182  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00183  *      Sleepycat Software.  All rights reserved.
00184  */
00185 
00186 #ifdef HAVE_CONFIG_H
00187 #include "config.h"
00188 #endif /* HAVE_CONFIG_H */
00189 
00190 #ifndef lint
00191 static const char copyright[] =
00192     "Copyright (c) 1996-2000\nSleepycat Software Inc.  All rights reserved.\n";
00193 static const char revid[] =
00194     "$Id: htdb__load_8cc-source.html,v 1.1 2008/06/08 10:19:44 sebdiaz Exp $";
00195 #endif
00196 
00197 #ifndef NO_SYSTEM_INCLUDES
00198 #include <sys/types.h>
00199 
00200 #include <errno.h>
00201 #include <limits.h>
00202 #include <stdio.h>
00203 #include <stdlib.h>
00204 #include <string.h>
00205 #include <unistd.h>
00206 #endif
00207 
00208 #ifdef HAVE_GETOPT_H
00209 #include <getopt.h>
00210 #endif /* HAVE_GETOPT_H */
00211 
00212 extern "C" {
00213 #include "db_int.h"
00214 #include "db_page.h"
00215 #include "db_am.h"
00216 #include "clib.h"
00217 }
00218 
00219 #include "util_sig.h"
00220 
00221 #include "WordDBCompress.h"
00222 #include "WordContext.h"
00223 #include "WordKey.h"
00224 
00225 void    badend __P((void));
00226 void    badnum __P((void));
00227 int     configure __P((DB *, char **, char **, int *));
00228 int     db_init __P((char *));
00229 int     dbt_rdump __P((DBT *));
00230 int     dbt_rprint __P((DBT *));
00231 int     dbt_rrecno __P((DBT *));
00232 int     digitize __P((int, int *));
00233 int     load __P((char *, DBTYPE, char **, int, u_int32_t, int, WordContext *));
00234 int     main __P((int, char *[]));
00235 int     rheader __P((DB *, DBTYPE *, char **, int *, int*));
00236 void    usage __P((void));
00237 
00238 int     endodata;                       /* Reached the end of a database. */
00239 int     endofile;                       /* Reached the end of the input. */
00240 int     existed;                        /* Tried to load existing key. */
00241 u_long  lineno;                         /* Input file line number. */
00242 int     version = 1;                    /* Input version. */
00243 
00244 DB_ENV  *dbenv;
00245 const char
00246         *progname = "db_load";          /* Program name. */
00247 
00248 int
00249 main(int argc, char* argv[])
00250 {
00251         extern char *optarg;
00252         extern int optind;
00253         DBTYPE dbtype;
00254         u_int32_t db_nooverwrite;
00255         int ch, exitval, no_header, ret;
00256         char **clist, **clp, *home;
00257         u_int32_t cachesize = 0;
00258         int compress = 0;
00259         int wordlist = 0;
00260         WordContext *context = 0;
00261 
00262         home = NULL;
00263         db_nooverwrite = 0;
00264         exitval = no_header = 0;
00265         dbtype = DB_UNKNOWN;
00266 
00267         /* Allocate enough room for configuration arguments. */
00268         if ((clp = clist = (char **)calloc(argc + 1, sizeof(char *))) == NULL) {
00269                 fprintf(stderr, "%s: %s\n", progname, strerror(ENOMEM));
00270                 exit(1);
00271         }
00272 
00273         while ((ch = getopt(argc, argv, "c:f:h:nTt:C:S:zWV")) != EOF)
00274                 switch (ch) {
00275                 case 'c':
00276                         *clp++ = optarg;
00277                         break;
00278                 case 'f':
00279                         if (freopen(optarg, "r", stdin) == NULL) {
00280                                 fprintf(stderr, "%s: %s: reopen: %s\n",
00281                                     progname, optarg, strerror(errno));
00282                                 exit(1);
00283                         }
00284                         break;
00285                 case 'h':
00286                         home = optarg;
00287                         break;
00288                 case 'n':
00289                         db_nooverwrite = DB_NOOVERWRITE;
00290                         break;
00291                 case 'T':
00292                         no_header = 1;
00293                         break;
00294                 case 't':
00295                         if (strcmp(optarg, "btree") == 0) {
00296                                 dbtype = DB_BTREE;
00297                                 break;
00298                         }
00299                         if (strcmp(optarg, "hash") == 0) {
00300                                 dbtype = DB_HASH;
00301                                 break;
00302                         }
00303                         if (strcmp(optarg, "recno") == 0) {
00304                                 dbtype = DB_RECNO;
00305                                 break;
00306                         }
00307                         if (strcmp(optarg, "queue") == 0) {
00308                                 dbtype = DB_QUEUE;
00309                                 break;
00310                         }
00311                         usage();
00312                         /* NOTREACHED */
00313                 case 'V':
00314                         printf("%s\n", CDB_db_version(NULL, NULL, NULL));
00315                         exit(0);
00316                 case 'C':
00317                         cachesize = atoi(optarg);
00318                         break;
00319                 case 'z':
00320                         compress = DB_COMPRESS;
00321                         break;
00322                 case 'W':
00323                         wordlist = 1;
00324                         break;
00325                 case '?':
00326                 default:
00327                         usage();
00328                         /* NOTREACHED */
00329                 }
00330         argc -= optind;
00331         argv += optind;
00332 
00333         if (argc != 1)
00334                 usage();
00335 
00336         /* Handle possible interruptions. */
00337         __db_util_siginit();
00338 
00339         if(wordlist) {
00340           static ConfigDefaults defaults[] = {
00341             { "wordlist_wordkey_description", "Word 24/DocID 32/Flag 8/Location 16"},
00342             { "wordlist_env_skip", "true"},
00343             { 0, 0, 0 }
00344           };
00345           context = new WordContext(defaults);
00346         } 
00347 
00348         /*
00349          * Create an environment object initialized for error reporting, and
00350          * then open it.
00351          */
00352         if ((ret = CDB_db_env_create(&dbenv, 0)) != 0) {
00353           fprintf(stderr,
00354                   "%s: CDB_db_env_create: %s\n", progname, CDB_db_strerror(ret));
00355           goto shutdown;
00356         }
00357         dbenv->set_errfile(dbenv, stderr);
00358         dbenv->set_errpfx(dbenv, progname);
00359         if(cachesize > 0) dbenv->set_cachesize(dbenv, 0, cachesize, 1);
00360         if(compress && wordlist) dbenv->mp_cmpr_info = (new WordDBCompress(context))->CmprInfo();
00361 
00362         if (db_init(home) != 0)
00363                 goto shutdown;
00364 
00365         while (!endofile)
00366                 if (load(argv[0],
00367                     dbtype, clist, no_header, db_nooverwrite, compress, context) != 0)
00368                         goto shutdown;
00369 
00370         if (0) {
00371 shutdown:       exitval = 1;
00372         }
00373         if(wordlist && compress) {
00374           delete (WordDBCompress*)dbenv->mp_cmpr_info->user_data;
00375           delete dbenv->mp_cmpr_info;
00376         }
00377         if ((ret = dbenv->close(dbenv, 0)) != 0) {
00378                 exitval = 1;
00379                 fprintf(stderr,
00380                     "%s: dbenv->close: %s\n", progname, CDB_db_strerror(ret));
00381         }
00382 
00383         if(context) delete context;
00384         free(clist);
00385         /* Resend any caught signal. */
00386         __db_util_sigresend();
00387 
00388         /* Return 0 on success, 1 if keys existed already, and 2 on failure. */
00389         return (exitval == 0 ? (existed == 0 ? 0 : 1) : 2);
00390 }
00391 
00392 /*
00393  * load --
00394  *      Load a database.
00395  */
00396 int
00397 load(char *name, DBTYPE argtype, char **clist, int no_header, u_int32_t db_nooverwrite, int compress, WordContext* context)
00398 {
00399         DB *dbp;
00400         DBT key, rkey, data, *readp, *writep;
00401         DBTYPE dbtype;
00402         db_recno_t recno, datarecno;
00403         int checkprint, ret, rval, keys;
00404         int keyflag, ascii_recno;
00405         char *subdb;
00406 
00407         endodata = 0;
00408         subdb = NULL;
00409         memset(&key, 0, sizeof(DBT));
00410         memset(&data, 0, sizeof(DBT));
00411 
00412         /* Create the DB object. */
00413         if ((ret = CDB_db_create(&dbp, dbenv, 0)) != 0) {
00414                 dbenv->err(dbenv, ret, "CDB_db_create");
00415                 return (1);
00416         }
00417 
00418         dbtype = DB_UNKNOWN;
00419         keys = -1;
00420         keyflag = -1;
00421         /* Read the header -- if there's no header, we expect flat text. */
00422         if (no_header) {
00423                 checkprint = 1;
00424                 dbtype = argtype;
00425         } else {
00426                 if (rheader(dbp, &dbtype, &subdb, &checkprint, &keys) != 0)
00427                         goto err;
00428                 if (endofile)
00429                         goto done;
00430         }
00431 
00432         /*
00433          * Apply command-line configuration changes.  (We apply command-line
00434          * configuration changes to all databases that are loaded, e.g., all
00435          * subdatabases.)
00436          */
00437         if (configure(dbp, clist, &subdb, &keyflag))
00438                 goto err;
00439 
00440 #if 0
00441         if(subdb && !strcmp(subdb, "index") && context) dbp->set_bt_compare(dbp, word_db_cmp);
00442 #endif
00443 
00444         if (keys != 1) {
00445                 if (keyflag == 1) {
00446                         dbp->err(dbp, EINVAL, "No keys specified in file");
00447                         goto err;
00448                 }
00449         }
00450         else if (keyflag == 0) {
00451                 dbp->err(dbp, EINVAL, "Keys specified in file");
00452                 goto err;
00453         }
00454         else
00455                 keyflag = 1;
00456 
00457         if (dbtype == DB_BTREE || dbtype == DB_HASH) {
00458                 if (keyflag == 0)
00459                         dbp->err(dbp,
00460                             EINVAL, "Btree and Hash must specify keys");
00461                 else
00462                         keyflag = 1;
00463         }
00464 
00465         if (argtype != DB_UNKNOWN) {
00466 
00467                 if (dbtype == DB_RECNO || dbtype == DB_QUEUE)
00468                         if (keyflag != 1 && argtype != DB_RECNO
00469                              && argtype != DB_QUEUE){
00470                                 dbenv->errx(dbenv,
00471                            "improper database type conversion specified");
00472                                 goto err;
00473                         }
00474                 dbtype = argtype;
00475         }
00476 
00477         if (dbtype == DB_UNKNOWN) {
00478                 dbenv->errx(dbenv, "no database type specified");
00479                 goto err;
00480         }
00481 
00482         if (keyflag == -1)
00483                 keyflag = 0;
00484 
00485         if (keyflag == 1 && (dbtype == DB_RECNO || dbtype == DB_QUEUE))
00486                 ascii_recno = 1;
00487         else
00488                 ascii_recno = 0;
00489 
00490         /* Open the DB file. */
00491         if ((ret = dbp->open(dbp,
00492             name, subdb, dbtype, (DB_CREATE | compress), CDB___db_omode("rwrwrw"))) != 0) {
00493                 dbp->err(dbp, ret, "DB->open: %s", name);
00494                 goto err;
00495         }
00496 
00497         /* Initialize the key/data pair. */
00498         readp = &key;
00499         writep = &key;
00500         if (dbtype == DB_RECNO || dbtype == DB_QUEUE) {
00501                 key.size = sizeof(recno);
00502                 if (keyflag) {
00503                         key.data = &datarecno;
00504                         if (checkprint) {
00505                                 readp = &rkey;
00506                                 goto key_data;
00507                         }
00508                 }
00509                 else
00510                         key.data = &recno;
00511         } else
00512 key_data:       if ((readp->data =
00513                     (void *)malloc(readp->ulen = 1024)) == NULL) {
00514                         dbenv->err(dbenv, ENOMEM, NULL);
00515                         goto err;
00516                 }
00517         if ((data.data = (void *)malloc(data.ulen = 1024)) == NULL) {
00518                 dbenv->err(dbenv, ENOMEM, NULL);
00519                 goto err;
00520         }
00521 
00522         /* Get each key/data pair and add them to the database. */
00523         for (recno = 1; !__db_util_interrupted(); ++recno) {
00524                 if (!keyflag)
00525                         if (checkprint) {
00526                                 if (dbt_rprint(&data))
00527                                         goto err;
00528                         } else {
00529                                 if (dbt_rdump(&data))
00530                                         goto err;
00531                         }
00532                 else
00533                         if (checkprint) {
00534                                 if (dbt_rprint(readp))
00535                                         goto err;
00536                                 if (!endodata && dbt_rprint(&data))
00537                                         goto fmt;
00538                         } else {
00539                                 if (ascii_recno) {
00540                                         if (dbt_rrecno(readp))
00541                                                 goto err;
00542                                 } else
00543                                         if (dbt_rdump(readp))
00544                                                 goto err;
00545                                 if (!endodata && dbt_rdump(&data)) {
00546 fmt:                                    dbenv->errx(dbenv,
00547                                             "odd number of key/data pairs");
00548                                         goto err;
00549                                 }
00550                         }
00551                 if (endodata)
00552                         break;
00553                 if (readp != writep) {
00554                         if (sscanf((char*)readp->data, "%ud", &datarecno) != 1)
00555                                 dbenv->errx(dbenv,
00556                                     "%s: non-integer key at line: %d",
00557                                     name, !keyflag ? recno : recno * 2 - 1);
00558                         if (datarecno == 0)
00559                                 dbenv->errx(dbenv, "%s: zero key at line: %d",
00560                                     name,
00561                                     !keyflag ? recno : recno * 2 - 1);
00562                 }
00563                 switch (ret =
00564                     dbp->put(dbp, NULL, writep, &data, db_nooverwrite)) {
00565                 case 0:
00566                         break;
00567                 case DB_KEYEXIST:
00568                         existed = 1;
00569                         dbenv->errx(dbenv,
00570                             "%s: line %d: key already exists, not loaded:",
00571                             name,
00572                             !keyflag ? recno : recno * 2 - 1);
00573 
00574                         (void)CDB___db_prdbt(&key, checkprint, 0, stderr,
00575                             CDB___db_verify_callback, 0, NULL);
00576                         break;
00577                 default:
00578                         dbenv->err(dbenv, ret, NULL);
00579                         goto err;
00580                 }
00581         }
00582 done:   rval = 0;
00583 
00584         if (0) {
00585 err:            rval = 1;
00586         }
00587 
00588         /* Close the database. */
00589         if ((ret = dbp->close(dbp, 0)) != 0) {
00590                 dbp->err(dbp, ret, "DB->close");
00591                 rval = 1;
00592         }
00593 
00594         /* Free allocated memory. */
00595         if (subdb != NULL)
00596                 free(subdb);
00597         if (dbtype != DB_RECNO && dbtype != DB_QUEUE) {
00598                 if(key.data) free(key.data);
00599         }
00600         if(data.data) free(data.data);
00601 
00602         return (rval);
00603 }
00604 
00605 /*
00606  * db_init --
00607  *      Initialize the environment.
00608  */
00609 int
00610 db_init(char *home)
00611 {
00612         u_int32_t flags;
00613         int ret;
00614 
00615         /* We may be loading into a live environment.  Try and join. */
00616         flags = DB_USE_ENVIRON |
00617             DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN;
00618         if (dbenv->open(dbenv, home, flags, 0) == 0)
00619                 return (0);
00620 
00621         /*
00622          * We're trying to load a database.
00623          *
00624          * An environment is required because we may be trying to look at
00625          * databases in directories other than the current one.  We could
00626          * avoid using an environment iff the -h option wasn't specified,
00627          * but that seems like more work than it's worth.
00628          *
00629          * No environment exists (or, at least no environment that includes
00630          * an mpool region exists).  Create one, but make it private so that
00631          * no files are actually created.
00632          */
00633         LF_CLR(DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_TXN);
00634         LF_SET(DB_CREATE | DB_PRIVATE);
00635         if ((ret = dbenv->open(dbenv, home, flags, 0)) == 0)
00636                 return (0);
00637 
00638         /* An environment is required. */
00639         dbenv->err(dbenv, ret, "DBENV->open");
00640         return (1);
00641 }
00642 
00643 #define FLAG(name, value, keyword, flag)                                \
00644         if (strcmp(name, keyword) == 0) {                               \
00645                 switch (*value) {                                       \
00646                 case '1':                                               \
00647                         if ((ret = dbp->set_flags(dbp, flag)) != 0) {   \
00648                                 dbp->err(dbp, ret, "%s: set_flags: %s", \
00649                                     progname, name);                    \
00650                                 return (1);                             \
00651                         }                                               \
00652                         break;                                          \
00653                 case '0':                                               \
00654                         break;                                          \
00655                 default:                                                \
00656                         badnum();                                       \
00657                         return (1);                                     \
00658                 }                                                       \
00659                 continue;                                               \
00660         }
00661 #define NUMBER(name, value, keyword, func)                              \
00662         if (strcmp(name, keyword) == 0) {                               \
00663                 if (CDB___db_getlong(dbp,                                       \
00664                     NULL, value, 1, LONG_MAX, &val) != 0)               \
00665                         return (1);                                     \
00666                 if ((ret = dbp->func(dbp, val)) != 0)                   \
00667                         goto nameerr;                                   \
00668                 continue;                                               \
00669         }
00670 #define STRING(name, value, keyword, func)                              \
00671         if (strcmp(name, keyword) == 0) {                               \
00672                 if ((ret = dbp->func(dbp, value[0])) != 0)              \
00673                         goto nameerr;                                   \
00674                 continue;                                               \
00675         }
00676 
00677 /*
00678  * configure --
00679  *      Handle command-line configuration options.
00680  */
00681 int
00682 configure(DB *dbp, char **clp, char **subdbp, int *keysp)
00683 {
00684         long val;
00685         int ret, savech;
00686         char *name, *value;
00687 
00688         for (; (name = *clp) != NULL; *--value = savech, ++clp) {
00689                 if ((value = strchr(name, '=')) == NULL) {
00690                         dbp->errx(dbp,
00691                     "command-line configuration uses name=value format");
00692                         return (1);
00693                 }
00694                 savech = *value;
00695                 *value++ = '\0';
00696 
00697                 if (strcmp(name, "database") == 0 ||
00698                     strcmp(name, "subdatabase") == 0) {
00699                         if ((*subdbp = strdup(value)) == NULL) {
00700                                 dbp->err(dbp, ENOMEM, NULL);
00701                                 return (1);
00702                         }
00703                         continue;
00704                 }
00705                 if (strcmp(name, "keys") == 0) {
00706                         if (strcmp(value, "1") == 0)
00707                                 *keysp = 1;
00708                         else if (strcmp(value, "0") == 0)
00709                                 *keysp = 0;
00710                         else {
00711                                 badnum();
00712                                 return (1);
00713                         }
00714                         continue;
00715                 }
00716 
00717 #ifdef notyet
00718                 NUMBER(name, value, "bt_maxkey", set_bt_maxkey);
00719 #endif
00720                 NUMBER(name, value, "bt_minkey", set_bt_minkey);
00721                 NUMBER(name, value, "db_lorder", set_lorder);
00722                 NUMBER(name, value, "db_pagesize", set_pagesize);
00723                 FLAG(name, value, "duplicates", DB_DUP);
00724                 FLAG(name, value, "dupsort", DB_DUPSORT);
00725                 NUMBER(name, value, "h_ffactor", set_h_ffactor);
00726                 NUMBER(name, value, "h_nelem", set_h_nelem);
00727                 NUMBER(name, value, "re_len", set_re_len);
00728                 STRING(name, value, "re_pad", set_re_pad);
00729                 FLAG(name, value, "recnum", DB_RECNUM);
00730                 FLAG(name, value, "renumber", DB_RENUMBER);
00731 
00732                 dbp->errx(dbp,
00733                     "unknown command-line configuration keyword");
00734                 return (1);
00735         }
00736         return (0);
00737 
00738 nameerr:
00739         dbp->err(dbp, ret, "%s: %s=%s", progname, name, value);
00740         return (1);
00741 }
00742 
00743 /*
00744  * rheader --
00745  *      Read the header message.
00746  */
00747 int
00748 rheader(DB *dbp, DBTYPE *dbtypep, char **subdbp, int *checkprintp, int *keysp)
00749 {
00750         long val;
00751         int first, ret;
00752         char *name, *value, *p, buf[128];
00753 
00754         *dbtypep = DB_UNKNOWN;
00755         *checkprintp = 0;
00756 
00757         for (first = 1;; first = 0) {
00758                 ++lineno;
00759 
00760                 /* If we don't see the expected information, it's an error. */
00761                 if (fgets(buf, sizeof(buf), stdin) == NULL) {
00762                         if (!first || ferror(stdin))
00763                                 goto badfmt;
00764                         endofile = 1;
00765                         break;
00766                 }
00767                 if ((p = strchr(name = buf, '=')) == NULL)
00768                         goto badfmt;
00769                 *p++ = '\0';
00770                 if ((p = strchr(value = p, '\n')) == NULL)
00771                         goto badfmt;
00772                 *p = '\0';
00773                 if (name[0] == '\0' || value[0] == '\0')
00774                         goto badfmt;
00775 
00776                 if (strcmp(name, "HEADER") == 0)
00777                         break;
00778                 if (strcmp(name, "VERSION") == 0) {
00779                         /*
00780                          * Version 1 didn't have a "VERSION" header line, we
00781                          * only support versions 1 and 2 of the dump format.
00782                          */
00783                         version = atoi(value);
00784 
00785                         if (version != 2) {
00786                                 dbp->errx(dbp,
00787                                     "line %lu: VERSION %d is unsupported",
00788                                     lineno, version);
00789                                 return (1);
00790                         }
00791                         continue;
00792                 }
00793                 if (strcmp(name, "format") == 0) {
00794                         if (strcmp(value, "bytevalue") == 0) {
00795                                 *checkprintp = 0;
00796                                 continue;
00797                         }
00798                         if (strcmp(value, "print") == 0) {
00799                                 *checkprintp = 1;
00800                                 continue;
00801                         }
00802                         goto badfmt;
00803                 }
00804                 if (strcmp(name, "type") == 0) {
00805                         if (strcmp(value, "btree") == 0) {
00806                                 *dbtypep = DB_BTREE;
00807                                 continue;
00808                         }
00809                         if (strcmp(value, "hash") == 0) {
00810                                 *dbtypep = DB_HASH;
00811                                 continue;
00812                         }
00813                         if (strcmp(value, "recno") == 0) {
00814                                 *dbtypep = DB_RECNO;
00815                                 continue;
00816                         }
00817                         if (strcmp(value, "queue") == 0) {
00818                                 *dbtypep = DB_QUEUE;
00819                                 continue;
00820                         }
00821                         dbp->errx(dbp, "line %lu: unknown type", lineno);
00822                         return (1);
00823                 }
00824                 if (strcmp(name, "database") == 0 ||
00825                     strcmp(name, "subdatabase") == 0) {
00826                         if ((*subdbp = strdup(value)) == NULL) {
00827                                 dbp->err(dbp, ENOMEM, NULL);
00828                                 return (1);
00829                         }
00830                         continue;
00831                 }
00832                 if (strcmp(name, "keys") == 0) {
00833                         if (strcmp(value, "1") == 0)
00834                                 *keysp = 1;
00835                         else if (strcmp(value, "0") == 0)
00836                                 *keysp = 0;
00837                         else {
00838                                 badnum();
00839                                 return (1);
00840                         }
00841                         continue;
00842                 }
00843 
00844 #ifdef notyet
00845                 NUMBER(name, value, "bt_maxkey", set_bt_maxkey);
00846 #endif
00847                 NUMBER(name, value, "bt_minkey", set_bt_minkey);
00848                 NUMBER(name, value, "db_lorder", set_lorder);
00849                 NUMBER(name, value, "db_pagesize", set_pagesize);
00850                 FLAG(name, value, "duplicates", DB_DUP);
00851                 FLAG(name, value, "dupsort", DB_DUPSORT);
00852                 NUMBER(name, value, "h_ffactor", set_h_ffactor);
00853                 NUMBER(name, value, "h_nelem", set_h_nelem);
00854                 NUMBER(name, value, "re_len", set_re_len);
00855                 STRING(name, value, "re_pad", set_re_pad);
00856                 FLAG(name, value, "recnum", DB_RECNUM);
00857                 FLAG(name, value, "renumber", DB_RENUMBER);
00858 
00859                 dbp->errx(dbp,
00860                     "unknown input-file header configuration keyword");
00861                 return (1);
00862         }
00863         return (0);
00864 
00865 nameerr:
00866         dbp->err(dbp, ret, "%s: %s=%s", progname, name, value);
00867         return (1);
00868 
00869 badfmt:
00870         dbp->errx(dbp, "line %lu: unexpected format", lineno);
00871         return (1);
00872 }
00873 
00874 /*
00875  * dbt_rprint --
00876  *      Read a printable line into a DBT structure.
00877  */
00878 int
00879 dbt_rprint(DBT *dbtp)
00880 {
00881         u_int32_t len;
00882         u_int8_t *p;
00883         int c1, c2, e, escape, first;
00884         char buf[32];
00885 
00886         ++lineno;
00887 
00888         first = 1;
00889         e = escape = 0;
00890         for (p = (u_int8_t*)dbtp->data, len = 0; (c1 = getchar()) != '\n';) {
00891                 if (c1 == EOF) {
00892                         if (len == 0) {
00893                                 endofile = endodata = 1;
00894                                 return (0);
00895                         }
00896                         badend();
00897                         return (1);
00898                 }
00899                 if (first) {
00900                         first = 0;
00901                         if (version > 1) {
00902                                 if (c1 != ' ') {
00903                                         buf[0] = c1;
00904                                         if (fgets(buf + 1,
00905                                             sizeof(buf) - 1, stdin) == NULL ||
00906                                             strcmp(buf, "DATA=END\n") != 0) {
00907                                                 badend();
00908                                                 return (1);
00909                                         }
00910                                         endodata = 1;
00911                                         return (0);
00912                                 }
00913                                 continue;
00914                         }
00915                 }
00916                 if (escape) {
00917                         if (c1 != '\\') {
00918                                 if ((c2 = getchar()) == EOF) {
00919                                         badend();
00920                                         return (1);
00921                                 }
00922                                 c1 = digitize(c1, &e) << 4 | digitize(c2, &e);
00923                                 if (e)
00924                                         return (1);
00925                         }
00926                         escape = 0;
00927                 } else
00928                         if (c1 == '\\') {
00929                                 escape = 1;
00930                                 continue;
00931                         }
00932                 if (len >= dbtp->ulen - 10) {
00933                         dbtp->ulen *= 2;
00934                         if ((dbtp->data =
00935                             (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) {
00936                                 dbenv->err(dbenv, ENOMEM, NULL);
00937                                 return (1);
00938                         }
00939                         p = (u_int8_t *)dbtp->data + len;
00940                 }
00941                 ++len;
00942                 *p++ = c1;
00943         }
00944         dbtp->size = len;
00945 
00946         return (0);
00947 }
00948 
00949 /*
00950  * dbt_rdump --
00951  *      Read a byte dump line into a DBT structure.
00952  */
00953 int
00954 dbt_rdump(DBT *dbtp)
00955 {
00956         u_int32_t len;
00957         u_int8_t *p;
00958         int c1, c2, e, first;
00959         char buf[32];
00960 
00961         ++lineno;
00962 
00963         first = 1;
00964         e = 0;
00965         for (p = (u_int8_t*)dbtp->data, len = 0; (c1 = getchar()) != '\n';) {
00966                 if (c1 == EOF) {
00967                         if (len == 0) {
00968                                 endofile = endodata = 1;
00969                                 return (0);
00970                         }
00971                         badend();
00972                         return (1);
00973                 }
00974                 if (first) {
00975                         first = 0;
00976                         if (version > 1) {
00977                                 if (c1 != ' ') {
00978                                         buf[0] = c1;
00979                                         if (fgets(buf + 1,
00980                                             sizeof(buf) - 1, stdin) == NULL ||
00981                                             strcmp(buf, "DATA=END\n") != 0) {
00982                                                 badend();
00983                                                 return (1);
00984                                         }
00985                                         endodata = 1;
00986                                         return (0);
00987                                 }
00988                                 continue;
00989                         }
00990                 }
00991                 if ((c2 = getchar()) == EOF) {
00992                         badend();
00993                         return (1);
00994                 }
00995                 if (len >= dbtp->ulen - 10) {
00996                         dbtp->ulen *= 2;
00997                         if ((dbtp->data =
00998                             (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) {
00999                                 dbenv->err(dbenv, ENOMEM, NULL);
01000                                 return (1);
01001                         }
01002                         p = (u_int8_t *)dbtp->data + len;
01003                 }
01004                 ++len;
01005                 *p++ = digitize(c1, &e) << 4 | digitize(c2, &e);
01006                 if (e)
01007                         return (1);
01008         }
01009         dbtp->size = len;
01010 
01011         return (0);
01012 }
01013 
01014 /*
01015  * dbt_rrecno --
01016  *      Read a record number dump line into a DBT structure.
01017  */
01018 int
01019 dbt_rrecno(DBT *dbtp)
01020 {
01021         char buf[32];
01022 
01023         ++lineno;
01024 
01025         if (fgets(buf, sizeof(buf), stdin) == NULL) {
01026                 endofile = endodata = 1;
01027                 return (0);
01028         }
01029 
01030         if (strcmp(buf, "DATA=END\n") == 0) {
01031                 endodata = 1;
01032                 return (0);
01033         }
01034 
01035         if (buf[0] != ' ' || CDB___db_getulong(NULL,
01036             progname, buf + 1, 0, 0, (u_long *)dbtp->data)) {
01037                 badend();
01038                 return (1);
01039         }
01040 
01041         dbtp->size = sizeof(db_recno_t);
01042         return (0);
01043 }
01044 
01045 /*
01046  * digitize --
01047  *      Convert a character to an integer.
01048  */
01049 int
01050 digitize(int c, int *errorp)
01051 {
01052         switch (c) {                    /* Don't depend on ASCII ordering. */
01053         case '0': return (0);
01054         case '1': return (1);
01055         case '2': return (2);
01056         case '3': return (3);
01057         case '4': return (4);
01058         case '5': return (5);
01059         case '6': return (6);
01060         case '7': return (7);
01061         case '8': return (8);
01062         case '9': return (9);
01063         case 'a': return (10);
01064         case 'b': return (11);
01065         case 'c': return (12);
01066         case 'd': return (13);
01067         case 'e': return (14);
01068         case 'f': return (15);
01069         }
01070 
01071         dbenv->errx(dbenv, "unexpected hexadecimal value");
01072         *errorp = 1;
01073 
01074         return (0);
01075 }
01076 
01077 /*
01078  * badnum --
01079  *      Display the bad number message.
01080  */
01081 void
01082 badnum()
01083 {
01084         dbenv->errx(dbenv,
01085             "boolean name=value pairs require a value of 0 or 1");
01086 }
01087 
01088 /*
01089  * badend --
01090  *      Display the bad end to input message.
01091  */
01092 void
01093 badend()
01094 {
01095         dbenv->errx(dbenv, "unexpected end of input data or key/data pair");
01096 }
01097 
01098 /*
01099  * usage --
01100  *      Display the usage message.
01101  */
01102 void
01103 usage()
01104 {
01105         (void)fprintf(stderr, "%s\n\t%s\n",
01106             "usage: db_load [-nTzWV]",
01107     "[-c name=value] [-f file] [-h home] [-C cachesize] [-t btree | hash | recno] db_file");
01108         exit(1);
01109 }

Generated on Sun Jun 8 10:56:39 2008 for GNUmifluz by  doxygen 1.5.5