db.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1990, 1993, 1994, 1995, 1996
00009  *      Keith Bostic.  All rights reserved.
00010  */
00011 /*
00012  * Copyright (c) 1990, 1993, 1994, 1995
00013  *      The Regents of the University of California.  All rights reserved.
00014  *
00015  * Redistribution and use in source and binary forms, with or without
00016  * modification, are permitted provided that the following conditions
00017  * are met:
00018  * 1. Redistributions of source code must retain the above copyright
00019  *    notice, this list of conditions and the following disclaimer.
00020  * 2. Redistributions in binary form must reproduce the above copyright
00021  *    notice, this list of conditions and the following disclaimer in the
00022  *    documentation and/or other materials provided with the distribution.
00023  * 3. Neither the name of the University nor the names of its contributors
00024  *    may be used to endorse or promote products derived from this software
00025  *    without specific prior written permission.
00026  *
00027  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00028  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00029  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00030  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00031  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00032  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00033  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00034  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00035  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00036  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00037  * SUCH DAMAGE.
00038  */
00039 
00040 #include "config.h"
00041 
00042 #ifndef lint
00043 static const char revid[] = "$Id: db_8c-source.html,v 1.1 2008/06/08 10:16:30 sebdiaz Exp $";
00044 #endif /* not lint */
00045 
00046 #ifndef NO_SYSTEM_INCLUDES
00047 #include <sys/types.h>
00048 
00049 #include <errno.h>
00050 #include <stddef.h>
00051 #include <stdlib.h>
00052 #include <string.h>
00053 #endif
00054 
00055 #include "db_int.h"
00056 #include "db_page.h"
00057 #include "db_shash.h"
00058 #include "db_swap.h"
00059 #include "btree.h"
00060 #include "db_am.h"
00061 #include "hash.h"
00062 #include "lock.h"
00063 #include "log.h"
00064 #include "mp.h"
00065 #include "qam.h"
00066 #include "common_ext.h"
00067 
00068 /* Actions that __db_master_update can take. */
00069 typedef enum { MU_REMOVE, MU_RENAME, MU_OPEN } mu_action;
00070 
00071 /* Flag values that __db_file_setup can return. */
00072 #define DB_FILE_SETUP_CREATE    0x01
00073 #define DB_FILE_SETUP_ZERO      0x02
00074 
00075 static int __db_file_setup __P((DB *,
00076                const char *, u_int32_t, int, db_pgno_t, int *));
00077 static int __db_master_update __P((DB *,
00078                const char *, u_int32_t,
00079                db_pgno_t *, mu_action, const char *, u_int32_t));
00080 static int __db_metabegin __P((DB *, DB_LOCK *));
00081 static int __db_metaend __P((DB *,
00082                DB_LOCK *, int, int (*)(DB *, void *), void *));
00083 static int __db_refresh __P((DB *));
00084 static int __db_remove_callback __P((DB *, void *));
00085 static int __db_set_pgsize __P((DB *, DB_FH *, char *));
00086 static int __db_subdb_remove __P((DB *, const char *, const char *));
00087 static int __db_subdb_rename __P(( DB *,
00088                 const char *, const char *, const char *));
00089 #if     CONFIG_TEST
00090 static void __db_makecopy __P((const char *, const char *));
00091 #endif
00092 
00093 /*
00094  * CDB___db_open --
00095  *      Main library interface to the DB access methods.
00096  *
00097  * PUBLIC: int CDB___db_open __P((DB *,
00098  * PUBLIC:     const char *, const char *, DBTYPE, u_int32_t, int));
00099  */
00100 int
00101 CDB___db_open(dbp, name, subdb, type, flags, mode)
00102         DB *dbp;
00103         const char *name, *subdb;
00104         DBTYPE type;
00105         u_int32_t flags;
00106         int mode;
00107 {
00108         DB_ENV *dbenv;
00109         DB_LOCK open_lock;
00110         DB *mdbp;
00111         db_pgno_t meta_pgno;
00112         u_int32_t ok_flags;
00113         int ret, t_ret;
00114 
00115         dbenv = dbp->dbenv;
00116         mdbp = NULL;
00117 
00118         /* Validate arguments. */
00119 #define OKFLAGS                                                         \
00120     (DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | DB_COMPRESS |             \
00121     DB_NOMMAP | DB_RDONLY | DB_RDWRMASTER | DB_THREAD | DB_TRUNCATE)
00122         if ((ret = CDB___db_fchk(dbenv, "DB->open", flags, OKFLAGS)) != 0)
00123                 return (ret);
00124         if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE))
00125                 return (CDB___db_ferr(dbenv, "DB->open", 1));
00126         if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE))
00127                 return (CDB___db_ferr(dbenv, "DB->open", 1));
00128 #ifdef  HAVE_VXWORKS
00129         if (LF_ISSET(DB_TRUNCATE)) {
00130                 CDB___db_err(dbenv, "DB_TRUNCATE unsupported in VxWorks");
00131                 return (CDB___db_eopnotsup(dbenv));
00132         }
00133 #endif
00134         switch (type) {
00135         case DB_UNKNOWN:
00136                 if (LF_ISSET(DB_CREATE)) {
00137                         CDB___db_err(dbenv,
00138                             "%s: DB_UNKNOWN type specified with DB_CREATE",
00139                             name);
00140                         return (EINVAL);
00141                 }
00142                 ok_flags = 0;
00143                 break;
00144         case DB_BTREE:
00145                 ok_flags = DB_OK_BTREE;
00146                 break;
00147         case DB_HASH:
00148                 ok_flags = DB_OK_HASH;
00149                 break;
00150         case DB_QUEUE:
00151                 ok_flags = DB_OK_QUEUE;
00152                 break;
00153         case DB_RECNO:
00154                 ok_flags = DB_OK_RECNO;
00155                 break;
00156         default:
00157                 CDB___db_err(dbenv, "unknown type: %lu", type);
00158                 return (EINVAL);
00159         }
00160         if (ok_flags)
00161                 DB_ILLEGAL_METHOD(dbp, ok_flags);
00162 
00163         /* The environment may have been created, but never opened. */
00164         if (!F_ISSET(dbenv, DB_ENV_DBLOCAL | DB_ENV_OPEN_CALLED)) {
00165                 CDB___db_err(dbenv, "environment not yet opened");
00166                 return (EINVAL);
00167         }
00168 
00169         /*
00170          * Historically, you could pass in an environment that didn't have a
00171          * mpool, and DB would create a private one behind the scenes.  This
00172          * no longer works.
00173          */
00174         if (!F_ISSET(dbenv, DB_ENV_DBLOCAL) && !MPOOL_ON(dbenv)) {
00175                 CDB___db_err(dbenv, "environment did not include a memory pool.");
00176                 return (EINVAL);
00177         }
00178 
00179         /*
00180          * You can't specify threads during DB->open if subsystems in the
00181          * environment weren't configured with them.
00182          */
00183         if (LF_ISSET(DB_THREAD) &&
00184             !F_ISSET(dbenv, DB_ENV_DBLOCAL | DB_ENV_THREAD)) {
00185                 CDB___db_err(dbenv, "environment not created using DB_THREAD");
00186                 return (EINVAL);
00187         }
00188 
00189         /*
00190          * If the environment was configured with threads, the DB handle
00191          * must also be free-threaded, so we force the DB_THREAD flag on.
00192          * (See SR #2033 for why this is a requirement--recovery needs
00193          * to be able to grab a dbp using __db_fileid_to_dbp, and it has
00194          * no way of knowing which dbp goes with which thread, so whichever
00195          * one it finds has to be usable in any of them.)
00196          */
00197         if (F_ISSET(dbenv, DB_ENV_THREAD))
00198                 LF_SET(DB_THREAD);
00199 
00200         /* DB_TRUNCATE is not transaction recoverable. */
00201         if (LF_ISSET(DB_TRUNCATE) && TXN_ON(dbenv)) {
00202                 CDB___db_err(dbenv,
00203             "DB_TRUNCATE illegal in a transaction protected environment");
00204                 return (EINVAL);
00205         }
00206 
00207         /* Subdatabase checks. */
00208         if (subdb != NULL) {
00209                 /* Subdatabases must be created in named files. */
00210                 if (name == NULL) {
00211                         CDB___db_err(dbenv,
00212                     "multiple databases cannot be created in temporary files");
00213                         return (EINVAL);
00214                 }
00215 
00216                 /* QAM can't be done as a subdatabase. */
00217                 if (type == DB_QUEUE) {
00218                         CDB___db_err(dbenv, "Queue databases must be one-per-file");
00219                         return (EINVAL);
00220                 }
00221         }
00222 
00223         /* Convert any DB->open flags. */
00224         if (LF_ISSET(DB_RDONLY))
00225                 F_SET(dbp, DB_AM_RDONLY);
00226         if (LF_ISSET(DB_COMPRESS))
00227                 F_SET(dbp, DB_AM_CMPR);
00228 
00229         /* Fill in the type. */
00230         dbp->type = type;
00231 
00232         /*
00233          * If we're potentially creating a database, wrap the open inside of
00234          * a transaction.
00235          */
00236         if (TXN_ON(dbenv) && LF_ISSET(DB_CREATE))
00237                 if ((ret = __db_metabegin(dbp, &open_lock)) != 0)
00238                         return (ret);
00239 
00240         /*
00241          * If we're opening a subdatabase, we have to open (and potentially
00242          * create) the CDB_main database, and then get (and potentially store)
00243          * our base page number in that database.  Then, we can finally open
00244          * the subdatabase.
00245          */
00246         if (subdb == NULL)
00247                 meta_pgno = PGNO_BASE_MD;
00248         else {
00249                 /*
00250                  * Open the master database, optionally creating or updating
00251                  * it, and retrieve the metadata page number.
00252                  */
00253                 if ((ret =
00254                     CDB___db_master_open(dbp, name, flags, mode, &mdbp)) != 0)
00255                         goto err;
00256 
00257                 /* Copy the page size and file id from the master. */
00258                 dbp->pgsize = mdbp->pgsize;
00259                 F_SET(dbp, DB_AM_SUBDB);
00260                 memcpy(dbp->fileid, mdbp->fileid, DB_FILE_ID_LEN);
00261 
00262                 if ((ret = __db_master_update(mdbp,
00263                     subdb, type, &meta_pgno, MU_OPEN, NULL, flags)) != 0)
00264                         goto err;
00265 
00266                 /*
00267                  * Clear the exclusive open and truncation flags, they only
00268                  * apply to the open of the master database.
00269                  */
00270                 LF_CLR(DB_EXCL | DB_TRUNCATE);
00271         }
00272 
00273         ret = CDB___db_dbopen(dbp, name, flags, mode, meta_pgno);
00274 
00275         /*
00276          * You can open the database that describes the subdatabases in the
00277          * rest of the file read-only.  The content of each key's data is
00278          * unspecified and applications should never be adding new records
00279          * or updating existing records.  However, during recovery, we need
00280          * to open these databases R/W so we can redo/undo changes in them.
00281          * Likewise, we need to open master databases read/write during
00282          * rename and remove so we can be sure they're fully sync'ed, so
00283          * we provide an override flag for the purpose.
00284          */
00285         if (subdb == NULL && !IS_RECOVERING(dbenv) && !LF_ISSET(DB_RDONLY) &&
00286             !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) {
00287                 CDB___db_err(dbenv,
00288     "files containing multiple databases may only be opened read-only");
00289                 ret = EINVAL;
00290                 goto err;
00291         }
00292 
00293 err:    /*
00294          * End any transaction, committing if we were successful, aborting
00295          * otherwise.
00296          */
00297         if (TXN_ON(dbenv) && LF_ISSET(DB_CREATE))
00298                 if ((t_ret = __db_metaend(dbp,
00299                     &open_lock, ret == 0, NULL, NULL)) != 0 && ret == 0)
00300                         ret = t_ret;
00301 
00302         /* If we were successful, don't discard the file on close. */
00303         if (ret == 0)
00304                 F_CLR(dbp, DB_AM_DISCARD);
00305 
00306         /* If we were unsuccessful, destroy the DB handle. */
00307         if (ret != 0) {
00308                 /* In recovery we set log_fileid early. */
00309                 if (IS_RECOVERING(dbenv))
00310                         dbp->log_fileid = DB_LOGFILEID_INVALID;
00311                 __db_refresh(dbp);
00312         }
00313 
00314         if (mdbp != NULL) {
00315                 /* If we were successful, don't discard the file on close. */
00316                 if (ret == 0)
00317                         F_CLR(mdbp, DB_AM_DISCARD);
00318                 if ((t_ret = mdbp->close(mdbp, 0)) != 0 && ret == 0)
00319                         ret = t_ret;
00320         }
00321 
00322         return (ret);
00323 }
00324 
00325 /*
00326  * CDB___db_dbopen --
00327  *      Open a database.
00328  * PUBLIC: int CDB___db_dbopen __P((DB *, const char *, u_int32_t, int, db_pgno_t));
00329  */
00330 int
00331 CDB___db_dbopen(dbp, name, flags, mode, meta_pgno)
00332         DB *dbp;
00333         const char *name;
00334         u_int32_t flags;
00335         int mode;
00336         db_pgno_t meta_pgno;
00337 {
00338         DB_ENV *dbenv;
00339         int ret, retinfo;
00340 
00341         dbenv = dbp->dbenv;
00342 
00343         /* Set up the underlying file. */
00344         if ((ret = __db_file_setup(dbp,
00345             name, flags, mode, meta_pgno, &retinfo)) != 0)
00346                 return (ret);
00347 
00348         /*
00349          * If we created the file, set the truncate flag for the mpool.  This
00350          * isn't for anything we've done, it's protection against stupid user
00351          * tricks: if the user deleted a file behind Berkeley DB's back, we
00352          * may still have pages in the mpool that match the file's "unique" ID.
00353          */
00354         if (retinfo & DB_FILE_SETUP_CREATE)
00355                 flags |= DB_TRUNCATE;
00356 
00357         /* Set up the underlying environment. */
00358         if ((ret = CDB___db_dbenv_setup(dbp, name, flags)) != 0)
00359                 return (ret);
00360 
00361         /*
00362          * Do access method specific initialization.
00363          *
00364          * !!!
00365          * Set the open flag.  (The underlying access method open functions
00366          * may want to do things like acquire cursors, so the open flag has
00367          * to be set before calling them.)
00368          */
00369         F_SET(dbp, DB_OPEN_CALLED);
00370 
00371         if (retinfo & DB_FILE_SETUP_ZERO)
00372                 return (0);
00373 
00374         switch (dbp->type) {
00375         case DB_BTREE:
00376                 ret = CDB___bam_open(dbp, name, meta_pgno, flags);
00377                 break;
00378         case DB_HASH:
00379                 ret = CDB___ham_open(dbp, name, meta_pgno, flags);
00380                 break;
00381         case DB_RECNO:
00382                 ret = CDB___ram_open(dbp, name, meta_pgno, flags);
00383                 break;
00384         case DB_QUEUE:
00385                 ret = CDB___qam_open(dbp, name, meta_pgno, flags);
00386                 break;
00387         case DB_UNKNOWN:
00388                 return (CDB___db_unknown_type(dbp->dbenv,
00389                      "CDB___db_dbopen", dbp->type));
00390                 break;
00391         }
00392         return (ret);
00393 }
00394 
00395 /*
00396  * CDB___db_master_open --
00397  *      Open up a handle on a master database.
00398  *
00399  * PUBLIC: int CDB___db_master_open __P((DB *,
00400  * PUBLIC:     const char *, u_int32_t, int, DB **));
00401  */
00402 int
00403 CDB___db_master_open(subdbp, name, flags, mode, dbpp)
00404         DB *subdbp;
00405         const char *name;
00406         u_int32_t flags;
00407         int mode;
00408         DB **dbpp;
00409 {
00410         DB *dbp;
00411         int ret;
00412 
00413         /* Open up a handle on the CDB_main database. */
00414         if ((ret = CDB_db_create(&dbp, subdbp->dbenv, 0)) != 0)
00415                 return (ret);
00416 
00417         /*
00418          * It's always a btree.
00419          * Run in the transaction we've created.
00420          * Set the pagesize in case we're creating a new database.
00421          * Flag that we're creating a database with subdatabases.
00422          */
00423         dbp->type = DB_BTREE;
00424         dbp->open_txn = subdbp->open_txn;
00425         dbp->pgsize = subdbp->pgsize;
00426         F_SET(dbp, DB_AM_SUBDB);
00427         if (LF_ISSET(DB_COMPRESS)) F_SET(dbp, DB_AM_CMPR);
00428 
00429         if ((ret = CDB___db_dbopen(dbp, name, flags, mode, PGNO_BASE_MD)) != 0) {
00430                 if (!F_ISSET(dbp, DB_AM_DISCARD))
00431                         dbp->close(dbp, 0);
00432                 return (ret);
00433         }
00434 
00435         *dbpp = dbp;
00436         return (0);
00437 }
00438 
00439 /*
00440  * __db_master_update --
00441  *      Add/Remove a subdatabase from a master database.
00442  */
00443 static int
00444 __db_master_update(mdbp, subdb, type, meta_pgnop, action, newname, flags)
00445         DB *mdbp;
00446         const char *subdb;
00447         u_int32_t type;
00448         db_pgno_t *meta_pgnop;          /* may be NULL on MU_RENAME */
00449         mu_action action;
00450         const char *newname;
00451         u_int32_t flags;
00452 {
00453         DB_ENV *dbenv;
00454         DBC *dbc, *ndbc;
00455         DBT key, data, ndata;
00456         PAGE *p;
00457         db_pgno_t t_pgno;
00458         int ret, t_ret;
00459 
00460         dbenv = mdbp->dbenv;
00461         dbc = ndbc = NULL;
00462         p = NULL;
00463 
00464         memset(&key, 0, sizeof(key));
00465         memset(&data, 0, sizeof(data));
00466 
00467         /* Open up a cursor. */
00468         if ((ret = mdbp->cursor(mdbp, mdbp->open_txn, &dbc, 0)) != 0)
00469                 goto err;
00470 
00471         /*
00472          * Try to point the cursor at the record.
00473          *
00474          * If we're removing or potentially creating an entry, lock the page
00475          * with DB_RMW.
00476          *
00477          * !!!
00478          * We don't include the name's nul termination in the database.
00479          */
00480         key.data = (char *)subdb;
00481         key.size = strlen(subdb);
00482         /* In the rename case, we do multiple cursor ops, so MALLOC is safer. */
00483         F_SET(&data, DB_DBT_MALLOC);
00484         ret = dbc->c_get(dbc, &key, &data,
00485             DB_SET | (STD_LOCKING(dbc) &&
00486             (action == MU_RENAME || LF_ISSET(DB_CREATE)) ? DB_RMW : 0));
00487 
00488         /*
00489          * What we do next--whether or not we found a record for the
00490          * specified subdatabase--depends on what the specified action is.
00491          * Handle ret appropriately as the first statement of each case.
00492          */
00493         switch (action) {
00494         case MU_REMOVE:
00495                 /*
00496                  * We should have found something if we're removing it.  Note
00497                  * that in the common case where the DB we're asking to remove
00498                  * doesn't exist, we won't get this far;  __db_subdb_remove
00499                  * will already have returned an error from CDB___db_open.
00500                  */
00501                 if (ret != 0)
00502                         goto err;
00503 
00504                 /*
00505                  * Delete the subdatabase entry first;  if this fails,
00506                  * we don't want to touch the actual subdb pages.
00507                  */
00508                 if ((ret = dbc->c_del(dbc, 0)) != 0)
00509                         goto err;
00510 
00511                 /*
00512                  * We're handling actual data, not on-page meta-data,
00513                  * so it hasn't been converted to/from opposite
00514                  * endian architectures.  Do it explicitly, now.
00515                  */
00516                 memcpy(meta_pgnop, data.data, sizeof(db_pgno_t));
00517                 DB_NTOHL(meta_pgnop);
00518                 if ((ret = CDB_memp_fget(mdbp->mpf, meta_pgnop, 0, &p)) != 0)
00519                         goto err;
00520 
00521                 /* Free and put the page. */
00522                 if ((ret = CDB___db_free(dbc, p)) != 0)
00523                         goto err;
00524                 p = NULL;
00525                 break;
00526         case MU_RENAME:
00527                 /* We should have found something if we're renaming it. */
00528                 if (ret != 0)
00529                         goto err;
00530 
00531                 /*
00532                  * Before we rename, we need to make sure we're not
00533                  * overwriting another subdatabase, or else this operation
00534                  * won't be undoable.  Open a second cursor and check
00535                  * for the existence of newname;  it shouldn't appear under
00536                  * us since we hold the metadata lock.
00537                  */
00538                 if ((ret = mdbp->cursor(mdbp, mdbp->open_txn, &ndbc, 0)) != 0)
00539                         goto err;
00540                 DB_ASSERT(newname != NULL);
00541                 key.data = (void *) newname;
00542                 key.size = strlen(newname);
00543 
00544                 /*
00545                  * We don't actually care what the meta page of the potentially-
00546                  * overwritten DB is;  we just care about existence.
00547                  */
00548                 memset(&ndata, 0, sizeof(ndata));
00549                 F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
00550 
00551                 if ((ret = ndbc->c_get(ndbc, &key, &ndata, DB_SET)) == 0) {
00552                         /* A subdb called newname exists.  Bail. */
00553                         ret = EEXIST;
00554                         CDB___db_err(dbenv, "rename: database %s exists", newname);
00555                         goto err;
00556                 } else if (ret != DB_NOTFOUND)
00557                         goto err;
00558 
00559                 /*
00560                  * Now do the put first;  we don't want to lose our
00561                  * sole reference to the subdb.  Use the second cursor
00562                  * so that the first one continues to point to the old record.
00563                  */
00564                 if ((ret = ndbc->c_put(ndbc, &key, &data, DB_KEYFIRST)) != 0)
00565                         goto err;
00566                 if ((ret = dbc->c_del(dbc, 0)) != 0) {
00567                         /*
00568                          * If the delete fails, try to delete the record
00569                          * we just put, in case we're not txn-protected.
00570                          */
00571                         (void)ndbc->c_del(ndbc, 0);
00572                         goto err;
00573                 }
00574 
00575                 break;
00576         case MU_OPEN:
00577                 /*
00578                  * Get the subdatabase information.  If it already exists,
00579                  * copy out the page number and we're done.
00580                  */
00581                 switch (ret) {
00582                 case 0:
00583                         memcpy(meta_pgnop, data.data, sizeof(db_pgno_t));
00584                         DB_NTOHL(meta_pgnop);
00585                         goto done;
00586                 case DB_NOTFOUND:
00587                         if (LF_ISSET(DB_CREATE))
00588                                 break;
00589                         /*
00590                          * No db_err, it is reasonable to remove a
00591                          * nonexistent db.
00592                          */
00593                         ret = ENOENT;
00594                         goto err;
00595                 default:
00596                         goto err;
00597                 }
00598 
00599                 if ((ret = CDB___db_new(dbc,
00600                     type == DB_HASH ? P_HASHMETA : P_BTREEMETA, &p)) != 0)
00601                         goto err;
00602                 *meta_pgnop = PGNO(p);
00603 
00604                 /*
00605                  * XXX
00606                  * We're handling actual data, not on-page meta-data, so it
00607                  * hasn't been converted to/from opposite endian architectures.
00608                  * Do it explicitly, now.
00609                  */
00610                 t_pgno = PGNO(p);
00611                 DB_HTONL(&t_pgno);
00612                 memset(&ndata, 0, sizeof(ndata));
00613                 ndata.data = &t_pgno;
00614                 ndata.size = sizeof(db_pgno_t);
00615                 if ((ret = dbc->c_put(dbc, &key, &ndata, DB_KEYLAST)) != 0)
00616                         goto err;
00617                 break;
00618         }
00619 
00620 err:
00621 done:   /*
00622          * If we allocated a page: if we're successful, mark the page dirty
00623          * and return it to the cache, otherwise, discard/free it.
00624          */
00625         if (p != NULL) {
00626                 if (ret == 0) {
00627                         if ((t_ret =
00628                             CDB_memp_fput(mdbp->mpf, p, DB_MPOOL_DIRTY)) != 0)
00629                                 ret = t_ret;
00630                         /*
00631                          * Since we cannot close this file until after
00632                          * transaction commit, we need to sync the dirty
00633                          * pages, because we'll read these directly from
00634                          * disk to open.
00635                          */
00636                         if ((t_ret = mdbp->sync(mdbp, 0)) != 0 && ret == 0)
00637                                 ret = t_ret;
00638                 } else
00639                         (void)CDB___db_free(dbc, p);
00640         }
00641 
00642         /* Discard the cursor(s) and data. */
00643         if (data.data != NULL)
00644                 CDB___os_free(data.data, data.size);
00645         if (dbc != NULL && (t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
00646                 ret = t_ret;
00647         if (ndbc != NULL && (t_ret = ndbc->c_close(ndbc)) != 0 && ret == 0)
00648                 ret = t_ret;
00649 
00650         return (ret);
00651 }
00652 
00653 /*
00654  * CDB___db_dbenv_setup --
00655  *      Set up the underlying environment during a db_open.
00656  *
00657  * PUBLIC: int CDB___db_dbenv_setup __P((DB *, const char *, u_int32_t));
00658  */
00659 int
00660 CDB___db_dbenv_setup(dbp, name, flags)
00661         DB *dbp;
00662         const char *name;
00663         u_int32_t flags;
00664 {
00665         DB_ENV *dbenv;
00666         DBT pgcookie;
00667         DB_MPOOL_FINFO finfo;
00668         DB_PGINFO pginfo;
00669         int ret;
00670 
00671         dbenv = dbp->dbenv;
00672 
00673         /* If we don't yet have an environment, it's time to create it. */
00674         if (!F_ISSET(dbenv, DB_ENV_OPEN_CALLED)) {
00675                 /* Make sure we have at least DB_MINCACHE pages in our cache. */
00676                 if (dbenv->mp_gbytes == 0 &&
00677                     dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE &&
00678                     (ret = dbenv->set_cachesize(
00679                     dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0)
00680                         return (ret);
00681 
00682                 if ((ret = dbenv->open(dbenv, NULL, DB_CREATE |
00683                     DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0)
00684                         return (ret);
00685         }
00686 
00687         /* Register DB's pgin/pgout functions. */
00688         if ((ret =
00689             CDB_memp_register(dbenv, DB_FTYPE_SET, CDB___db_pgin, CDB___db_pgout)) != 0)
00690                 return (ret);
00691 
00692         /*
00693          * Open a backing file in the memory pool.
00694          *
00695          * If we need to pre- or post-process a file's pages on I/O, set the
00696          * file type.  If it's a hash file, always call the pgin and pgout
00697          * routines.  This means that hash files can never be mapped into
00698          * process memory.  If it's a btree file and requires swapping, we
00699          * need to page the file in and out.  This has to be right -- we can't
00700          * mmap files that are being paged in and out.
00701          */
00702         memset(&finfo, 0, sizeof(finfo));
00703         switch (dbp->type) {
00704         case DB_BTREE:
00705         case DB_RECNO:
00706                 finfo.ftype =
00707                     F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
00708                 finfo.clear_len = DB_PAGE_DB_LEN;
00709                 break;
00710         case DB_HASH:
00711                 finfo.ftype = DB_FTYPE_SET;
00712                 finfo.clear_len = DB_PAGE_DB_LEN;
00713                 break;
00714         case DB_QUEUE:
00715                 finfo.ftype =
00716                     F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
00717                 finfo.clear_len = DB_PAGE_QUEUE_LEN;
00718                 break;
00719         case DB_UNKNOWN:
00720                 return (CDB___db_unknown_type(dbp->dbenv,
00721                      "CDB___db_dbenv_setup", dbp->type));
00722         }
00723         finfo.pgcookie = &pgcookie;
00724         finfo.fileid = dbp->fileid;
00725         finfo.lsn_offset = 0;
00726 
00727         pginfo.db_pagesize = dbp->pgsize;
00728         pginfo.needswap = F_ISSET(dbp, DB_AM_SWAP);
00729         pgcookie.data = &pginfo;
00730         pgcookie.size = sizeof(DB_PGINFO);
00731 
00732         if ((ret = CDB_memp_fopen(dbenv, name,
00733             LF_ISSET(DB_RDONLY | DB_NOMMAP | DB_ODDFILESIZE | DB_TRUNCATE | DB_COMPRESS),
00734             0, dbp->pgsize, &finfo, &dbp->mpf)) != 0)
00735                 return (ret);
00736 
00737         /*
00738          * We may need a per-thread mutex.  Allocate it from the environment
00739          * region, there's supposed to be extra space there for that purpose.
00740          */
00741         if (LF_ISSET(DB_THREAD)) {
00742                 if ((ret = CDB___db_mutex_alloc(
00743                     dbenv, dbenv->reginfo, (MUTEX **)&dbp->mutexp)) != 0)
00744                         return (ret);
00745                 if ((ret = __db_mutex_init(
00746                     dbenv, dbp->mutexp, 0, MUTEX_THREAD)) != 0)
00747                         return (ret);
00748         }
00749 
00750         /* Get a log file id. */
00751         if (LOGGING_ON(dbenv) && !IS_RECOVERING(dbenv) &&
00752 #if !defined(DEBUG_ROP)
00753             !F_ISSET(dbp, DB_AM_RDONLY) &&
00754 #endif
00755             (ret = CDB_log_register(dbenv, dbp, name)) != 0)
00756                 return (ret);
00757 
00758         return (0);
00759 }
00760 
00761 /*
00762  * __db_file_setup --
00763  *      Setup the file or in-memory data.
00764  *      Read the database metadata and resolve it with our arguments.
00765  */
00766 static int
00767 __db_file_setup(dbp, name, flags, mode, meta_pgno, retflags)
00768         DB *dbp;
00769         const char *name;
00770         u_int32_t flags;
00771         int mode;
00772         db_pgno_t meta_pgno;
00773         int *retflags;
00774 {
00775         DB *mdb;
00776         DBT namedbt;
00777         DB_ENV *dbenv;
00778         DB_FH *fhp, fh;
00779         DB_LSN lsn;
00780         DB_TXN *txn;
00781         size_t nr;
00782         u_int32_t magic, oflags;
00783         int ret, retry_cnt, t_ret;
00784         char *real_name, mbuf[DBMETASIZE];
00785         size_t disk_pagesize;
00786 
00787 #define IS_SUBDB_SETUP  (meta_pgno != PGNO_BASE_MD)
00788 
00789         dbenv = dbp->dbenv;
00790         dbp->meta_pgno = meta_pgno;
00791         txn = NULL;
00792         *retflags = 0;
00793 
00794         /*
00795          * If we open a file handle and our caller is doing fcntl(2) locking,
00796          * we can't close it because that would discard the caller's lock.
00797          * Save it until we close the DB handle.
00798          */
00799         if (LF_ISSET(DB_FCNTL_LOCKING)) {
00800                 if ((ret = CDB___os_malloc(dbenv, sizeof(*fhp), NULL, &fhp)) != 0)
00801                         return (ret);
00802         } else
00803                 fhp = &fh;
00804         memset(fhp, 0, sizeof(*fhp));
00805 
00806         /*
00807          * If the file is in-memory, set up is simple.  Otherwise, do the
00808          * hard work of opening and reading the file.
00809          *
00810          * If we have a file name, try and read the first page, figure out
00811          * what type of file it is, and initialize everything we can based
00812          * on that file's meta-data page.
00813          *
00814          * !!!
00815          * There's a reason we don't push this code down into the buffer cache.
00816          * The problem is that there's no information external to the file that
00817          * we can use as a unique ID.  UNIX has dev/inode pairs, but they are
00818          * not necessarily unique after reboot, if the file was mounted via NFS.
00819          * Windows has similar problems, as the FAT filesystem doesn't maintain
00820          * dev/inode numbers across reboot.  So, we must get something from the
00821          * file we can use to ensure that, even after a reboot, the file we're
00822          * joining in the cache is the right file for us to join.  The solution
00823          * we use is to maintain a file ID that's stored in the database, and
00824          * that's why we have to open and read the file before calling into the
00825          * buffer cache.
00826          *
00827          * The secondary reason is that there's additional information that
00828          * we want to have before instantiating a file in the buffer cache:
00829          * the page size, file type (btree/hash), if swapping is required,
00830          * and flags (DB_RDONLY, DB_CREATE, DB_TRUNCATE).  We could handle
00831          * needing this information by allowing it to be set for a file in
00832          * the buffer cache even after the file has been opened, and, of
00833          * course, supporting the ability to flush a file from the cache as
00834          * necessary, e.g., if we guessed wrongly about the page size.  Given
00835          * that we have to read the file anyway to get the file ID, we might
00836          * as well get the rest, too.
00837          *
00838          * Get the real file name.
00839          */
00840         if (name == NULL) {
00841                 F_SET(dbp, DB_AM_INMEM);
00842 
00843                 if (dbp->type == DB_UNKNOWN) {
00844                         CDB___db_err(dbenv,
00845                             "DBTYPE of unknown without existing file");
00846                         return (EINVAL);
00847                 }
00848                 real_name = NULL;
00849 
00850                 /* Set the page size if we don't have one yet. */
00851                 if (dbp->pgsize == 0)
00852                         dbp->pgsize = DB_DEF_IOSIZE;
00853 
00854                 /*
00855                  * If the file is a temporary file and we're doing locking,
00856                  * then we have to create a unique file ID.  We can't use our
00857                  * normal dev/inode pair (or whatever this OS uses in place of
00858                  * dev/inode pairs) because no backing file will be created
00859                  * until the mpool cache is filled forcing the buffers to disk.
00860                  * Grab a random locker ID to use as a file ID.  The created
00861                  * ID must never match a potential real file ID -- we know it
00862                  * won't because real file IDs contain a time stamp after the
00863                  * dev/inode pair, and we're simply storing a 4-byte value.
00864                  *
00865                  * !!!
00866                  * Store the locker in the file id structure -- we can get it
00867                  * from there as necessary, and it saves having two copies.
00868                  */
00869                 if (LOCKING_ON(dbenv) &&
00870                     (ret = CDB_lock_id(dbenv, (u_int32_t *)dbp->fileid)) != 0)
00871                         return (ret);
00872 
00873                 return (0);
00874         }
00875 
00876         /* Get the real backing file name. */
00877         if ((ret = CDB___db_appname(dbenv,
00878             DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
00879                 return (ret);
00880 
00881         /*
00882          * Open the backing file.  We need to make sure that multiple processes
00883          * attempting to create the file at the same time are properly ordered
00884          * so that only one of them creates the "unique" file ID, so we open it
00885          * O_EXCL and O_CREAT so two simultaneous attempts to create the region
00886          * will return failure in one of the attempts.  If we're the one that
00887          * fails, simply retry without the O_CREAT flag, which will require the
00888          * meta-data page exist.
00889          */
00890 
00891         /* Fill in the default file mode. */
00892         if (mode == 0)
00893                 mode = CDB___db_omode("rwrw--");
00894 
00895         oflags = 0;
00896         if (LF_ISSET(DB_RDONLY))
00897                 oflags |= DB_OSO_RDONLY;
00898         if (LF_ISSET(DB_TRUNCATE))
00899                 oflags |= DB_OSO_TRUNC;
00900 
00901         retry_cnt = 0;
00902 open_retry:
00903         *retflags = 0;
00904         ret = 0;
00905         if (!IS_SUBDB_SETUP && LF_ISSET(DB_CREATE)) {
00906                 if (dbp->open_txn != NULL) {
00907                         /*
00908                          * Start a child transaction to wrap this individual
00909                          * create.
00910                          */
00911                         if ((ret =
00912                             CDB_txn_begin(dbenv, dbp->open_txn, &txn, 0)) != 0)
00913                                 goto err_msg;
00914 
00915                         memset(&namedbt, 0, sizeof(namedbt));
00916                         namedbt.data = (char *)name;
00917                         namedbt.size = strlen(name) + 1;
00918                         if ((ret = CDB___crdel_fileopen_log(dbenv, txn,
00919                             &lsn, DB_FLUSH, &namedbt, mode)) != 0)
00920                                 goto err_msg;
00921                 }
00922                 DB_TEST_RECOVERY(dbp, DB_TEST_PREOPEN, ret, name);
00923                 if ((ret = CDB___os_open(dbenv, real_name,
00924                     oflags | DB_OSO_CREATE | DB_OSO_EXCL, mode, fhp)) == 0) {
00925                         DB_TEST_RECOVERY(dbp, DB_TEST_POSTOPEN, ret, name);
00926 
00927                         /* Commit the file create. */
00928                         if (dbp->open_txn != NULL) {
00929                                 if ((ret = CDB_txn_commit(txn, DB_TXN_SYNC)) != 0)
00930                                         goto err_msg;
00931                                 txn = NULL;
00932                         }
00933 
00934                         /*
00935                          * We created the file.  This means that if we later
00936                          * fail, we need to delete the file and if we're going
00937                          * to do that, we need to trash any pages in the
00938                          * memory pool.  Since we only know here that we
00939                          * created the file, we're going to set the flag here
00940                          * and clear it later if we commit successfully.
00941                          */
00942                         F_SET(dbp, DB_AM_DISCARD);
00943                         *retflags |= DB_FILE_SETUP_CREATE;
00944                 } else {
00945                         /*
00946                          * Abort the file create.  If the abort fails, report
00947                          * the error returned by CDB_txn_abort(), rather than the
00948                          * open error, for no particular reason.
00949                          */
00950                         if (dbp->open_txn != NULL) {
00951                                 if ((t_ret = CDB_txn_abort(txn)) != 0) {
00952                                         ret = t_ret;
00953                                         goto err_msg;
00954                                 }
00955                                 txn = NULL;
00956                         }
00957 
00958                         /*
00959                          * If we were not doing an exclusive open, try again
00960                          * without the create flag.
00961                          */
00962                         if (ret == EEXIST && !LF_ISSET(DB_EXCL)) {
00963                                 LF_CLR(DB_CREATE);
00964                                 DB_TEST_RECOVERY(dbp,
00965                                     DB_TEST_POSTOPEN, ret, name);
00966                                 goto open_retry;
00967                         }
00968                 }
00969         } else
00970                 ret = CDB___os_open(dbenv, real_name, oflags, mode, fhp);
00971 
00972         /*
00973          * Be quiet if we couldn't open the file because it didn't exist
00974          * or we did not have permission,
00975          * the customers don't like those messages appearing in the logs.
00976          * Otherwise, complain loudly.
00977          */
00978         if (ret != 0) {
00979                 if (ret == EACCES || ret == ENOENT)
00980                         goto err;
00981                 goto err_msg;
00982         }
00983 
00984         /* Set the page size if we don't have one yet. */
00985         if (dbp->pgsize == 0) {
00986                 if (IS_SUBDB_SETUP) {
00987                         if ((ret = CDB___db_master_open(dbp,
00988                             name, flags, mode, &mdb)) != 0)
00989                                 goto err;
00990                         dbp->pgsize = mdb->pgsize;
00991                         (void) mdb->close(mdb, 0);
00992                 } else if ((ret = __db_set_pgsize(dbp, fhp, real_name)) != 0)
00993                         goto err;
00994         }
00995 
00996         disk_pagesize = F_ISSET(dbp, DB_AM_CMPR) ? DB_CMPR_DIVIDE(dbenv, dbp->pgsize) : dbp->pgsize;
00997         if(meta_pgno == PGNO_BASE_MD || !F_ISSET(dbp, DB_AM_CMPR)) {
00998 
00999           /*
01000            * Seek to the metadata offset; if it's a master database open or a
01001            * database without subdatabases, we're seeking to 0, but that's OK.
01002            */
01003           if ((ret = CDB___os_seek(dbenv, fhp,
01004                                    disk_pagesize, meta_pgno, 0, 0, DB_OS_SEEK_SET)) != 0)
01005             goto err_msg;
01006 
01007           /*
01008            * Read the metadata page.  We read DBMETASIZE bytes, which is larger
01009            * than any access method's metadata page and smaller than any disk
01010            * sector.
01011            */
01012           if ((ret = CDB___os_read(dbenv, fhp, mbuf, sizeof(mbuf), &nr)) != 0)
01013             goto err_msg;
01014         } else {
01015           if ((ret = CDB___os_seek(dbenv, fhp, disk_pagesize, 
01016                                    meta_pgno, 0, 0, DB_OS_SEEK_SET)) != 0)
01017             goto err_msg;
01018           if ((ret = CDB___memp_cmpr_read_meta(dbenv, fhp, mbuf, sizeof(mbuf), &nr)) != 0)
01019             goto err_msg;
01020         }
01021 
01022         if (nr == sizeof(mbuf)) {
01023                 /*
01024                  * Figure out what access method we're dealing with, and then
01025                  * call access method specific code to check error conditions
01026                  * based on conflicts between the found file and application
01027                  * arguments.  A found file overrides some user information --
01028                  * we don't consider it an error, for example, if the user set
01029                  * an expected byte order and the found file doesn't match it.
01030                  */
01031                 F_CLR(dbp, DB_AM_SWAP);
01032                 magic = ((DBMETA *)mbuf)->magic;
01033 
01034 swap_retry:     switch (magic) {
01035                 case DB_BTREEMAGIC:
01036                         if ((ret =
01037                             CDB___bam_metachk(dbp, name, (BTMETA *)mbuf)) != 0)
01038                                 goto err;
01039                         break;
01040                 case DB_HASHMAGIC:
01041                         if ((ret =
01042                             CDB___ham_metachk(dbp, name, (HMETA *)mbuf)) != 0)
01043                                 goto err;
01044                         break;
01045                 case DB_QAMMAGIC:
01046                         if ((ret =
01047                             CDB___qam_metachk(dbp, name, (QMETA *)mbuf)) != 0)
01048                                 goto err;
01049                         break;
01050                 case 0:
01051                         /*
01052                          * There are two ways we can get a 0 magic number.
01053                          * If we're creating a subdatabase, then the magic
01054                          * number will be 0.  We allocate a page as part of
01055                          * finding out what the base page number will be for
01056                          * the new subdatabase, but it's not initialized in
01057                          * any way.
01058                          *
01059                          * The second case happens if we are in recovery
01060                          * and we are going to recreate a database, it's
01061                          * possible that it's page was created (on systems
01062                          * where pages must be created explicitly to avoid
01063                          * holes in files) but is still 0.
01064                          */
01065                         if (IS_SUBDB_SETUP) {           /* Case 1 */
01066                                 if ((IS_RECOVERING(dbenv)
01067                                     && F_ISSET((DB_LOG *)
01068                                     dbenv->lg_handle, DBLOG_FORCE_OPEN))
01069                                     || ((DBMETA *)mbuf)->pgno != PGNO_INVALID)
01070                                         goto empty;
01071 
01072                                 ret = EINVAL;
01073                                 goto err;
01074                         }
01075                                                         /* Case 2 */
01076                         if (IS_RECOVERING(dbenv)) {
01077                                 *retflags |= DB_FILE_SETUP_ZERO;
01078                                 goto empty;
01079                         }
01080                         goto bad_format;
01081                 default:
01082                         if (F_ISSET(dbp, DB_AM_SWAP))
01083                                 goto bad_format;
01084 
01085                         M_32_SWAP(magic);
01086                         F_SET(dbp, DB_AM_SWAP);
01087                         goto swap_retry;
01088                 }
01089         } else {
01090                 /*
01091                  * Only newly created files are permitted to fail magic
01092                  * number tests.
01093                  */
01094                 if (nr != 0 || (!IS_RECOVERING(dbenv) && IS_SUBDB_SETUP))
01095                         goto bad_format;
01096 
01097                 /* Let the caller know that we had a 0-length file. */
01098                 if (!LF_ISSET(DB_CREATE | DB_TRUNCATE))
01099                         *retflags = DB_FILE_SETUP_ZERO;
01100 
01101                 /*
01102                  * The only way we can reach here with the DB_CREATE flag set
01103                  * is if we created the file.  If that's not the case, then
01104                  * either (a) someone else created the file but has not yet
01105                  * written out the metadata page, or (b) we truncated the file
01106                  * (DB_TRUNCATE) leaving it zero-length.  In the case of (a),
01107                  * we want to sleep and give the file creator time to write
01108                  * the metadata page.  In the case of (b), we want to continue.
01109                  *
01110                  * !!!
01111                  * There's a race in the case of two processes opening the file
01112                  * with the DB_TRUNCATE flag set at roughly the same time, and
01113                  * they could theoretically hurt each other.  Sure hope that's
01114                  * unlikely.
01115                  */
01116                 if (!LF_ISSET(DB_CREATE | DB_TRUNCATE) &&
01117                     !IS_RECOVERING(dbenv)) {
01118                         if (retry_cnt++ < 3) {
01119                                 CDB___os_sleep(dbenv, 1, 0);
01120                                 goto open_retry;
01121                         }
01122 bad_format:             CDB___db_err(dbenv,
01123                             "%s: unexpected file type or format", name);
01124                         ret = EINVAL;
01125                         goto err;
01126                 }
01127 
01128                 DB_ASSERT (dbp->type != DB_UNKNOWN);
01129 
01130 empty:          /*
01131                  * The file is empty, and that's OK.  If it's not a subdatabase,
01132                  * though, we do need to generate a unique file ID for it.  The
01133                  * unique file ID includes a timestamp so that we can't collide
01134                  * with any other files, even when the file IDs (dev/inode pair)
01135                  * are reused.
01136                  */
01137                 if (!IS_SUBDB_SETUP) {
01138                         if (*retflags & DB_FILE_SETUP_ZERO)
01139                                 memset(dbp->fileid, 0, DB_FILE_ID_LEN);
01140                         else {
01141                           if ((ret = CDB___os_fileid(dbenv,
01142                             real_name, 1, dbp->fileid)) != 0)
01143                                 goto err_msg;
01144                           CDB___memp_cmpr_create(dbenv, fhp, disk_pagesize, F_ISSET(dbp, DB_AM_CMPR) ? MP_CMPR : 0);
01145                         }
01146                 }
01147         }
01148 
01149         if (0) {
01150 err_msg:        CDB___db_err(dbenv, "%s: %s", name, CDB_db_strerror(ret));
01151         }
01152 
01153         /*
01154          * Abort any running transaction -- it can only exist if something
01155          * went wrong.
01156          */
01157 err:    if (txn != NULL)
01158                 (void)CDB_txn_abort(txn);
01159 
01160 DB_TEST_RECOVERY_LABEL
01161         /*
01162          * If we opened a file handle and our caller is doing fcntl(2) locking,
01163          * then we can't close it because that would discard the caller's lock.
01164          * Otherwise, close the handle.
01165          */
01166         if (F_ISSET(fhp, DB_FH_VALID)) {
01167                 if (ret == 0 && LF_ISSET(DB_FCNTL_LOCKING))
01168                         dbp->saved_open_fhp = fhp;
01169                 else
01170                         if ((t_ret = CDB___os_closehandle(fhp)) != 0 && ret == 0)
01171                                 ret = t_ret;
01172         }
01173 
01174         if (real_name != NULL)
01175                 CDB___os_freestr(real_name);
01176 
01177         return (ret);
01178 }
01179 
01180 /*
01181  * __db_set_pgsize --
01182  *      Set the page size based on file information.
01183  */
01184 static int
01185 __db_set_pgsize(dbp, fhp, name)
01186         DB *dbp;
01187         DB_FH *fhp;
01188         char *name;
01189 {
01190         DB_ENV *dbenv;
01191         u_int32_t iopsize;
01192         int ret;
01193 
01194         dbenv = dbp->dbenv;
01195 
01196         /*
01197          * Use the filesystem's optimum I/O size as the pagesize if a pagesize
01198          * not specified.  Some filesystems have 64K as their optimum I/O size,
01199          * but as that results in fairly large default caches, we limit the
01200          * default pagesize to 16K.
01201          */
01202         if ((ret = CDB___os_ioinfo(dbenv, name, fhp, NULL, NULL, &iopsize)) != 0) {
01203                 CDB___db_err(dbenv, "%s: %s", name, CDB_db_strerror(ret));
01204                 return (ret);
01205         }
01206         if (iopsize < 512)
01207                 iopsize = 512;
01208         if (iopsize > 16 * 1024)
01209                 iopsize = 16 * 1024;
01210 
01211         /*
01212          * If compression is on, the minimum page size must be multiplied
01213          * by the compression factor.
01214          */
01215         if(F_ISSET(dbp, DB_AM_CMPR)) {
01216           if(iopsize < DB_CMPR_MULTIPLY(dbenv, DB_MIN_PGSIZE))
01217             iopsize = DB_CMPR_MULTIPLY(dbenv, DB_MIN_PGSIZE);
01218         }
01219 
01220         /*
01221          * Sheer paranoia, but we don't want anything that's not a power-of-2
01222          * (we rely on that for alignment of various types on the pages), and
01223          * we want a multiple of the sector size as well.
01224          */
01225         OS_ROUNDOFF(iopsize, 512);
01226 
01227         dbp->pgsize = iopsize;
01228         F_SET(dbp, DB_AM_PGDEF);
01229 
01230         return (0);
01231 }
01232 
01233 /*
01234  * CDB___db_close --
01235  *      DB destructor.
01236  *
01237  * PUBLIC: int CDB___db_close __P((DB *, u_int32_t));
01238  */
01239 int
01240 CDB___db_close(dbp, flags)
01241         DB *dbp;
01242         u_int32_t flags;
01243 {
01244         DB_ENV *dbenv;
01245         DBC *dbc;
01246         int ret, t_ret;
01247 
01248         ret = 0;
01249 
01250         PANIC_CHECK(dbp->dbenv);
01251 
01252         /* Validate arguments. */
01253         if ((ret = CDB___db_closechk(dbp, flags)) != 0)
01254                 return (ret);
01255 
01256         /* If never opened, or not currently open, it's easy. */
01257         if (!F_ISSET(dbp, DB_OPEN_CALLED))
01258                 goto never_opened;
01259 
01260         /* Sync the underlying access method. */
01261         if (!LF_ISSET(DB_NOSYNC) && !F_ISSET(dbp, DB_AM_DISCARD) &&
01262             (t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0)
01263                 ret = t_ret;
01264 
01265         /*
01266          * Go through the active cursors and call the cursor recycle routine,
01267          * which resolves pending operations and moves the cursors onto the
01268          * free list.  Then, walk the free list and call the cursor destroy
01269          * routine.
01270          */
01271         while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
01272                 if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
01273                         ret = t_ret;
01274         while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
01275                 if ((t_ret = CDB___db_c_destroy(dbc)) != 0 && ret == 0)
01276                         ret = t_ret;
01277 
01278         /*
01279          * Close any outstanding join cursors.  Join cursors destroy
01280          * themselves on close and have no separate destroy routine.
01281          */
01282         while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL)
01283                 if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
01284                         ret = t_ret;
01285 
01286         /* Sync the memory pool. */
01287         if (!LF_ISSET(DB_NOSYNC) && !F_ISSET(dbp, DB_AM_DISCARD) &&
01288             (t_ret = CDB_memp_fsync(dbp->mpf)) != 0 &&
01289             t_ret != DB_INCOMPLETE && ret == 0)
01290                 ret = t_ret;
01291 
01292         /* Close any handle we've been holding since the open.  */
01293         if (dbp->saved_open_fhp != NULL &&
01294             F_ISSET(dbp->saved_open_fhp, DB_FH_VALID) &&
01295             (t_ret = CDB___os_closehandle(dbp->saved_open_fhp)) != 0 && ret == 0)
01296                 ret = t_ret;
01297 
01298 never_opened:
01299         /*
01300          * Call the access specific close function.
01301          *
01302          * !!!
01303          * Because of where the function is called in the close process,
01304          * these routines can't do anything that would dirty pages or
01305          * otherwise affect closing down the database.
01306          */
01307         if ((t_ret = CDB___ham_db_close(dbp)) != 0 && ret == 0)
01308                 ret = t_ret;
01309         if ((t_ret = CDB___bam_db_close(dbp)) != 0 && ret == 0)
01310                 ret = t_ret;
01311         if ((t_ret = CDB___qam_db_close(dbp)) != 0 && ret == 0)
01312                 ret = t_ret;
01313 
01314         /* Refresh the structure and close any local environment. */
01315         dbenv = dbp->dbenv;
01316         if ((t_ret = __db_refresh(dbp)) != 0 && ret == 0)
01317                 ret = t_ret;
01318         if (F_ISSET(dbenv, DB_ENV_DBLOCAL) &&
01319             --dbenv->dblocal_ref == 0 &&
01320             (t_ret = dbenv->close(dbenv, 0)) != 0 && ret == 0)
01321                 ret = t_ret;
01322 
01323         memset(dbp, CLEAR_BYTE, sizeof(*dbp));
01324         CDB___os_free(dbp, sizeof(*dbp));
01325 
01326         return (ret);
01327 }
01328 
01329 /*
01330  * __db_refresh --
01331  *      Refresh the DB structure, releasing any allocated resources.
01332  */
01333 static int
01334 __db_refresh(dbp)
01335         DB *dbp;
01336 {
01337         DB_ENV *dbenv;
01338         DBC *dbc;
01339         int ret, t_ret;
01340 
01341         ret = 0;
01342 
01343         dbenv = dbp->dbenv;
01344 
01345         /*
01346          * Go through the active cursors and call the cursor recycle routine,
01347          * which resolves pending operations and moves the cursors onto the
01348          * free list.  Then, walk the free list and call the cursor destroy
01349          * routine.
01350          */
01351         while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
01352                 if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
01353                         ret = t_ret;
01354         while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
01355                 if ((t_ret = CDB___db_c_destroy(dbc)) != 0 && ret == 0)
01356                         ret = t_ret;
01357 
01358         dbp->type = 0;
01359 
01360         /* Close the memory pool file handle. */
01361         if (dbp->mpf != NULL) {
01362                 if (F_ISSET(dbp, DB_AM_DISCARD))
01363                         (void)CDB___memp_fremove(dbp->mpf);
01364                 if ((t_ret = CDB_memp_fclose(dbp->mpf)) != 0 && ret == 0)
01365                         ret = t_ret;
01366                 dbp->mpf = NULL;
01367         }
01368 
01369         /* Discard the thread mutex. */
01370         if (dbp->mutexp != NULL) {
01371                 CDB___db_mutex_free(dbenv, dbenv->reginfo, dbp->mutexp);
01372                 dbp->mutexp = NULL;
01373         }
01374 
01375         /* Discard the log file id. */
01376         if (!IS_RECOVERING(dbenv)
01377             && dbp->log_fileid != DB_LOGFILEID_INVALID)
01378                 (void)CDB_log_unregister(dbenv, dbp);
01379 
01380         TAILQ_INIT(&dbp->free_queue);
01381         TAILQ_INIT(&dbp->active_queue);
01382         TAILQ_INIT(&dbp->join_queue);
01383 
01384         F_CLR(dbp, DB_AM_DISCARD);
01385         F_CLR(dbp, DB_AM_INMEM);
01386         F_CLR(dbp, DB_AM_RDONLY);
01387         F_CLR(dbp, DB_AM_SWAP);
01388         F_CLR(dbp, DB_DBM_ERROR);
01389         F_CLR(dbp, DB_OPEN_CALLED);
01390 
01391         return (ret);
01392 }
01393 
01394 /*
01395  * CDB___db_remove
01396  *      Remove method for DB.
01397  *
01398  * PUBLIC: int CDB___db_remove __P((DB *, const char *, const char *, u_int32_t));
01399  */
01400 int
01401 CDB___db_remove(dbp, name, subdb, flags)
01402         DB *dbp;
01403         const char *name, *subdb;
01404         u_int32_t flags;
01405 {
01406         DBT namedbt;
01407         DB_ENV *dbenv;
01408         DB_LOCK remove_lock;
01409         DB_LSN newlsn;
01410         int ret, t_ret;
01411         char *backup, *real_back, *real_name;
01412 
01413         dbenv = dbp->dbenv;
01414         ret = 0;
01415         backup = real_back = real_name = NULL;
01416 
01417         PANIC_CHECK(dbenv);
01418         DB_ILLEGAL_AFTER_OPEN(dbp, "remove");
01419 
01420         /* Validate arguments. */
01421         if ((ret = CDB___db_removechk(dbp, flags)) != 0)
01422                 return (ret);
01423 
01424         /*
01425          * Subdatabases.
01426          */
01427         if (subdb != NULL) {
01428                 /* Subdatabases must be created in named files. */
01429                 if (name == NULL) {
01430                         CDB___db_err(dbenv,
01431                     "multiple databases cannot be created in temporary files");
01432                         return (EINVAL);
01433                 }
01434                 return (__db_subdb_remove(dbp, name, subdb));
01435         }
01436 
01437         if ((ret = dbp->open(dbp,
01438             name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0)) != 0)
01439                 return (ret);
01440 
01441         if (LOGGING_ON(dbenv) && (ret = CDB___log_file_lock(dbp)) != 0)
01442                 goto err_close;
01443 
01444         if ((ret = dbp->sync(dbp, 0)) != 0)
01445                 goto err_close;
01446 
01447         /*
01448          * On Windows, the underlying file must be closed to perform a remove.
01449          * Nothing later in CDB___db_remove requires that it be open, and the
01450          * dbp->close closes it anyway, so we just close it early.
01451          */
01452         (void)CDB___memp_fremove(dbp->mpf);
01453         if ((ret = CDB_memp_fclose(dbp->mpf)) != 0)
01454                 goto err_close;
01455         dbp->mpf = NULL;
01456 
01457         /* Start the transaction and log the delete. */
01458         if (TXN_ON(dbenv) && (ret = __db_metabegin(dbp, &remove_lock)) != 0)
01459                 goto err_close;
01460 
01461         if (LOGGING_ON(dbenv)) {
01462                 memset(&namedbt, 0, sizeof(namedbt));
01463                 namedbt.data = (char *)name;
01464                 namedbt.size = strlen(name) + 1;
01465 
01466                 if ((ret = CDB___crdel_delete_log(dbenv,
01467                     dbp->open_txn, &newlsn, DB_FLUSH,
01468                     dbp->log_fileid, &namedbt)) != 0) {
01469                         CDB___db_err(dbenv,
01470                             "%s: %s", name, CDB_db_strerror(ret));
01471                         goto err;
01472                 }
01473         }
01474 
01475         /* Find the real name of the file. */
01476         if ((ret = CDB___db_appname(dbenv,
01477             DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
01478                 goto err;
01479 
01480         /*
01481          * XXX
01482          * We don't bother to open the file and call CDB___memp_fremove on the mpf.
01483          * There is a potential race here.  It is at least possible that, if
01484          * the unique filesystem ID (dev/inode pair on UNIX) is reallocated
01485          * within a second (the granularity of the fileID timestamp), a new
01486          * file open will get the same fileID as the file being "removed".
01487          * We may actually want to open the file and call CDB___memp_fremove on
01488          * the mpf to get around this.
01489          */
01490 
01491         /* Create name for backup file. */
01492         if (TXN_ON(dbenv)) {
01493                 if ((ret =
01494                     CDB___db_backup_name(dbenv, name, &backup, &newlsn)) != 0)
01495                         goto err;
01496                 if ((ret = CDB___db_appname(dbenv,
01497                     DB_APP_DATA, NULL, backup, 0, NULL, &real_back)) != 0)
01498                         goto err;
01499         }
01500 
01501         DB_TEST_RECOVERY(dbp, DB_TEST_PRERENAME, ret, name);
01502         if (TXN_ON(dbenv))
01503                 ret = CDB___os_rename(dbenv, real_name, real_back);
01504         else
01505                 ret = CDB___os_unlink(dbenv, real_name);
01506         DB_TEST_RECOVERY(dbp, DB_TEST_POSTRENAME, ret, name);
01507 
01508 err:
01509 DB_TEST_RECOVERY_LABEL
01510         /*
01511          * End the transaction, committing the transaction if we were
01512          * successful, aborting otherwise.
01513          */
01514         if (dbp->open_txn != NULL && (t_ret = __db_metaend(dbp, &remove_lock,
01515            ret == 0, __db_remove_callback, real_back)) != 0 && ret == 0)
01516                 ret = t_ret;
01517 
01518         /* FALLTHROUGH */
01519 
01520 err_close:
01521         if (real_name != NULL)
01522                 CDB___os_freestr(real_name);
01523         if (backup != NULL)
01524                 CDB___os_freestr(backup);
01525 
01526         /* We no longer have an mpool, so syncing would be disastrous. */
01527         if ((t_ret = dbp->close(dbp, DB_NOSYNC)) != 0 && ret == 0)
01528                 ret = t_ret;
01529 
01530         return (ret);
01531 }
01532 
01533 /*
01534  * __db_subdb_remove --
01535  *      Remove a subdatabase.
01536  */
01537 static int
01538 __db_subdb_remove(dbp, name, subdb)
01539         DB *dbp;
01540         const char *name, *subdb;
01541 {
01542         DB *mdbp;
01543         DBC *dbc;
01544         DB_ENV *dbenv;
01545         DB_LOCK remove_lock;
01546         db_pgno_t meta_pgno;
01547         int ret, t_ret;
01548 
01549         mdbp = NULL;
01550         dbc = NULL;
01551         dbenv = dbp->dbenv;
01552 
01553         /* Start the transaction. */
01554         if (TXN_ON(dbenv) && (ret = __db_metabegin(dbp, &remove_lock)) != 0)
01555                 return (ret);
01556 
01557         /*
01558          * Open the subdatabase.  We can use the user's DB handle for this
01559          * purpose, I think.
01560          */
01561         if ((ret = CDB___db_open(dbp, name, subdb, DB_UNKNOWN, ((dbenv->flags & DB_ENV_CMPR) ? DB_COMPRESS : 0), 0)) != 0)
01562                 goto err;
01563 
01564         /* Free up the pages in the subdatabase. */
01565         switch (dbp->type) {
01566                 case DB_BTREE:
01567                 case DB_RECNO:
01568                         if ((ret = CDB___bam_reclaim(dbp, dbp->open_txn)) != 0)
01569                                 goto err;
01570                         break;
01571                 case DB_HASH:
01572                         if ((ret = CDB___ham_reclaim(dbp, dbp->open_txn)) != 0)
01573                                 goto err;
01574                         break;
01575                 default:
01576                         ret = CDB___db_unknown_type(dbp->dbenv,
01577                              "__db_subdb_remove", dbp->type);
01578                         goto err;
01579         }
01580 
01581         /*
01582          * Remove the entry from the CDB_main database and free the subdatabase
01583          * metadata page.
01584          */
01585         if ((ret = CDB___db_master_open(dbp, name, ((dbenv->flags & DB_ENV_CMPR) ? DB_COMPRESS : 0), 0, &mdbp)) != 0)
01586                 goto err;
01587 
01588         if ((ret = __db_master_update(mdbp,
01589              subdb, dbp->type, &meta_pgno, MU_REMOVE, NULL, 0)) != 0)
01590                 goto err;
01591 
01592 err:    /*
01593          * End the transaction, committing the transaction if we were
01594          * successful, aborting otherwise.
01595          */
01596         if (dbp->open_txn != NULL && (t_ret = __db_metaend(dbp,
01597             &remove_lock, ret == 0, NULL, NULL)) != 0 && ret == 0)
01598                 ret = t_ret;
01599 
01600         /*
01601          * Close the user's DB handle -- do this LAST to avoid smashing the
01602          * the transaction information.
01603          */
01604         if ((t_ret = dbp->close(dbp, 0)) != 0 && ret == 0)
01605                 ret = t_ret;
01606 
01607         if (mdbp != NULL && (t_ret = mdbp->close(mdbp, 0)) != 0 && ret == 0)
01608                 ret = t_ret;
01609 
01610         return (ret);
01611 }
01612 
01613 /*
01614  * CDB___db_rename
01615  *      Rename method for DB.
01616  *
01617  * PUBLIC: int CDB___db_rename __P((DB *,
01618  * PUBLIC:     const char *, const char *, const char *, u_int32_t));
01619  */
01620 int
01621 CDB___db_rename(dbp, filename, subdb, newname, flags)
01622         DB *dbp;
01623         const char *filename, *subdb, *newname;
01624         u_int32_t flags;
01625 {
01626         DBT namedbt, newnamedbt;
01627         DB_ENV *dbenv;
01628         DB_LOCK remove_lock;
01629         DB_LSN newlsn;
01630         char *real_name, *real_newname;
01631         int ret, t_ret;
01632 
01633         dbenv = dbp->dbenv;
01634         ret = 0;
01635         real_name = real_newname = NULL;
01636 
01637         PANIC_CHECK(dbenv);
01638         DB_ILLEGAL_AFTER_OPEN(dbp, "rename");
01639 
01640         /* Validate arguments -- has same rules as remove. */
01641         if ((ret = CDB___db_removechk(dbp, flags)) != 0)
01642                 return (ret);
01643 
01644         /*
01645          * Subdatabases.
01646          */
01647         if (subdb != NULL) {
01648                 if (filename == NULL) {
01649                         CDB___db_err(dbenv,
01650                     "multiple databases cannot be created in temporary files");
01651                         return (EINVAL);
01652                 }
01653                 return (__db_subdb_rename(dbp, filename, subdb, newname));
01654         }
01655 
01656         if ((ret = dbp->open(dbp,
01657             filename, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0)) != 0)
01658                 return (ret);
01659 
01660         if (LOGGING_ON(dbenv) && (ret = CDB___log_file_lock(dbp)) != 0)
01661                 goto err_close;
01662 
01663         if ((ret = dbp->sync(dbp, 0)) != 0)
01664                 goto err_close;
01665 
01666         /*
01667          * We have to flush the cache for a couple of reasons.  First, the
01668          * underlying MPOOLFILE maintains a "name" that unrelated processes
01669          * can use to open the file in order to flush pages, and that name
01670          * is about to be wrong.  Second, on Windows the unique file ID is
01671          * generated from the file's name, not other file information as is
01672          * the case on UNIX, and so a subsequent open of the old file name
01673          * could conceivably result in a matching "unique" file ID.
01674          */
01675         if ((ret = CDB___memp_fremove(dbp->mpf)) != 0)
01676                 goto err_close;
01677 
01678         /*
01679          * On Windows, the underlying file must be closed to perform a rename.
01680          * Nothing later in CDB___db_rename requires that it be open, and the call
01681          * to dbp->close closes it anyway, so we just close it early.
01682          */
01683         if ((ret = CDB_memp_fclose(dbp->mpf)) != 0)
01684                 goto err_close;
01685         dbp->mpf = NULL;
01686 
01687         /* Start the transaction and log the rename. */
01688         if (TXN_ON(dbenv) && (ret = __db_metabegin(dbp, &remove_lock)) != 0)
01689                 goto err_close;
01690 
01691         if (LOGGING_ON(dbenv)) {
01692                 memset(&namedbt, 0, sizeof(namedbt));
01693                 namedbt.data = (char *)filename;
01694                 namedbt.size = strlen(filename) + 1;
01695 
01696                 memset(&newnamedbt, 0, sizeof(namedbt));
01697                 newnamedbt.data = (char *)newname;
01698                 newnamedbt.size = strlen(newname) + 1;
01699 
01700                 if ((ret = CDB___crdel_rename_log(dbenv, dbp->open_txn,
01701                     &newlsn, 0, dbp->log_fileid, &namedbt, &newnamedbt)) != 0) {
01702                         CDB___db_err(dbenv, "%s: %s", filename, CDB_db_strerror(ret));
01703                         goto err;
01704                 }
01705 
01706                 if ((ret = CDB___log_filelist_update(dbenv, dbp,
01707                     dbp->log_fileid, newname, NULL)) != 0)
01708                         goto err;
01709         }
01710 
01711         /* Find the real name of the file. */
01712         if ((ret = CDB___db_appname(dbenv,
01713             DB_APP_DATA, NULL, filename, 0, NULL, &real_name)) != 0)
01714                 goto err;
01715 
01716         /* Find the real newname of the file. */
01717         if ((ret = CDB___db_appname(dbenv,
01718             DB_APP_DATA, NULL, newname, 0, NULL, &real_newname)) != 0)
01719                 goto err;
01720 
01721         /*
01722          * It is an error to rename a file over one that already exists,
01723          * as that wouldn't be transaction-safe.
01724          */
01725         if (CDB___os_exists(real_newname, NULL) == 0) {
01726                 ret = EEXIST;
01727                 CDB___db_err(dbenv, "rename: file %s exists", real_newname);
01728                 goto err;
01729         }
01730 
01731         DB_TEST_RECOVERY(dbp, DB_TEST_PRERENAME, ret, filename);
01732         ret = CDB___os_rename(dbenv, real_name, real_newname);
01733         DB_TEST_RECOVERY(dbp, DB_TEST_POSTRENAME, ret, newname);
01734 
01735 DB_TEST_RECOVERY_LABEL
01736 err:    if (dbp->open_txn != NULL && (t_ret = __db_metaend(dbp,
01737             &remove_lock, ret == 0, NULL, NULL)) != 0 && ret == 0)
01738                 ret = t_ret;
01739 
01740 err_close:
01741         /* We no longer have an mpool, so syncing would be disastrous. */
01742         dbp->close(dbp, DB_NOSYNC);
01743         if (real_name != NULL)
01744                 CDB___os_freestr(real_name);
01745         if (real_newname != NULL)
01746                 CDB___os_freestr(real_newname);
01747 
01748         return (ret);
01749 }
01750 
01751 /*
01752  * __db_subdb_rename --
01753  *      Rename a subdatabase.
01754  */
01755 static int
01756 __db_subdb_rename(dbp, name, subdb, newname)
01757         DB *dbp;
01758         const char *name, *subdb, *newname;
01759 {
01760         DB *mdbp;
01761         DBC *dbc;
01762         DB_ENV *dbenv;
01763         DB_LOCK remove_lock;
01764         int ret, t_ret;
01765 
01766         mdbp = NULL;
01767         dbc = NULL;
01768         dbenv = dbp->dbenv;
01769 
01770         /* Start the transaction. */
01771         if (TXN_ON(dbenv) && (ret = __db_metabegin(dbp, &remove_lock)) != 0)
01772                 return (ret);
01773 
01774         /*
01775          * Open the subdatabase.  We can use the user's DB handle for this
01776          * purpose, I think.
01777          */
01778         if ((ret = CDB___db_open(dbp, name, subdb, DB_UNKNOWN, 0, 0)) != 0)
01779                 goto err;
01780 
01781         /*
01782          * Rename the entry in the CDB_main database.
01783          */
01784         if ((ret = CDB___db_master_open(dbp, name, 0, 0, &mdbp)) != 0)
01785                 goto err;
01786 
01787         if ((ret = __db_master_update(mdbp,
01788              subdb, dbp->type, NULL, MU_RENAME, newname, 0)) != 0)
01789                 goto err;
01790 
01791 err:    /*
01792          * End the transaction, committing the transaction if we were
01793          * successful, aborting otherwise.
01794          */
01795         if (dbp->open_txn != NULL && (t_ret = __db_metaend(dbp,
01796             &remove_lock, ret == 0, NULL, NULL)) != 0 && ret == 0)
01797                 ret = t_ret;
01798 
01799         /*
01800          * Close the user's DB handle -- do this LAST to avoid smashing the
01801          * the transaction information.
01802          */
01803         if ((t_ret = dbp->close(dbp, 0)) != 0 && ret == 0)
01804                 ret = t_ret;
01805 
01806         if (mdbp != NULL && (t_ret = mdbp->close(mdbp, 0)) != 0 && ret == 0)
01807                 ret = t_ret;
01808 
01809         return (ret);
01810 }
01811 
01812 /*
01813  * __db_metabegin --
01814  *
01815  * Begin a meta-data operation.  This involves doing any required locking,
01816  * potentially beginning a transaction and then telling the caller if you
01817  * did or did not begin the transaction.
01818  *
01819  * The writing flag indicates if the caller is actually allowing creates
01820  * or doing deletes (i.e., if the caller is opening and not creating, then
01821  * we don't need to do any of this).
01822  */
01823 static int
01824 __db_metabegin(dbp, lockp)
01825         DB *dbp;
01826         DB_LOCK *lockp;
01827 {
01828         DB_ENV *dbenv;
01829         DBT dbplock;
01830         u_int32_t locker, lockval;
01831         int ret;
01832 
01833         dbenv = dbp->dbenv;
01834 
01835         lockp->off = LOCK_INVALID;
01836 
01837         /*
01838          * There is no single place where we can know that we are or are not
01839          * going to be creating any files and/or subdatabases, so we will
01840          * always begin a tranasaction when we start creating one.  If we later
01841          * discover that this was unnecessary, we will abort the transaction.
01842          * Recovery is written so that if we log a file create, but then
01843          * discover that we didn't have to do it, we recover correctly.  The
01844          * file recovery design document has details.
01845          *
01846          * We need to single thread all create and delete operations, so if we
01847          * are running with locking, we must obtain a lock. We use CDB_lock_id to
01848          * generate a unique locker id and use a handcrafted DBT as the object
01849          * on which we are locking.
01850          */
01851         if (LOCKING_ON(dbenv)) {
01852                 if ((ret = CDB_lock_id(dbenv, &locker)) != 0)
01853                         return (ret);
01854                 lockval = 0;
01855                 dbplock.data = &lockval;
01856                 dbplock.size = sizeof(lockval);
01857                 if ((ret = CDB_lock_get(dbenv,
01858                     locker, 0, &dbplock, DB_LOCK_WRITE, lockp)) != 0)
01859                         return(ret);
01860         }
01861 
01862         return (CDB_txn_begin(dbenv, NULL, &dbp->open_txn, 0));
01863 }
01864 
01865 /*
01866  * __db_metaend --
01867  *      End a meta-data operation.
01868  */
01869 static int
01870 __db_metaend(dbp, lockp, commit, callback, cookie)
01871         DB *dbp;
01872         DB_LOCK *lockp;
01873         int commit, (*callback) __P((DB *, void *));
01874         void *cookie;
01875 {
01876         DB_ENV *dbenv;
01877         int ret, t_ret;
01878 
01879         ret = 0;
01880         dbenv = dbp->dbenv;
01881 
01882         /* End the transaction. */
01883         if (commit) {
01884                 if ((ret = CDB_txn_commit(dbp->open_txn, DB_TXN_SYNC)) == 0) {
01885                         /*
01886                          * Unlink any underlying file, we've committed the
01887                          * transaction.
01888                          */
01889                         if (callback != NULL)
01890                                 ret = callback(dbp, cookie);
01891                 }
01892         } else if ((t_ret = CDB_txn_abort(dbp->open_txn)) && ret == 0)
01893                 ret = t_ret;
01894 
01895         /* Release our lock. */
01896         if (lockp->off != LOCK_INVALID &&
01897             (t_ret = CDB_lock_put(dbenv, lockp)) != 0 && ret == 0)
01898                 ret = t_ret;
01899 
01900         return (ret);
01901 }
01902 
01903 /*
01904  * CDB___db_log_page
01905  *      Log a meta-data or root page during a create operation.
01906  *
01907  * PUBLIC: int CDB___db_log_page __P((DB *,
01908  * PUBLIC:     const char *, DB_LSN *, db_pgno_t, PAGE *));
01909  */
01910 int
01911 CDB___db_log_page(dbp, name, lsn, pgno, page)
01912         DB *dbp;
01913         const char *name;
01914         DB_LSN *lsn;
01915         db_pgno_t pgno;
01916         PAGE *page;
01917 {
01918         DBT name_dbt, page_dbt;
01919         DB_LSN new_lsn;
01920         int ret;
01921 
01922         if (dbp->open_txn == NULL)
01923                 return (0);
01924 
01925         memset(&page_dbt, 0, sizeof(page_dbt));
01926         page_dbt.size = dbp->pgsize;
01927         page_dbt.data = page;
01928         if (pgno == PGNO_BASE_MD) {
01929                 /*
01930                  * !!!
01931                  * Make sure that we properly handle a null name.  The old
01932                  * Tcl sent us pathnames of the form ""; it may be the case
01933                  * that the new Tcl doesn't do that, so we can get rid of
01934                  * the second check here.
01935                  */
01936                 memset(&name_dbt, 0, sizeof(name_dbt));
01937                 name_dbt.data = (char *)name;
01938                 if (name == NULL || *name == '\0')
01939                         name_dbt.size = 0;
01940                 else
01941                         name_dbt.size = strlen(name) + 1;
01942 
01943                 ret = CDB___crdel_metapage_log(dbp->dbenv,
01944                     dbp->open_txn, &new_lsn, DB_FLUSH,
01945                     dbp->log_fileid, &name_dbt, pgno, &page_dbt);
01946         } else
01947                 ret = CDB___crdel_metasub_log(dbp->dbenv, dbp->open_txn,
01948                     &new_lsn, 0, dbp->log_fileid, pgno, &page_dbt, lsn);
01949 
01950         if (ret == 0)
01951                 page->lsn = new_lsn;
01952         return (ret);
01953 }
01954 
01955 /*
01956  * CDB___db_backup_name
01957  *      Create the backup file name for a given file.
01958  *
01959  * PUBLIC: int CDB___db_backup_name __P((DB_ENV *,
01960  * PUBLIC:     const char *, char **, DB_LSN *));
01961  */
01962 #undef  BACKUP_PREFIX
01963 #define BACKUP_PREFIX   "__db."
01964 
01965 #undef  MAX_LSN_TO_TEXT
01966 #define MAX_LSN_TO_TEXT 21
01967 int
01968 CDB___db_backup_name(dbenv, name, backup, lsn)
01969         DB_ENV *dbenv;
01970         const char *name;
01971         char **backup;
01972         DB_LSN *lsn;
01973 {
01974         size_t len;
01975         int plen, ret;
01976         char *p, *retp;
01977 
01978         len = strlen(name) + strlen(BACKUP_PREFIX) + MAX_LSN_TO_TEXT + 1;
01979 
01980         if ((ret = CDB___os_malloc(dbenv, len, NULL, &retp)) != 0)
01981                 return (ret);
01982 
01983         /*
01984          * Create the name.  Backup file names are of the form:
01985          *
01986          *      __db.name.0x[lsn-file].0x[lsn-offset]
01987          *
01988          * which guarantees uniqueness.
01989          *
01990          * However, name may contain an env-relative path in it.
01991          * In that case, put the __db. after the last portion of
01992          * the pathname.
01993          */
01994         if ((p = CDB___db_rpath(name)) == NULL)
01995                 snprintf(retp, len,
01996                     "%s%s.0x%x0x%x", BACKUP_PREFIX, name,
01997                     lsn->file, lsn->offset);
01998         else { 
01999                 plen = p - name + 1;
02000                 p++;
02001                 snprintf(retp, len,
02002                     "%.*s%s%s.0x%x0x%x", plen, name, BACKUP_PREFIX, p,
02003                     lsn->file, lsn->offset);
02004         }
02005 
02006         *backup = retp;
02007         return (0);
02008 }
02009 
02010 /*
02011  * __db_remove_callback --
02012  *      Callback function -- on file remove commit, it unlinks the backing
02013  *      file.
02014  */
02015 static int
02016 __db_remove_callback(dbp, cookie)
02017         DB *dbp;
02018         void *cookie;
02019 {
02020         return (CDB___os_unlink(dbp->dbenv, cookie));
02021 }
02022 
02023 #if     CONFIG_TEST
02024 /*
02025  * __db_testcopy
02026  *      Create a copy of all backup files and our "CDB_main" DB.
02027  *
02028  * PUBLIC: int __db_testcopy __P((DB *, const char *));
02029  */
02030 int
02031 __db_testcopy(dbp, name)
02032         DB *dbp;
02033         const char *name;
02034 {
02035         size_t len;
02036         int dircnt, i, ret;
02037         char **namesp, *backup, *copy, *dir, *p, *real_name;
02038 
02039         real_name = NULL;
02040         /* Get the real backing file name. */
02041         if ((ret = CDB___db_appname(dbp->dbenv,
02042             DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
02043                 return (ret);
02044 
02045         /*
02046          * Maximum size of file, including adding a ".afterop".
02047          */
02048         len = strlen(real_name) + strlen(BACKUP_PREFIX) + MAX_LSN_TO_TEXT + 9;
02049 
02050         if ((ret = CDB___os_malloc(dbp->dbenv, len, NULL, &copy)) != 0)
02051                 goto out;
02052 
02053         if ((ret = CDB___os_malloc(dbp->dbenv, len, NULL, &backup)) != 0)
02054                 goto out;
02055 
02056         /*
02057          * First copy the file itself.
02058          */
02059         snprintf(copy, len, "%s.afterop", real_name);
02060         __db_makecopy(real_name, copy);
02061 
02062         if ((ret = CDB___os_strdup(dbp->dbenv, real_name, &dir)) != 0)
02063                 goto out;
02064         CDB___os_freestr(real_name);
02065         real_name = NULL;
02066         /*
02067          * Create the name.  Backup file names are of the form:
02068          *
02069          *      __db.name.0x[lsn-file].0x[lsn-offset]
02070          *
02071          * which guarantees uniqueness.  We want to look for the
02072          * backup name, followed by a '.0x' (so that if they have
02073          * files named, say, 'a' and 'abc' we won't match 'abc' when
02074          * looking for 'a'.
02075          */
02076         snprintf(backup, len, "%s%s.0x", BACKUP_PREFIX, name);
02077 
02078         /*
02079          * We need the directory path to do the CDB___os_dirlist.
02080          */
02081         p = CDB___db_rpath(dir);
02082         if (p != NULL)
02083                 *p = '\0';
02084         ret = CDB___os_dirlist(dbp->dbenv, dir, &namesp, &dircnt);
02085 #if DIAGNOSTIC
02086         /*
02087          * XXX
02088          * To get the memory guard code to work because it uses strlen and we
02089          * just moved the end of the string somewhere sooner.  This causes the
02090          * guard code to fail because it looks at one byte past the end of the
02091          * string.
02092          */
02093         *p = '/';
02094 #endif
02095         CDB___os_freestr(dir);
02096         if (ret != 0)
02097                 goto out;
02098         for (i = 0; i < dircnt; i++) {
02099                 /*
02100                  * Need to check if it is a backup file for this.
02101                  * No idea what namesp[i] may be or how long, so
02102                  * must use strncmp and not memcmp.  We don't want
02103                  * to use strcmp either because we are only matching
02104                  * the first part of the real file's name.  We don't
02105                  * know its LSN's.
02106                  */
02107                 if (strncmp(namesp[i], backup, strlen(backup)) == 0) {
02108                         if ((ret = CDB___db_appname(dbp->dbenv, DB_APP_DATA,
02109                             NULL, namesp[i], 0, NULL, &real_name)) != 0)
02110                                 goto out;
02111 
02112                         /*
02113                          * This should not happen.  Check that old
02114                          * .afterop files aren't around.
02115                          * If so, just move on.
02116                          */
02117                         if (strstr(real_name, ".afterop") != NULL) {
02118                                 CDB___os_freestr(real_name);
02119                                 real_name = NULL;
02120                                 continue;
02121                         }
02122                         snprintf(copy, len, "%s.afterop", real_name);
02123                         __db_makecopy(real_name, copy);
02124                         CDB___os_freestr(real_name);
02125                         real_name = NULL;
02126                 }
02127         }
02128 out:
02129         if (real_name)
02130                 CDB___os_freestr(real_name);
02131         return (ret);
02132 }
02133 
02134 static void
02135 __db_makecopy(src, dest)
02136         const char *src, *dest;
02137 {
02138         DB_FH rfh, wfh;
02139         size_t rcnt, wcnt;
02140         char *buf;
02141 
02142         memset(&rfh, 0, sizeof(rfh));
02143         memset(&wfh, 0, sizeof(wfh));
02144 
02145         if (CDB___os_malloc(NULL, 1024, NULL, &buf) != 0)
02146                 return;
02147 
02148         if (CDB___os_open(NULL,
02149             src, DB_OSO_RDONLY, CDB___db_omode("rw----"), &rfh) != 0)
02150                 goto err;
02151         if (CDB___os_open(NULL, dest,
02152             DB_OSO_CREATE | DB_OSO_TRUNC, CDB___db_omode("rw----"), &wfh) != 0)
02153                 goto err;
02154 
02155         for (;;)
02156                 if (CDB___os_read(NULL, &rfh, buf, 1024, &rcnt) < 0 || rcnt == 0 ||
02157                     CDB___os_write(NULL, &wfh, buf, rcnt, &wcnt) < 0 || wcnt != rcnt)
02158                         break;
02159 
02160 err:    CDB___os_free(buf, 1024);
02161         if (F_ISSET(&rfh, DB_FH_VALID))
02162                 CDB___os_closehandle(&rfh);
02163         if (F_ISSET(&wfh, DB_FH_VALID))
02164                 CDB___os_closehandle(&wfh);
02165 }
02166 #endif

Generated on Sun Jun 8 10:56:35 2008 for GNUmifluz by  doxygen 1.5.5