env_region.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 
00008 #include "config.h"
00009 
00010 #ifndef lint
00011 static const char revid[] = "$Id: env__region_8c-source.html,v 1.1 2008/06/08 10:18:46 sebdiaz Exp $";
00012 #endif /* not lint */
00013 
00014 #ifndef NO_SYSTEM_INCLUDES
00015 #include <sys/types.h>
00016 
00017 #include <ctype.h>
00018 #include <errno.h>
00019 #include <string.h>
00020 #include <unistd.h>
00021 #endif
00022 
00023 #include "db_int.h"
00024 
00025 static int __db_des_destroy __P((DB_ENV *, REGION *));
00026 static int __db_des_get __P((DB_ENV *, REGINFO *, REGINFO *, REGION **));
00027 static int __db_e_remfile __P((DB_ENV *));
00028 static int __db_faultmem __P((void *, size_t, int));
00029 
00030 /*
00031  * CDB___db_e_attach
00032  *      Join/create the environment
00033  *
00034  * PUBLIC: int CDB___db_e_attach __P((DB_ENV *));
00035  */
00036 int
00037 CDB___db_e_attach(dbenv)
00038         DB_ENV *dbenv;
00039 {
00040         REGENV *renv;
00041         REGENV_REF ref;
00042         REGINFO *infop;
00043         REGION *rp, tregion;
00044         size_t size;
00045         size_t nrw;
00046         u_int32_t mbytes, bytes;
00047         int retry_cnt, ret, segid;
00048         char buf[sizeof(DB_REGION_FMT) + 20];
00049 
00050 #if !defined(HAVE_MUTEX_THREADS)
00051         /*
00052          * !!!
00053          * If we don't have spinlocks, we need a file descriptor for fcntl(2)
00054          * locking.  We use the file handle from the REGENV file for this
00055          * purpose.
00056          *
00057          * Since we may be using shared memory regions, e.g., shmget(2), and
00058          * not a mapped-in regular file, the backing file may be only a few
00059          * bytes in length.  So, this depends on the ability to call fcntl to
00060          * lock file offsets much larger than the actual physical file.  I
00061          * think that's safe -- besides, very few systems actually need this
00062          * kind of support, SunOS is the only one still in wide use of which
00063          * I'm aware.
00064          *
00065          * The error case is if an application lacks spinlocks and wants to be
00066          * threaded.  That doesn't work because fcntl may lock the underlying
00067          * process, including all its threads.
00068          */
00069         if (F_ISSET(dbenv, DB_ENV_THREAD)) {
00070                 CDB___db_err(dbenv,
00071 "architecture lacks fast mutexes: applications cannot be threaded");
00072                 return (EINVAL);
00073         }
00074 #endif
00075 
00076         /* Initialization */
00077         retry_cnt = 0;
00078 
00079         /* Repeated initialization. */
00080 loop:   renv = NULL;
00081 
00082         /* Set up the DB_ENV's REG_INFO structure. */
00083         if ((ret = CDB___os_calloc(dbenv, 1, sizeof(REGINFO), &infop)) != 0)
00084                 return (ret);
00085         infop->id = REG_ID_ENV;
00086         infop->mode = dbenv->db_mode;
00087         if (F_ISSET(dbenv, DB_ENV_CREATE))
00088                 F_SET(infop, REGION_CREATE_OK);
00089 
00090         /*
00091          * We have to single-thread the creation of the REGENV region.  Once
00092          * it exists, we can do locking using locks in the region, but until
00093          * then we have to be the only player in the game.
00094          *
00095          * If this is a private environment, we are only called once and there
00096          * are no possible race conditions.
00097          *
00098          * If this is a public environment, we use the filesystem to ensure
00099          * the creation of the environment file is single-threaded.
00100          */
00101         if (F_ISSET(dbenv, DB_ENV_PRIVATE))
00102                 goto creation;
00103 
00104         /* Build the region name. */
00105         (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV);
00106         if ((ret = CDB___db_appname(dbenv,
00107             DB_APP_NONE, NULL, buf, 0, NULL, &infop->name)) != 0)
00108                 goto err;
00109 
00110         /*
00111          * Try to create the file, if we have the authority.  We have to ensure
00112          * that multiple threads/processes attempting to simultaneously create
00113          * the file are properly ordered.  Open using the O_CREAT and O_EXCL
00114          * flags so that multiple attempts to create the region will return
00115          * failure in all but one.  POSIX 1003.1 requires that EEXIST be the
00116          * errno return value -- I sure hope they're right.
00117          */
00118         if (F_ISSET(dbenv, DB_ENV_CREATE)) {
00119                 if ((ret = CDB___os_open(dbenv,
00120                     infop->name, DB_OSO_CREATE | DB_OSO_EXCL,
00121                     dbenv->db_mode, dbenv->lockfhp)) == 0)
00122                         goto creation;
00123                 if (ret != EEXIST) {
00124                         CDB___db_err(dbenv,
00125                             "%s: %s", infop->name, CDB_db_strerror(ret));
00126                         goto err;
00127                 }
00128         }
00129 
00130         /*
00131          * If we couldn't create the file, try and open it.  (If that fails,
00132          * we're done.)
00133          */
00134         if ((ret = CDB___os_open(dbenv,
00135                 infop->name, 0, dbenv->db_mode, dbenv->lockfhp)) != 0)
00136                 goto err;
00137 
00138         /*
00139          * !!!
00140          * The region may be in system memory not backed by the filesystem
00141          * (more specifically, not backed by this file), and we're joining
00142          * it.  In that case, the process that created it will have written
00143          * out a REGENV_REF structure as its only contents.  We read that
00144          * structure before we do anything further, e.g., we can't just map
00145          * that file in and then figure out what's going on.
00146          *
00147          * All of this noise is because some systems don't have a coherent VM
00148          * and buffer cache, and what's worse, when you mix operations on the
00149          * VM and buffer cache, half the time you hang the system.
00150          *
00151          * If the file is the size of an REGENV_REF structure, then we know
00152          * the real region is in some other memory.  (The only way you get a
00153          * file that size is to deliberately write it, as it's smaller than
00154          * any possible disk sector created by writing a file or mapping the
00155          * file into memory.)  In which case, retrieve the structure from the
00156          * file and use it to acquire the referenced memory.
00157          *
00158          * If the structure is larger than a REGENV_REF structure, then this
00159          * file is backing the shared memory region, and we just map it into
00160          * memory.
00161          *
00162          * And yes, this makes me want to take somebody and kill them.  (I
00163          * digress -- but you have no freakin' idea.  This is unbelievably
00164          * stupid and gross, and I've probably spent six months of my life,
00165          * now, trying to make different versions of it work.)
00166          */
00167         if ((ret = CDB___os_ioinfo(dbenv, infop->name,
00168             dbenv->lockfhp, &mbytes, &bytes, NULL)) != 0) {
00169                 CDB___db_err(dbenv, "%s: %s", infop->name, CDB_db_strerror(ret));
00170                 goto err;
00171         }
00172 
00173         /*
00174          * !!!
00175          * A size_t is OK -- regions get mapped into memory, and so can't
00176          * be larger than a size_t.
00177          */
00178         size = mbytes * MEGABYTE + bytes;
00179 
00180         /*
00181          * If the size is less than the size of a REGENV_REF structure, the
00182          * region (or, possibly, the REGENV_REF structure) has not yet been
00183          * completely written.  Wait awhile and try again.
00184          *
00185          * Otherwise, if the size is the size of a REGENV_REF structure,
00186          * read it into memory and use it as a reference to the real region.
00187          */
00188         if (size <= sizeof(ref)) {
00189                 if (size != sizeof(ref))
00190                         goto retry;
00191 
00192                 if ((ret = CDB___os_read(dbenv, dbenv->lockfhp, &ref,
00193                     sizeof(ref), &nrw)) != 0 || nrw < (size_t)sizeof(ref)) {
00194                         if (ret == 0)
00195                                 ret = EIO;
00196                         CDB___db_err(dbenv,
00197                     "%s: unable to read system-memory information from: %s",
00198                             infop->name, CDB_db_strerror(ret));
00199                         goto err;
00200                 }
00201                 size = ref.size;
00202                 segid = ref.segid;
00203 
00204                 F_SET(dbenv, DB_ENV_SYSTEM_MEM);
00205         } else if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
00206                 ret = EINVAL;
00207                 CDB___db_err(dbenv,
00208                     "%s: existing environment not created in system memory: %s",
00209                     infop->name, CDB_db_strerror(ret));
00210                 goto err;
00211         } else
00212                 segid = INVALID_REGION_SEGID;
00213 
00214         /*
00215          * If not doing thread locking, we need to save the file handle for
00216          * fcntl(2) locking.  Otherwise, discard the handle, we no longer
00217          * need it, and the less contact between the buffer cache and the VM,
00218          * the better.
00219          */
00220 #ifdef HAVE_MUTEX_THREADS
00221          CDB___os_closehandle(dbenv->lockfhp);
00222 #endif
00223 
00224         /* Call the region join routine to acquire the region. */
00225         memset(&tregion, 0, sizeof(tregion));
00226         tregion.size = size;
00227         tregion.segid = segid;
00228         if ((ret = CDB___os_r_attach(dbenv, infop, &tregion)) != 0)
00229                 goto err;
00230 
00231         /*
00232          * The environment's REGENV structure has to live at offset 0 instead
00233          * of the usual shalloc information.  Set the primary reference and
00234          * correct the "addr" value to reference the shalloc region.  Note,
00235          * this means that all of our offsets (R_ADDR/R_OFFSET) get shifted
00236          * as well, but that should be fine.
00237          */
00238         infop->primary = R_ADDR(infop, 0);
00239         infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV);
00240 
00241         /*
00242          * Check if the environment has had a catastrophic failure.
00243          *
00244          * Check the magic number to ensure the region is initialized.  If the
00245          * magic number isn't set, the lock may not have been initialized, and
00246          * an attempt to use it could lead to random behavior.
00247          *
00248          * The panic and magic values aren't protected by any lock, so we never
00249          * use them in any check that's more complex than set/not-set.
00250          *
00251          * !!!
00252          * I'd rather play permissions games using the underlying file, but I
00253          * can't because Windows/NT filesystems won't open files mode 0.
00254          */
00255         renv = infop->primary;
00256         if (renv->panic) {
00257                 ret = CDB___db_panic_msg(dbenv);
00258                 goto err;
00259         }
00260         if (renv->magic != DB_REGION_MAGIC)
00261                 goto retry;
00262 
00263         /* Lock the environment. */
00264         MUTEX_LOCK(&renv->mutex, dbenv->lockfhp);
00265 
00266         /*
00267          * Finally!  We own the environment now.  Repeat the panic check, it's
00268          * possible that it was set while we waited for the lock.
00269          */
00270         if (renv->panic) {
00271                 ret = CDB___db_panic_msg(dbenv);
00272                 goto err_unlock;
00273         }
00274 
00275         /*
00276          * Get a reference to the underlying REGION information for this
00277          * environment.
00278          */
00279         if ((ret = __db_des_get(dbenv,
00280             infop, infop, &rp)) != 0 || rp == NULL) {
00281                 MUTEX_UNLOCK(&renv->mutex);
00282                 goto find_err;
00283         }
00284         infop->rp = rp;
00285 
00286         /*
00287          * There's still a possibility for inconsistent data.  When we acquired
00288          * the size of the region and attached to it, it might have still been
00289          * growing as part of its creation.  We can detect this by checking the
00290          * size we originally found against the region's current size.  (The
00291          * region's current size has to be final, the creator finished growing
00292          * it before releasing the environment for us to lock.)
00293          */
00294         if (rp->size != size) {
00295 err_unlock:     MUTEX_UNLOCK(&renv->mutex);
00296                 goto retry;
00297         }
00298 
00299         /* Increment the reference count. */
00300         ++renv->refcnt;
00301 
00302         /* Discard our lock. */
00303         MUTEX_UNLOCK(&renv->mutex);
00304 
00305         /*
00306          * Fault the pages into memory.  Note, do this AFTER releasing the
00307          * lock, because we're only reading the pages, not writing them.
00308          */
00309         (void)__db_faultmem(infop->primary, rp->size, 0);
00310 
00311         /* Everything looks good, we're done. */
00312         dbenv->reginfo = infop;
00313         return (0);
00314 
00315 creation:
00316         /* Create the environment region. */
00317         F_SET(infop, REGION_CREATE);
00318 
00319         /*
00320          * Allocate room for 50 REGION structures plus overhead (we're going
00321          * to use this space for last-ditch allocation requests), although we
00322          * should never need anything close to that.
00323          */
00324         memset(&tregion, 0, sizeof(tregion));
00325         tregion.size = 50 * sizeof(REGION) + 50 * sizeof(MUTEX) + 2048;
00326         tregion.segid = INVALID_REGION_SEGID;
00327         if ((ret = CDB___os_r_attach(dbenv, infop, &tregion)) != 0)
00328                 goto err;
00329 
00330         /*
00331          * Fault the pages into memory.  Note, do this BEFORE we initialize
00332          * anything, because we're writing the pages, not just reading them.
00333          */
00334         (void)__db_faultmem(infop->addr, tregion.size, 1);
00335 
00336         /*
00337          * The first object in the region is the REGENV structure.  This is
00338          * different from the other regions, and, from everything else in
00339          * this region, where all objects are allocated from the pool, i.e.,
00340          * there aren't any fixed locations.  The remaining space is made
00341          * available for later allocation.
00342          *
00343          * The allocation space must be size_t aligned, because that's what
00344          * the initialization routine is going to store there.  To make sure
00345          * that happens, the REGENV structure was padded with a final size_t.
00346          * No other region needs to worry about it because all of them treat
00347          * the entire region as allocation space.
00348          *
00349          * Set the primary reference and correct the "addr" value to reference
00350          * the shalloc region.  Note, this requires that we "uncorrect" it at
00351          * region detach, and that all of our offsets (R_ADDR/R_OFFSET) will be
00352          * shifted as well, but that should be fine.
00353          */
00354         infop->primary = R_ADDR(infop, 0);
00355         infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV);
00356         CDB___db_shalloc_init(infop->addr, tregion.size - sizeof(REGENV));
00357 
00358         /*
00359          * Initialize the rest of the REGENV structure, except for the magic
00360          * number which validates the file/environment.
00361          */
00362         renv = infop->primary;
00363         renv->panic = 0;
00364         CDB_db_version(&renv->majver, &renv->minver, &renv->patch);
00365         SH_LIST_INIT(&renv->regionq);
00366         renv->refcnt = 1;
00367 
00368         /*
00369          * Lock the environment.
00370          *
00371          * Check the lock call return.  This is the first lock we initialize
00372          * and acquire, and we have to know if it fails.  (It CAN fail, e.g.,
00373          * SunOS, when using fcntl(2) for locking and using an in-memory
00374          * filesystem as the database home.  But you knew that, I'm sure -- it
00375          * probably wasn't even worth mentioning.)
00376          */
00377         if ((ret =
00378             __db_mutex_init(dbenv, &renv->mutex, DB_FCNTL_OFF_GEN, 0)) != 0) {
00379                 CDB___db_err(dbenv, "%s: unable to initialize environment lock: %s",
00380                     infop->name, CDB_db_strerror(ret));
00381                 goto err;
00382         }
00383 
00384         if (!F_ISSET(&renv->mutex, MUTEX_IGNORE) &&
00385             (ret = __db_mutex_lock(&renv->mutex, dbenv->lockfhp)) != 0) {
00386                 CDB___db_err(dbenv, "%s: unable to acquire environment lock: %s",
00387                     infop->name, CDB_db_strerror(ret));
00388                 goto err;
00389         }
00390 
00391         /*
00392          * Get the underlying REGION structure for this environment.  Note,
00393          * we created the underlying OS region before we acquired the REGION
00394          * structure, which is backwards from the normal procedure.  Update
00395          * the REGION structure.
00396          */
00397         if ((ret = __db_des_get(dbenv, infop, infop, &rp)) != 0) {
00398 find_err:       CDB___db_err(dbenv,
00399                     "%s: unable to find environment", infop->name);
00400                 if (ret == 0)
00401                         ret = EINVAL;
00402                 goto err;
00403         }
00404         infop->rp = rp;
00405         rp->size = tregion.size;
00406         rp->segid = tregion.segid;
00407 
00408         /*
00409          * !!!
00410          * If we create an environment where regions are public and in system
00411          * memory, we have to inform processes joining the environment how to
00412          * attach to the shared memory segment.  So, we write the shared memory
00413          * identifier into the file, to be read by those other processes.
00414          *
00415          * XXX
00416          * This is really OS-layer information, but I can't see any easy way
00417          * to move it down there without passing down information that it has
00418          * no right to know, e.g., that this is the one-and-only REGENV region
00419          * and not some other random region.
00420          */
00421         if (tregion.segid != INVALID_REGION_SEGID) {
00422                 ref.size = tregion.size;
00423                 ref.segid = tregion.segid;
00424                 if ((ret = CDB___os_write(dbenv, dbenv->lockfhp,
00425                     &ref, sizeof(ref), &nrw)) != 0 || nrw != sizeof(ref)) {
00426                         CDB___db_err(dbenv,
00427                             "%s: unable to write out public environment ID: %s",
00428                             infop->name, CDB_db_strerror(ret));
00429                         goto err;
00430                 }
00431         }
00432 
00433         /*
00434          * If not doing thread locking, we need to save the file handle for
00435          * fcntl(2) locking.  Otherwise, discard the handle, we no longer
00436          * need it, and the less contact between the buffer cache and the VM,
00437          * the better.
00438          */
00439 #if defined(HAVE_MUTEX_THREADS)
00440         if (F_ISSET(dbenv->lockfhp, DB_FH_VALID))
00441                  CDB___os_closehandle(dbenv->lockfhp);
00442 #endif
00443 
00444         /* Validate the file. */
00445         renv->magic = DB_REGION_MAGIC;
00446 
00447         /* Discard our lock. */
00448         MUTEX_UNLOCK(&renv->mutex);
00449 
00450         /* Everything looks good, we're done. */
00451         dbenv->reginfo = infop;
00452         return (0);
00453 
00454 err:
00455 retry:  /* Close any open file handle. */
00456         if (F_ISSET(dbenv->lockfhp, DB_FH_VALID))
00457                 (void)CDB___os_closehandle(dbenv->lockfhp);
00458 
00459         /*
00460          * If we joined or created the region, detach from it.  If we created
00461          * it, destroy it.  Note, there's a path in the above code where we're
00462          * using a temporary REGION structure because we haven't yet allocated
00463          * the real one.  In that case the region address (addr) will be filled
00464          * in, but the REGION pointer (rp) won't.  Fix it.
00465          */
00466         if (infop->addr != NULL) {
00467                 if (infop->rp == NULL)
00468                         infop->rp = &tregion;
00469 
00470                 /* Reset the addr value that we "corrected" above. */
00471                 infop->addr = infop->primary;
00472                 (void)CDB___os_r_detach(dbenv,
00473                     infop, F_ISSET(infop, REGION_CREATE));
00474         }
00475 
00476         /* Free the allocated name and/or REGINFO structure. */
00477         if (infop->name != NULL)
00478                 CDB___os_freestr(infop->name);
00479         CDB___os_free(infop, sizeof(REGINFO));
00480 
00481         /* If we had a temporary error, wait awhile and try again. */
00482         if (ret == 0) {
00483                 if (++retry_cnt > 3) {
00484                         CDB___db_err(dbenv, "unable to join the environment");
00485                         ret = EAGAIN;
00486                 } else {
00487                         CDB___os_sleep(dbenv, retry_cnt * 3, 0);
00488                         goto loop;
00489                 }
00490         }
00491 
00492         return (ret);
00493 }
00494 
00495 /*
00496  * CDB___db_e_detach --
00497  *      Detach from the environment.
00498  *
00499  * PUBLIC: int CDB___db_e_detach __P((DB_ENV *, int));
00500  */
00501 int
00502 CDB___db_e_detach(dbenv, destroy)
00503         DB_ENV *dbenv;
00504         int destroy;
00505 {
00506         REGENV *renv;
00507         REGINFO *infop;
00508 
00509         infop = dbenv->reginfo;
00510         renv = infop->primary;
00511 
00512         /* Lock the environment. */
00513         MUTEX_LOCK(&renv->mutex, dbenv->lockfhp);
00514 
00515         /* Decrement the reference count. */
00516         if (renv->refcnt == 0) {
00517                 CDB___db_err(dbenv,
00518                     "region %lu (environment): reference count went negative",
00519                     infop->rp->id);
00520         } else
00521                 --renv->refcnt;
00522 
00523         /* Release the lock. */
00524         MUTEX_UNLOCK(&renv->mutex);
00525 
00526         /* Close the locking file handle. */
00527         if (F_ISSET(dbenv->lockfhp, DB_FH_VALID))
00528                 (void)CDB___os_closehandle(dbenv->lockfhp);
00529 
00530         /* Reset the addr value that we "corrected" above. */
00531         infop->addr = infop->primary;
00532 
00533         /*
00534          * Release the region, and kill our reference.
00535          *
00536          * We set the DBENV->reginfo field to NULL here and discard its memory.
00537          * DBENV->remove calls CDB___dbenv_remove to do the region remove, and
00538          * CDB___dbenv_remove attached and then detaches from the region.  We don't
00539          * want to return to DBENV->remove with a non-NULL DBENV->reginfo field
00540          * because it will attempt to detach again as part of its cleanup.
00541          */
00542         (void)CDB___os_r_detach(dbenv, infop, destroy);
00543 
00544         if (infop->name != NULL)
00545                 CDB___os_free(infop->name, 0);
00546         CDB___os_free(dbenv->reginfo, sizeof(REGINFO));
00547         dbenv->reginfo = NULL;
00548 
00549         return (0);
00550 }
00551 
00552 /*
00553  * CDB___db_e_remove --
00554  *      Discard an environment if it's not in use.
00555  *
00556  * PUBLIC: int CDB___db_e_remove __P((DB_ENV *, int));
00557  */
00558 int
00559 CDB___db_e_remove(dbenv, force)
00560         DB_ENV *dbenv;
00561         int force;
00562 {
00563         REGENV *renv;
00564         REGINFO *infop, reginfo;
00565         REGION *rp;
00566         int ret, saved_value;
00567 
00568         /*
00569          * This routine has to walk a nasty line between not looking into
00570          * the environment (which may be corrupted after an app or system
00571          * crash), and removing everything that needs removing.  What we
00572          * do is:
00573          *      1. Connect to the environment (so it better be OK).
00574          *      2. If the environment is in use (reference count is non-zero),
00575          *         return EBUSY.
00576          *      3. Overwrite the magic number so that any threads of control
00577          *         attempting to connect will backoff and retry.
00578          *      4. Walk the list of regions.  Connect to each region and then
00579          *         disconnect with the destroy flag set.  This shouldn't cause
00580          *         any problems, even if the region is corrupted, because we
00581          *         should never be looking inside the region.
00582          *      5. Walk the list of files in the directory, unlinking any
00583          *         files that match a region name.  Unlink the environment
00584          *         file last.
00585          *
00586          * If the force flag is set, we do not acquire any locks during this
00587          * process.
00588          */
00589         saved_value = DB_GLOBAL(db_mutexlocks);
00590         if (force)
00591                 DB_GLOBAL(db_mutexlocks) = 0;
00592 
00593         /* Join the environment. */
00594         if ((ret = CDB___db_e_attach(dbenv)) != 0) {
00595                 /*
00596                  * If we can't join it, we assume that's because it doesn't
00597                  * exist.  It would be better to know why we failed, but it
00598                  * probably isn't important.
00599                  */
00600                 ret = 0;
00601                 if (force)
00602                         goto remfiles;
00603                 goto err;
00604         }
00605 
00606         infop = dbenv->reginfo;
00607         renv = infop->primary;
00608 
00609         /* Lock the environment. */
00610         MUTEX_LOCK(&renv->mutex, dbenv->lockfhp);
00611 
00612         /* If it's in use, we're done. */
00613         if (renv->refcnt == 1 || force) {
00614                 /*
00615                  * Set the panic flag and overwrite the magic number.
00616                  *
00617                  * !!!
00618                  * From this point on, there's no going back, we pretty
00619                  * much ignore errors, and just whack on whatever we can.
00620                  */
00621                 renv->panic = 1;
00622                 renv->magic = 0;
00623 
00624                 /*
00625                  * Unlock the environment.  We should no longer need the lock
00626                  * because we've poisoned the pool, but we can't continue to
00627                  * hold it either, because other routines may want it.
00628                  */
00629                 MUTEX_UNLOCK(&renv->mutex);
00630 
00631                 /*
00632                  * Attach to each sub-region and destroy it.
00633                  *
00634                  * !!!
00635                  * The REGION_CREATE_OK flag is set for Windows/95 -- regions
00636                  * are zero'd out when the last reference to the region goes
00637                  * away, in which case the underlying OS region code requires
00638                  * callers be prepared to create the region in order to join it.
00639                  */
00640                 memset(&reginfo, 0, sizeof(reginfo));
00641 restart:        for (rp = SH_LIST_FIRST(&renv->regionq, __db_region);
00642                     rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) {
00643                         if (rp->id == REG_ID_ENV)
00644                                 continue;
00645 
00646                         reginfo.id = rp->id;
00647                         reginfo.flags = REGION_CREATE_OK;
00648                         if (CDB___db_r_attach(dbenv, &reginfo, 0) == 0) {
00649                                 R_UNLOCK(dbenv, &reginfo);
00650                                 (void)CDB___db_r_detach(dbenv, &reginfo, 1);
00651                         }
00652                         goto restart;
00653                 }
00654 
00655                 /* Destroy the environment's region. */
00656                 (void)CDB___db_e_detach(dbenv, 1);
00657 
00658                 /* Discard the physical files. */
00659 remfiles:       (void)__db_e_remfile(dbenv);
00660         } else {
00661                 /* Unlock the environment. */
00662                 MUTEX_UNLOCK(&renv->mutex);
00663 
00664                 /* Discard the environment. */
00665                 (void)CDB___db_e_detach(dbenv, 0);
00666 
00667                 ret = EBUSY;
00668         }
00669 
00670 err:    if (force)
00671                 DB_GLOBAL(db_mutexlocks) = saved_value;
00672 
00673         return (ret);
00674 }
00675 
00676 /*
00677  * __db_e_remfile --
00678  *      Discard any region files in the filesystem.
00679  */
00680 static int
00681 __db_e_remfile(dbenv)
00682         DB_ENV *dbenv;
00683 {
00684         static char *old_region_names[] = {
00685                 "__db_lock.share",
00686                 "__db_log.share",
00687                 "__db_mpool.share",
00688                 "__db_txn.share",
00689                 NULL,
00690         };
00691         int cnt, fcnt, lastrm, ret;
00692         u_int8_t saved_byte;
00693         const char *dir;
00694         char *p, **names, *path, buf[sizeof(DB_REGION_FMT) + 20];
00695 
00696         /* Get the full path of a file in the environment. */
00697         (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV);
00698         if ((ret =
00699             CDB___db_appname(dbenv, DB_APP_NONE, NULL, buf, 0, NULL, &path)) != 0)
00700                 return (ret);
00701 
00702         /* Get the parent directory for the environment. */
00703         if ((p = CDB___db_rpath(path)) == NULL) {
00704                 p = path;
00705                 saved_byte = *p;
00706 
00707                 dir = PATH_DOT;
00708         } else {
00709                 saved_byte = *p;
00710                 *p = '\0';
00711 
00712                 dir = path;
00713         }
00714 
00715         /* Get the list of file names. */
00716         ret = CDB___os_dirlist(dbenv, dir, &names, &fcnt);
00717 
00718         /* Restore the path, and free it. */
00719         *p = saved_byte;
00720         CDB___os_freestr(path);
00721 
00722         if (ret != 0) {
00723                 CDB___db_err(dbenv, "%s: %s", dir, CDB_db_strerror(ret));
00724                 return (ret);
00725         }
00726 
00727         /*
00728          * Search for valid region names, and remove them.  We remove the
00729          * environment region last, because it's the key to this whole mess.
00730          */
00731         for (lastrm = -1, cnt = fcnt; --cnt >= 0;) {
00732                 if (strlen(names[cnt]) != DB_REGION_NAME_LENGTH ||
00733                     memcmp(names[cnt], DB_REGION_FMT, DB_REGION_NAME_NUM) != 0)
00734                         continue;
00735                 if (strcmp(names[cnt], DB_REGION_ENV) == 0) {
00736                         lastrm = cnt;
00737                         continue;
00738                 }
00739                 for (p = names[cnt] + DB_REGION_NAME_NUM;
00740                     *p != '\0' && isdigit((int)*p); ++p)
00741                         ;
00742                 if (*p != '\0')
00743                         continue;
00744 
00745                 if (CDB___db_appname(dbenv,
00746                     DB_APP_NONE, NULL, names[cnt], 0, NULL, &path) == 0) {
00747                         (void)CDB___os_unlink(dbenv, path);
00748                         CDB___os_freestr(path);
00749                 }
00750         }
00751 
00752         if (lastrm != -1)
00753                 if (CDB___db_appname(dbenv,
00754                     DB_APP_NONE, NULL, names[lastrm], 0, NULL, &path) == 0) {
00755                         (void)CDB___os_unlink(dbenv, path);
00756                         CDB___os_freestr(path);
00757                 }
00758         CDB___os_dirfree(names, fcnt);
00759 
00760         /*
00761          * !!!
00762          * Backward compatibility -- remove region files from releases
00763          * before 2.8.XX.
00764          */
00765         for (names = (char **)old_region_names; *names != NULL; ++names)
00766                 if (CDB___db_appname(dbenv,
00767                     DB_APP_NONE, NULL, *names, 0, NULL, &path) == 0) {
00768                         (void)CDB___os_unlink(dbenv, path);
00769                         CDB___os_freestr(path);
00770                 }
00771 
00772         return (0);
00773 }
00774 
00775 /*
00776  * CDB___db_e_stat
00777  *      Statistics for the environment.
00778  *
00779  * PUBLIC: int CDB___db_e_stat __P((DB_ENV *, REGENV *, REGION *, int *));
00780  */
00781 int
00782 CDB___db_e_stat(dbenv, arg_renv, arg_regions, arg_regions_cnt)
00783         DB_ENV *dbenv;
00784         REGENV *arg_renv;
00785         REGION *arg_regions;
00786         int *arg_regions_cnt;
00787 {
00788         REGENV *renv;
00789         REGINFO *infop;
00790         REGION *rp;
00791         int n;
00792 
00793         infop = dbenv->reginfo;
00794         renv = infop->primary;
00795         rp = infop->rp;
00796 
00797         /* Lock the environment. */
00798         MUTEX_LOCK(&rp->mutex, dbenv->lockfhp);
00799 
00800         *arg_renv = *renv;
00801 
00802         for (n = 0, rp = SH_LIST_FIRST(&renv->regionq, __db_region);
00803             n < *arg_regions_cnt && rp != NULL;
00804             ++n, rp = SH_LIST_NEXT(rp, q, __db_region))
00805                 arg_regions[n] = *rp;
00806 
00807         /* Release the lock. */
00808         rp = infop->rp;
00809         MUTEX_UNLOCK(&rp->mutex);
00810 
00811         *arg_regions_cnt = n == 0 ? n : n - 1;
00812 
00813         return (0);
00814 }
00815 
00816 /*
00817  * CDB___db_r_attach
00818  *      Join/create a region.
00819  *
00820  * PUBLIC: int CDB___db_r_attach __P((DB_ENV *, REGINFO *, size_t));
00821  */
00822 int
00823 CDB___db_r_attach(dbenv, infop, size)
00824         DB_ENV *dbenv;
00825         REGINFO *infop;
00826         size_t size;
00827 {
00828         REGENV *renv;
00829         REGION *rp;
00830         int ret;
00831         char buf[sizeof(DB_REGION_FMT) + 20];
00832 
00833         renv = ((REGINFO *)dbenv->reginfo)->primary;
00834         F_CLR(infop, REGION_CREATE);
00835 
00836         /* Lock the environment. */
00837         MUTEX_LOCK(&renv->mutex, dbenv->lockfhp);
00838 
00839         /* Find or create a REGION structure for this region. */
00840         if ((ret = __db_des_get(dbenv, dbenv->reginfo, infop, &rp)) != 0) {
00841                 MUTEX_UNLOCK(&renv->mutex);
00842                 return (ret);
00843         }
00844         infop->rp = rp;
00845         infop->id = rp->id;
00846 
00847         /* If we're creating the region, set the desired size. */
00848         if (F_ISSET(infop, REGION_CREATE))
00849                 rp->size = size;
00850 
00851         /* Join/create the underlying region. */
00852         (void)snprintf(buf, sizeof(buf), DB_REGION_FMT, infop->id);
00853         if ((ret = CDB___db_appname(dbenv,
00854             DB_APP_NONE, NULL, buf, 0, NULL, &infop->name)) != 0)
00855                 goto err;
00856         if ((ret = CDB___os_r_attach(dbenv, infop, rp)) != 0)
00857                 goto err;
00858 
00859         /*
00860          * Fault the pages into memory.  Note, do this BEFORE we initialize
00861          * anything because we're writing pages in created regions, not just
00862          * reading them.
00863          */
00864         (void)__db_faultmem(infop->addr,
00865             rp->size, F_ISSET(infop, REGION_CREATE));
00866 
00867         /*
00868          * !!!
00869          * The underlying layer may have just decided that we are going
00870          * to create the region.  There are various system issues that
00871          * can result in a useless region that requires re-initialization.
00872          *
00873          * If we created the region, initialize it for allocation.
00874          */
00875         if (F_ISSET(infop, REGION_CREATE)) {
00876                 ((REGION *)(infop->addr))->magic = DB_REGION_MAGIC;
00877 
00878                 (void)CDB___db_shalloc_init(infop->addr, rp->size);
00879         }
00880 
00881         /*
00882          * If the underlying REGION isn't the environment, acquire a lock
00883          * for it and release our lock on the environment.
00884          */
00885         if (infop->id != REG_ID_ENV) {
00886                 MUTEX_LOCK(&rp->mutex, dbenv->lockfhp);
00887                 MUTEX_UNLOCK(&renv->mutex);
00888         }
00889 
00890         return (0);
00891 
00892         /* Discard the underlying region. */
00893 err:    if (infop->addr != NULL)
00894                 (void)CDB___os_r_detach(dbenv,
00895                     infop, F_ISSET(infop, REGION_CREATE));
00896         infop->rp = NULL;
00897         infop->id = REG_ID_INVALID;
00898 
00899         /* Discard the REGION structure if we created it. */
00900         if (F_ISSET(infop, REGION_CREATE))
00901                 (void)__db_des_destroy(dbenv, rp);
00902 
00903         /* Release the environment lock. */
00904         MUTEX_UNLOCK(&renv->mutex);
00905 
00906         return (ret);
00907 }
00908 
00909 /*
00910  * CDB___db_r_detach --
00911  *      Detach from a region.
00912  *
00913  * PUBLIC: int CDB___db_r_detach __P((DB_ENV *, REGINFO *, int));
00914  */
00915 int
00916 CDB___db_r_detach(dbenv, infop, destroy)
00917         DB_ENV *dbenv;
00918         REGINFO *infop;
00919         int destroy;
00920 {
00921         REGENV *renv;
00922         REGION *rp;
00923         int ret, t_ret;
00924 
00925         renv = ((REGINFO *)dbenv->reginfo)->primary;
00926         rp = infop->rp;
00927 
00928         /* Lock the environment. */
00929         MUTEX_LOCK(&renv->mutex, dbenv->lockfhp);
00930 
00931         /* Acquire the lock for the REGION. */
00932         MUTEX_LOCK(&rp->mutex, dbenv->lockfhp);
00933 
00934         /* Detach from the underlying OS region. */
00935         ret = CDB___os_r_detach(dbenv, infop, destroy);
00936 
00937         /* Release the REGION lock. */
00938         MUTEX_UNLOCK(&rp->mutex);
00939 
00940         /* If we destroyed the region, discard the REGION structure. */
00941         if (destroy &&
00942             ((t_ret = __db_des_destroy(dbenv, rp)) != 0) && ret == 0)
00943                 ret = t_ret;
00944 
00945         /* Release the environment lock. */
00946         MUTEX_UNLOCK(&renv->mutex);
00947 
00948         /* Destroy the structure. */
00949         if (infop->name != NULL)
00950                 CDB___os_freestr(infop->name);
00951 
00952         return (ret);
00953 }
00954 
00955 /*
00956  * __db_des_get --
00957  *      Return a reference to the shared information for a REGION,
00958  *      optionally creating a new entry.
00959  */
00960 static int
00961 __db_des_get(dbenv, env_infop, infop, rpp)
00962         DB_ENV *dbenv;
00963         REGINFO *env_infop, *infop;
00964         REGION **rpp;
00965 {
00966         REGENV *renv;
00967         REGION *rp;
00968         int maxid, ret;
00969 
00970         /*
00971          * !!!
00972          * Called with the environment already locked.
00973          */
00974         *rpp = NULL;
00975         renv = env_infop->primary;
00976 
00977         maxid = REG_ID_ASSIGN;
00978         for (rp = SH_LIST_FIRST(&renv->regionq, __db_region);
00979             rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) {
00980                 if (rp->id == infop->id)
00981                         break;
00982                 if (rp->id > maxid)
00983                         maxid = rp->id;
00984         }
00985 
00986         /*
00987          * If we didn't find a region, or we found one needing initialization,
00988          * and we can't create the region, fail.  The caller generates
00989          * an error message.
00990          */
00991         if (!F_ISSET(infop, REGION_CREATE_OK) &&
00992             (rp == NULL || F_ISSET(rp, REG_DEAD)))
00993                 return (ENOENT);
00994 
00995         /*
00996          * If we didn't find a region, create and initialize a REGION structure
00997          * for the caller.  If id was set, use that value, otherwise we use the
00998          * next available ID.
00999          */
01000         if (rp == NULL) {
01001                 if ((ret = CDB___db_shalloc(env_infop->addr,
01002                     sizeof(REGION), MUTEX_ALIGN, &rp)) != 0)
01003                         return (ret);
01004 
01005                 /* Initialize the region. */
01006                 memset(rp, 0, sizeof(*rp));
01007                 if ((ret = __db_mutex_init(dbenv, &rp->mutex,
01008                     R_OFFSET(env_infop, &rp->mutex) + DB_FCNTL_OFF_GEN,
01009                     0)) != 0) {
01010                         CDB___db_shalloc_free(env_infop->addr, rp);
01011                         return (ret);
01012                 }
01013                 rp->segid = INVALID_REGION_SEGID;
01014                 rp->id = infop->id == REG_ID_INVALID ? maxid + 1 : infop->id;
01015 
01016                 SH_LIST_INSERT_HEAD(&renv->regionq, rp, q, __db_region);
01017                 F_SET(infop, REGION_CREATE);
01018         } else {
01019                 /*
01020                  * There is one race -- a caller created a region, was trying
01021                  * to initialize it for general use, and failed somehow.  We
01022                  * leave the region around and tell each new caller that they
01023                  * are creating it, because that's easier than dealing with
01024                  * the races involved in removing it.
01025                  */
01026                 if (F_ISSET(rp, REG_DEAD)) {
01027                         rp->primary = INVALID_ROFF;
01028 
01029                         F_CLR(rp, REG_DEAD);
01030                         F_SET(infop, REGION_CREATE);
01031                 }
01032         }
01033 
01034         *rpp = rp;
01035         return (0);
01036 }
01037 
01038 /*
01039  * __db_des_destroy --
01040  *      Destroy a reference to a REGION.
01041  */
01042 static int
01043 __db_des_destroy(dbenv, rp)
01044         DB_ENV *dbenv;
01045         REGION *rp;
01046 {
01047         REGINFO *infop;
01048 
01049         /*
01050          * !!!
01051          * Called with the environment already locked.
01052          */
01053         infop = dbenv->reginfo;
01054 
01055         SH_LIST_REMOVE(rp, q, __db_region);
01056         CDB___db_shalloc_free(infop->addr, rp);
01057 
01058         return (0);
01059 }
01060 
01061 /*
01062  * __db_faultmem --
01063  *      Fault the region into memory.
01064  */
01065 static int
01066 __db_faultmem(addr, size, created)
01067         void *addr;
01068         size_t size;
01069         int created;
01070 {
01071         int ret;
01072         u_int8_t *p, *t;
01073 
01074         /*
01075          * It's sometimes significantly faster to page-fault in all of the
01076          * region's pages before we run the application, as we see nasty
01077          * side-effects when we page-fault while holding various locks, i.e.,
01078          * the lock takes a long time to acquire because of the underlying
01079          * page fault, and the other threads convoy behind the lock holder.
01080          *
01081          * If we created the region, we write a non-zero value so that the
01082          * system can't cheat.  If we're just joining the region, we can
01083          * only read the value and try to confuse the compiler sufficiently
01084          * that it doesn't figure out that we're never really using it.
01085          */
01086         ret = 0;
01087         if (DB_GLOBAL(db_region_init)) {
01088                 if (created)
01089                         for (p = addr, t = (u_int8_t *)addr + size;
01090                             p < t; p += OS_VMPAGESIZE)
01091                                 p[0] = 0xdb;
01092                 else
01093                         for (p = addr, t = (u_int8_t *)addr + size;
01094                             p < t; p += OS_VMPAGESIZE)
01095                                 ret |= p[0];
01096         }
01097 
01098         return (ret);
01099 }

Generated on Sun Jun 8 10:56:37 2008 for GNUmifluz by  doxygen 1.5.5