region.h

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: region_8h-source.html,v 1.1 2008/06/08 10:21:52 sebdiaz Exp $
00008  */
00009 
00010 /*
00011  * The DB environment consists of some number of "regions", which are described
00012  * by the following four structures:
00013  *
00014  *      REGENV     -- shared information about the environment
00015  *      REGENV_REF -- file describing system memory version of REGENV
00016  *      REGION     -- shared information about a single region
00017  *      REGINFO    -- per-process information about a REGION
00018  *
00019  * There are three types of memory that hold regions:
00020  *      per-process heap (malloc)
00021  *      file mapped into memory (mmap, MapViewOfFile)
00022  *      system memory (shmget, CreateFileMapping)
00023  *
00024  * If the regions are private to a process, they're in malloc.  If they're
00025  * public, they're in file mapped memory, or, optionally, in system memory.
00026  * Regions in the filesystem are named "__db.001", "__db.002" and so on.  If
00027  * we're not using a private environment allocated using malloc(3), the file
00028  * "__db.001" will always exist, as we use it to synchronize on the regions,
00029  * whether they exist in file mapped memory or system memory.
00030  *
00031  * The file "__db.001" contains a REGENV structure and a linked list of some
00032  * number of REGION structures.  Each of the REGION structures describes and
00033  * locks one of the underlying shared regions used by DB.
00034  *
00035  *      __db.001
00036  *      +---------+
00037  *      |REGENV  |
00038  *      +---------+   +----------+
00039  *      |REGION   |-> | __db.002 |
00040  *      |         |   +----------+
00041  *      +---------+   +----------+
00042  *      |REGION   |-> | __db.003 |
00043  *      |         |   +----------+
00044  *      +---------+   +----------+
00045  *      |REGION   |-> | __db.004 |
00046  *      |         |   +----------+
00047  *      +---------+
00048  *
00049  * The only tricky part about manipulating the regions is correctly creating
00050  * or joining the REGENV file, i.e., __db.001.  We have to be absolutely sure
00051  * that only one process creates it, and that everyone else joins it without
00052  * seeing inconsistent data.  Once that region is created, we can use normal
00053  * shared locking procedures to do mutal exclusion for all other regions.
00054  *
00055  * One of the REGION structures in the CDB_main environment region describes the
00056  * environment region itself.
00057  *
00058  * To lock a region, locate the REGION structure that describes it and acquire
00059  * the region's mutex.  There is one exception to this rule -- the lock for the
00060  * environment region itself is in the REGENV structure, and not in the REGION
00061  * that describes the environment region.  That's so that we can acquire a lock
00062  * without walking linked lists that could potentially change underneath us.
00063  * The REGION will not be moved or removed during the life of the region, and
00064  * so long-lived references to it can be held by the process.
00065  *
00066  * All requests to create or join a region return a REGINFO structure, which
00067  * is held by the caller and used to open and subsequently close the reference
00068  * to the region.  The REGINFO structure contains the per-process information
00069  * that we need to access the region.
00070  *
00071  * The one remaining complication.  If the regions (including the environment
00072  * region) live in system memory, and the system memory isn't "named" somehow
00073  * in the filesystem name space, we need some way of finding it.  Do this by
00074  * by writing the REGENV_REF structure into the "__db.001" file.  When we find
00075  * a __db.001 file that is too small to be a real, on-disk environment, we use
00076  * the information it contains to redirect to the real "__db.001" file/memory.
00077  * This currently only happens when the REGENV file is in shared system memory.
00078  *
00079  * Although DB does not currently grow regions when they run out of memory, it
00080  * would be possible to do so.  To grow a region, allocate a new region of the
00081  * appropriate size, then copy the old region over it and insert the additional
00082  * space into the already existing shalloc arena.  Callers may have to fix up
00083  * local references, but that should be easy to do.  This failed in historic
00084  * versions of DB because the region lock lived in the mapped memory, and when
00085  * it was unmapped and remapped (or copied), threads could lose track of it.
00086  * Once we moved that lock into a region that is never unmapped, growing should
00087  * work.  That all said, current versions of DB don't implement region grow
00088  * because some systems don't support mutex copying, e.g., from OSF1 V4.0:
00089  *
00090  *      The address of an msemaphore structure may be significant.  If the
00091  *      msemaphore structure contains any value copied from an msemaphore
00092  *      structure at a different address, the result is undefined.
00093  */
00094 
00095 #if defined(__cplusplus)
00096 extern "C" {
00097 #endif
00098 
00099 #define DB_REGION_FMT   "__db.%03d"     /* Region file name format. */
00100 #define DB_REGION_NAME_NUM      5       /* First digit offset in file names. */
00101 #define DB_REGION_NAME_LENGTH   8       /* Length of file names. */
00102 
00103 #define DB_REGION_ENV   "__db.001"      /* Primary environment name. */
00104 
00105 #define INVALID_REGION_SEGID    -1      /* Segment IDs are either shmget(2) or
00106                                          * Win16 segment identifiers.  They are
00107                                          * both stored in a "long", and we need
00108                                          * an out-of-band value.
00109                                          */
00110 /*
00111  * Currently, region offsets are limited to 32-bits.  I expect that's going
00112  * to have to be fixed in the not-too-distant future, since we won't want to
00113  * split 100Gb memory pools into that many different regions.  It's typedef'd
00114  * so it won't be too painful to upgrade.
00115  */
00116 typedef u_int32_t roff_t;
00117 
00118 /*
00119  * Nothing can live at region offset 0, because, in all cases, that's where
00120  * we store *something*.  Lots of code needs an out-of-band value for region
00121  * offsets, so we use 0.
00122  */
00123 #define INVALID_ROFF            0
00124 
00125 /* Reference describing system memory version of REGENV. */
00126 typedef struct __db_reg_env_ref {
00127         roff_t     size;                /* Region size. */
00128         long       segid;               /* UNIX shmget(2) ID. */
00129 } REGENV_REF;
00130 
00131 /* Per-environment region information. */
00132 typedef struct __db_reg_env {
00133         /*
00134          * !!!
00135          * The mutex must be the first entry in the structure to guarantee
00136          * correct alignment.
00137          */
00138         MUTEX      mutex;               /* Environment mutex. */
00139 
00140         /*
00141          * !!!
00142          * Note, the magic and panic fields are NOT protected by the mutex,
00143          * and for this reason cannot be anything more complicated than a
00144          * zero/non-zero value.
00145          *
00146          * !!!
00147          * Some 64-bit architectures (e.g., the OSF/1 Alpha processor) do not
00148          * support 32-bit atomic reads and writes, and so have an interesting
00149          * bug where sequential 32-bit values can be accidentally overwritten,
00150          * i.e., a variable protected by a lock gets overwritten by a thread
00151          * that doesn't hold the lock, simply because the variable sequentially
00152          * followed a variable that didn't need the lock for protection. We do
00153          * not want setting the panic value to be overwritten by another thread
00154          * unlocking the region, or vice-versa, for that matter.  As the magic
00155          * variable is written only during region creation, list it first to
00156          * ensure this cannot happen.
00157          *
00158          * !!!
00159          * The valid region magic number must appear at the same byte offset
00160          * in both the environment and each shared region, as Windows/95 uses
00161          * it to determine if the memory has been zeroed since it was last used.
00162          */
00163 #define DB_REGION_MAGIC 0x120897
00164         u_int32_t  magic;               /* Valid region magic number. */
00165 
00166         int        panic;               /* Environment is dead. */
00167 
00168         int        majver;              /* Major DB version number. */
00169         int        minver;              /* Minor DB version number. */
00170         int        patch;               /* Patch DB version number. */
00171 
00172                                         /* List of regions. */
00173         SH_LIST_HEAD(__db_regionh) regionq;
00174 
00175         u_int32_t  refcnt;              /* References to the environment. */
00176 
00177         size_t     pad;                 /* Guarantee that following memory is
00178                                          * size_t aligned.  This is necessary
00179                                          * because we're going to store the
00180                                          * allocation region information there.
00181                                          */
00182 } REGENV;
00183 
00184 /* Per-region shared region information. */
00185 typedef struct __db_region {
00186         /*
00187          * !!!
00188          * The mutex must be the first entry in the structure to guarantee
00189          * correct alignment.
00190          */
00191         MUTEX      mutex;               /* Region mutex. */
00192 
00193         /*
00194          * !!!
00195          * The valid region magic number must appear at the same byte offset
00196          * in both the environment and each shared region, as Windows/95 uses
00197          * it to determine if the memory has been zeroed since it was last used.
00198          */
00199         u_int32_t  magic;
00200 
00201         SH_LIST_ENTRY q;                /* Linked list of REGIONs. */
00202 
00203         roff_t     size;                /* Region size in bytes. */
00204 
00205         roff_t     primary;             /* Primary data structure offset. */
00206 
00207         long       segid;               /* UNIX shmget(2), Win16 segment ID. */
00208 
00209 #define REG_ID_INVALID  0               /* Invalid. */
00210 #define REG_ID_ENV      1               /* Environment. */
00211 #define REG_ID_LOCK     2               /* Lock region. */
00212 #define REG_ID_LOG      3               /* Log region. */
00213 #define REG_ID_MPOOL    4               /* Mpool region. */
00214 #define REG_ID_TXN      5               /* Txn region. */
00215 #define REG_ID_ASSIGN   (REG_ID_TXN + 1)/* First assignable region number. */
00216         int        id;                  /* Region id. */
00217 
00218 #define REG_DEAD        0x01            /* Region may be corrupted. */
00219         u_int32_t  flags;
00220 } REGION;
00221 
00222 /*
00223  * Per-process/per-attachment information about a single region.
00224  */
00225 struct __db_reginfo_t {         /* CDB___db_r_attach IN parameters. */
00226         int         id;                 /* Region id: used for naming. */
00227         int         mode;               /* File creation mode. */
00228 
00229                                 /* CDB___db_r_attach OUT parameters. */
00230         REGION     *rp;                 /* Shared region. */
00231 
00232         char       *name;               /* Region file name. */
00233 
00234         void       *addr;               /* Region allocation address. */
00235         void       *primary;            /* Primary data structure address. */
00236 
00237         void       *wnt_handle;         /* Win/NT HANDLE. */
00238 
00239 #define REGION_CREATE           0x01    /* Caller created region. */
00240 #define REGION_CREATE_OK        0x02    /* Caller willing to create region. */
00241         u_int32_t   flags;
00242 };
00243 
00244 /*
00245  * R_ADDR       Return a per-process address for a shared region offset.
00246  * R_OFFSET     Return a shared region offset for a per-process address.
00247  *
00248  * !!!
00249  * R_OFFSET should really be returning a ptrdiff_t, but that's not yet
00250  * portable.  We use u_int32_t, which restricts regions to 4Gb in size.
00251  */
00252 #define R_ADDR(base, offset)                                            \
00253         ((void *)((u_int8_t *)((base)->addr) + offset))
00254 #define R_OFFSET(base, p)                                               \
00255         ((u_int32_t)((u_int8_t *)(p) - (u_int8_t *)(base)->addr))
00256 
00257 /*
00258  * R_LOCK       Lock/unlock a region.
00259  * R_UNLOCK
00260  */
00261 #define R_LOCK(dbenv, reginfo)                                          \
00262         MUTEX_LOCK(&(reginfo)->rp->mutex, (dbenv)->lockfhp)
00263 #define R_UNLOCK(dbenv, reginfo)                                        \
00264         MUTEX_UNLOCK(&(reginfo)->rp->mutex)
00265 
00266 /* PANIC_CHECK: Check to see if the DB environment is dead. */
00267 #define PANIC_CHECK(dbenv)                                              \
00268         if (DB_GLOBAL(db_panic) &&                                      \
00269             (dbenv)->reginfo != NULL && ((REGENV *)                     \
00270             ((REGINFO *)(dbenv)->reginfo)->primary)->panic != 0)        \
00271                 return (DB_RUNRECOVERY);
00272 
00273 /*
00274  * All regions are created on 8K boundaries out of sheer paranoia, so that
00275  * we don't make some underlying VM unhappy.
00276  */
00277 #define OS_ROUNDOFF(i, s) {                                             \
00278         (i) += (s) - 1;                                                 \
00279         (i) -= (i) % (s);                                               \
00280 }
00281 #define OS_VMPAGESIZE           (8 * 1024)
00282 #define OS_VMROUNDOFF(i)        OS_ROUNDOFF(i, OS_VMPAGESIZE)
00283 
00284 #if defined(__cplusplus)
00285 }
00286 #endif

Generated on Sun Jun 8 10:56:39 2008 for GNUmifluz by  doxygen 1.5.5