mp.h

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  *
00007  * $Id: mp_8h-source.html,v 1.1 2008/06/08 10:20:35 sebdiaz Exp $
00008  */
00009 
00010 struct __bh;            typedef struct __bh BH;
00011 struct __db_mpool;      typedef struct __db_mpool DB_MPOOL;
00012 struct __db_mpreg;      typedef struct __db_mpreg DB_MPREG;
00013 struct __mpool;         typedef struct __mpool MPOOL;
00014 struct __mpoolfile;     typedef struct __mpoolfile MPOOLFILE;
00015 struct __cmpr;          typedef struct __cmpr CMPR;
00016 
00017 /* We require at least 40K of cache. */
00018 #define DB_CACHESIZE_MIN        (20 * 1024)
00019 
00020 /*
00021  * DB_MPOOL --
00022  *      Per-process memory pool structure.
00023  */
00024 struct __db_mpool {
00025         /* These fields need to be protected for multi-threaded support. */
00026         MUTEX      *mutexp;             /* Structure thread lock. */
00027 
00028                                         /* List of pgin/pgout routines. */
00029         LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
00030 
00031                                         /* List of DB_MPOOLFILE's. */
00032         TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
00033 
00034         /* These fields are not thread-protected. */
00035         DB_ENV     *dbenv;              /* Reference to error information. */
00036 
00037         u_int32_t   nreg;               /* N underlying cache regions. */
00038         REGINFO    *reginfo;            /* Underlying cache regions. */
00039 };
00040 
00041 /*
00042  * DB_MPREG --
00043  *      DB_MPOOL registry of pgin/pgout functions.
00044  */
00045 struct __db_mpreg {
00046         LIST_ENTRY(__db_mpreg) q;       /* Linked list. */
00047 
00048         int ftype;                      /* File type. */
00049                                         /* Pgin, pgout routines. */
00050         int (*pgin) __P((DB_ENV *, db_pgno_t, void *, DBT *));
00051         int (*pgout) __P((DB_ENV *, db_pgno_t, void *, DBT *));
00052 };
00053 
00054 /*
00055  * DB_MPOOLFILE --
00056  *      Per-process DB_MPOOLFILE information.
00057  */
00058 struct __db_mpoolfile {
00059         /* These fields need to be protected for multi-threaded support. */
00060         MUTEX     *mutexp;              /* Structure thread lock. */
00061 
00062         DB_FH      fh;                  /* Underlying file handle. */
00063 
00064         u_int32_t ref;                  /* Reference count. */
00065 
00066         /*
00067          * !!!
00068          * This field is a special case -- it's protected by the region lock
00069          * NOT the thread lock.  The reason for this is that we always have
00070          * the region lock immediately before or after we modify the field,
00071          * and we don't want to use the structure lock to protect it because
00072          * then I/O (which is done with the structure lock held because of
00073          * the race between the seek and write of the file descriptor) will
00074          * block any other put/get calls using this DB_MPOOLFILE structure.
00075          */
00076         u_int32_t pinref;               /* Pinned block reference count. */
00077 
00078         /*
00079          * !!!
00080          * This field is a special case -- it's protected by the region lock
00081          * since it's manipulated only when new files are added to the list.
00082          */
00083         TAILQ_ENTRY(__db_mpoolfile) q;  /* Linked list of DB_MPOOLFILE's. */
00084 
00085         /* These fields are not thread-protected. */
00086         DB_MPOOL  *dbmp;                /* Overlying DB_MPOOL. */
00087         MPOOLFILE *mfp;                 /* Underlying MPOOLFILE. */
00088 
00089         void      *addr;                /* Address of mmap'd region. */
00090         size_t     len;                 /* Length of mmap'd region. */
00091 
00092         /* These fields need to be protected for multi-threaded support. */
00093 #define MP_READONLY     0x01            /* File is readonly. */
00094 #define MP_UPGRADE      0x02            /* File descriptor is readwrite. */
00095 #define MP_UPGRADE_FAIL 0x04            /* Upgrade wasn't possible. */
00096 #define MP_CMPR         0x08            /* Transparent I/O compression. */
00097         u_int32_t  flags;
00098 };
00099 
00100 /*
00101  * NCACHE --
00102  *      Select a cache based on the page number.  This assumes accesses are
00103  *      uniform across pages, which is probably OK -- what we really want to
00104  *      avoid is anything that puts all the pages for any single file in the
00105  *      same cache, as we expect that file access will be bursty.
00106  */
00107 #define NCACHE(mp, pgno)                                                \
00108         ((pgno) % ((MPOOL *)mp)->nreg)
00109 
00110 /*
00111  * NBUCKET --
00112  *       We make the assumption that early pages of the file are more likely
00113  *       to be retrieved than the later pages, which means the top bits will
00114  *       be more interesting for hashing as they're less likely to collide.
00115  *       That said, as 512 8K pages represents a 4MB file, so only reasonably
00116  *       large files will have page numbers with any other than the bottom 9
00117  *       bits set.  We XOR in the MPOOL offset of the MPOOLFILE that backs the
00118  *       page, since that should also be unique for the page.  We don't want
00119  *       to do anything very fancy -- speed is more important to us than using
00120  *       good hashing.
00121  */
00122 #define NBUCKET(mc, mf_offset, pgno)                                    \
00123         (((pgno) ^ ((mf_offset) << 9)) % (mc)->htab_buckets)
00124 
00125 /*
00126  * MPOOL --
00127  *      Shared memory pool region.
00128  */
00129 struct __mpool {
00130         /*
00131          * The memory pool can be broken up into individual pieces/files.
00132          * Not what we would have liked, but on Solaris you can allocate
00133          * only a little more than 2GB of memory in a contiguous chunk,
00134          * and I expect to see more systems with similar issues.
00135          *
00136          * The first of these pieces/files describes the entire pool, all
00137          * subsequent ones only describe a part of the cache.
00138          *
00139          * We single-thread CDB_memp_sync and CDB_memp_fsync calls.
00140          *
00141          * This mutex is intended *only* to single-thread access to the call,
00142          * it is not used to protect the lsn and lsn_cnt fields, the region
00143          * lock is used to protect them.
00144          */
00145         MUTEX     sync_mutex;           /* Checkpoint lock. */
00146         DB_LSN    lsn;                  /* Maximum checkpoint LSN. */
00147         u_int32_t lsn_cnt;              /* Checkpoint buffers left to write. */
00148 
00149         SH_TAILQ_HEAD(__mpfq) mpfq;     /* List of MPOOLFILEs. */
00150 
00151         u_int32_t nreg;                 /* Number of underlying REGIONS. */
00152         roff_t    regids;               /* Array of underlying REGION Ids. */
00153 
00154 #define MP_LSN_RETRY    0x01            /* Retry all BH_WRITE buffers. */
00155         u_int32_t  flags;
00156 
00157         /*
00158          * The following structure fields only describe the cache portion of
00159          * the region.
00160          */
00161         SH_TAILQ_HEAD(__bhq) bhq;       /* LRU list of buffer headers. */
00162 
00163         int         htab_buckets;       /* Number of hash table entries. */
00164         roff_t      htab;               /* Hash table offset. */
00165 
00166         DB_MPOOL_STAT stat;             /* Per-cache mpool statistics. */
00167 };
00168 
00169 /*
00170  * MPOOLFILE --
00171  *      Shared DB_MPOOLFILE information.
00172  */
00173 struct __mpoolfile {
00174         SH_TAILQ_ENTRY  q;              /* List of MPOOLFILEs */
00175 
00176         db_pgno_t ref_cnt;              /* Ref count: pages or DB_MPOOLFILEs. */
00177         db_pgno_t lsn_cnt;              /* Checkpoint buffers left to write. */
00178 
00179         int       ftype;                /* File type. */
00180         int32_t   lsn_off;              /* Page's LSN offset. */
00181         u_int32_t clear_len;            /* Bytes to clear on page create. */
00182 
00183         roff_t    path_off;             /* File name location. */
00184         roff_t    fileid_off;           /* File identification location. */
00185 
00186         roff_t    pgcookie_len;         /* Pgin/pgout cookie length. */
00187         roff_t    pgcookie_off;         /* Pgin/pgout cookie location. */
00188 
00189         db_pgno_t last_pgno;            /* Last page in the file. */
00190         db_pgno_t orig_last_pgno;       /* Original last page in the file. */
00191 
00192         db_pgno_t cmpr_free;            /* Compression free list. */
00193 
00194         DB_MPOOL_FSTAT stat;            /* Per-file mpool statistics. */
00195 
00196 #define MP_CAN_MMAP     0x01            /* If the file can be mmap'd. */
00197 #define MP_DEADFILE     0x02            /* Dirty pages can simply be trashed. */
00198 #define MP_TEMP         0x04            /* Backing file is a temporary. */
00199         u_int32_t  flags;
00200 };
00201 
00202 /*
00203  * BH_TO_CACHE --
00204  *      Return the cache where we can find the specified buffer header.
00205  */
00206 #define BH_TO_CACHE(dbmp, bhp)                                          \
00207         (dbmp)->reginfo[NCACHE((dbmp)->reginfo[0].primary, (bhp)->pgno)].primary
00208 
00209 /*
00210  * DB_CMPR --
00211  *      Page compression information
00212  *
00213  * !!!
00214  * There is no need to keep the length of the data wrote
00215  * in the page since it's already encoded in the compressed
00216  * data.
00217  */
00218 
00219 /*
00220  * Convert size to expected compressed size
00221  */
00222 #define DB_CMPR_DIVIDE(dbenv, size) ((size) >> CDB___memp_cmpr_coefficient(dbenv) )
00223 #define DB_CMPR_MULTIPLY(dbenv, size) ((size) << CDB___memp_cmpr_coefficient(dbenv) )
00224 
00225 struct __cmpr {
00226 #define DB_CMPR_FIRST           0x01 /* Head of chain. */
00227 #define DB_CMPR_INTERNAL        0x02 /* Weak compression data. */
00228 #define DB_CMPR_CHAIN           0x04 /* More data in next page. */
00229 #define DB_CMPR_FREE            0x08 /* Not in use. */
00230 
00231   u_int16_t flags; 
00232 
00233   /* 
00234    * Filled if DB_CMPR_CHAIN set
00235    */
00236   db_pgno_t next;
00237 };
00238 
00239 /*
00240  * Reserved information at the beginning of each compressed page
00241  */
00242 #define DB_CMPR_OVERHEAD        sizeof(struct __cmpr)
00243 /*
00244  * Size of IO page, without the reserved information
00245  */
00246 #define DB_CMPR_PAGESIZE(io)    (io->pagesize - DB_CMPR_OVERHEAD)
00247 /*
00248  * Pointer to data within raw compressed buffer
00249  */
00250 #define DB_CMPR_DATA(io) (io->buf + DB_CMPR_OVERHEAD)
00251 
00252 /*
00253  * BH --
00254  *      Buffer header.
00255  */
00256 struct __bh {
00257         MUTEX           mutex;          /* Buffer thread/process lock. */
00258 
00259         u_int16_t       ref;            /* Reference count. */
00260 
00261 #define BH_CALLPGIN     0x001           /* Page needs to be reworked... */
00262 #define BH_DIRTY        0x002           /* Page was modified. */
00263 #define BH_DISCARD      0x004           /* Page is useless. */
00264 #define BH_LOCKED       0x008           /* Page is locked (I/O in progress). */
00265 #define BH_TRASH        0x010           /* Page is garbage. */
00266 #define BH_WRITE        0x020           /* Page scheduled for writing. */
00267 #define BH_CMPR         0x040           /* Chain contains valid data. */
00268 #define BH_CMPR_POOL    0x080           /* Chain allocated in pool. */
00269 #define BH_CMPR_OS      0x100           /* Chain allocate with malloc. */
00270         u_int16_t  flags;
00271 
00272         db_pgno_t *chain;               /* Compression chain. */
00273 
00274         SH_TAILQ_ENTRY  q;              /* LRU queue. */
00275         SH_TAILQ_ENTRY  hq;             /* MPOOL hash bucket queue. */
00276 
00277         db_pgno_t pgno;                 /* Underlying MPOOLFILE page number. */
00278         roff_t    mf_offset;            /* Associated MPOOLFILE offset. */
00279 
00280         /*
00281          * !!!
00282          * This array must be size_t aligned -- the DB access methods put PAGE
00283          * and other structures into it, and expect to be able to access them
00284          * directly.  (We guarantee size_t alignment in the documentation too.)
00285          */
00286         u_int8_t   buf[1];              /* Variable length data. */
00287 };
00288 
00289 #include "mp_ext.h"

Generated on Sun Jun 8 10:56:38 2008 for GNUmifluz by  doxygen 1.5.5