os_map.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 
00008 #include "config.h"
00009 
00010 #ifndef lint
00011 static const char revid[] = "$Id: os__map_8c-source.html,v 1.1 2008/06/08 10:21:18 sebdiaz Exp $";
00012 #endif /* not lint */
00013 
00014 #ifndef NO_SYSTEM_INCLUDES
00015 #include <sys/types.h>
00016 #ifdef HAVE_MMAP
00017 #include <sys/mman.h>
00018 #endif
00019 
00020 #ifdef HAVE_SHMGET
00021 #include <sys/ipc.h>
00022 #include <sys/shm.h>
00023 #endif
00024 
00025 #include <errno.h>
00026 #include <string.h>
00027 #endif
00028 
00029 #include "db_int.h"
00030 #include "db_page.h"
00031 #include "db_ext.h"
00032 #include "os_jump.h"
00033 
00034 #ifdef HAVE_MMAP
00035 static int __os_map __P((DB_ENV *, char *, DB_FH *, size_t, int, int, void **));
00036 #endif
00037 #ifndef HAVE_SHMGET
00038 static int __db_nosystemmem __P((DB_ENV *));
00039 #endif
00040 
00041 /*
00042  * CDB___os_r_sysattach --
00043  *      Create/join a shared memory region.
00044  *
00045  * PUBLIC: int CDB___os_r_sysattach __P((DB_ENV *, REGINFO *, REGION *));
00046  */
00047 int
00048 CDB___os_r_sysattach(dbenv, infop, rp)
00049         DB_ENV *dbenv;
00050         REGINFO *infop;
00051         REGION *rp;
00052 {
00053         if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
00054                 /*
00055                  * If the region is in system memory on UNIX, we use shmget(2).
00056                  *
00057                  * !!!
00058                  * There exist spinlocks that don't work in shmget memory, e.g.,
00059                  * the HP/UX msemaphore interface.  If we don't have locks that
00060                  * will work in shmget memory, we better be private and not be
00061                  * threaded.  If we reach this point, we know we're public, so
00062                  * it's an error.
00063                  */
00064 #if defined(MUTEX_NO_SHMGET_LOCKS)
00065                 CDB___db_err(dbenv,
00066             "architecture does not support locks inside system shared memory");
00067                 return (EINVAL);
00068 #endif
00069 #if defined(HAVE_SHMGET)
00070                 {
00071                 key_t segid;
00072                 int id, ret;
00073 
00074                 /*
00075                  * We require that the application provide use with a base
00076                  * System V IPC key value.
00077                  */
00078                 if (dbenv->shm_key == INVALID_REGION_SEGID) {
00079                         CDB___db_err(dbenv,
00080                             "no base system shared memory ID specified");
00081                         return (EINVAL);
00082                 }
00083                 segid = (key_t)dbenv->shm_key;
00084 
00085                 /*
00086                  * We could potentially create based on REGION_CREATE_OK, but
00087                  * that's dangerous -- we might get crammed in sideways if
00088                  * some of the expected regions exist but others do not.  Also,
00089                  * if the requested size differs from an existing region's
00090                  * actual size, then all sorts of nasty things can happen.
00091                  * Basing create solely on REGION_CREATE is much safer -- a
00092                  * recovery will get us straightened out.
00093                  */
00094                 if (F_ISSET(infop, REGION_CREATE)) {
00095                         /*
00096                          * If map to an existing region, assume the application
00097                          * crashed and we're restarting.  Delete the old region
00098                          * and re-try.  If that fails, return an error, the
00099                          * application will have to select a different segment
00100                          * ID or clean up some other way.
00101                          */
00102                         if ((id = shmget(segid, 0, 0)) != -1) {
00103                                 (void)shmctl(id, IPC_RMID, NULL);
00104                                 if ((id = shmget(segid, 0, 0)) != -1) {
00105                                         CDB___db_err(dbenv,
00106                 "shmget: key: %ld: shared system memory region already exists",
00107                                             (long)segid);
00108                                         return (EAGAIN);
00109                                 }
00110                         }
00111                         if ((rp->segid =
00112                             shmget(segid, rp->size, IPC_CREAT | 0600)) == -1) {
00113                                 ret = CDB___os_get_errno();
00114                                 CDB___db_err(dbenv,
00115         "shmget: key: %ld: unable to create shared system memory region: %s",
00116                                     (long)segid, strerror(ret));
00117                                 return (ret);
00118                         }
00119 
00120                         /*
00121                          * Increment the base segment value to identify a new
00122                          * segment.
00123                          */
00124                         ++dbenv->shm_key;
00125                 }
00126 
00127                 if ((infop->addr = shmat(rp->segid, NULL, 0)) == (void *)-1) {
00128                         infop->addr = NULL;
00129                         ret = CDB___os_get_errno();
00130                         CDB___db_err(dbenv,
00131         "shmat: id %ld: unable to attach to shared system memory region: %s",
00132                             rp->segid, strerror(ret));
00133                         return (ret);
00134                 }
00135 
00136                 return (0);
00137                 }
00138 #else
00139                 return (__db_nosystemmem(dbenv));
00140 #endif
00141         }
00142 
00143 #ifdef HAVE_MMAP
00144         {
00145         DB_FH fh;
00146         int ret;
00147 
00148         /*
00149          * Try to open/create the file.  We DO NOT need to ensure that multiple
00150          * threads/processes attempting to simultaneously create the region are
00151          * properly ordered, our caller has already taken care of that.
00152          */
00153         if ((ret = CDB___os_open(dbenv, infop->name,
00154             F_ISSET(infop, REGION_CREATE_OK) ? DB_OSO_CREATE: 0,
00155             infop->mode, &fh)) != 0)
00156                 CDB___db_err(dbenv, "%s: %s", infop->name, CDB_db_strerror(ret));
00157 
00158         /*
00159          * If we created the file, grow it to its full size before mapping
00160          * it in.  We really want to avoid touching the buffer cache after
00161          * mmap(2) is called, doing anything else confuses the hell out of
00162          * systems without merged VM/buffer cache systems, or, more to the
00163          * point, *badly* merged VM/buffer cache systems.
00164          */
00165         if (ret == 0 && F_ISSET(infop, REGION_CREATE))
00166                 ret = CDB___os_finit(dbenv,
00167                     &fh, rp->size, DB_GLOBAL(db_region_init));
00168 
00169         /* Map the file in. */
00170         if (ret == 0)
00171                 ret = __os_map(dbenv,
00172                     infop->name, &fh, rp->size, 1, 0, &infop->addr);
00173 
00174          (void)CDB___os_closehandle(&fh);
00175 
00176         return (ret);
00177         }
00178 #else
00179         COMPQUIET(infop, NULL);
00180         COMPQUIET(rp, NULL);
00181         CDB___db_err(dbenv,
00182             "architecture lacks mmap(2), shared environments not possible");
00183         return (CDB___db_eopnotsup(dbenv));
00184 #endif
00185 }
00186 
00187 /*
00188  * CDB___os_r_sysdetach --
00189  *      Detach from a shared memory region.
00190  *
00191  * PUBLIC: int CDB___os_r_sysdetach __P((DB_ENV *, REGINFO *, int));
00192  */
00193 int
00194 CDB___os_r_sysdetach(dbenv, infop, destroy)
00195         DB_ENV *dbenv;
00196         REGINFO *infop;
00197         int destroy;
00198 {
00199         REGION *rp;
00200 
00201         rp = infop->rp;
00202 
00203         if (F_ISSET(dbenv, DB_ENV_SYSTEM_MEM)) {
00204 #ifdef HAVE_SHMGET
00205                 int ret, segid;
00206 
00207                 /*
00208                  * We may be about to remove the memory referenced by rp,
00209                  * save the segment ID, and (optionally) wipe the original.
00210                  */
00211                 segid = rp->segid;
00212                 if (destroy)
00213                         rp->segid = INVALID_REGION_SEGID;
00214 
00215                 if (shmdt(infop->addr) != 0) {
00216                         ret = CDB___os_get_errno();
00217                         CDB___db_err(dbenv, "shmdt: %s", strerror(ret));
00218                         return (ret);
00219                 }
00220 
00221                 if (destroy && shmctl(segid, IPC_RMID,
00222                     NULL) != 0 && (ret = CDB___os_get_errno()) != EINVAL) {
00223                         CDB___db_err(dbenv,
00224             "shmctl: id %ld: unable to delete system shared memory region: %s",
00225                             segid, strerror(ret));
00226                         return (ret);
00227                 }
00228 
00229                 return (0);
00230 #else
00231                 return (__db_nosystemmem(dbenv));
00232 #endif
00233         }
00234 
00235 #ifdef HAVE_MMAP
00236 #ifdef HAVE_MUNLOCK
00237         if (F_ISSET(dbenv, DB_ENV_LOCKDOWN))
00238                 (void)munlock(infop->addr, rp->size);
00239 #endif
00240         if (munmap(infop->addr, rp->size) != 0) {
00241                 int ret;
00242 
00243                 ret = CDB___os_get_errno();
00244                 CDB___db_err(dbenv, "munmap: %s", strerror(ret));
00245                 return (ret);
00246         }
00247 
00248         if (destroy && CDB___os_unlink(dbenv, infop->name) != 0)
00249                 return (CDB___os_get_errno());
00250 
00251         return (0);
00252 #else
00253         COMPQUIET(destroy, 0);
00254         return (EINVAL);
00255 #endif
00256 }
00257 
00258 /*
00259  * CDB___os_mapfile --
00260  *      Map in a shared memory file.
00261  *
00262  * PUBLIC: int CDB___os_mapfile __P((DB_ENV *,
00263  * PUBLIC:     char *, DB_FH *, size_t, int, void **));
00264  */
00265 int
00266 CDB___os_mapfile(dbenv, path, fhp, len, is_rdonly, addrp)
00267         DB_ENV *dbenv;
00268         char *path;
00269         DB_FH *fhp;
00270         int is_rdonly;
00271         size_t len;
00272         void **addrp;
00273 {
00274 #ifdef HAVE_MMAP
00275         return (__os_map(dbenv, path, fhp, len, 0, is_rdonly, addrp));
00276 #else
00277         COMPQUIET(dbenv, NULL);
00278         COMPQUIET(path, NULL);
00279         COMPQUIET(fhp, NULL);
00280         COMPQUIET(is_rdonly, 0);
00281         COMPQUIET(len, 0);
00282         COMPQUIET(addrp, NULL);
00283         return (EINVAL);
00284 #endif
00285 }
00286 
00287 /*
00288  * CDB___os_unmapfile --
00289  *      Unmap the shared memory file.
00290  *
00291  * PUBLIC: int CDB___os_unmapfile __P((DB_ENV *, void *, size_t));
00292  */
00293 int
00294 CDB___os_unmapfile(dbenv, addr, len)
00295         DB_ENV *dbenv;
00296         void *addr;
00297         size_t len;
00298 {
00299         /* If the user replaced the map call, call through their interface. */
00300         if (CDB___db_jump.j_unmap != NULL)
00301                 return (CDB___db_jump.j_unmap(addr, len));
00302 
00303 #ifdef HAVE_MMAP
00304 #ifdef HAVE_MUNLOCK
00305         if (F_ISSET(dbenv, DB_ENV_LOCKDOWN))
00306                 (void)munlock(addr, len);
00307 #else
00308         COMPQUIET(dbenv, NULL);
00309 #endif
00310         return (munmap(addr, len) ? CDB___os_get_errno() : 0);
00311 #else
00312         COMPQUIET(dbenv, NULL);
00313 
00314         return (EINVAL);
00315 #endif
00316 }
00317 
00318 #ifdef HAVE_MMAP
00319 /*
00320  * __os_map --
00321  *      Call the mmap(2) function.
00322  */
00323 static int
00324 __os_map(dbenv, path, fhp, len, is_region, is_rdonly, addrp)
00325         DB_ENV *dbenv;
00326         char *path;
00327         DB_FH *fhp;
00328         int is_region, is_rdonly;
00329         size_t len;
00330         void **addrp;
00331 {
00332         void *p;
00333         int flags, prot, ret;
00334 
00335         /* If the user replaced the map call, call through their interface. */
00336         if (CDB___db_jump.j_map != NULL)
00337                 return (CDB___db_jump.j_map
00338                     (path, len, is_region, is_rdonly, addrp));
00339 
00340         /*
00341          * If it's read-only, it's private, and if it's not, it's shared.
00342          * Don't bother with an additional parameter.
00343          */
00344         flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
00345 
00346 #ifdef MAP_FILE
00347         /*
00348          * Historically, MAP_FILE was required for mapping regular files,
00349          * even though it was the default.  Some systems have it, some
00350          * don't, some that have it set it to 0.
00351          */
00352         flags |= MAP_FILE;
00353 #endif
00354 
00355         /*
00356          * I know of no systems that implement the flag to tell the system
00357          * that the region contains semaphores, but it's not an unreasonable
00358          * thing to do, and has been part of the design since forever.  I
00359          * don't think anyone will object, but don't set it for read-only
00360          * files, it doesn't make sense.
00361          */
00362 #ifdef MAP_HASSEMAPHORE
00363         if (is_region && !is_rdonly)
00364                 flags |= MAP_HASSEMAPHORE;
00365 #else
00366         COMPQUIET(is_region, 0);
00367 #endif
00368 
00369         prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
00370 
00371         /*
00372          * XXX
00373          * Work around a bug in the VMS V7.1 mmap() implementation.  To map
00374          * a file into memory on VMS it needs to be opened in a certain way,
00375          * originally.  To get the file opened in that certain way, the VMS
00376          * mmap() closes the file and re-opens it.  When it does this, it
00377          * doesn't flush any caches out to disk before closing.  The problem
00378          * this causes us is that when the memory cache doesn't get written
00379          * out, the file isn't big enough to match the memory chunk and the
00380          * mmap() call fails.  This call to fsync() fixes the problem.  DEC
00381          * thinks this isn't a bug because of language in XPG5 discussing user
00382          * responsibility for on-disk and in-memory synchronization.
00383          */
00384 #ifdef VMS
00385         if (CDB___os_fsync(dbenv, fhp) == -1)
00386                 return(CDB___os_get_errno());
00387 #endif
00388 
00389         /* MAP_FAILED was not defined in early mmap implementations. */
00390 #ifndef MAP_FAILED
00391 #define MAP_FAILED      -1
00392 #endif
00393         if ((p = mmap(NULL,
00394             len, prot, flags, fhp->fd, (off_t)0)) == (void *)MAP_FAILED) {
00395                 ret = CDB___os_get_errno();
00396                 CDB___db_err(dbenv, "mmap: %s", strerror(ret));
00397                 return (ret);
00398         }
00399 
00400 #ifdef HAVE_MLOCK
00401         /*
00402          * If it's a region, we want to make sure that the memory isn't paged.
00403          * For example, Solaris will page large mpools because it thinks that
00404          * I/O buffer memory is more important than we are.  The mlock system
00405          * call may or may not succeed (mlock is restricted to the super-user
00406          * on some systems).  Currently, the only other use of mmap in DB is
00407          * to map read-only databases -- we don't want them paged, either, so
00408          * the call isn't conditional.
00409          */
00410         if (F_ISSET(dbenv, DB_ENV_LOCKDOWN) && mlock(p, len) != 0) {
00411                 ret = CDB___os_get_errno();
00412                 (void)munmap(p, len);
00413                 CDB___db_err(dbenv, "mlock: %s", strerror(ret));
00414                 return (ret);
00415         }
00416 #else
00417         COMPQUIET(dbenv, NULL);
00418 #endif
00419 
00420         *addrp = p;
00421         return (0);
00422 }
00423 #endif
00424 
00425 #ifndef HAVE_SHMGET
00426 /*
00427  * __db_nosystemmem --
00428  *      No system memory environments error message.
00429  */
00430 static int
00431 __db_nosystemmem(dbenv)
00432         DB_ENV *dbenv;
00433 {
00434         CDB___db_err(dbenv,
00435             "architecture doesn't support environments in system memory");
00436         return (CDB___db_eopnotsup(dbenv));
00437 }
00438 #endif

Generated on Sun Jun 8 10:56:38 2008 for GNUmifluz by  doxygen 1.5.5