mp_bh.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 #include "config.h"
00008 
00009 #ifndef lint
00010 static const char revid[] = "$Id: mp__bh_8c-source.html,v 1.1 2008/06/08 10:20:39 sebdiaz Exp $";
00011 #endif /* not lint */
00012 
00013 #ifndef NO_SYSTEM_INCLUDES
00014 #include <sys/types.h>
00015 
00016 #include <errno.h>
00017 #include <string.h>
00018 #include <unistd.h>
00019 #endif
00020 
00021 #include "db_int.h"
00022 #include "db_shash.h"
00023 #include "mp.h"
00024 #include "db_page.h"
00025 
00026 #ifdef DEBUG
00027 #include "WordMonitor.h"
00028 #endif /* DEBUG */
00029 
00030 static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *));
00031 
00032 /*
00033  * CDB___memp_bhwrite --
00034  *      Write the page associated with a given bucket header.
00035  *
00036  * PUBLIC: int CDB___memp_bhwrite
00037  * PUBLIC:     __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
00038  */
00039 int
00040 CDB___memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
00041         DB_MPOOL *dbmp;
00042         MPOOLFILE *mfp;
00043         BH *bhp;
00044         int *restartp, *wrotep;
00045 {
00046         DB_MPOOLFILE *dbmfp;
00047         DB_MPREG *mpreg;
00048         int incremented, ret;
00049 
00050         if (restartp != NULL)
00051                 *restartp = 0;
00052         if (wrotep != NULL)
00053                 *wrotep = 0;
00054         incremented = 0;
00055 
00056         /*
00057          * If the file has been removed or is a closed temporary file, Jump
00058          * right ahead and pretend that we've found the file we want-- the
00059          * page-write function knows how to handle the fact that we don't have
00060          * (or need!) any real file descriptor information.
00061          */
00062         if (F_ISSET(mfp, MP_DEADFILE)) {
00063                 dbmfp = NULL;
00064                 goto found;
00065         }
00066 
00067         /*
00068          * Walk the process' DB_MPOOLFILE list and find a file descriptor for
00069          * the file.  We also check that the descriptor is open for writing.
00070          * If we find a descriptor on the file that's not open for writing, we
00071          * try and upgrade it to make it writeable.  If that fails, we're done.
00072          */
00073         MUTEX_THREAD_LOCK(dbmp->mutexp);
00074         for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
00075             dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
00076                 if (dbmfp->mfp == mfp) {
00077                         if (F_ISSET(dbmfp, MP_READONLY) &&
00078                             __memp_upgrade(dbmp, dbmfp, mfp)) {
00079                                 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00080                                 return (0);
00081                         }
00082 
00083                         /*
00084                          * Increment the reference count -- see the comment in
00085                          * CDB_memp_fclose().
00086                          */
00087                         ++dbmfp->ref;
00088                         incremented = 1;
00089                         break;
00090                 }
00091         MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00092         if (dbmfp != NULL)
00093                 goto found;
00094 
00095         /*
00096          * !!!
00097          * Don't try to attach to temporary files.  There are two problems in
00098          * trying to do that.  First, if we have different privileges than the
00099          * process that "owns" the temporary file, we might create the backing
00100          * disk file such that the owning process couldn't read/write its own
00101          * buffers, e.g., CDB_memp_trickle() running as root creating a file owned
00102          * as root, mode 600.  Second, if the temporary file has already been
00103          * created, we don't have any way of finding out what its real name is,
00104          * and, even if we did, it was already unlinked (so that it won't be
00105          * left if the process dies horribly).  This decision causes a problem,
00106          * however: if the temporary file consumes the entire buffer cache,
00107          * and the owner doesn't flush the buffers to disk, we could end up
00108          * with resource starvation, and the CDB_memp_trickle() thread couldn't do
00109          * anything about it.  That's a pretty unlikely scenario, though.
00110          *
00111          * Note that we should never get here when the temporary file
00112          * in question has already been closed in another process, in which
00113          * case it should be marked MP_DEADFILE.
00114          */
00115         if (F_ISSET(mfp, MP_TEMP)) {
00116                 DB_ASSERT(!F_ISSET(mfp, MP_DEADFILE));
00117                 return (0);
00118         }
00119 
00120         /*
00121          * It's not a page from a file we've opened.  If the file requires
00122          * input/output processing, see if this process has ever registered
00123          * information as to how to write this type of file.  If not, there's
00124          * nothing we can do.
00125          */
00126         if (mfp->ftype != 0) {
00127                 MUTEX_THREAD_LOCK(dbmp->mutexp);
00128                 for (mpreg = LIST_FIRST(&dbmp->dbregq);
00129                     mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
00130                         if (mpreg->ftype == mfp->ftype)
00131                                 break;
00132                 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00133                 if (mpreg == NULL)
00134                         return (0);
00135         }
00136 
00137         /*
00138          * Try and open the file, attaching to the underlying shared area.
00139          * Ignore any error, assume it's a permissions problem.
00140          *
00141          * XXX
00142          * There's no negative cache, so we may repeatedly try and open files
00143          * that we have previously tried (and failed) to open.
00144          */
00145         if (CDB___memp_fopen(dbmp, mfp, R_ADDR(dbmp->reginfo, mfp->path_off),
00146             0, 0, mfp->stat.st_pagesize, 0, NULL, &dbmfp) != 0)
00147                 return (0);
00148 
00149 found:  ret = CDB___memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep);
00150 
00151         if (incremented) {
00152                 MUTEX_THREAD_LOCK(dbmp->mutexp);
00153                 --dbmfp->ref;
00154                 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00155         }
00156 
00157         return (ret);
00158 }
00159 
00160 /*
00161  * CDB___memp_pgread --
00162  *      Read a page from a file.
00163  *
00164  * PUBLIC: int CDB___memp_pgread __P((DB_MPOOLFILE *, BH *, int));
00165  */
00166 int
00167 CDB___memp_pgread(dbmfp, bhp, can_create)
00168         DB_MPOOLFILE *dbmfp;
00169         BH *bhp;
00170         int can_create;
00171 {
00172         DB_IO db_io;
00173         DB_ENV *dbenv;
00174         DB_MPOOL *dbmp;
00175         MPOOLFILE *mfp;
00176         size_t len, pagesize;
00177         size_t nr;
00178         int created, ret;
00179 
00180         dbmp = dbmfp->dbmp;
00181         dbenv = dbmp->dbenv;
00182         mfp = dbmfp->mfp;
00183         pagesize = mfp->stat.st_pagesize;
00184 
00185         F_SET(bhp, BH_LOCKED | BH_TRASH);
00186         MUTEX_LOCK(&bhp->mutex, dbenv->lockfhp);
00187         R_UNLOCK(dbenv, dbmp->reginfo);
00188 
00189         /*
00190          * Temporary files may not yet have been created.  We don't create
00191          * them now, we create them when the pages have to be flushed.
00192          */
00193         nr = 0;
00194         if (F_ISSET(&dbmfp->fh, DB_FH_VALID)) {
00195                 /*
00196                  * Ignore read errors if we have permission to create the page.
00197                  * Assume that the page doesn't exist, and that we'll create it
00198                  * when we write it out.
00199                  *
00200                  * XXX
00201                  * Theoretically, we could overwrite a page of data if it were
00202                  * possible for a file to be successfully opened for reading
00203                  * and then for the read to fail.  Shouldn't ever happen, but
00204                  * it might be worth checking to see if the offset is past the
00205                  * known end-of-file.
00206                  */
00207                 db_io.fhp = &dbmfp->fh;
00208                 db_io.mutexp = dbmfp->mutexp;
00209                 db_io.pagesize = db_io.bytes = pagesize;
00210                 db_io.pgno = bhp->pgno;
00211                 db_io.buf = bhp->buf;
00212 
00213                 if(F_ISSET(dbmfp, MP_CMPR)) {
00214                   ret = CDB___memp_cmpr(dbmfp, bhp, &db_io, DB_IO_READ, &nr);
00215                 } else {
00216                   ret = CDB___os_io(dbenv, &db_io, DB_IO_READ, &nr);
00217                 }
00218 #ifdef DEBUG
00219                 if(ret == 0) {
00220                   PAGE* pp = (PAGE*)db_io.buf;
00221                   word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_READ, 1);
00222                   switch(TYPE(pp)) {
00223                   case P_IBTREE:
00224                     word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_IBTREE, 1);
00225                     break;
00226                   case P_LBTREE:
00227                     word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_LBTREE, 1);
00228                     break;
00229                   default:
00230                     word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_UNKNOWN, 1);
00231                     break;
00232                   }
00233                 }
00234 #endif /* DEBUG */
00235         } else
00236                 ret = 0;
00237 
00238         created = 0;
00239         if (nr < pagesize) {
00240                 if (can_create)
00241                         created = 1;
00242                 else {
00243                         /*
00244                          * If we had a short read, ret may be 0.  This may not
00245                          * be an error -- in particular DB recovery processing
00246                          * may request pages that have never been written to
00247                          * disk, in which case we won't find the page.  So, the
00248                          * caller must know how to handle the error.
00249                          */
00250                         if (ret == 0)
00251                                 ret = EIO;
00252                         goto err;
00253                 }
00254         }
00255 
00256         /*
00257          * Clear any bytes we didn't read that need to be cleared.  If we're
00258          * running in diagnostic mode, smash any bytes on the page that are
00259          * unknown quantities for the caller.
00260          */
00261         if (nr != pagesize) {
00262                 len = mfp->clear_len == 0 ? pagesize : mfp->clear_len;
00263                 if (nr < len)
00264                         memset(bhp->buf + nr, 0, len - nr);
00265 #ifdef DIAGNOSTIC
00266                 if (nr > len)
00267                         len = nr;
00268                 if (len < pagesize)
00269                         memset(bhp->buf + len, CLEAR_BYTE, pagesize - len);
00270 #endif
00271         }
00272 
00273         /* Call any pgin function. */
00274         ret = mfp->ftype == 0 ? 0 : CDB___memp_pg(dbmfp, bhp, 1);
00275 
00276         /* Unlock the buffer and reacquire the region lock. */
00277 err:    MUTEX_UNLOCK(&bhp->mutex);
00278         R_LOCK(dbenv, dbmp->reginfo);
00279 
00280         /*
00281          * If no errors occurred, the data is now valid, clear the BH_TRASH
00282          * flag; regardless, clear the lock bit and let other threads proceed.
00283          */
00284         F_CLR(bhp, BH_LOCKED);
00285         if (ret == 0) {
00286                 F_CLR(bhp, BH_TRASH);
00287 
00288                 /* Update the statistics. */
00289                 if (created)
00290                         ++mfp->stat.st_page_create;
00291                 else
00292                         ++mfp->stat.st_page_in;
00293         }
00294 
00295         return (ret);
00296 }
00297 
00298 /*
00299  * CDB___memp_pgwrite --
00300  *      Write a page to a file.
00301  *
00302  * PUBLIC: int CDB___memp_pgwrite
00303  * PUBLIC:     __P((DB_MPOOL *, DB_MPOOLFILE *, BH *, int *, int *));
00304  */
00305 int
00306 CDB___memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep)
00307         DB_MPOOL *dbmp;
00308         DB_MPOOLFILE *dbmfp;
00309         BH *bhp;
00310         int *restartp, *wrotep;
00311 {
00312         DB_ENV *dbenv;
00313         DB_IO db_io;
00314         DB_LSN lsn;
00315         MPOOL *c_mp, *mp;
00316         MPOOLFILE *mfp;
00317         size_t nw;
00318         int callpgin, dosync, ret, syncfail;
00319         const char *fail;
00320 
00321         dbenv = dbmp->dbenv;
00322         mp = dbmp->reginfo[0].primary;
00323         mfp = dbmfp == NULL ? NULL : dbmfp->mfp;
00324 
00325         if (restartp != NULL)
00326                 *restartp = 0;
00327         if (wrotep != NULL)
00328                 *wrotep = 0;
00329         callpgin = 0;
00330 
00331         /*
00332          * Check the dirty bit -- this buffer may have been written since we
00333          * decided to write it.
00334          */
00335         if (!F_ISSET(bhp, BH_DIRTY)) {
00336                 if (wrotep != NULL)
00337                         *wrotep = 1;
00338                 return (0);
00339         }
00340 
00341         MUTEX_LOCK(&bhp->mutex, dbenv->lockfhp);
00342 
00343         /*
00344          * If there were two writers, we may have just been waiting while the
00345          * other writer completed I/O on this buffer.  Check the dirty bit one
00346          * more time.
00347          */
00348         if (!F_ISSET(bhp, BH_DIRTY)) {
00349                 MUTEX_UNLOCK(&bhp->mutex);
00350 
00351                 if (wrotep != NULL)
00352                         *wrotep = 1;
00353                 return (0);
00354         }
00355 
00356         F_SET(bhp, BH_LOCKED);
00357         R_UNLOCK(dbenv, dbmp->reginfo);
00358 
00359         if (restartp != NULL)
00360                 *restartp = 1;
00361 
00362         /*
00363          * It's possible that the underlying file doesn't exist, either
00364          * because of an outright removal or because it was a temporary
00365          * file that's been closed.
00366          *
00367          * !!!
00368          * Once we pass this point, we know that dbmfp and mfp aren't NULL,
00369          * and that we have a valid file reference.
00370          */
00371         if (mfp == NULL || F_ISSET(mfp, MP_DEADFILE))
00372                 goto file_dead;
00373 
00374         /* Copy the LSN off the page if we're going to need it. */
00375         if (LOGGING_ON(dbenv) || F_ISSET(bhp, BH_WRITE))
00376                 memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
00377 
00378         /* Ensure the appropriate log records are on disk. */
00379         if (LOGGING_ON(dbenv) && (ret = CDB_log_flush(dbenv, &lsn)) != 0)
00380                 goto err;
00381 
00382         /*
00383          * Call any pgout function.  We set the callpgin flag so that we flag
00384          * that the contents of the buffer will need to be passed through pgin
00385          * before they are reused.
00386          */
00387         if (mfp->ftype == 0)
00388                 ret = 0;
00389         else {
00390                 callpgin = 1;
00391                 if ((ret = CDB___memp_pg(dbmfp, bhp, 0)) != 0)
00392                         goto err;
00393         }
00394 
00395         /* Temporary files may not yet have been created. */
00396         if (!F_ISSET(&dbmfp->fh, DB_FH_VALID)) {
00397                 MUTEX_THREAD_LOCK(dbmp->mutexp);
00398                 if (!F_ISSET(&dbmfp->fh, DB_FH_VALID) &&
00399                     ((ret = CDB___db_appname(dbenv, DB_APP_TMP, NULL, NULL,
00400                     DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP,
00401                     &dbmfp->fh, NULL)) != 0 ||
00402                     !F_ISSET(&dbmfp->fh, DB_FH_VALID))) {
00403                         MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00404                         CDB___db_err(dbenv,
00405                             "unable to create temporary backing file");
00406                         goto err;
00407                 }
00408                 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00409         }
00410 
00411         /* Write the page. */
00412         db_io.fhp = &dbmfp->fh;
00413         db_io.mutexp = dbmfp->mutexp;
00414         db_io.pagesize = db_io.bytes = mfp->stat.st_pagesize;
00415         db_io.pgno = bhp->pgno;
00416         db_io.buf = bhp->buf;
00417 #ifdef DEBUG
00418         {
00419           PAGE* pp = (PAGE*)db_io.buf;
00420           word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_WRITE, 1);
00421           switch(TYPE(pp)) {
00422           case P_IBTREE:
00423             word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_IBTREE, 1);
00424             break;
00425           case P_LBTREE:
00426             word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_LBTREE, 1);
00427             break;
00428           default:
00429             word_monitor_add(DB_MONITOR(dbenv), WORD_MONITOR_PAGE_UNKNOWN, 1);
00430             break;
00431           }
00432         }
00433 #endif /* DEBUG */
00434         if(F_ISSET(dbmfp, MP_CMPR)) {
00435           ret = CDB___memp_cmpr(dbmfp, bhp, &db_io, DB_IO_WRITE, &nw);
00436         } else {
00437           if(db_io.pgno == PGNO_BASE_MD) {
00438             /*
00439              * Write a copy of the DBMETA information at 256, 512,
00440              * 1024, 2048, 4096, ... up to the actually required page
00441              * size.  This ensure that the DBMETA information will be
00442              * found without knowing the actual page size used in the
00443              * file.
00444              * !! Assume that PGNO_BASE_MD == 1
00445              */
00446             size_t required = db_io.pagesize;
00447             size_t orig_bytes = db_io.bytes;
00448             db_io.bytes = DBMETASIZE;
00449             for(db_io.pagesize = DBMETASIZE; db_io.pagesize < required; db_io.pagesize <<= 1) {
00450               ret = CDB___os_io(dbenv, &db_io, DB_IO_WRITE, &nw);
00451               if(ret != 0 || nw != DBMETASIZE)
00452                 break;
00453             }
00454             db_io.bytes = orig_bytes;
00455             db_io.pagesize = required;
00456             if(ret == 0)
00457               ret = CDB___os_io(dbenv, &db_io, DB_IO_WRITE, &nw);
00458           } else {
00459             ret = CDB___os_io(dbenv, &db_io, DB_IO_WRITE, &nw);
00460           }
00461         }
00462         if (ret != 0) {
00463                 CDB___db_panic(dbenv, ret);
00464                 fail = "write";
00465                 goto syserr;
00466         }
00467         if (nw != mfp->stat.st_pagesize) {
00468                 ret = EIO;
00469                 fail = "write";
00470                 goto syserr;
00471         }
00472 
00473 file_dead:
00474         /*
00475          * !!!
00476          * Once we pass this point, dbmfp and mfp may be NULL, we may not have
00477          * a valid file reference.
00478          *
00479          * Unlock the buffer and reacquire the region lock.
00480          */
00481         MUTEX_UNLOCK(&bhp->mutex);
00482         R_LOCK(dbenv, dbmp->reginfo);
00483 
00484         /*
00485          * Clean up the flags based on a successful write.
00486          *
00487          * If we rewrote the page, it will need processing by the pgin
00488          * routine before reuse.
00489          */
00490         if (callpgin)
00491                 F_SET(bhp, BH_CALLPGIN);
00492         F_CLR(bhp, BH_DIRTY | BH_LOCKED);
00493 
00494         /*
00495          * If we write a buffer for which a checkpoint is waiting, update
00496          * the count of pending buffers (both in the mpool as a whole and
00497          * for this file).  If the count for this file goes to zero, set a
00498          * flag so we flush the writes.
00499          */
00500         dosync = 0;
00501         if (F_ISSET(bhp, BH_WRITE)) {
00502                 F_CLR(bhp, BH_WRITE);
00503 
00504                 --mp->lsn_cnt;
00505                 if (mfp != NULL)
00506                         dosync = --mfp->lsn_cnt == 0 ? 1 : 0;
00507         }
00508 
00509         /* Update the page clean/dirty statistics. */
00510         c_mp = BH_TO_CACHE(dbmp, bhp);
00511         ++c_mp->stat.st_page_clean;
00512         --c_mp->stat.st_page_dirty;
00513 
00514         /* Update I/O statistics. */
00515         if (mfp != NULL)
00516                 ++mfp->stat.st_page_out;
00517 
00518         /*
00519          * Do the sync after everything else has been updated, so any incoming
00520          * checkpoint doesn't see inconsistent information.
00521          *
00522          * XXX:
00523          * Don't lock the region around the sync, fsync(2) has no atomicity
00524          * issues.
00525          *
00526          * XXX:
00527          * We ignore errors from the sync -- it makes no sense to return an
00528          * error to the calling process, so set a flag causing the checkpoint
00529          * to be retried later.  There is a possibility, of course, that a
00530          * subsequent checkpoint was started and that we're going to force it
00531          * to fail.  That should be unlikely, and fixing it would be difficult.
00532          */
00533         if (dosync) {
00534                 R_UNLOCK(dbenv, dbmp->reginfo);
00535                 syncfail = CDB___os_fsync(dbenv, &dbmfp->fh) != 0;
00536                 R_LOCK(dbenv, dbmp->reginfo);
00537                 if (syncfail)
00538                         F_SET(mp, MP_LSN_RETRY);
00539         }
00540 
00541         if (wrotep != NULL)
00542                 *wrotep = 1;
00543 
00544         return (0);
00545 
00546 syserr: CDB___db_err(dbenv, "%s: %s failed for page %lu",
00547             CDB___memp_fn(dbmfp), fail, (u_long)bhp->pgno);
00548 
00549 err:    /* Unlock the buffer and reacquire the region lock. */
00550         MUTEX_UNLOCK(&bhp->mutex);
00551         R_LOCK(dbenv, dbmp->reginfo);
00552 
00553         /*
00554          * Clean up the flags based on a failure.
00555          *
00556          * The page remains dirty but we remove our lock.  If we rewrote the
00557          * page, it will need processing by the pgin routine before reuse.
00558          */
00559         if (callpgin)
00560                 F_SET(bhp, BH_CALLPGIN);
00561         F_CLR(bhp, BH_LOCKED);
00562 
00563         return (ret);
00564 }
00565 
00566 /*
00567  * CDB___memp_pg --
00568  *      Call the pgin/pgout routine.
00569  *
00570  * PUBLIC: int CDB___memp_pg __P((DB_MPOOLFILE *, BH *, int));
00571  */
00572 int
00573 CDB___memp_pg(dbmfp, bhp, is_pgin)
00574         DB_MPOOLFILE *dbmfp;
00575         BH *bhp;
00576         int is_pgin;
00577 {
00578         DBT dbt, *dbtp;
00579         DB_MPOOL *dbmp;
00580         DB_MPREG *mpreg;
00581         MPOOLFILE *mfp;
00582         int ftype, ret;
00583 
00584         dbmp = dbmfp->dbmp;
00585         mfp = dbmfp->mfp;
00586 
00587         MUTEX_THREAD_LOCK(dbmp->mutexp);
00588 
00589         ftype = mfp->ftype;
00590         for (mpreg = LIST_FIRST(&dbmp->dbregq);
00591             mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) {
00592                 if (ftype != mpreg->ftype)
00593                         continue;
00594                 if (mfp->pgcookie_len == 0)
00595                         dbtp = NULL;
00596                 else {
00597                         dbt.size = mfp->pgcookie_len;
00598                         dbt.data = R_ADDR(dbmp->reginfo, mfp->pgcookie_off);
00599                         dbtp = &dbt;
00600                 }
00601                 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00602 
00603                 if (is_pgin) {
00604                         if (mpreg->pgin != NULL &&
00605                             (ret = mpreg->pgin(dbmp->dbenv,
00606                             bhp->pgno, bhp->buf, dbtp)) != 0)
00607                                 goto err;
00608                 } else
00609                         if (mpreg->pgout != NULL &&
00610                             (ret = mpreg->pgout(dbmp->dbenv,
00611                             bhp->pgno, bhp->buf, dbtp)) != 0)
00612                                 goto err;
00613                 break;
00614         }
00615 
00616         if (mpreg == NULL)
00617                 MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00618 
00619         return (0);
00620 
00621 err:    MUTEX_THREAD_UNLOCK(dbmp->mutexp);
00622         CDB___db_err(dbmp->dbenv, "%s: %s failed for page %lu",
00623             CDB___memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
00624         return (ret);
00625 }
00626 
00627 /*
00628  * CDB___memp_bhfree --
00629  *      Free a bucket header and its referenced data.
00630  *
00631  * PUBLIC: void CDB___memp_bhfree __P((DB_MPOOL *, BH *, int));
00632  */
00633 void
00634 CDB___memp_bhfree(dbmp, bhp, free_mem)
00635         DB_MPOOL *dbmp;
00636         BH *bhp;
00637         int free_mem;
00638 {
00639         DB_HASHTAB *dbht;
00640         MPOOL *c_mp, *mp;
00641         MPOOLFILE *mfp;
00642         int n_bucket, n_cache;
00643 
00644         mp = dbmp->reginfo[0].primary;
00645         c_mp = BH_TO_CACHE(dbmp, bhp);
00646         n_cache = NCACHE(mp, bhp->pgno);
00647         n_bucket = NBUCKET(c_mp, bhp->mf_offset, bhp->pgno);
00648         dbht = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab);
00649 
00650         /* Delete the buffer header from the hash bucket queue. */
00651         SH_TAILQ_REMOVE(&dbht[n_bucket], bhp, hq, __bh);
00652 
00653         /* Delete the buffer header from the LRU queue. */
00654         SH_TAILQ_REMOVE(&c_mp->bhq, bhp, q, __bh);
00655 
00656         /*
00657          * Find the underlying MPOOLFILE and decrement its reference count.
00658          * If this is its last reference, remove it.
00659          */
00660         mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
00661         if (--mfp->ref_cnt == 0)
00662                 CDB___memp_mf_discard(dbmp, mfp);
00663 
00664         /*
00665          * If we're not reusing it immediately, free the buffer header
00666          * and data for real.
00667          */
00668         CDB___memp_cmpr_free_chain(dbmp, bhp);
00669         if (free_mem) {
00670                 --c_mp->stat.st_page_clean;
00671                 CDB___db_shalloc_free(dbmp->reginfo[n_cache].addr, bhp);
00672         }
00673 }
00674 
00675 /*
00676  * __memp_upgrade --
00677  *      Upgrade a file descriptor from readonly to readwrite.
00678  */
00679 static int
00680 __memp_upgrade(dbmp, dbmfp, mfp)
00681         DB_MPOOL *dbmp;
00682         DB_MPOOLFILE *dbmfp;
00683         MPOOLFILE *mfp;
00684 {
00685         DB_FH fh;
00686         int ret;
00687         char *rpath;
00688 
00689         /*
00690          * !!!
00691          * We expect the handle to already be locked.
00692          */
00693 
00694         /* Check to see if we've already upgraded. */
00695         if (F_ISSET(dbmfp, MP_UPGRADE))
00696                 return (0);
00697 
00698         /* Check to see if we've already failed. */
00699         if (F_ISSET(dbmfp, MP_UPGRADE_FAIL))
00700                 return (1);
00701 
00702         /*
00703          * Calculate the real name for this file and try to open it read/write.
00704          * We know we have a valid pathname for the file because it's the only
00705          * way we could have gotten a file descriptor of any kind.
00706          */
00707         if ((ret = CDB___db_appname(dbmp->dbenv, DB_APP_DATA,
00708             NULL, R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0)
00709                 return (ret);
00710         if (CDB___os_open(dbmp->dbenv, rpath, 0, 0, &fh) != 0) {
00711                 F_SET(dbmfp, MP_UPGRADE_FAIL);
00712                 ret = 1;
00713         } else {
00714                 /* Swap the descriptors and set the upgrade flag. */
00715                 (void)CDB___os_closehandle(&dbmfp->fh);
00716                 dbmfp->fh = fh;
00717                 F_SET(dbmfp, MP_UPGRADE);
00718                 ret = 0;
00719         }
00720         CDB___os_freestr(rpath);
00721         return (ret);
00722 }

Generated on Sun Jun 8 10:56:38 2008 for GNUmifluz by  doxygen 1.5.5