txn.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 /*
00008  * Copyright (c) 1995, 1996
00009  *      The President and Fellows of Harvard University.  All rights reserved.
00010  *
00011  * This code is derived from software contributed to Berkeley by
00012  * Margo Seltzer.
00013  *
00014  * Redistribution and use in source and binary forms, with or without
00015  * modification, are permitted provided that the following conditions
00016  * are met:
00017  * 1. Redistributions of source code must retain the above copyright
00018  *    notice, this list of conditions and the following disclaimer.
00019  * 2. Redistributions in binary form must reproduce the above copyright
00020  *    notice, this list of conditions and the following disclaimer in the
00021  *    documentation and/or other materials provided with the distribution.
00022  * 3. Neither the name of the University nor the names of its contributors
00023  *    may be used to endorse or promote products derived from this software
00024  *    without specific prior written permission.
00025  *
00026  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00027  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00029  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00030  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00031  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00032  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00033  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00034  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00035  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00036  * SUCH DAMAGE.
00037  */
00038 
00039 #include "config.h"
00040 
00041 #ifndef lint
00042 static const char revid[] = "$Id: txn_8c-source.html,v 1.1 2008/06/08 10:24:47 sebdiaz Exp $";
00043 #endif /* not lint */
00044 
00045 #ifndef NO_SYSTEM_INCLUDES
00046 #include <sys/types.h>
00047 
00048 #if TIME_WITH_SYS_TIME
00049 #include <sys/time.h>
00050 #include <time.h>
00051 #else
00052 #if HAVE_SYS_TIME_H
00053 #include <sys/time.h>
00054 #else
00055 #include <time.h>
00056 #endif
00057 #endif
00058 
00059 #include <errno.h>
00060 #include <string.h>
00061 #endif
00062 
00063 #ifdef  HAVE_RPC
00064 #include "db_server.h"
00065 #endif
00066 
00067 #include "db_int.h"
00068 #include "db_shash.h"
00069 #include "txn.h"
00070 #include "lock.h"
00071 #include "log.h"
00072 #include "db_dispatch.h"
00073 
00074 #ifdef HAVE_RPC
00075 #include "gen_client_ext.h"
00076 #include "rpc_client_ext.h"
00077 #endif
00078 
00079 static int  __txn_begin __P((DB_TXN *));
00080 static int  __txn_check_running __P((const DB_TXN *, TXN_DETAIL **));
00081 static int  __txn_count __P((DB_TXN *));
00082 static void __txn_freekids __P((DB_TXN *));
00083 static void __txn_lsn __P((DB_TXN *, DB_LSN **));
00084 static int  __txn_makefamily __P((DB_ENV *, DB_TXN *, int *, DB_LSN **));
00085 static int  __txn_undo __P((DB_TXN *));
00086 
00087 #define TXN_BUBBLE(AP, MAX) {                                           \
00088         int __j;                                                        \
00089         DB_LSN __tmp;                                                   \
00090                                                                         \
00091         for (__j = 0; __j < MAX - 1; __j++)                             \
00092                 if (CDB_log_compare(&AP[__j], &AP[__j + 1]) < 0) {              \
00093                         __tmp = AP[__j];                                \
00094                         AP[__j] = AP[__j + 1];                          \
00095                         AP[__j + 1] = __tmp;                            \
00096                 }                                                       \
00097 }
00098 
00099 /*
00100  * CDB_txn_begin --
00101  *      This is a wrapper to the actual begin process.  Normal CDB_txn_begin()
00102  * allocates a DB_TXN structure for the caller, while txn_xa_begin() does
00103  * not.  Other than that, both call into the common __txn_begin code().
00104  *
00105  * Internally, we use TXN_DETAIL structures, but the DB_TXN structure
00106  * provides access to the transaction ID and the offset in the transaction
00107  * region of the TXN_DETAIL structure.
00108  */
00109 int
00110 CDB_txn_begin(dbenv, parent, txnpp, flags)
00111         DB_ENV *dbenv;
00112         DB_TXN *parent, **txnpp;
00113         u_int32_t flags;
00114 {
00115         DB_TXN *txn;
00116         int ret;
00117 
00118 #ifdef HAVE_RPC
00119         if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
00120                 return (__dbcl_txn_begin(dbenv, parent, txnpp, flags));
00121 #endif
00122 
00123         PANIC_CHECK(dbenv);
00124         ENV_REQUIRES_CONFIG(dbenv, dbenv->tx_handle, DB_INIT_TXN);
00125 
00126         if ((ret = CDB___db_fchk(dbenv,
00127             "CDB_txn_begin", flags,
00128             DB_TXN_NOWAIT | DB_TXN_NOSYNC | DB_TXN_SYNC)) != 0)
00129                 return (ret);
00130         if ((ret = CDB___db_fcchk(dbenv,
00131             "CDB_txn_begin", flags, DB_TXN_NOSYNC, DB_TXN_SYNC)) != 0)
00132                 return (ret);
00133 
00134         if ((ret = CDB___os_calloc(dbenv, 1, sizeof(DB_TXN), &txn)) != 0)
00135                 return (ret);
00136 
00137         txn->mgrp = dbenv->tx_handle;
00138         txn->parent = parent;
00139         TAILQ_INIT(&txn->kids);
00140         txn->flags = TXN_MALLOC;
00141         if (LF_ISSET(DB_TXN_NOSYNC))
00142                 F_SET(txn, TXN_NOSYNC);
00143         if (LF_ISSET(DB_TXN_SYNC))
00144                 F_SET(txn, TXN_SYNC);
00145         if (LF_ISSET(DB_TXN_NOWAIT))
00146                 F_SET(txn, TXN_NOWAIT);
00147 
00148         if ((ret = __txn_begin(txn)) != 0) {
00149                 CDB___os_free(txn, sizeof(DB_TXN));
00150                 txn = NULL;
00151         }
00152 
00153         if (txn != NULL && parent != NULL)
00154                 TAILQ_INSERT_HEAD(&parent->kids, txn, klinks);
00155 
00156         *txnpp = txn;
00157         return (ret);
00158 }
00159 
00160 /*
00161  * CDB___txn_xa_begin --
00162  *      XA version of CDB_txn_begin.
00163  *
00164  * PUBLIC: int CDB___txn_xa_begin __P((DB_ENV *, DB_TXN *));
00165  */
00166 int
00167 CDB___txn_xa_begin(dbenv, txn)
00168         DB_ENV *dbenv;
00169         DB_TXN *txn;
00170 {
00171         PANIC_CHECK(dbenv);
00172 
00173         memset(txn, 0, sizeof(DB_TXN));
00174 
00175         txn->mgrp = dbenv->tx_handle;
00176 
00177         return (__txn_begin(txn));
00178 }
00179 
00180 /*
00181  * __txn_begin --
00182  *      Normal DB version of CDB_txn_begin.
00183  */
00184 static int
00185 __txn_begin(txn)
00186         DB_TXN *txn;
00187 {
00188         DB_ENV *dbenv;
00189         DB_LSN begin_lsn;
00190         DB_TXNMGR *mgr;
00191         DB_TXNREGION *region;
00192         TXN_DETAIL *td;
00193         size_t off;
00194         u_int32_t id;
00195         int ret;
00196 
00197         mgr = txn->mgrp;
00198         dbenv = mgr->dbenv;
00199         region = mgr->reginfo.primary;
00200 
00201         /*
00202          * We do not have to write begin records (and if we do not, then we
00203          * need never write records for read-only transactions).  However,
00204          * we do need to find the current LSN so that we can store it in the
00205          * transaction structure, so we can know where to take checkpoints.
00206          */
00207         if (LOGGING_ON(dbenv) &&
00208             (ret = CDB_log_put(dbenv, &begin_lsn, NULL, DB_CURLSN)) != 0)
00209                 goto err2;
00210 
00211         R_LOCK(dbenv, &mgr->reginfo);
00212 
00213         /* Make sure that last_txnid is not going to wrap around. */
00214         if (region->last_txnid == TXN_INVALID) {
00215                 CDB___db_err(dbenv, "CDB_txn_begin: %s  %s",
00216                     "Transaction ID wrapping.",
00217                     "Snapshot your database and start a new log.");
00218                 ret = EINVAL;
00219                 goto err1;
00220         }
00221 
00222         /* Allocate a new transaction detail structure. */
00223         if ((ret =
00224             CDB___db_shalloc(mgr->reginfo.addr, sizeof(TXN_DETAIL), 0, &td)) != 0) {
00225                 CDB___db_err(dbenv,
00226                      "Unable to allocate memory for transaction detail");
00227                 goto err1;
00228         }
00229 
00230         /* Place transaction on active transaction list. */
00231         SH_TAILQ_INSERT_HEAD(&region->active_txn, td, links, __txn_detail);
00232 
00233         id = ++region->last_txnid;
00234         ++region->nbegins;
00235         if (++region->nactive > region->maxnactive)
00236                 region->maxnactive = region->nactive;
00237 
00238         td->txnid = id;
00239         td->begin_lsn = begin_lsn;
00240         ZERO_LSN(td->last_lsn);
00241         td->status = TXN_RUNNING;
00242         if (txn->parent != NULL)
00243                 td->parent = txn->parent->off;
00244         else
00245                 td->parent = INVALID_ROFF;
00246 
00247         off = R_OFFSET(&mgr->reginfo, td);
00248         R_UNLOCK(dbenv, &mgr->reginfo);
00249 
00250         ZERO_LSN(txn->last_lsn);
00251         txn->txnid = id;
00252         txn->off = off;
00253 
00254         /*
00255          * If this is a transaction family, we must link the child to the
00256          * maximal grandparent in the lock table for deadlock detection.
00257          */
00258         if (txn->parent != NULL && LOCKING_ON(dbenv))
00259                 if ((ret = CDB___lock_addfamilylocker(dbenv,
00260                     txn->parent->txnid, txn->txnid)) != 0)
00261                         goto err2;
00262 
00263         if (F_ISSET(txn, TXN_MALLOC)) {
00264                 MUTEX_THREAD_LOCK(mgr->mutexp);
00265                 TAILQ_INSERT_TAIL(&mgr->txn_chain, txn, links);
00266                 MUTEX_THREAD_UNLOCK(mgr->mutexp);
00267         }
00268 
00269         return (0);
00270 
00271 err1:   R_UNLOCK(dbenv, &mgr->reginfo);
00272 
00273 err2:   return (ret);
00274 }
00275 
00276 /*
00277  * CDB_txn_commit --
00278  *      Commit a transaction.
00279  */
00280 int
00281 CDB_txn_commit(txnp, flags)
00282         DB_TXN *txnp;
00283         u_int32_t flags;
00284 {
00285         DB_ENV *dbenv;
00286         DB_TXN *kids;
00287         DB_TXNMGR *mgr;
00288         int ret;
00289 
00290         mgr = txnp->mgrp;
00291         dbenv = mgr->dbenv;
00292 
00293 #ifdef HAVE_RPC
00294         if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
00295                 return (__dbcl_txn_commit(txnp, flags));
00296 #endif
00297 
00298         PANIC_CHECK(dbenv);
00299         if ((ret = CDB___db_fchk(dbenv,
00300             "CDB_txn_commit", flags, DB_TXN_NOSYNC | DB_TXN_SYNC)) != 0)
00301                 return (ret);
00302 
00303         if ((ret = CDB___db_fcchk(dbenv,
00304             "CDB_txn_commit", flags, DB_TXN_NOSYNC, DB_TXN_SYNC)) != 0)
00305                 return (ret);
00306 
00307         if ((ret = __txn_check_running(txnp, NULL)) != 0)
00308                 return (ret);
00309 
00310         if (LF_ISSET(DB_TXN_NOSYNC)) {
00311                 F_CLR(txnp, TXN_SYNC);
00312                 F_SET(txnp, TXN_NOSYNC);
00313         }
00314         if (LF_ISSET(DB_TXN_SYNC)) {
00315                 F_CLR(txnp, TXN_NOSYNC);
00316                 F_SET(txnp, TXN_SYNC);
00317         }
00318 
00319         /* Commit any uncommitted children. */
00320         for (kids = TAILQ_FIRST(&txnp->kids);
00321             kids != NULL;
00322             kids = TAILQ_NEXT(kids, klinks))
00323                 if (!F_ISSET(kids, TXN_CHILDCOMMIT) &&
00324                     (ret = CDB_txn_commit(kids, flags)) != 0)
00325                         return (ret);
00326 
00327         /*
00328          * If there are any log records, write a log record and sync the log,
00329          * else do no log writes.  If the commit is for a child transaction,
00330          * we do not need to commit the child synchronously since it may still
00331          * abort (if its parent aborts), and otherwise its parent or ultimate
00332          * ancestor will write synchronously.
00333          */
00334         if (LOGGING_ON(dbenv) &&
00335             (F_ISSET(txnp, TXN_MUSTFLUSH) || !IS_ZERO_LSN(txnp->last_lsn))) {
00336                 if (txnp->parent == NULL)
00337                         ret = CDB___txn_regop_log(dbenv, txnp, &txnp->last_lsn,
00338                             (F_ISSET(mgr->dbenv, DB_ENV_TXN_NOSYNC) &&
00339                             !F_ISSET(txnp, TXN_SYNC)) ||
00340                             F_ISSET(txnp, TXN_NOSYNC) ?  0 : DB_FLUSH,
00341                             TXN_COMMIT, (int32_t)time(NULL));
00342                 else {
00343                         F_SET(txnp->parent, TXN_MUSTFLUSH);
00344                         ret = CDB___txn_child_log(dbenv, txnp, &txnp->last_lsn, 0,
00345                             TXN_COMMIT, txnp->parent->txnid);
00346                 }
00347                 if (ret != 0)
00348                         return (ret);
00349         }
00350 
00351         /*
00352          * If this is the senior ancestor (i.e., it has no parent), then we
00353          * can release all the child transactions since everyone is committing.
00354          * Then we can release this transaction.  If this is not the ultimate
00355          * ancestor, then we can neither free it or its children.
00356          */
00357         if (txnp->parent == NULL)
00358                 __txn_freekids(txnp);
00359 
00360         return (CDB___txn_end(txnp, 1));
00361 }
00362 
00363 /*
00364  * CDB_txn_abort --
00365  *      Abort a transaction.
00366  */
00367 int
00368 CDB_txn_abort(txnp)
00369         DB_TXN *txnp;
00370 {
00371         int ret;
00372 
00373 #ifdef HAVE_RPC
00374         if (F_ISSET(txnp->mgrp->dbenv, DB_ENV_RPCCLIENT))
00375                 return (__dbcl_txn_abort(txnp));
00376 #endif
00377 
00378         PANIC_CHECK(txnp->mgrp->dbenv);
00379         if ((ret = __txn_check_running(txnp, NULL)) != 0)
00380                 return (ret);
00381 
00382         if ((ret = __txn_undo(txnp)) != 0) {
00383                 return (ret);
00384         }
00385         return (CDB___txn_end(txnp, 0));
00386 }
00387 
00388 /*
00389  * CDB_txn_prepare --
00390  *      Flush the log so a future commit is guaranteed to succeed.
00391  */
00392 int
00393 CDB_txn_prepare(txnp)
00394         DB_TXN *txnp;
00395 {
00396         DBT xid;
00397         DB_ENV *dbenv;
00398         TXN_DETAIL *td;
00399         int ret;
00400 
00401         dbenv = txnp->mgrp->dbenv;
00402 #ifdef HAVE_RPC
00403         if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
00404                 return (__dbcl_txn_prepare(txnp));
00405 #endif
00406 
00407         if ((ret = __txn_check_running(txnp, &td)) != 0)
00408                 return (ret);
00409 
00410         memset(&xid, 0, sizeof(xid));
00411         xid.data = td->xid;
00412         xid.size = sizeof(td->xid);
00413         if (LOGGING_ON(dbenv) &&
00414             (ret = CDB___txn_xa_regop_log(dbenv, txnp, &txnp->last_lsn,
00415             (F_ISSET(dbenv, DB_ENV_TXN_NOSYNC) &&
00416             !F_ISSET(txnp, TXN_SYNC)) ||
00417             F_ISSET(txnp, TXN_NOSYNC) ? 0 : DB_FLUSH, TXN_PREPARE,
00418             &xid, td->format, td->gtrid, td->bqual)) != 0) {
00419                 CDB___db_err(dbenv,
00420                     "CDB_txn_prepare: log_write failed %s\n", CDB_db_strerror(ret));
00421                 return (ret);
00422         }
00423 
00424         MUTEX_THREAD_LOCK(txnp->mgrp->mutexp);
00425         td->status = TXN_PREPARED;
00426         MUTEX_THREAD_UNLOCK(txnp->mgrp->mutexp);
00427         return (ret);
00428 }
00429 
00430 /*
00431  * Return the transaction ID associated with a particular transaction
00432  */
00433 u_int32_t
00434 CDB_txn_id(txnp)
00435         DB_TXN *txnp;
00436 {
00437         return (txnp->txnid);
00438 }
00439 
00440 /* Internal routines. */
00441 
00442 /*
00443  * Return 0 if the txnp is reasonable, otherwise returns EINVAL.
00444  */
00445 static int
00446 __txn_check_running(txnp, tdp)
00447         const DB_TXN *txnp;
00448         TXN_DETAIL **tdp;
00449 {
00450         DB_TXNMGR *mgrp;
00451         TXN_DETAIL *tp;
00452 
00453         tp = NULL;
00454         mgrp = txnp->mgrp;
00455         if (txnp != NULL && mgrp != NULL && mgrp->reginfo.primary != NULL) {
00456                 tp = (TXN_DETAIL *)R_ADDR(&mgrp->reginfo, txnp->off);
00457                 /*
00458                  * Child transactions could be marked committed which is OK.
00459                  */
00460                 if (tp->status != TXN_RUNNING &&
00461                     tp->status != TXN_PREPARED && tp->status != TXN_COMMITTED)
00462                         tp = NULL;
00463                 if (tdp != NULL)
00464                         *tdp = tp;
00465         }
00466 
00467         return (tp == NULL ? EINVAL : 0);
00468 }
00469 
00470 /*
00471  * CDB___txn_end --
00472  *      Internal transaction end routine.
00473  *
00474  * PUBLIC: int CDB___txn_end __P((DB_TXN *, int));
00475  */
00476 int
00477 CDB___txn_end(txnp, is_commit)
00478         DB_TXN *txnp;
00479         int is_commit;
00480 {
00481         DB_ENV *dbenv;
00482         DB_LOCKREQ request;
00483         DB_TXN *kids;
00484         DB_TXNMGR *mgr;
00485         DB_TXNREGION *region;
00486         TXN_DETAIL *tp;
00487         int ret;
00488 
00489         mgr = txnp->mgrp;
00490         dbenv = mgr->dbenv;
00491         region = mgr->reginfo.primary;
00492 
00493         /*
00494          * On aborts, we've undone the children, but we still need
00495          * to free the up.
00496          */
00497         if (!is_commit) {
00498                 while ((kids = TAILQ_FIRST(&txnp->kids)) != NULL)
00499                         if ((ret = CDB___txn_end(kids, is_commit)) != 0)
00500                                 return (DB_RUNRECOVERY);
00501         }
00502 
00503         /* Release the locks. */
00504         request.op = txnp->parent == NULL ||
00505             is_commit == 0 ? DB_LOCK_PUT_ALL : DB_LOCK_INHERIT;
00506 
00507         if (LOCKING_ON(dbenv)) {
00508                 ret = CDB_lock_vec(dbenv, txnp->txnid, 0, &request, 1, NULL);
00509                 if (ret != 0 && (ret != DB_LOCK_DEADLOCK || is_commit)) {
00510                         CDB___db_err(dbenv, "%s: release locks failed %s",
00511                             is_commit ? "CDB_txn_commit" : "CDB_txn_abort",
00512                             CDB_db_strerror(ret));
00513                         return (ret);
00514                 }
00515         }
00516 
00517         /* End the transaction. */
00518         R_LOCK(dbenv, &mgr->reginfo);
00519 
00520         /*
00521          * Child transactions that are committing cannot be released until
00522          * the parent commits, since the parent may abort, causing the child
00523          * to abort as well.
00524          */
00525         tp = (TXN_DETAIL *)R_ADDR(&mgr->reginfo, txnp->off);
00526         if (txnp->parent == NULL || !is_commit) {
00527                 SH_TAILQ_REMOVE(&region->active_txn, tp, links, __txn_detail);
00528 
00529                 CDB___db_shalloc_free(mgr->reginfo.addr, tp);
00530         } else {
00531                 tp->status = TXN_COMMITTED;
00532                 F_SET(txnp, TXN_CHILDCOMMIT);
00533         }
00534 
00535         if (is_commit)
00536                 region->ncommits++;
00537         else
00538                 region->naborts++;
00539         --region->nactive;
00540 
00541         R_UNLOCK(dbenv, &mgr->reginfo);
00542 
00543         /*
00544          * If the transaction aborted, we can remove it from its parent links.
00545          * If it committed, then we need to leave it on, since the parent can
00546          * still abort.
00547          * The transaction cannot get more locks, remove its locker info.
00548          */
00549         if (txnp->parent != NULL) {
00550                 if (LOCKING_ON(dbenv))
00551                         CDB___lock_freefamilylocker(dbenv->lk_handle, txnp->txnid);
00552                  if (!is_commit)
00553                         TAILQ_REMOVE(&txnp->parent->kids, txnp, klinks);
00554         }
00555 
00556         /* Free the space. */
00557         if (F_ISSET(txnp, TXN_MALLOC) && (txnp->parent == NULL || !is_commit)) {
00558                 MUTEX_THREAD_LOCK(mgr->mutexp);
00559                 TAILQ_REMOVE(&mgr->txn_chain, txnp, links);
00560                 MUTEX_THREAD_UNLOCK(mgr->mutexp);
00561 
00562                 CDB___os_free(txnp, sizeof(*txnp));
00563         }
00564 
00565         return (0);
00566 }
00567 
00568 /*
00569  * __txn_undo --
00570  *      Undo the transaction with id txnid.  Returns 0 on success and
00571  *      errno on failure.
00572  */
00573 static int
00574 __txn_undo(txnp)
00575         DB_TXN *txnp;
00576 {
00577         DBT rdbt;
00578         DB_ENV *dbenv;
00579         DB_LSN *lsn_array, *key_lsnp;
00580         DB_TXNMGR *mgr;
00581         int ntxns, ret, threaded;
00582 
00583         mgr = txnp->mgrp;
00584         dbenv = mgr->dbenv;
00585         lsn_array = NULL;
00586 
00587         if (!LOGGING_ON(dbenv))
00588                 return (0);
00589 
00590         /*
00591          * This is the simplest way to code this, but if the mallocs during
00592          * recovery turn out to be a performance issue, we can do the
00593          * allocation here and use DB_DBT_USERMEM.
00594          */
00595         memset(&rdbt, 0, sizeof(rdbt));
00596         threaded = F_ISSET(dbenv, DB_ENV_THREAD) ? 1 : 0;
00597         if (threaded)
00598                 F_SET(&rdbt, DB_DBT_MALLOC);
00599 
00600         key_lsnp = &txnp->last_lsn;
00601 
00602         if (TAILQ_FIRST(&txnp->kids) != NULL) {
00603                 if ((ret = __txn_makefamily(dbenv,
00604                      txnp, &ntxns, &lsn_array)) != 0)
00605                         return (ret);
00606                 key_lsnp = &lsn_array[0];
00607         }
00608 
00609         for (ret = 0; ret == 0 && !IS_ZERO_LSN(*key_lsnp);) {
00610                 /*
00611                  * The dispatch routine returns the lsn of the record
00612                  * before the current one in the key_lsnp argument.
00613                  */
00614                 if ((ret = CDB_log_get(dbenv, key_lsnp, &rdbt, DB_SET)) == 0) {
00615                         ret = mgr->recover(dbenv,
00616                             &rdbt, key_lsnp, DB_TXN_ABORT, NULL);
00617                         if (threaded && rdbt.data != NULL) {
00618                                 CDB___os_free(rdbt.data, rdbt.size);
00619                                 rdbt.data = NULL;
00620                         }
00621                         if (lsn_array != NULL)
00622                                 TXN_BUBBLE(lsn_array, ntxns);
00623                 }
00624                 if (ret != 0) {
00625                         CDB___db_err(txnp->mgrp->dbenv,
00626                             "CDB_txn_abort: Log undo failed for LSN: %lu %lu: %s",
00627                             (u_long)key_lsnp->file, (u_long)key_lsnp->offset,
00628                             CDB_db_strerror(ret));
00629                         return (ret);
00630                 }
00631         }
00632 
00633         return (ret);
00634 }
00635 
00636 /*
00637  * Transaction checkpoint.
00638  * If either kbytes or minutes is non-zero, then we only take the checkpoint
00639  * more than "minutes" minutes have passed since the last checkpoint or if
00640  * more than "kbytes" of log data have been written since the last checkpoint.
00641  * When taking a checkpoint, find the oldest active transaction and figure out
00642  * its first LSN.  This is the lowest LSN we can checkpoint, since any record
00643  * written after since that point may be involved in a transaction and may
00644  * therefore need to be undone in the case of an abort.
00645  */
00646 int
00647 CDB_txn_checkpoint(dbenv, kbytes, minutes, flags)
00648         DB_ENV *dbenv;
00649         u_int32_t kbytes, minutes, flags;
00650 {
00651         DB_LOG *dblp;
00652         DB_LSN ckp_lsn, sync_lsn, last_ckp;
00653         DB_TXNMGR *mgr;
00654         DB_TXNREGION *region;
00655         LOG *lp;
00656         TXN_DETAIL *txnp;
00657         time_t last_ckp_time, now;
00658         u_int32_t bytes, mbytes;
00659         int ret;
00660 
00661 #ifdef HAVE_RPC
00662         if (F_ISSET(dbenv, DB_ENV_RPCCLIENT))
00663                 return (__dbcl_txn_checkpoint(dbenv, kbytes, minutes));
00664 #endif
00665         PANIC_CHECK(dbenv);
00666         ENV_REQUIRES_CONFIG(dbenv, dbenv->tx_handle, DB_INIT_TXN);
00667 
00668         mgr = dbenv->tx_handle;
00669         region = mgr->reginfo.primary;
00670         dblp = dbenv->lg_handle;
00671         lp = dblp->reginfo.primary;
00672 
00673         /*
00674          * Check if we need to checkpoint.
00675          */
00676         ZERO_LSN(ckp_lsn);
00677 
00678         if (LF_ISSET(DB_FORCE))
00679                 goto do_ckp;
00680 
00681         R_LOCK(dbenv, &dblp->reginfo);
00682         mbytes = lp->stat.st_wc_mbytes;
00683         bytes = lp->stat.st_wc_bytes;
00684         ckp_lsn = lp->lsn;
00685         R_UNLOCK(dbenv, &dblp->reginfo);
00686 
00687         /* Don't checkpoint a quiescent database. */
00688         if (bytes == 0 && mbytes == 0)
00689                 return (0);
00690 
00691         if (kbytes != 0 && mbytes * 1024 + bytes / 1024 >= (u_int32_t)kbytes)
00692                 goto do_ckp;
00693 
00694         if (minutes != 0) {
00695                 (void)time(&now);
00696 
00697                 R_LOCK(dbenv, &mgr->reginfo);
00698                 last_ckp_time = region->time_ckp;
00699                 R_UNLOCK(dbenv, &mgr->reginfo);
00700 
00701                 if (now - last_ckp_time >= (time_t)(minutes * 60))
00702                         goto do_ckp;
00703         }
00704 
00705         /*
00706          * If we checked time and data and didn't go to checkpoint,
00707          * we're done.
00708          */
00709         if (minutes != 0 || kbytes != 0)
00710                 return (0);
00711 
00712 do_ckp:
00713         if (IS_ZERO_LSN(ckp_lsn)) {
00714                 R_LOCK(dbenv, &dblp->reginfo);
00715                 ckp_lsn = lp->lsn;
00716                 R_UNLOCK(dbenv, &dblp->reginfo);
00717         }
00718 
00719         /*
00720          * We have to find an LSN such that all transactions begun
00721          * before that LSN are complete.
00722          */
00723         R_LOCK(dbenv, &mgr->reginfo);
00724 
00725         if (IS_ZERO_LSN(region->pending_ckp)) {
00726                 for (txnp =
00727                     SH_TAILQ_FIRST(&region->active_txn, __txn_detail);
00728                     txnp != NULL;
00729                     txnp = SH_TAILQ_NEXT(txnp, links, __txn_detail)) {
00730 
00731                         /*
00732                          * Look through the active transactions for the
00733                          * lowest begin lsn.
00734                          */
00735                         if (!IS_ZERO_LSN(txnp->begin_lsn) &&
00736                             CDB_log_compare(&txnp->begin_lsn, &ckp_lsn) < 0)
00737                                 ckp_lsn = txnp->begin_lsn;
00738                 }
00739                 region->pending_ckp = ckp_lsn;
00740         } else
00741                 ckp_lsn = region->pending_ckp;
00742 
00743         R_UNLOCK(dbenv, &mgr->reginfo);
00744 
00745         /*
00746          * CDB_memp_sync may change the lsn you pass it, so don't pass it
00747          * the actual ckp_lsn, pass it a temp instead.
00748          */
00749         sync_lsn = ckp_lsn;
00750         if (MPOOL_ON(dbenv) && (ret = CDB_memp_sync(dbenv, &sync_lsn)) != 0) {
00751                 /*
00752                  * ret == DB_INCOMPLETE means that there are still buffers to
00753                  * flush, the checkpoint is not complete.  Wait and try again.
00754                  */
00755                 if (ret > 0)
00756                         CDB___db_err(dbenv,
00757                             "CDB_txn_checkpoint: system failure in CDB_memp_sync %s\n",
00758                             CDB_db_strerror(ret));
00759                 return (ret);
00760         }
00761         if (LOGGING_ON(dbenv)) {
00762                 R_LOCK(dbenv, &mgr->reginfo);
00763                 last_ckp = region->last_ckp;
00764                 ZERO_LSN(region->pending_ckp);
00765                 R_UNLOCK(dbenv, &mgr->reginfo);
00766 
00767                 if ((ret = CDB___txn_ckp_log(dbenv,
00768                     NULL, &ckp_lsn, DB_CHECKPOINT, &ckp_lsn,
00769                     &last_ckp, (int32_t)time(NULL))) != 0) {
00770                         CDB___db_err(dbenv,
00771                             "CDB_txn_checkpoint: log failed at LSN [%ld %ld] %s\n",
00772                             (long)ckp_lsn.file, (long)ckp_lsn.offset,
00773                             CDB_db_strerror(ret));
00774                         return (ret);
00775                 }
00776 
00777                 R_LOCK(dbenv, &mgr->reginfo);
00778                 region->last_ckp = ckp_lsn;
00779                 (void)time(&region->time_ckp);
00780                 R_UNLOCK(dbenv, &mgr->reginfo);
00781         }
00782         return (0);
00783 }
00784 
00785 static void
00786 __txn_freekids(txnp)
00787         DB_TXN *txnp;
00788 {
00789         DB_ENV *dbenv;
00790         DB_TXN *kids;
00791         DB_TXNMGR *mgr;
00792         DB_TXNREGION *region;
00793         TXN_DETAIL *tp;
00794 
00795         mgr = txnp->mgrp;
00796         dbenv = mgr->dbenv;
00797         region = mgr->reginfo.primary;
00798 
00799         for (kids = TAILQ_FIRST(&txnp->kids);
00800             kids != NULL;
00801             kids = TAILQ_FIRST(&txnp->kids)) {
00802                 /* Free any children of this transaction. */
00803                 __txn_freekids(kids);
00804 
00805                 /* Free the transaction detail in the region. */
00806                 R_LOCK(dbenv, &mgr->reginfo);
00807                 tp = (TXN_DETAIL *)R_ADDR(&mgr->reginfo, kids->off);
00808                 SH_TAILQ_REMOVE(&region->active_txn, tp, links, __txn_detail);
00809 
00810                 CDB___db_shalloc_free(mgr->reginfo.addr, tp);
00811                 R_UNLOCK(dbenv, &mgr->reginfo);
00812 
00813                 /* Now remove from its parent. */
00814                 TAILQ_REMOVE(&txnp->kids, kids, klinks);
00815                 if (F_ISSET(txnp, TXN_MALLOC)) {
00816                         MUTEX_THREAD_LOCK(mgr->mutexp);
00817                         TAILQ_REMOVE(&mgr->txn_chain, kids, links);
00818                         MUTEX_THREAD_UNLOCK(mgr->mutexp);
00819                         CDB___os_free(kids, sizeof(*kids));
00820                 }
00821         }
00822 }
00823 
00824 /*
00825  * __txn_makefamily --
00826  *      Create an array of DB_LSNs for every member of the family being
00827  * aborted so that we can undo the records in the appropriate order.  We
00828  * allocate memory here and expect our caller to free it when they're done.
00829  */
00830 static int
00831 __txn_makefamily(dbenv, txnp, np, arrayp)
00832         DB_ENV *dbenv;
00833         DB_TXN *txnp;
00834         int *np;
00835         DB_LSN **arrayp;
00836 {
00837         DB_LSN *ap, *tmpp;
00838         int i, ret;
00839 
00840         /* Figure out how many we have. */
00841         *np = __txn_count(txnp);
00842 
00843         /* Malloc space. */
00844         if ((ret = CDB___os_malloc(dbenv, *np * sizeof(DB_LSN), NULL, arrayp)) != 0)
00845                 return (ret);
00846 
00847         /* Fill in the space. */
00848         tmpp = *arrayp;
00849         __txn_lsn(txnp, &tmpp);
00850 
00851         /* Sort the LSNs. */
00852         ap = *arrayp;
00853         for (i = 0; i < *np; i++)
00854                 TXN_BUBBLE(ap, *np - i);
00855 
00856         return (0);
00857 }
00858 
00859 /*
00860  * __txn_count --
00861  *      Routine to count the number of members in a transaction family.  We
00862  * include the incoming transaction in the count.  We assume that we never
00863  * call this routine with NULL.
00864  */
00865 static int
00866 __txn_count(txnp)
00867         DB_TXN *txnp;
00868 {
00869         DB_TXN *kids;
00870         int n;
00871 
00872         n = 1;
00873         for (kids = TAILQ_FIRST(&txnp->kids);
00874             kids != NULL;
00875             kids = TAILQ_NEXT(kids, klinks))
00876                 n += __txn_count(kids);
00877 
00878         return (n);
00879 }
00880 
00881 /*
00882  * __txn_lsn ---
00883  *      Fill in the array with the last_lsn field of every transaction
00884  * in the family.  Array is an in/out parameter that leaves you pointing
00885  * to the next space in which to place an LSN.
00886  */
00887 static void
00888 __txn_lsn(txnp, array)
00889         DB_TXN *txnp;
00890         DB_LSN **array;
00891 {
00892         DB_LSN *lsn;
00893         DB_TXN *kids;
00894 
00895         lsn = *array;
00896         lsn[0] = txnp->last_lsn;
00897         *array = &lsn[1];
00898 
00899         for (kids = TAILQ_FIRST(&txnp->kids);
00900             kids != NULL;
00901             kids = TAILQ_NEXT(kids, klinks))
00902                 __txn_lsn(kids, array);
00903 }
00904 
00905 /*
00906  * CDB___txn_activekids --
00907  *      Determine if this transaction has any active children.  Returns 1
00908  * if any active children are present; 0 otherwise.
00909  *
00910  * PUBLIC: int CDB___txn_activekids __P((DB_TXN *));
00911  */
00912 int
00913 CDB___txn_activekids(txnp)
00914         DB_TXN *txnp;
00915 {
00916         DB_TXN *kids;
00917 
00918         for (kids = TAILQ_FIRST(&txnp->kids);
00919             kids != NULL;
00920             kids = TAILQ_NEXT(kids, klinks))
00921                 if (!F_ISSET(kids, TXN_CHILDCOMMIT))
00922                         return (1);
00923         return (0);
00924 }

Generated on Sun Jun 8 10:56:39 2008 for GNUmifluz by  doxygen 1.5.5