env_recover.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1996, 1997, 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 
00008 #include "config.h"
00009 
00010 #ifndef lint
00011 static const char copyright[] =
00012     "Copyright (c) 1996-2000\nSleepycat Software Inc.  All rights reserved.\n";
00013 static const char revid[] =
00014     "$Id: env__recover_8c-source.html,v 1.1 2008/06/08 10:18:45 sebdiaz Exp $";
00015 #endif
00016 
00017 #ifndef NO_SYSTEM_INCLUDES
00018 #include <sys/types.h>
00019 
00020 #if TIME_WITH_SYS_TIME
00021 #include <sys/time.h>
00022 #include <time.h>
00023 #else
00024 #if HAVE_SYS_TIME_H
00025 #include <sys/time.h>
00026 #else
00027 #include <time.h>
00028 #endif
00029 #endif
00030 
00031 #include <errno.h>
00032 #include <string.h>
00033 #endif
00034 
00035 #include "db_int.h"
00036 #include "db_page.h"
00037 #include "db_dispatch.h"
00038 #include "db_am.h"
00039 #include "log.h"
00040 #include "txn.h"
00041 
00042 static float __lsn_diff __P((DB_LSN *, DB_LSN *, DB_LSN *, u_int32_t, int));
00043 static int   __log_earliest __P((DB_ENV *, int32_t *, DB_LSN *));
00044 
00045 /*
00046  * CDB___db_apprec --
00047  *      Perform recovery.
00048  *
00049  * PUBLIC: int CDB___db_apprec __P((DB_ENV *, u_int32_t));
00050  */
00051 int
00052 CDB___db_apprec(dbenv, flags)
00053         DB_ENV *dbenv;
00054         u_int32_t flags;
00055 {
00056         DBT data;
00057         DB_LSN ckp_lsn, first_lsn, last_lsn, lowlsn, lsn, open_lsn;
00058         DB_TXNREGION *region;
00059         __txn_ckp_args *ckp_args;
00060         time_t now, tlow;
00061         float nfiles;
00062         int32_t low;
00063         int is_thread, progress, ret;
00064         void *txninfo;
00065 
00066         COMPQUIET(nfiles, (float)0);
00067 
00068         /* Initialize the transaction list. */
00069         if ((ret = CDB___db_txnlist_init(dbenv, &txninfo)) != 0)
00070                 return (ret);
00071 
00072         /*
00073          * Save the state of the thread flag -- we don't need it on at the
00074          * moment because we're single-threaded until recovery is complete.
00075          */
00076         is_thread = F_ISSET(dbenv, DB_ENV_THREAD) ? 1 : 0;
00077         F_CLR(dbenv, DB_ENV_THREAD);
00078         F_SET((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
00079 
00080         /*
00081          * If the user is specifying recover to a particular point in time,
00082          * verify that the logs present are sufficient to do this.
00083          */
00084         ZERO_LSN(lowlsn);
00085         if (dbenv->tx_timestamp != 0) {
00086                 if ((ret = __log_earliest(dbenv, &low, &lowlsn)) != 0)
00087                         return (ret);
00088                 if ((int32_t)dbenv->tx_timestamp < low) {
00089                         tlow = (time_t)low;
00090                         CDB___db_err(dbenv, "%s (%s, %s).\n",
00091                             "Invalid recovery timestamp specified",
00092                             ctime(&tlow));
00093                         return (EINVAL);
00094                 }
00095         }
00096 
00097         /*
00098          * Recovery is done in three passes:
00099          * Pass #0:
00100          *      We need to find the position from which we will open files
00101          *      We need to open files beginning with the last to next
00102          *      checkpoint because we might have crashed after writing the
00103          *      last checkpoint record, but before having written out all
00104          *      the open file information.
00105          *
00106          * Pass #1:
00107          *      Read forward through the log from the second to last checkpoint
00108          *      opening and closing files so that at the end of the log we have
00109          *      the "current" set of files open.
00110          *
00111          * Pass #2:
00112          *      Read backward through the log undoing any uncompleted TXNs.
00113          *      There are three cases:
00114          *          1.  If doing catastrophic recovery, we read to the beginning
00115          *              of the log
00116          *          2.  If we are doing normal reovery, then we have to roll
00117          *              back to the most recent checkpoint that occurs
00118          *              before the most recent checkpoint LSN, which is
00119          *              returned by CDB___log_findckp().
00120          *          3.  If we are recovering to a point in time, then we have
00121          *              to roll back to the checkpoint whose ckp_lsn is earlier
00122          *              than the specified time.  __log_earliest will figure
00123          *              this out for us.
00124          *      In case 2, "uncompleted TXNs" include all those who commited
00125          *      after the user's specified timestamp.
00126          *
00127          * Pass #3:
00128          *      Read forward through the log from the LSN found in pass #2,
00129          *      redoing any committed TXNs (which commited after any user-
00130          *      specified rollback point).  During this pass, checkpoint
00131          *      file information is ignored, and file openings and closings
00132          *      are redone.
00133          */
00134 
00135         /*
00136          * Find out the last lsn, so that we can estimate how far along we
00137          * are in recovery.  This will help us determine how much log there
00138          * is between the first LSN that we're going to be working with and
00139          * the last one.  We assume that each of the three phases takes the
00140          * same amount of time (a false assumption) and then use the %-age
00141          * of the amount of log traversed to figure out how much of the
00142          * pass we've accomplished.
00143          */
00144         if (dbenv->db_feedback != NULL &&
00145             (ret = CDB_log_get(dbenv, &last_lsn, &data, DB_LAST)) != 0)
00146                 goto out;
00147 
00148         /*
00149          * Pass #0
00150          * Find the second to last checkpoint in the log.  This is the point
00151          * from which we want to begin pass #1 (the open files pass).
00152          */
00153         memset(&data, 0, sizeof(data));
00154         ckp_args = NULL;
00155 
00156         if (LF_ISSET(DB_RECOVER_FATAL)) {
00157                 if ((ret = CDB_log_get(dbenv, &ckp_lsn, &data, DB_FIRST)) != 0) {
00158                         if (ret == DB_NOTFOUND)
00159                                 ret = 0;
00160                         else
00161                                 CDB___db_err(dbenv, "First log record not found");
00162                         goto out;
00163                 }
00164                 open_lsn = ckp_lsn;
00165         } else if ((ret =
00166              CDB_log_get(dbenv, &ckp_lsn, &data, DB_CHECKPOINT)) != 0) {
00167                 /*
00168                  * If we don't find a checkpoint, start from the beginning.
00169                  * If that fails, we're done.  Note, we do not require that
00170                  * there be log records if we're performing recovery.
00171                  */
00172 first:          if ((ret = CDB_log_get(dbenv, &ckp_lsn, &data, DB_FIRST)) != 0) {
00173                         if (ret == DB_NOTFOUND)
00174                                 ret = 0;
00175                         else
00176                                 CDB___db_err(dbenv, "First log record not found");
00177                         goto out;
00178                 }
00179                 open_lsn = ckp_lsn;
00180         } else if ((ret = CDB___txn_ckp_read(dbenv, data.data, &ckp_args)) != 0) {
00181                 CDB___db_err(dbenv, "Invalid checkpoint record at [%ld][%ld]\n",
00182                     (u_long)ckp_lsn.file, (u_long)ckp_lsn.offset);
00183                 goto out;
00184         } else if (IS_ZERO_LSN(ckp_args->last_ckp) ||
00185             (ret = CDB_log_get(dbenv, &ckp_args->last_ckp, &data, DB_SET)) != 0)
00186                 goto first;
00187         else
00188                 open_lsn = ckp_args->last_ckp;
00189 
00190         if (dbenv->db_feedback != NULL) {
00191                 if (last_lsn.file == open_lsn.file)
00192                         nfiles = (float)(last_lsn.offset - open_lsn.offset) /
00193                             dbenv->lg_max;
00194                 else
00195                         nfiles = (float)(last_lsn.file - open_lsn.file) +
00196                             (float)(dbenv->lg_max - open_lsn.offset +
00197                             last_lsn.offset) / dbenv->lg_max;
00198                 /* We are going to divide by nfiles; make sure it isn't 0. */
00199                 if (nfiles == 0)
00200                         nfiles = (float)0.001;
00201         }
00202 
00203         /*
00204          * Pass #1
00205          * Now, ckp_lsn is either the lsn of the last checkpoint
00206          * or the lsn of the first record in the log.  Open_lsn is
00207          * the second to last checkpoint or the beinning of the log;
00208          * begin the open files pass from that lsn, and proceed to
00209          * the end of the log.
00210          */
00211         lsn = open_lsn;
00212         for (;;) {
00213                 if (dbenv->db_feedback != NULL) {
00214                         progress = (int)(33 * (__lsn_diff(&open_lsn,
00215                            &last_lsn, &lsn, dbenv->lg_max, 1) / nfiles));
00216                         dbenv->db_feedback(dbenv, DB_RECOVER, progress);
00217                 }
00218                 if (dbenv->tx_recover != NULL)
00219                         ret = dbenv->tx_recover(dbenv,
00220                             &data, &lsn, DB_TXN_OPENFILES, txninfo);
00221                 else
00222                         ret = CDB___db_dispatch(dbenv,
00223                             &data, &lsn, DB_TXN_OPENFILES, txninfo);
00224                 if (ret != 0 && ret != DB_TXN_CKP)
00225                         goto msgerr;
00226                 if ((ret = CDB_log_get(dbenv, &lsn, &data, DB_NEXT)) != 0) {
00227                         if (ret == DB_NOTFOUND)
00228                                 break;
00229                         goto out;
00230                 }
00231         }
00232 
00233         /*
00234          * Pass #2.
00235          *
00236          * Before we can begin pass #2, backward roll phase, we determine how
00237          * far back in the log to recover.  If we are doing catastrophic
00238          * recovery, then we go as far back as we have files.  If we are
00239          * doing normal recovery, we go as back to the most recent checkpoint
00240          * that occurs before the most recent checkpoint LSN.  If we are
00241          * recovering to a point in time, then rollback to the checkpoint whose
00242          * ckp_lsn precedes the first log record (and then roll forward to
00243          * the appropriate timestamp in Pass #3).
00244          */
00245         if (LF_ISSET(DB_RECOVER_FATAL)) {
00246                 ZERO_LSN(first_lsn);
00247         } else if (dbenv->tx_timestamp != 0)
00248                 first_lsn = lowlsn;
00249         else
00250                 if ((ret = CDB___log_findckp(dbenv, &first_lsn)) == DB_NOTFOUND) {
00251                         /*
00252                          * We don't require that log files exist if recovery
00253                          * was specified.
00254                          */
00255                         ret = 0;
00256                         goto out;
00257                 }
00258 
00259         if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY))
00260                 CDB___db_err(dbenv, "Recovery starting from [%lu][%lu]",
00261                     (u_long)first_lsn.file, (u_long)first_lsn.offset);
00262 
00263         for (ret = CDB_log_get(dbenv, &lsn, &data, DB_LAST);
00264             ret == 0 && CDB_log_compare(&lsn, &first_lsn) > 0;
00265             ret = CDB_log_get(dbenv, &lsn, &data, DB_PREV)) {
00266                 if (dbenv->db_feedback != NULL) {
00267                         progress = 34 + (int)(33 * (__lsn_diff(&open_lsn,
00268                             &last_lsn, &lsn, dbenv->lg_max, 0) / nfiles));
00269                         dbenv->db_feedback(dbenv, DB_RECOVER, progress);
00270                 }
00271                 if (dbenv->tx_recover != NULL)
00272                         ret = dbenv->tx_recover(dbenv,
00273                             &data, &lsn, DB_TXN_BACKWARD_ROLL, txninfo);
00274                 else
00275                         ret = CDB___db_dispatch(dbenv,
00276                             &data, &lsn, DB_TXN_BACKWARD_ROLL, txninfo);
00277                 if (ret != 0) {
00278                         if (ret != DB_TXN_CKP)
00279                                 goto msgerr;
00280                         else
00281                                 ret = 0;
00282                 }
00283         }
00284         if (ret != 0 && ret != DB_NOTFOUND)
00285                 goto out;
00286 
00287         /*
00288          * Pass #3.
00289          */
00290         for (ret = CDB_log_get(dbenv, &lsn, &data, DB_NEXT);
00291             ret == 0; ret = CDB_log_get(dbenv, &lsn, &data, DB_NEXT)) {
00292                 if (dbenv->db_feedback != NULL) {
00293                         progress = 67 + (int)(33 * (__lsn_diff(&open_lsn,
00294                             &last_lsn, &lsn, dbenv->lg_max, 1) / nfiles));
00295                         dbenv->db_feedback(dbenv, DB_RECOVER, progress);
00296                 }
00297                 if (dbenv->tx_recover != NULL)
00298                         ret = dbenv->tx_recover(dbenv,
00299                             &data, &lsn, DB_TXN_FORWARD_ROLL, txninfo);
00300                 else
00301                         ret = CDB___db_dispatch(dbenv,
00302                             &data, &lsn, DB_TXN_FORWARD_ROLL, txninfo);
00303                 if (ret != 0) {
00304                         if (ret != DB_TXN_CKP)
00305                                 goto msgerr;
00306                         else
00307                                 ret = 0;
00308                 }
00309         }
00310         if (ret != DB_NOTFOUND)
00311                 goto out;
00312 
00313         /*
00314          * Now set the last checkpoint lsn and the current time,
00315          * take a checkpoint, and reset the txnid.
00316          */
00317         (void)time(&now);
00318         region = ((DB_TXNMGR *)dbenv->tx_handle)->reginfo.primary;
00319         region->last_ckp = ckp_lsn;
00320         region->time_ckp = (u_int32_t)now;
00321 
00322         /*
00323          * Take two checkpoints so that we don't re-recover any of the
00324          * work we've already done.
00325          */
00326         if ((ret = CDB_txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0)
00327                 goto out;
00328 
00329         /* Now close all the db files that are open. */
00330         CDB___log_close_files(dbenv);
00331 
00332         if ((ret = CDB_txn_checkpoint(dbenv, 0, 0, DB_FORCE)) != 0)
00333                 goto out;
00334         region->last_txnid = TXN_MINIMUM;
00335 
00336         if (FLD_ISSET(dbenv->verbose, DB_VERB_RECOVERY)) {
00337                 CDB___db_err(dbenv, "Recovery complete at %.24s", ctime(&now));
00338                 CDB___db_err(dbenv, "%s %lx %s [%lu][%lu]",
00339                     "Maximum transaction ID",
00340                     ((DB_TXNHEAD *)txninfo)->maxid,
00341                     "Recovery checkpoint",
00342                     (u_long)region->last_ckp.file,
00343                     (u_long)region->last_ckp.offset);
00344         }
00345 
00346         if (0) {
00347 msgerr:         CDB___db_err(dbenv, "Recovery function for LSN %lu %lu failed",
00348                     (u_long)lsn.file, (u_long)lsn.offset);
00349         }
00350 
00351 out:    if (is_thread)
00352                 F_SET(dbenv, DB_ENV_THREAD);
00353         CDB___db_txnlist_end(dbenv, txninfo);
00354         if (ckp_args != NULL)
00355                 CDB___os_free(ckp_args, sizeof(*ckp_args));
00356         F_CLR((DB_LOG *)dbenv->lg_handle, DBLOG_RECOVER);
00357 
00358         dbenv->tx_timestamp = 0;
00359         return (ret);
00360 }
00361 
00362 /*
00363  * Figure out how many logfiles we have processed.  If we are moving
00364  * forward (is_forward != 0), then we're computing current - low.  If
00365  * we are moving backward, we are computing high - current.  max is
00366  * the number of bytes per logfile.
00367  */
00368 static float
00369 __lsn_diff(low, high, current, max, is_forward)
00370         DB_LSN *low, *high, *current;
00371         u_int32_t max;
00372         int is_forward;
00373 {
00374         float nf;
00375 
00376         /*
00377          * There are three cases in each direction.  If you are in the
00378          * same file, then all you need worry about is the difference in
00379          * offsets.  If you are in different files, then either your offsets
00380          * put you either more or less than the integral difference in the
00381          * number of files -- we need to handle both of these.
00382          */
00383         if (is_forward) {
00384                 if (current->file == low->file)
00385                         nf = (float)(current->offset - low->offset) / max;
00386                 else if (current->offset < low->offset)
00387                         nf = (float)(current->file - low->file - 1) +
00388                             (float)(max - low->offset + current->offset) / max;
00389                 else
00390                         nf = (float)(current->file - low->file) +
00391                             (float)(current->offset - low->offset) / max;
00392         } else {
00393                 if (current->file == high->file)
00394                         nf = (float)(high->offset - current->offset) / max;
00395                 else if (current->offset > high->offset)
00396                         nf = (float)(high->file - current->file - 1) +
00397                             (float)(max - current->offset + high->offset) / max;
00398                 else
00399                         nf = (float)(high->file - current->file) +
00400                             (float)(high->offset - current->offset) / max;
00401         }
00402         return (nf);
00403 }
00404 
00405 /*
00406  * __log_earliest --
00407  *
00408  * Return the earliest recovery point for the log files present.  The
00409  * earliest recovery time is the time stamp of the first checkpoint record
00410  * whose checkpoint LSN is greater than the first LSN we process.
00411  */
00412 static int
00413 __log_earliest(dbenv, lowtime, lowlsn)
00414         DB_ENV *dbenv;
00415         int32_t *lowtime;
00416         DB_LSN *lowlsn;
00417 {
00418         DB_LSN first_lsn, lsn;
00419         DBT data;
00420         __txn_ckp_args *ckpargs;
00421         u_int32_t rectype;
00422         int cmp, ret;
00423 
00424         memset(&data, 0, sizeof(data));
00425         /*
00426          * Read forward through the log looking for the first checkpoint
00427          * record whose ckp_lsn is greater than first_lsn.
00428          */
00429 
00430         for (ret = CDB_log_get(dbenv, &first_lsn, &data, DB_FIRST);
00431             ret == 0; ret = CDB_log_get(dbenv, &lsn, &data, DB_NEXT)) {
00432                 if (ret != 0)
00433                         break;
00434                 memcpy(&rectype, data.data, sizeof(rectype));
00435                 if (rectype != DB_txn_ckp)
00436                         continue;
00437                 if ((ret = CDB___txn_ckp_read(dbenv, data.data, &ckpargs)) == 0) {
00438                         cmp = CDB_log_compare(&ckpargs->ckp_lsn, &first_lsn);
00439                         *lowlsn = ckpargs->ckp_lsn;
00440                         *lowtime = ckpargs->timestamp;
00441 
00442                         CDB___os_free(ckpargs, 0);
00443                         if (cmp >= 0)
00444                                 break;
00445                 }
00446         }
00447 
00448         return (ret);
00449 }

Generated on Sun Jun 8 10:56:37 2008 for GNUmifluz by  doxygen 1.5.5