db_join.c

Go to the documentation of this file.
00001 /*-
00002  * See the file LICENSE for redistribution information.
00003  *
00004  * Copyright (c) 1998, 1999, 2000
00005  *      Sleepycat Software.  All rights reserved.
00006  */
00007 
00008 #include "config.h"
00009 
00010 #ifndef lint
00011 static const char revid[] = "$Id: db__join_8c-source.html,v 1.1 2008/06/08 10:17:49 sebdiaz Exp $";
00012 #endif /* not lint */
00013 
00014 #ifndef NO_SYSTEM_INCLUDES
00015 #include <sys/types.h>
00016 
00017 #include <errno.h>
00018 #include <stdlib.h>
00019 #include <string.h>
00020 #endif
00021 
00022 #include "db_int.h"
00023 #include "db_page.h"
00024 #include "db_join.h"
00025 #include "db_am.h"
00026 #include "btree.h"
00027 
00028 static int __db_join_close __P((DBC *));
00029 static int __db_join_cmp __P((const void *, const void *));
00030 static int __db_join_del __P((DBC *, u_int32_t));
00031 static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t));
00032 static int __db_join_getnext __P((DBC *, DBT *, DBT *, u_int32_t));
00033 static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t));
00034 
00035 /*
00036  * Check to see if the Nth secondary cursor of join cursor jc is pointing
00037  * to a sorted duplicate set.
00038  */
00039 #define SORTED_SET(jc, n)   ((jc)->j_curslist[(n)]->dbp->dup_compare != NULL)
00040 
00041 /*
00042  * This is the duplicate-assisted join functionality.  Right now we're
00043  * going to write it such that we return one item at a time, although
00044  * I think we may need to optimize it to return them all at once.
00045  * It should be easier to get it working this way, and I believe that
00046  * changing it should be fairly straightforward.
00047  *
00048  * We optimize the join by sorting cursors from smallest to largest
00049  * cardinality.  In most cases, this is indeed optimal.  However, if
00050  * a cursor with large cardinality has very few data in common with the
00051  * first cursor, it is possible that the join will be made faster by
00052  * putting it earlier in the cursor list.  Since we have no way to detect
00053  * cases like this, we simply provide a flag, DB_JOIN_NOSORT, which retains
00054  * the sort order specified by the caller, who may know more about the
00055  * structure of the data.
00056  *
00057  * The first cursor moves sequentially through the duplicate set while
00058  * the others search explicitly for the duplicate in question.
00059  *
00060  */
00061 
00062 /*
00063  * CDB___db_join --
00064  *      This is the interface to the duplicate-assisted join functionality.
00065  * In the same way that cursors mark a position in a database, a cursor
00066  * can mark a position in a join.  While most cursors are created by the
00067  * cursor method of a DB, join cursors are created through an explicit
00068  * call to DB->join.
00069  *
00070  * The curslist is an array of existing, intialized cursors and primary
00071  * is the DB of the primary file.  The data item that joins all the
00072  * cursors in the curslist is used as the key into the primary and that
00073  * key and data are returned.  When no more items are left in the join
00074  * set, the  c_next operation off the join cursor will return DB_NOTFOUND.
00075  *
00076  * PUBLIC: int CDB___db_join __P((DB *, DBC **, DBC **, u_int32_t));
00077  */
00078 int
00079 CDB___db_join(primary, curslist, dbcp, flags)
00080         DB *primary;
00081         DBC **curslist, **dbcp;
00082         u_int32_t flags;
00083 {
00084         DB_ENV *dbenv;
00085         DBC *dbc;
00086         JOIN_CURSOR *jc;
00087         int ret;
00088         u_int32_t i, ncurs, nslots;
00089 
00090         COMPQUIET(nslots, 0);
00091 
00092         PANIC_CHECK(primary->dbenv);
00093 
00094         if ((ret = CDB___db_joinchk(primary, flags)) != 0)
00095                 return (ret);
00096 
00097         if (curslist == NULL || curslist[0] == NULL)
00098                 return (EINVAL);
00099 
00100         dbc = NULL;
00101         jc = NULL;
00102         dbenv = primary->dbenv;
00103 
00104         if ((ret = CDB___os_calloc(dbenv, 1, sizeof(DBC), &dbc)) != 0)
00105                 goto err;
00106 
00107         if ((ret = CDB___os_calloc(dbenv,
00108             1, sizeof(JOIN_CURSOR), &jc)) != 0)
00109                 goto err;
00110 
00111         if ((ret = CDB___os_malloc(dbenv, 256, NULL, &jc->j_key.data)) != 0)
00112                 goto err;
00113         jc->j_key.ulen = 256;
00114         F_SET(&jc->j_key, DB_DBT_USERMEM);
00115 
00116         for (jc->j_curslist = curslist;
00117             *jc->j_curslist != NULL; jc->j_curslist++)
00118                 ;
00119 
00120         /*
00121          * The number of cursor slots we allocate is one greater than
00122          * the number of cursors involved in the join, because the
00123          * list is NULL-terminated.
00124          */
00125         ncurs = jc->j_curslist - curslist;
00126         nslots = ncurs + 1;
00127 
00128         /*
00129          * !!! -- A note on the various lists hanging off jc.
00130          *
00131          * j_curslist is the initial NULL-terminated list of cursors passed
00132          * into CDB___db_join.  The original cursors are not modified; pristine
00133          * copies are required because, in databases with unsorted dups, we
00134          * must reset all of the secondary cursors after the first each
00135          * time the first one is incremented, or else we will lose data
00136          * which happen to be sorted differently in two different cursors.
00137          *
00138          * j_workcurs is where we put those copies that we're planning to
00139          * work with.  They're lazily c_dup'ed from j_curslist as we need
00140          * them, and closed when the join cursor is closed or when we need
00141          * to reset them to their original values (in which case we just
00142          * c_dup afresh).
00143          *
00144          * j_fdupcurs is an array of cursors which point to the first
00145          * duplicate in the duplicate set that contains the data value
00146          * we're currently interested in.  We need this to make
00147          * __db_join_get correctly return duplicate duplicates;  i.e., if a
00148          * given data value occurs twice in the set belonging to cursor #2,
00149          * and thrice in the set belonging to cursor #3, and once in all
00150          * the other cursors, successive calls to __db_join_get need to
00151          * return that data item six times.  To make this happen, each time
00152          * cursor N is allowed to advance to a new datum, all cursors M
00153          * such that M > N have to be reset to the first duplicate with
00154          * that datum, so __db_join_get will return all the dup-dups again.
00155          * We could just reset them to the original cursor from j_curslist,
00156          * but that would be a bit slower in the unsorted case and a LOT
00157          * slower in the sorted one.
00158          *
00159          * j_exhausted is a list of boolean values which represent
00160          * whether or not their corresponding cursors are "exhausted",
00161          * i.e. whether the datum under the corresponding cursor has
00162          * been found not to exist in any unreturned combinations of
00163          * later secondary cursors, in which case they are ready to be
00164          * incremented.
00165          */
00166 
00167         /* We don't want to free regions whose callocs have failed. */
00168         jc->j_curslist = NULL;
00169         jc->j_workcurs = NULL;
00170         jc->j_fdupcurs = NULL;
00171         jc->j_exhausted = NULL;
00172 
00173         if ((ret = CDB___os_calloc(dbenv, nslots, sizeof(DBC *),
00174             &jc->j_curslist)) != 0)
00175                 goto err;
00176         if ((ret = CDB___os_calloc(dbenv, nslots, sizeof(DBC *),
00177             &jc->j_workcurs)) != 0)
00178                 goto err;
00179         if ((ret = CDB___os_calloc(dbenv, nslots, sizeof(DBC *),
00180             &jc->j_fdupcurs)) != 0)
00181                 goto err;
00182         if ((ret = CDB___os_calloc(dbenv, nslots, sizeof(u_int8_t),
00183             &jc->j_exhausted)) != 0)
00184                 goto err;
00185         for (i = 0; curslist[i] != NULL; i++) {
00186                 jc->j_curslist[i] = curslist[i];
00187                 jc->j_workcurs[i] = NULL;
00188                 jc->j_fdupcurs[i] = NULL;
00189                 jc->j_exhausted[i] = 0;
00190         }
00191         jc->j_ncurs = ncurs;
00192 
00193         /*
00194          * If DB_JOIN_NOSORT is not set, optimize secondary cursors by
00195          * sorting in order of increasing cardinality.
00196          */
00197         if (!LF_ISSET(DB_JOIN_NOSORT))
00198                 qsort(jc->j_curslist, ncurs, sizeof(DBC *), __db_join_cmp);
00199 
00200         /*
00201          * We never need to reset the 0th cursor, so there's no
00202          * solid reason to use workcurs[0] rather than curslist[0] in
00203          * join_get.  Nonetheless, it feels cleaner to do it for symmetry,
00204          * and this is the most logical place to copy it.
00205          *
00206          * !!!
00207          * There's no need to close the new cursor if we goto err only
00208          * because this is the last thing that can fail.  Modifier of this
00209          * function beware!
00210          */
00211         if ((ret = jc->j_curslist[0]->c_dup(jc->j_curslist[0], jc->j_workcurs,
00212             DB_POSITIONI)) != 0)
00213                 goto err;
00214 
00215         dbc->c_close = __db_join_close;
00216         dbc->c_del = __db_join_del;
00217         dbc->c_get = __db_join_get;
00218         dbc->c_put = __db_join_put;
00219         dbc->internal = (DBC_INTERNAL *) jc;
00220         dbc->dbp = primary;
00221         jc->j_primary = primary;
00222 
00223         *dbcp = dbc;
00224 
00225         MUTEX_THREAD_LOCK(primary->mutexp);
00226         TAILQ_INSERT_TAIL(&primary->join_queue, dbc, links);
00227         MUTEX_THREAD_UNLOCK(primary->mutexp);
00228 
00229         return (0);
00230 
00231 err:    if (jc != NULL) {
00232                 if (jc->j_curslist != NULL)
00233                         CDB___os_free(jc->j_curslist, nslots * sizeof(DBC *));
00234                 if (jc->j_workcurs != NULL) {
00235                         if (jc->j_workcurs[0] != NULL)
00236                                 CDB___os_free(jc->j_workcurs[0], sizeof(DBC));
00237                         CDB___os_free(jc->j_workcurs, nslots * sizeof(DBC *));
00238                 }
00239                 if (jc->j_fdupcurs != NULL)
00240                         CDB___os_free(jc->j_fdupcurs, nslots * sizeof(DBC *));
00241                 if (jc->j_exhausted != NULL)
00242                         CDB___os_free(jc->j_exhausted, nslots * sizeof(u_int8_t));
00243                 CDB___os_free(jc, sizeof(JOIN_CURSOR));
00244         }
00245         if (dbc != NULL)
00246                 CDB___os_free(dbc, sizeof(DBC));
00247         return (ret);
00248 }
00249 
00250 static int
00251 __db_join_put(dbc, key, data, flags)
00252         DBC *dbc;
00253         DBT *key;
00254         DBT *data;
00255         u_int32_t flags;
00256 {
00257         PANIC_CHECK(dbc->dbp->dbenv);
00258 
00259         COMPQUIET(key, NULL);
00260         COMPQUIET(data, NULL);
00261         COMPQUIET(flags, 0);
00262         return (EINVAL);
00263 }
00264 
00265 static int
00266 __db_join_del(dbc, flags)
00267         DBC *dbc;
00268         u_int32_t flags;
00269 {
00270         PANIC_CHECK(dbc->dbp->dbenv);
00271 
00272         COMPQUIET(flags, 0);
00273         return (EINVAL);
00274 }
00275 
00276 static int
00277 __db_join_get(dbc, key_arg, data_arg, flags)
00278         DBC *dbc;
00279         DBT *key_arg, *data_arg;
00280         u_int32_t flags;
00281 {
00282         DBT *key_n, key_n_mem;
00283         DB *dbp;
00284         DBC *cp;
00285         JOIN_CURSOR *jc;
00286         int ret;
00287         u_int32_t i, j, operation;
00288 
00289         dbp = dbc->dbp;
00290         jc = (JOIN_CURSOR *)dbc->internal;
00291 
00292         PANIC_CHECK(dbp->dbenv);
00293 
00294         operation = LF_ISSET(DB_OPFLAGS_MASK);
00295 
00296         if ((ret = CDB___db_joingetchk(dbp, key_arg, flags)) != 0)
00297                 return (ret);
00298 
00299         /*
00300          * Since we are fetching the key as a datum in the secondary indices,
00301          * we must be careful of caller-specified DB_DBT_* memory
00302          * management flags.  If necessary, use a stack-allocated DBT;
00303          * we'll appropriately copy and/or allocate the data later.
00304          */
00305         if (F_ISSET(key_arg, DB_DBT_USERMEM) ||
00306             F_ISSET(key_arg, DB_DBT_MALLOC)) {
00307                 /* We just use the default buffer;  no need to go malloc. */
00308                 key_n = &key_n_mem;
00309                 memset(key_n, 0, sizeof(DBT));
00310         } else {
00311                 /*
00312                  * Either DB_DBT_REALLOC or the default buffer will work
00313                  * fine if we have to reuse it, as we do.
00314                  */
00315                 key_n = key_arg;
00316         }
00317 
00318         /*
00319          * If our last attempt to do a get on the primary key failed,
00320          * short-circuit the join and try again with the same key.
00321          */
00322         if (F_ISSET(jc, JOIN_RETRY))
00323                 goto samekey;
00324         F_CLR(jc, JOIN_RETRY);
00325 
00326 retry:  ret = jc->j_workcurs[0]->c_get(jc->j_workcurs[0],
00327             &jc->j_key, key_n, jc->j_exhausted[0] ? DB_NEXT_DUP : DB_CURRENT);
00328 
00329         if (ret == ENOMEM) {
00330                 jc->j_key.ulen <<= 1;
00331                 if ((ret = CDB___os_realloc(dbp->dbenv,
00332                     jc->j_key.ulen, NULL, &jc->j_key.data)) != 0)
00333                         goto mem_err;
00334                 goto retry;
00335         }
00336 
00337         /*
00338          * If ret == DB_NOTFOUND, we're out of elements of the first
00339          * secondary cursor.  This is how we finally finish the join
00340          * if all goes well.
00341          */
00342         if (ret != 0)
00343                 goto err;
00344 
00345         /*
00346          * If jc->j_exhausted[0] == 1, we've just advanced the first cursor,
00347          * and we're going to want to advance all the cursors that point to
00348          * the first member of a duplicate duplicate set (j_fdupcurs[1..N]).
00349          * Close all the cursors in j_fdupcurs;  we'll reopen them the
00350          * first time through the upcoming loop.
00351          */
00352         for (i = 1; i < jc->j_ncurs; i++) {
00353                 if (jc->j_fdupcurs[i] != NULL &&
00354                     (ret = jc->j_fdupcurs[i]->c_close(jc->j_fdupcurs[i])) != 0)
00355                         goto err;
00356                 jc->j_fdupcurs[i] = NULL;
00357         }
00358 
00359         /*
00360          * If jc->j_curslist[1] == NULL, we have only one cursor in the join.
00361          * Thus, we can safely increment that one cursor on each call
00362          * to __db_join_get, and we signal this by setting jc->j_exhausted[0]
00363          * right away.
00364          *
00365          * Otherwise, reset jc->j_exhausted[0] to 0, so that we don't
00366          * increment it until we know we're ready to.
00367          */
00368         if (jc->j_curslist[1] == NULL)
00369                 jc->j_exhausted[0] = 1;
00370         else
00371                 jc->j_exhausted[0] = 0;
00372 
00373         /* We have the first element; now look for it in the other cursors. */
00374         for (i = 1; i < jc->j_ncurs; i++) {
00375                 DB_ASSERT(jc->j_curslist[i] != NULL);
00376                 if (jc->j_workcurs[i] == NULL)
00377                         /* If this is NULL, we need to dup curslist into it. */
00378                         if ((ret = jc->j_curslist[i]->c_dup(
00379                             jc->j_curslist[i], jc->j_workcurs + i,
00380                             DB_POSITIONI)) != 0)
00381                                 goto err;
00382 
00383 retry2:         cp = jc->j_workcurs[i];
00384 
00385                 if ((ret = __db_join_getnext(cp, &jc->j_key, key_n,
00386                             jc->j_exhausted[i])) == DB_NOTFOUND) {
00387                         /*
00388                          * jc->j_workcurs[i] has no more of the datum we're
00389                          * interested in.  Go back one cursor and get
00390                          * a new dup.  We can't just move to a new
00391                          * element of the outer relation, because that way
00392                          * we might miss duplicate duplicates in cursor i-1.
00393                          *
00394                          * If this takes us back to the first cursor,
00395                          * -then- we can move to a new element of the outer
00396                          * relation.
00397                          */
00398                         --i;
00399                         jc->j_exhausted[i] = 1;
00400 
00401                         if (i == 0) {
00402                                 for (j = 1; jc->j_workcurs[j] != NULL; j++) {
00403                                         /*
00404                                          * We're moving to a new element of
00405                                          * the first secondary cursor.  If
00406                                          * that cursor is sorted, then any
00407                                          * other sorted cursors can be safely
00408                                          * reset to the first duplicate
00409                                          * duplicate in the current set if we
00410                                          * have a pointer to it (we can't just
00411                                          * leave them be, or we'll miss
00412                                          * duplicate duplicates in the outer
00413                                          * relation).
00414                                          *
00415                                          * If the first cursor is unsorted, or
00416                                          * if cursor j is unsorted, we can
00417                                          * make no assumptions about what
00418                                          * we're looking for next or where it
00419                                          * will be, so we reset to the very
00420                                          * beginning (setting workcurs NULL
00421                                          * will achieve this next go-round).
00422                                          *
00423                                          * XXX: This is likely to break
00424                                          * horribly if any two cursors are
00425                                          * both sorted, but have different
00426                                          * specified sort functions.  For,
00427                                          * now, we dismiss this as pathology
00428                                          * and let strange things happen--we
00429                                          * can't make rope childproof.
00430                                          */
00431                                         if ((ret = jc->j_workcurs[j]->c_close(
00432                                             jc->j_workcurs[j])) != 0)
00433                                                 goto err;
00434                                         if (!SORTED_SET(jc, 0) ||
00435                                             !SORTED_SET(jc, j) ||
00436                                             jc->j_fdupcurs[j] == NULL)
00437                                                 /*
00438                                                  * Unsafe conditions;
00439                                                  * reset fully.
00440                                                  */
00441                                                 jc->j_workcurs[j] = NULL;
00442                                         else
00443                                                 /* Partial reset suffices. */
00444                                                 if ((jc->j_fdupcurs[j]->c_dup(
00445                                                     jc->j_fdupcurs[j],
00446                                                     &jc->j_workcurs[j],
00447                                                     DB_POSITIONI)) != 0)
00448                                                         goto err;
00449                                         jc->j_exhausted[j] = 0;
00450                                 }
00451                                 goto retry;
00452                                 /* NOTREACHED */
00453                         }
00454 
00455                         /*
00456                          * We're about to advance the cursor and need to
00457                          * reset all of the workcurs[j] where j>i, so that
00458                          * we don't miss any duplicate duplicates.
00459                          */
00460                         for (j = i + 1;
00461                             jc->j_workcurs[j] != NULL;
00462                             j++) {
00463                                 if ((ret = jc->j_workcurs[j]->c_close(
00464                                     jc->j_workcurs[j])) != 0)
00465                                         goto err;
00466                                 jc->j_exhausted[j] = 0;
00467                                 if (jc->j_fdupcurs[j] != NULL &&
00468                                     (ret = jc->j_fdupcurs[j]->c_dup(
00469                                     jc->j_fdupcurs[j], &jc->j_workcurs[j],
00470                                     DB_POSITIONI)) != 0)
00471                                         goto err;
00472                                 else
00473                                         jc->j_workcurs[j] = NULL;
00474                         }
00475                         goto retry2;
00476                         /* NOTREACHED */
00477                 }
00478 
00479                 if (ret == ENOMEM) {
00480                         jc->j_key.ulen <<= 1;
00481                         if ((ret = CDB___os_realloc(dbp->dbenv, jc->j_key.ulen,
00482                             NULL, &jc->j_key.data)) != 0) {
00483 mem_err:                        CDB___db_err(dbp->dbenv,
00484                                     "Allocation failed for join key, len = %lu",
00485                                     (u_long)jc->j_key.ulen);
00486                                 goto err;
00487                         }
00488                         goto retry2;
00489                 }
00490 
00491                 if (ret != 0)
00492                         goto err;
00493 
00494                 /*
00495                  * If we made it this far, we've found a matching
00496                  * datum in cursor i.  Mark the current cursor
00497                  * unexhausted, so we don't miss any duplicate
00498                  * duplicates the next go-round--unless this is the
00499                  * very last cursor, in which case there are none to
00500                  * miss, and we'll need that exhausted flag to finally
00501                  * get a DB_NOTFOUND and move on to the next datum in
00502                  * the outermost cursor.
00503                  */
00504                 if (i + 1 != jc->j_ncurs)
00505                         jc->j_exhausted[i] = 0;
00506                 else
00507                         jc->j_exhausted[i] = 1;
00508 
00509                 /*
00510                  * If jc->j_fdupcurs[i] is NULL and the ith cursor's dups are
00511                  * sorted, then we're here for the first time since advancing
00512                  * cursor 0, and we have a new datum of interest.
00513                  * jc->j_workcurs[i] points to the beginning of a set of
00514                  * duplicate duplicates;  store this into jc->j_fdupcurs[i].
00515                  */
00516                 if (SORTED_SET(jc, i) && jc->j_fdupcurs[i] == NULL && (ret =
00517                     cp->c_dup(cp, &jc->j_fdupcurs[i], DB_POSITIONI)) != 0)
00518                         goto err;
00519 
00520         }
00521 
00522 err:    if (ret != 0)
00523                 return (ret);
00524 
00525         if (0) {
00526 samekey:        /*
00527                  * Get the key we tried and failed to return last time;
00528                  * it should be the current datum of all the secondary cursors.
00529                  */
00530                 if ((ret = jc->j_workcurs[0]->c_get(jc->j_workcurs[0],
00531                     &jc->j_key, key_n, DB_CURRENT)) != 0)
00532                         return (ret);
00533                 F_CLR(jc, JOIN_RETRY);
00534         }
00535 
00536         /*
00537          * ret == 0;  we have a key to return.
00538          *
00539          * If DB_DBT_USERMEM or DB_DBT_MALLOC is set, we need to
00540          * copy it back into the dbt we were given for the key;
00541          * call CDB___db_retcopy.
00542          *
00543          * Otherwise, assert that we do not in fact need to copy anything
00544          * and simply proceed.
00545          */
00546         if (F_ISSET(key_arg, DB_DBT_USERMEM) ||
00547             F_ISSET(key_arg, DB_DBT_MALLOC)) {
00548                 /*
00549                  * We need to copy the key back into our original
00550                  * datum.  Do so.
00551                  */
00552                 if ((ret = CDB___db_retcopy(dbp,
00553                     key_arg, key_n->data, key_n->size, NULL, NULL)) != 0) {
00554                         /*
00555                          * The retcopy failed, most commonly because we
00556                          * have a user buffer for the key which is too small.
00557                          * Set things up to retry next time, and return.
00558                          */
00559                         F_SET(jc, JOIN_RETRY);
00560                         return (ret);
00561                 }
00562         } else
00563                 DB_ASSERT(key_n == key_arg);
00564 
00565         /*
00566          * If DB_JOIN_ITEM is
00567          * set, we return it;  otherwise we do the lookup in the
00568          * primary and then return.
00569          *
00570          * Note that we use key_arg here;  it is safe (and appropriate)
00571          * to do so.
00572          */
00573         if (operation == DB_JOIN_ITEM)
00574                 return (0);
00575 
00576         if ((ret = jc->j_primary->get(jc->j_primary,
00577             jc->j_curslist[0]->txn, key_arg, data_arg, 0)) != 0)
00578                 /*
00579                  * The get on the primary failed, most commonly because we're
00580                  * using a user buffer that's not big enough.  Flag our
00581                  * failure so we can return the same key next time.
00582                  */
00583                 F_SET(jc, JOIN_RETRY);
00584 
00585         return (ret);
00586 }
00587 
00588 static int
00589 __db_join_close(dbc)
00590         DBC *dbc;
00591 {
00592         DB *dbp;
00593         JOIN_CURSOR *jc;
00594         int ret, t_ret;
00595         u_int32_t i;
00596 
00597         jc = (JOIN_CURSOR *)dbc->internal;
00598         dbp = dbc->dbp;
00599         ret = t_ret = 0;
00600 
00601         /*
00602          * Remove from active list of join cursors.  Note that this
00603          * must happen before any action that can fail and return, or else
00604          * CDB___db_close may loop indefinitely.
00605          */
00606         MUTEX_THREAD_LOCK(dbp->mutexp);
00607         TAILQ_REMOVE(&dbp->join_queue, dbc, links);
00608         MUTEX_THREAD_UNLOCK(dbp->mutexp);
00609 
00610         PANIC_CHECK(dbc->dbp->dbenv);
00611 
00612         /*
00613          * Close any open scratch cursors.  In each case, there may
00614          * not be as many outstanding as there are cursors in
00615          * curslist, but we want to close whatever's there.
00616          *
00617          * If any close fails, there's no reason not to close everything else;
00618          * we'll just return the error code of the last one to fail.  There's
00619          * not much the caller can do anyway, since these cursors only exist
00620          * hanging off a db-internal data structure that they shouldn't be
00621          * mucking with.
00622          */
00623         for (i = 0; i < jc->j_ncurs; i++) {
00624                 if (jc->j_workcurs[i] != NULL && (t_ret =
00625                     jc->j_workcurs[i]->c_close(jc->j_workcurs[i])) != 0)
00626                         ret = t_ret;
00627                 if (jc->j_fdupcurs[i] != NULL && (t_ret =
00628                     jc->j_fdupcurs[i]->c_close(jc->j_fdupcurs[i])) != 0)
00629                         ret = t_ret;
00630         }
00631 
00632         CDB___os_free(jc->j_exhausted, 0);
00633         CDB___os_free(jc->j_curslist, 0);
00634         CDB___os_free(jc->j_workcurs, 0);
00635         CDB___os_free(jc->j_fdupcurs, 0);
00636         CDB___os_free(jc->j_key.data, jc->j_key.ulen);
00637         CDB___os_free(jc, sizeof(JOIN_CURSOR));
00638         CDB___os_free(dbc, sizeof(DBC));
00639 
00640         return (ret);
00641 }
00642 
00643 /*
00644  * __db_join_getnext --
00645  *      This function replaces the DBC_CONTINUE and DBC_KEYSET
00646  *      functionality inside the various cursor get routines.
00647  *
00648  *      If exhausted == 0, we're not done with the current datum;
00649  *      return it if it matches "matching", otherwise search
00650  *      using DB_GET_BOTHC (which is faster than iteratively doing
00651  *      DB_NEXT_DUP) forward until we find one that does.
00652  *
00653  *      If exhausted == 1, we are done with the current datum, so just
00654  *      leap forward to searching NEXT_DUPs.
00655  *
00656  *      If no matching datum exists, returns DB_NOTFOUND, else 0.
00657  */
00658 static int
00659 __db_join_getnext(dbc, key, data, exhausted)
00660         DBC *dbc;
00661         DBT *key, *data;
00662         u_int32_t exhausted;
00663 {
00664         int ret, cmp;
00665         DB *dbp;
00666         DBT ldata;
00667         int (*func) __P((const DBT *, const DBT *));
00668 
00669         dbp = dbc->dbp;
00670 
00671         func = (dbp->dup_compare == NULL) ? CDB___bam_defcmp : dbp->dup_compare;
00672 
00673         switch (exhausted) {
00674         case 0:
00675                 memset(&ldata, 0, sizeof(DBT));
00676                 /* We don't want to step on data->data;  malloc. */
00677                 F_SET(&ldata, DB_DBT_MALLOC);
00678                 if ((ret = dbc->c_get(dbc, key, &ldata, DB_CURRENT)) != 0)
00679                         break;
00680                 cmp = func(data, &ldata);
00681                 if (cmp == 0) {
00682                         /*
00683                          * We have to return the real data value.  Copy
00684                          * it into data, then free the buffer we malloc'ed
00685                          * above.
00686                          */
00687                         if ((ret = CDB___db_retcopy(dbp, data, ldata.data,
00688                             ldata.size, &data->data, &data->size)) != 0)
00689                                 return (ret);
00690                         CDB___os_free(ldata.data, 0);
00691                         return (0);
00692                 }
00693 
00694                 /*
00695                  * Didn't match--we want to fall through and search future
00696                  * dups.  We just forget about ldata and free
00697                  * its buffer--data contains the value we're searching for.
00698                  */
00699                 CDB___os_free(ldata.data, 0);
00700                 /* FALLTHROUGH */
00701         case 1:
00702                 ret = dbc->c_get(dbc, key, data, DB_GET_BOTHC);
00703                 break;
00704         default:
00705                 ret = EINVAL;
00706                 break;
00707         }
00708 
00709         return (ret);
00710 }
00711 
00712 /*
00713  * __db_join_cmp --
00714  *      Comparison function for sorting DBCs in cardinality order.
00715  */
00716 
00717 static int
00718 __db_join_cmp(a, b)
00719         const void *a, *b;
00720 {
00721         DBC *dbca, *dbcb;
00722         db_recno_t counta, countb;
00723 
00724         /* In case c_count fails, pretend cursors are equal. */
00725         counta = countb = 0;
00726 
00727         dbca = *((DBC * const *)a);
00728         dbcb = *((DBC * const *)b);
00729 
00730         if (dbca->c_count(dbca, &counta, 0) != 0 ||
00731             dbcb->c_count(dbcb, &countb, 0) != 0)
00732                 return (0);
00733 
00734         return (counta - countb);
00735 }

Generated on Sun Jun 8 10:56:36 2008 for GNUmifluz by  doxygen 1.5.5