LCOV - code coverage report
Current view: top level - gdk - gdk_join.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1430 2238 63.9 %
Date: 2021-10-13 02:24:04 Functions: 23 28 82.1 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : #include "monetdb_config.h"
      10             : #include "gdk.h"
      11             : #include "gdk_private.h"
      12             : #include "gdk_calc_private.h"
      13             : 
      14             : /*
      15             :  * All join variants produce some sort of join on two input BATs,
      16             :  * optionally subject to up to two candidate lists.  Only values in
      17             :  * the input BATs that are mentioned in the associated candidate list
      18             :  * (if provided) are eligible.  They all return two output BATs in the
      19             :  * first two arguments.  The join operations differ in the way in
      20             :  * which tuples from the two inputs are matched.
      21             :  *
      22             :  * The outputs consist of two aligned BATs (i.e. same length and same
      23             :  * hseqbase (0@0)) that contain the OIDs of the input BATs that match.
      24             :  * The candidate lists, if given, contain the OIDs of the associated
      25             :  * input BAT which must be considered for matching.  The input BATs
      26             :  * must have the same type.
      27             :  *
      28             :  * All functions also have a parameter nil_matches which indicates
      29             :  * whether NIL must be considered an ordinary value that can match, or
      30             :  * whether NIL must be considered to never match.
      31             :  *
      32             :  * The join functions that are provided here are:
      33             :  * BATjoin
      34             :  *      normal equi-join
      35             :  * BATleftjoin
      36             :  *      normal equi-join, but the left output is sorted
      37             :  * BATouterjoin
      38             :  *      equi-join, but the left output is sorted, and if there is no
      39             :  *      match for a value in the left input, there is still an output
      40             :  *      with NIL in the right output
      41             :  * BATsemijoin
      42             :  *      equi-join, but the left output is sorted, and if there are
      43             :  *      multiple matches, only one is returned (i.e., the left output
      44             :  *      is also key)
      45             :  * BATthetajoin
      46             :  *      theta-join: an extra operator must be provided encoded as an
      47             :  *      integer (macros JOIN_EQ, JOIN_NE, JOIN_LT, JOIN_LE, JOIN_GT,
      48             :  *      JOIN_GE); values match if the left input has the given
      49             :  *      relationship with the right input; order of the outputs is not
      50             :  *      guaranteed
      51             :  * BATbandjoin
      52             :  *      band-join: two extra input values (c1, c2) must be provided as
      53             :  *      well as Booleans (li, hi) that indicate whether the value
      54             :  *      ranges are inclusive or not; values in the left and right
      55             :  *      inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
      56             :  *      c2 is NIL, there are no matches
      57             :  * BATrangejoin
      58             :  *      range-join: the right input consists of two aligned BATs,
      59             :  *      values match if the left value is between two corresponding
      60             :  *      right values; two extra Boolean parameters, li and hi,
      61             :  *      indicate whether equal values match
      62             :  *
      63             :  * In addition to these functions, there are two more functions that
      64             :  * are closely related:
      65             :  * BATintersect
      66             :  *      intersection: return a candidate list with OIDs of tuples in
      67             :  *      the left input whose value occurs in the right input
      68             :  * BATdiff
      69             :  *      difference: return a candidate list with OIDs of tuples in the
      70             :  *      left input whose value does not occur in the right input
      71             :  */
      72             : 
      73             : /* Perform a bunch of sanity checks on the inputs to a join. */
      74             : static gdk_return
      75      438986 : joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
      76             : {
      77     1157359 :         if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
      78         268 :             (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
      79          17 :                 GDKerror("%s: inputs not compatible.\n", func);
      80           0 :                 return GDK_FAIL;
      81             :         }
      82      438969 :         if (r2 &&
      83         134 :             (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
      84           0 :                 GDKerror("%s: right inputs not aligned.\n", func);
      85           0 :                 return GDK_FAIL;
      86             :         }
      87      438969 :         if ((sl && !BATiscand(sl)) || (sr && !BATiscand(sr))) {
      88           0 :                 GDKerror("%s: argument not a candidate list.\n", func);
      89           0 :                 return GDK_FAIL;
      90             :         }
      91             :         return GDK_SUCCEED;
      92             : }
      93             : 
      94             : #define INCRSIZELOG     (8 + (SIZEOF_OID / 2))
      95             : #define INCRSIZE        (1 << INCRSIZELOG)
      96             : 
      97             : /* Create the result bats for a join, returns the absolute maximum
      98             :  * number of outputs that could possibly be generated. */
      99             : static BUN
     100       46124 : joininitresults(BAT **r1p, BAT **r2p, BUN lcnt, BUN rcnt, bool lkey, bool rkey,
     101             :                 bool semi, bool nil_on_miss, bool only_misses, bool min_one,
     102             :                 BUN estimate)
     103             : {
     104             :         BAT *r1, *r2;
     105             :         BUN maxsize, size;
     106             : 
     107             :         /* if nil_on_miss is set, we really need a right output */
     108       46124 :         assert(!nil_on_miss || r2p != NULL);
     109             : 
     110       46124 :         lkey |= lcnt <= 1;
     111       46124 :         rkey |= rcnt <= 1;
     112             : 
     113       46124 :         *r1p = NULL;
     114       46124 :         if (r2p)
     115       21134 :                 *r2p = NULL;
     116       46124 :         if (lcnt == 0) {
     117             :                 /* there is nothing to match */
     118             :                 maxsize = 0;
     119       44063 :         } else if (!only_misses && !nil_on_miss && rcnt == 0) {
     120             :                 /* if right is empty, we have no hits, so if we don't
     121             :                  * want misses, the result is empty */
     122             :                 maxsize = 0;
     123       44062 :         } else if (rkey | semi | only_misses) {
     124             :                 /* each entry left matches at most one on right, in
     125             :                  * case nil_on_miss is also set, each entry matches
     126             :                  * exactly one (see below) */
     127             :                 maxsize = lcnt;
     128       25560 :         } else if (lkey) {
     129             :                 /* each entry on right is matched at most once */
     130        3990 :                 if (nil_on_miss) {
     131             :                         /* one entry left could match all right, and
     132             :                          * all other entries left match nil */
     133           0 :                         maxsize = lcnt + rcnt - 1;
     134             :                 } else {
     135             :                         maxsize = rcnt;
     136             :                 }
     137       21570 :         } else if (rcnt == 0) {
     138             :                 /* nil_on_miss must be true due to previous checks, so
     139             :                  * all values on left miss */
     140             :                 maxsize = lcnt;
     141       21569 :         } else if (BUN_MAX / lcnt >= rcnt) {
     142             :                 /* in the worst case we have a full cross product */
     143       21569 :                 maxsize = lcnt * rcnt;
     144             :         } else {
     145             :                 /* a BAT cannot grow larger than BUN_MAX */
     146             :                 maxsize = BUN_MAX;
     147             :         }
     148       46124 :         size = estimate == BUN_NONE ? lcnt < rcnt ? lcnt : rcnt : estimate;
     149             :         if (size < INCRSIZE)
     150             :                 size = INCRSIZE;
     151             :         if (size > maxsize)
     152             :                 size = maxsize;
     153       46124 :         if ((rkey | semi | only_misses) & nil_on_miss) {
     154             :                 /* see comment above: each entry left matches exactly
     155             :                  * once */
     156             :                 size = maxsize;
     157             :         }
     158       46124 :         if (min_one && size < lcnt)
     159             :                 size = lcnt;
     160             : 
     161       46124 :         if (maxsize == 0) {
     162        2062 :                 r1 = BATdense(0, 0, 0);
     163        2062 :                 if (r1 == NULL) {
     164             :                         return BUN_NONE;
     165             :                 }
     166        2062 :                 if (r2p) {
     167         242 :                         r2 = BATdense(0, 0, 0);
     168         242 :                         if (r2 == NULL) {
     169           0 :                                 BBPreclaim(r1);
     170           0 :                                 return BUN_NONE;
     171             :                         }
     172         242 :                         *r2p = r2;
     173             :                 }
     174        2062 :                 *r1p = r1;
     175        2062 :                 return 0;
     176             :         }
     177             : 
     178       44062 :         r1 = COLnew(0, TYPE_oid, size, TRANSIENT);
     179       44059 :         if (r1 == NULL) {
     180             :                 return BUN_NONE;
     181             :         }
     182       44059 :         r1->tnil = false;
     183       44059 :         r1->tnonil = true;
     184       44059 :         r1->tkey = true;
     185       44059 :         r1->tsorted = true;
     186       44059 :         r1->trevsorted = true;
     187       44059 :         r1->tseqbase = 0;
     188       44059 :         r1->theap->dirty = true;
     189       44059 :         *r1p = r1;
     190       44059 :         if (r2p) {
     191       20890 :                 r2 = COLnew(0, TYPE_oid, size, TRANSIENT);
     192       20890 :                 if (r2 == NULL) {
     193           0 :                         BBPreclaim(r1);
     194           0 :                         return BUN_NONE;
     195             :                 }
     196       20890 :                 r2->tnil = false;
     197       20890 :                 r2->tnonil = true;
     198       20890 :                 r2->tkey = true;
     199       20890 :                 r2->tsorted = true;
     200       20890 :                 r2->trevsorted = true;
     201       20890 :                 r2->tseqbase = 0;
     202       20890 :                 r2->theap->dirty = true;
     203       20890 :                 *r2p = r2;
     204             :         }
     205             :         return maxsize;
     206             : }
     207             : 
     208             : #define VALUE(s, x)     (s##vars ?                                      \
     209             :                          s##vars + VarHeapVal(s##vals, (x), s##i.width) : \
     210             :                          s##vals ? (const char *) s##vals + ((x) * s##i.width) : \
     211             :                          (s##val = BUNtoid(s, (x)), (const char *) &s##val))
     212             : #define FVALUE(s, x)    ((const char *) s##vals + ((x) * s##i.width))
     213             : 
     214             : #define APPEND(b, o)            (((oid *) b->theap->base)[b->batCount++] = (o))
     215             : 
     216             : static inline gdk_return
     217   162750577 : maybeextend(BAT *restrict r1, BAT *restrict r2,
     218             :             BUN cnt, BUN lcur, BUN lcnt, BUN maxsize)
     219             : {
     220   162750577 :         if (BATcount(r1) + cnt > BATcapacity(r1)) {
     221             :                 /* make some extra space by extrapolating how much more
     222             :                  * we need (fraction of l we've seen so far is used to
     223             :                  * estimate a new size but with a shallow slope so that
     224             :                  * a skewed join doesn't overwhelm, whilst making sure
     225             :                  * there is somewhat significant progress) */
     226        2444 :                 BUN newcap = (BUN) (lcnt / (lcnt / 4.0 + lcur * .75) * (BATcount(r1) + cnt));
     227        2444 :                 newcap = (newcap + INCRSIZE - 1) & ~(((BUN) 1 << INCRSIZELOG) - 1);
     228        2444 :                 if (newcap < cnt + BATcount(r1))
     229           0 :                         newcap = cnt + BATcount(r1) + INCRSIZE;
     230             :                 /* if close to maxsize, then just use maxsize */
     231        2444 :                 if (newcap + INCRSIZE > maxsize)
     232             :                         newcap = maxsize;
     233             :                 /* make sure heap.free is set properly before
     234             :                  * extending */
     235        2444 :                 BATsetcount(r1, BATcount(r1));
     236        2445 :                 if (BATextend(r1, newcap) != GDK_SUCCEED)
     237             :                         return GDK_FAIL;
     238        2445 :                 if (r2) {
     239        1689 :                         BATsetcount(r2, BATcount(r2));
     240        1689 :                         if (BATextend(r2, newcap) != GDK_SUCCEED)
     241             :                                 return GDK_FAIL;
     242        1689 :                         assert(BATcapacity(r1) == BATcapacity(r2));
     243             :                 }
     244             :         }
     245             :         return GDK_SUCCEED;
     246             : }
     247             : 
     248             : /* Return BATs through r1p and r2p for the case that there is no
     249             :  * match between l and r, taking all flags into consideration.
     250             :  *
     251             :  * This means, if nil_on_miss is set or only_misses is set, *r1p is a
     252             :  * copy of the left candidate list or a dense list of all "head"
     253             :  * values of l, and *r2p (if r2p is not NULL) is all nil.  If neither
     254             :  * of those flags is set, the result is two empty BATs. */
     255             : static gdk_return
     256      311608 : nomatch(BAT **r1p, BAT **r2p, BAT *l, BAT *r, struct canditer *restrict lci,
     257             :         bool nil_on_miss, bool only_misses, const char *func, lng t0)
     258             : {
     259             :         BAT *r1, *r2 = NULL;
     260             : 
     261      311608 :         MT_thread_setalgorithm(__func__);
     262      311473 :         if (lci->ncand == 0 || !(nil_on_miss | only_misses)) {
     263             :                 /* return empty BATs */
     264      296353 :                 if ((r1 = BATdense(0, 0, 0)) == NULL)
     265             :                         return GDK_FAIL;
     266      296531 :                 if (r2p) {
     267      222980 :                         if ((r2 = BATdense(0, 0, 0)) == NULL) {
     268           0 :                                 BBPreclaim(r1);
     269           0 :                                 return GDK_FAIL;
     270             :                         }
     271      222963 :                         *r2p = r2;
     272             :                 }
     273             :         } else {
     274       15120 :                 r1 = canditer_slice(lci, 0, lci->ncand);
     275       15122 :                 if (r2p) {
     276           0 :                         if ((r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand, TRANSIENT)) == NULL) {
     277           0 :                                 BBPreclaim(r1);
     278           0 :                                 return GDK_FAIL;
     279             :                         }
     280           0 :                         *r2p = r2;
     281             :                 }
     282             :         }
     283      311636 :         *r1p = r1;
     284      311636 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ",r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT
     285             :                   ",nil_on_miss=%s,only_misses=%s"
     286             :                   " - > " ALGOBATFMT "," ALGOOPTBATFMT
     287             :                   " (%s -- " LLFMT "usec)\n",
     288             :                   ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(lci->s),
     289             :                   nil_on_miss ? "true" : "false",
     290             :                   only_misses ? "true" : "false",
     291             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
     292             :                   func, GDKusec() - t0);
     293             :         return GDK_SUCCEED;
     294             : }
     295             : 
     296             : /* Implementation of join where there is a single value (possibly
     297             :  * repeated multiple times) on the left.  This means we can use a
     298             :  * point select to find matches in the right column. */
     299             : static gdk_return
     300       58046 : selectjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
     301             :            struct canditer *lci, struct canditer *rci,
     302             :            bool nil_matches, lng t0, bool swapped, const char *reason)
     303             : {
     304       58046 :         BATiter li = bat_iterator(l);
     305             :         const void *v;
     306             :         BAT *bn = NULL;
     307             : 
     308       58046 :         assert(lci->ncand > 0);
     309       58046 :         assert(lci->ncand == 1 || (l->tsorted && l->trevsorted));
     310             : 
     311             :         size_t counter = 0;
     312             :         lng timeoffset = 0;
     313       58046 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     314       58045 :         if (qry_ctx != NULL) {
     315       58025 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     316             :         }
     317             : 
     318       58045 :         MT_thread_setalgorithm(__func__);
     319       58045 :         oid o = canditer_next(lci);
     320       58046 :         v = BUNtail(li, o - l->hseqbase);
     321             : 
     322      114487 :         if (!nil_matches &&
     323       56441 :             (*ATOMcompare(l->ttype))(v, ATOMnilptr(l->ttype)) == 0) {
     324             :                 /* NIL doesn't match anything */
     325          88 :                 bat_iterator_end(&li);
     326          88 :                 return nomatch(r1p, r2p, l, r, lci, false, false,
     327             :                                reason, t0);
     328             :         }
     329             : 
     330       57958 :         bn = BATselect(r, rci->s, v, NULL, true, true, false);
     331       57957 :         bat_iterator_end(&li);
     332       57957 :         if (bn == NULL) {
     333             :                 return GDK_FAIL;
     334             :         }
     335       57957 :         if (BATcount(bn) == 0) {
     336        8364 :                 BBPunfix(bn->batCacheid);
     337        8365 :                 return nomatch(r1p, r2p, l, r, lci, false, false,
     338             :                                reason, t0);
     339             :         }
     340       49593 :         BAT *r1 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT);
     341       49593 :         if (r1 == NULL) {
     342           0 :                 BBPunfix(bn->batCacheid);
     343           0 :                 return GDK_FAIL;
     344             :         }
     345       49593 :         r1->tsorted = true;
     346       49593 :         r1->trevsorted = lci->ncand == 1;
     347       49593 :         r1->tseqbase = BATcount(bn) == 1 && lci->tpe == cand_dense ? o : oid_nil;
     348       49593 :         r1->tkey = BATcount(bn) == 1;
     349       49593 :         r1->tnil = false;
     350       49593 :         r1->tnonil = true;
     351             :         BAT *r2 = NULL;
     352       49593 :         if (r2p) {
     353       45876 :                 r2 = COLnew(0, TYPE_oid, lci->ncand * BATcount(bn), TRANSIENT);
     354       45876 :                 if (r2 == NULL) {
     355           0 :                         BBPunfix(bn->batCacheid);
     356           0 :                         BBPreclaim(r1);
     357           0 :                         return GDK_FAIL;
     358             :                 }
     359       45876 :                 r2->tsorted = lci->ncand == 1 || BATcount(bn) == 1;
     360       45876 :                 r2->trevsorted = BATcount(bn) == 1;
     361       45876 :                 r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil;
     362       45876 :                 r2->tkey = lci->ncand == 1;
     363       45876 :                 r2->tnil = false;
     364       45876 :                 r2->tnonil = true;
     365             :         }
     366       49593 :         if (BATtdense(bn)) {
     367       49281 :                 oid *o1p = (oid *) Tloc(r1, 0);
     368       49281 :                 oid *o2p = r2 ? (oid *) Tloc(r2, 0) : NULL;
     369             :                 oid bno = bn->tseqbase;
     370       49281 :                 BUN p, q = BATcount(bn);
     371             : 
     372             :                 do {
     373      244202 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
     374             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     375      654734 :                         for (p = 0; p < q; p++) {
     376      410532 :                                 *o1p++ = o;
     377             :                         }
     378      244202 :                         if (o2p) {
     379      447792 :                                 for (p = 0; p < q; p++) {
     380      306910 :                                         *o2p++ = bno + p;
     381             :                                 }
     382             :                         }
     383      244202 :                         o = canditer_next(lci);
     384      244202 :                 } while (!is_oid_nil(o));
     385             :         } else {
     386         312 :                 oid *o1p = (oid *) Tloc(r1, 0);
     387         312 :                 oid *o2p = r2 ? (oid *) Tloc(r2, 0) : NULL;
     388         312 :                 const oid *bnp = (const oid *) Tloc(bn, 0);
     389         312 :                 BUN p, q = BATcount(bn);
     390             : 
     391             :                 do {
     392        1941 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
     393             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     394    29359039 :                         for (p = 0; p < q; p++) {
     395    29357098 :                                 *o1p++ = o;
     396             :                         }
     397        1941 :                         if (o2p) {
     398    29356604 :                                 for (p = 0; p < q; p++) {
     399    29354677 :                                         *o2p++ = bnp[p];
     400             :                                 }
     401             :                         }
     402        1941 :                         o = canditer_next(lci);
     403        1941 :                 } while (!is_oid_nil(o));
     404             :         }
     405       49593 :         BATsetcount(r1, lci->ncand * BATcount(bn));
     406       49593 :         *r1p = r1;
     407       49593 :         if (r2p) {
     408       45876 :                 BATsetcount(r2, lci->ncand * BATcount(bn));
     409       45875 :                 *r2p = r2;
     410             :         }
     411       49592 :         BBPunfix(bn->batCacheid);
     412       49593 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     413             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
     414             :                   "sr=" ALGOOPTBATFMT ",nil_matches=%s;%s %s "
     415             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
     416             :                   ALGOBATPAR(l), ALGOBATPAR(r),
     417             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
     418             :                   nil_matches ? "true" : "false",
     419             :                   swapped ? " swapped" : "", reason,
     420             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
     421             :                   GDKusec() - t0);
     422             : 
     423             :         return GDK_SUCCEED;
     424             : 
     425           0 :   bailout:
     426           0 :         BBPreclaim(r1);
     427           0 :         BBPreclaim(r2);
     428           0 :         return GDK_FAIL;
     429             : }
     430             : 
     431             : #if SIZEOF_OID == SIZEOF_INT
     432             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_int(indir, offset, (const int *) vals, lo, hi, (int) (v), ordering, last)
     433             : #endif
     434             : #if SIZEOF_OID == SIZEOF_LNG
     435             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, last)
     436             : #endif
     437             : 
     438             : /* Implementation of join where the right-hand side is dense, and if
     439             :  * there is a right candidate list, it too is dense.  In case
     440             :  * nil_on_miss is not set, we use a range select (BATselect) to find
     441             :  * the matching values in the left column and then calculate the
     442             :  * corresponding matches from the right.  If nil_on_miss is set, we
     443             :  * need to do some more work. */
     444             : static gdk_return
     445       24881 : mergejoin_void(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
     446             :                struct canditer *restrict lci, struct canditer *restrict rci,
     447             :                bool nil_on_miss, bool only_misses, lng t0, bool swapped,
     448             :                const char *reason)
     449             : {
     450             :         oid lo, hi;
     451             :         BUN i;
     452             :         oid o, *o1p = NULL, *o2p = NULL;
     453             :         BAT *r1 = NULL, *r2 = NULL;
     454             : 
     455             :         /* r is dense, and if there is a candidate list, it too is
     456             :          * dense.  This means we don't have to do any searches, we
     457             :          * only need to compare ranges to know whether a value from l
     458             :          * has a match in r */
     459       44015 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
     460       24881 :         assert(r->tsorted || r->trevsorted);
     461       24881 :         assert(BATcount(l) > 0);
     462       24881 :         assert(rci->tpe == cand_dense);
     463       24881 :         assert(BATcount(r) > 0);
     464             : 
     465             :         lng timeoffset = 0;
     466       24881 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     467       24879 :         if (qry_ctx != NULL) {
     468       22797 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     469             :         }
     470             : 
     471       24879 :         MT_thread_setalgorithm(__func__);
     472             :         /* figure out range [lo..hi) of values in r that we need to match */
     473       24880 :         lo = r->tseqbase;
     474       24880 :         hi = lo + BATcount(r);
     475             :         /* restrict [lo..hi) range further using candidate list */
     476       24880 :         if (rci->seq > r->hseqbase)
     477           0 :                 lo += rci->seq - r->hseqbase;
     478       24880 :         if (rci->seq + rci->ncand < r->hseqbase + BATcount(r))
     479           0 :                 hi -= r->hseqbase + BATcount(r) - rci->seq - rci->ncand;
     480             : 
     481             :         /* at this point, the matchable values in r are [lo..hi) */
     482       24880 :         if (!nil_on_miss) {
     483       24880 :                 r1 = BATselect(l, lci->s, &lo, &hi, true, false, only_misses);
     484       24883 :                 if (r1 == NULL)
     485             :                         return GDK_FAIL;
     486       24883 :                 if (only_misses && !l->tnonil) {
     487             :                         /* also look for NILs */
     488           0 :                         r2 = BATselect(l, lci->s, &oid_nil, NULL, true, false, false);
     489           0 :                         if (r2 == NULL) {
     490           0 :                                 BBPreclaim(r1);
     491           0 :                                 return GDK_FAIL;
     492             :                         }
     493           0 :                         if (BATcount(r2) > 0) {
     494           0 :                                 BAT *mg = BATmergecand(r1, r2);
     495           0 :                                 BBPunfix(r1->batCacheid);
     496           0 :                                 BBPunfix(r2->batCacheid);
     497             :                                 r1 = mg;
     498           0 :                                 if (r1 == NULL)
     499             :                                         return GDK_FAIL;
     500             :                         } else {
     501           0 :                                 BBPunfix(r2->batCacheid);
     502             :                         }
     503             :                         r2 = NULL;
     504             :                 }
     505       24883 :                 *r1p = r1;
     506       24883 :                 if (r2p == NULL)
     507       23076 :                         goto doreturn2;
     508        1807 :                 if (BATcount(r1) == 0) {
     509          46 :                         r2 = BATdense(0, 0, 0);
     510          46 :                         if (r2 == NULL) {
     511           0 :                                 BBPreclaim(r1);
     512           0 :                                 return GDK_FAIL;
     513             :                         }
     514        1761 :                 } else if (BATtdense(r1) && BATtdense(l)) {
     515         124 :                         r2 = BATdense(0, l->tseqbase + r1->tseqbase - l->hseqbase + r->hseqbase - r->tseqbase, BATcount(r1));
     516         124 :                         if (r2 == NULL) {
     517           0 :                                 BBPreclaim(r1);
     518           0 :                                 return GDK_FAIL;
     519             :                         }
     520             :                 } else {
     521        1637 :                         r2 = COLnew(0, TYPE_oid, BATcount(r1), TRANSIENT);
     522        1636 :                         if (r2 == NULL) {
     523           0 :                                 BBPreclaim(r1);
     524           0 :                                 return GDK_FAIL;
     525             :                         }
     526        1636 :                         BATiter li = bat_iterator(l);
     527        1636 :                         const oid *lp = (const oid *) li.base;
     528        1636 :                         const oid *o1p = (const oid *) Tloc(r1, 0);
     529        1636 :                         oid *o2p = (oid *) Tloc(r2, 0);
     530        1636 :                         hi = BATcount(r1);
     531        1636 :                         if (complex_cand(l)) {
     532             :                                 /* this is actually generic code */
     533           0 :                                 for (o = 0; o < hi; o++)
     534           0 :                                         o2p[o] = BUNtoid(l, BUNtoid(r1, o) - l->hseqbase) - r->tseqbase + r->hseqbase;
     535        1636 :                         } else if (BATtdense(r1)) {
     536         608 :                                 lo = r1->tseqbase - l->hseqbase;
     537         608 :                                 if (r->tseqbase == r->hseqbase) {
     538         591 :                                         memcpy(o2p, lp + lo, hi * SIZEOF_OID);
     539             :                                 } else {
     540          17 :                                         hi += lo;
     541     5085032 :                                         for (o = 0; lo < hi; o++, lo++) {
     542     5085015 :                                                 o2p[o] = lp[lo] - r->tseqbase + r->hseqbase;
     543             :                                         }
     544             :                                 }
     545        1028 :                         } else if (BATtdense(l)) {
     546           0 :                                 for (o = 0; o < hi; o++) {
     547           0 :                                         o2p[o] = o1p[o] - l->hseqbase + l->tseqbase - r->tseqbase + r->hseqbase;
     548             :                                 }
     549             :                         } else {
     550    63561199 :                                 for (o = 0; o < hi; o++) {
     551    63560171 :                                         o2p[o] = lp[o1p[o] - l->hseqbase] - r->tseqbase + r->hseqbase;
     552             :                                 }
     553             :                         }
     554        1636 :                         bat_iterator_end(&li);
     555        1637 :                         r2->tkey = l->tkey;
     556        1637 :                         r2->tsorted = l->tsorted;
     557        1637 :                         r2->trevsorted = l->trevsorted;
     558        1637 :                         r2->tnil = false;
     559        1637 :                         r2->tnonil = true;
     560        1637 :                         BATsetcount(r2, BATcount(r1));
     561             :                 }
     562        1807 :                 *r2p = r2;
     563        1807 :                 goto doreturn2;
     564             :         }
     565             :         /* nil_on_miss is set, this means we must have a second output */
     566           0 :         assert(r2p);
     567           0 :         if (BATtdense(l)) {
     568             :                 /* if l is dense, we can further restrict the [lo..hi)
     569             :                  * range to values in l that match with values in r */
     570           0 :                 o = lo;
     571           0 :                 i = lci->seq - l->hseqbase;
     572           0 :                 if (l->tseqbase + i > lo)
     573           0 :                         lo = l->tseqbase + i;
     574           0 :                 i = canditer_last(lci) + 1 - l->hseqbase;
     575           0 :                 if (l->tseqbase + i < hi)
     576           0 :                         hi = l->tseqbase + i;
     577           0 :                 if (lci->tpe == cand_dense) {
     578             :                         /* l is dense, and so is the left candidate
     579             :                          * list (if it exists); this means we don't
     580             :                          * have to actually look at any values in l:
     581             :                          * we can just do some arithmetic; it also
     582             :                          * means that r1 will be dense, and if
     583             :                          * nil_on_miss is not set, or if all values in
     584             :                          * l match, r2 will too */
     585           0 :                         if (hi <= lo) {
     586           0 :                                 return nomatch(r1p, r2p, l, r, lci,
     587             :                                                nil_on_miss, only_misses,
     588             :                                                "mergejoin_void", t0);
     589             :                         }
     590             : 
     591             :                         /* at this point, the matched values in l and
     592             :                          * r (taking candidate lists into account) are
     593             :                          * [lo..hi) which we can translate back to the
     594             :                          * respective OID values that we can store in
     595             :                          * r1 and r2; note that r1 will be dense since
     596             :                          * all values in l will match something (even
     597             :                          * if nil since nil_on_miss is set) */
     598           0 :                         *r1p = r1 = BATdense(0, lci->seq, lci->ncand);
     599           0 :                         if (r1 == NULL)
     600             :                                 return GDK_FAIL;
     601           0 :                         if (hi - lo < lci->ncand) {
     602             :                                 /* we need to fill in nils in r2 for
     603             :                                  * missing values */
     604           0 :                                 *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     605           0 :                                 if (r2 == NULL) {
     606           0 :                                         BBPreclaim(*r1p);
     607           0 :                                         return GDK_FAIL;
     608             :                                 }
     609           0 :                                 o2p = (oid *) Tloc(r2, 0);
     610           0 :                                 i = l->tseqbase + lci->seq - l->hseqbase;
     611           0 :                                 lo -= i;
     612           0 :                                 hi -= i;
     613           0 :                                 i += r->hseqbase - r->tseqbase;
     614           0 :                                 for (o = 0; o < lo; o++)
     615           0 :                                         *o2p++ = oid_nil;
     616           0 :                                 for (o = lo; o < hi; o++)
     617           0 :                                         *o2p++ = o + i;
     618           0 :                                 for (o = hi; o < lci->ncand; o++)
     619           0 :                                         *o2p++ = oid_nil;
     620           0 :                                 r2->tnonil = false;
     621           0 :                                 r2->tnil = true;
     622             :                                 /* sorted of no nils at end */
     623           0 :                                 r2->tsorted = hi == lci->ncand;
     624             :                                 /* reverse sorted if single non-nil at start */
     625           0 :                                 r2->trevsorted = lo == 0 && hi == 1;
     626           0 :                                 r2->tseqbase = oid_nil;
     627             :                                 /* (hi - lo) different OIDs in r2,
     628             :                                  * plus one for nil */
     629           0 :                                 r2->tkey = hi - lo + 1 == lci->ncand;
     630           0 :                                 BATsetcount(r2, lci->ncand);
     631             :                         } else {
     632             :                                 /* no missing values */
     633           0 :                                 *r2p = r2 = BATdense(0, r->hseqbase + lo - r->tseqbase, lci->ncand);
     634           0 :                                 if (r2 == NULL) {
     635           0 :                                         BBPreclaim(*r1p);
     636           0 :                                         return GDK_FAIL;
     637             :                                 }
     638             :                         }
     639           0 :                         goto doreturn;
     640             :                 }
     641             :                 /* l is dense, but the candidate list exists and is
     642             :                  * not dense; we can, by manipulating the range
     643             :                  * [lo..hi), just look at the candidate list values */
     644             : 
     645             :                 /* translate lo and hi to l's OID values that now need
     646             :                  * to match */
     647           0 :                 lo = lo - l->tseqbase + l->hseqbase;
     648           0 :                 hi = hi - l->tseqbase + l->hseqbase;
     649             : 
     650           0 :                 *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     651           0 :                 *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     652           0 :                 if (r1 == NULL || r2 == NULL) {
     653           0 :                         BBPreclaim(r1);
     654           0 :                         BBPreclaim(r2);
     655           0 :                         return GDK_FAIL;
     656             :                 }
     657           0 :                 o1p = (oid *) Tloc(r1, 0);
     658           0 :                 o2p = (oid *) Tloc(r2, 0);
     659           0 :                 r2->tnil = false;
     660           0 :                 r2->tnonil = true;
     661           0 :                 r2->tkey = true;
     662           0 :                 r2->tsorted = true;
     663           0 :                 o = canditer_next(lci);
     664           0 :                 for (i = 0; i < lci->ncand && o < lo; i++) {
     665           0 :                         *o1p++ = o;
     666           0 :                         *o2p++ = oid_nil;
     667           0 :                         o = canditer_next(lci);
     668             :                 }
     669           0 :                 if (i > 0) {
     670           0 :                         r2->tnil = true;
     671           0 :                         r2->tnonil = false;
     672           0 :                         r2->tkey = i == 1;
     673             :                 }
     674           0 :                 for (; i < lci->ncand && o < hi; i++) {
     675           0 :                         *o1p++ = o;
     676           0 :                         *o2p++ = o - l->hseqbase + l->tseqbase - r->tseqbase + r->hseqbase;
     677           0 :                         o = canditer_next(lci);
     678             :                 }
     679           0 :                 if (i < lci->ncand) {
     680           0 :                         r2->tkey = !r2->tnil && lci->ncand - i == 1;
     681           0 :                         r2->tnil = true;
     682           0 :                         r2->tnonil = false;
     683           0 :                         r2->tsorted = false;
     684           0 :                         for (; i < lci->ncand; i++) {
     685           0 :                                 *o1p++ = o;
     686           0 :                                 *o2p++ = oid_nil;
     687           0 :                                 o = canditer_next(lci);
     688             :                         }
     689             :                 }
     690           0 :                 BATsetcount(r1, lci->ncand);
     691           0 :                 r1->tseqbase = BATcount(r1) == 1 ? *(oid*)Tloc(r1, 0) : oid_nil;
     692           0 :                 r1->tsorted = true;
     693           0 :                 r1->trevsorted = BATcount(r1) <= 1;
     694           0 :                 r1->tnil = false;
     695           0 :                 r1->tnonil = true;
     696           0 :                 r1->tkey = true;
     697           0 :                 BATsetcount(r2, BATcount(r1));
     698           0 :                 r2->tseqbase = r2->tnil || BATcount(r2) > 1 ? oid_nil : BATcount(r2) == 1 ? *(oid*)Tloc(r2, 0) : 0;
     699           0 :                 r2->trevsorted = BATcount(r2) <= 1;
     700           0 :                 goto doreturn;
     701             :         }
     702             :         /* l is not dense, so we need to look at the values and check
     703             :          * whether they are in the range [lo..hi) */
     704             : 
     705             :         /* do indirection through the candidate list to look at the
     706             :          * value */
     707             : 
     708           0 :         *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     709           0 :         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     710           0 :         if (r1 == NULL || r2 == NULL) {
     711           0 :                 BBPreclaim(r1);
     712           0 :                 BBPreclaim(r2);
     713           0 :                 return GDK_FAIL;
     714             :         }
     715           0 :         o1p = (oid *) Tloc(r1, 0);
     716           0 :         o2p = (oid *) Tloc(r2, 0);
     717           0 :         r2->tnil = false;
     718           0 :         r2->tnonil = true;
     719           0 :         if (complex_cand(l)) {
     720           0 :                 TIMEOUT_LOOP(lci->ncand, timeoffset) {
     721           0 :                         oid c = canditer_next(lci);
     722             : 
     723           0 :                         o = BUNtoid(l, c - l->hseqbase);
     724           0 :                         *o1p++ = c;
     725           0 :                         if (o >= lo && o < hi) {
     726           0 :                                 *o2p++ = o - r->tseqbase + r->hseqbase;
     727             :                         } else {
     728           0 :                                 *o2p++ = oid_nil;
     729           0 :                                 r2->tnil = true;
     730           0 :                                 r2->tnonil = false;
     731             :                         }
     732             :                 }
     733           0 :                 TIMEOUT_CHECK(timeoffset,
     734             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     735             :         } else {
     736           0 :                 BATiter li = bat_iterator(l);
     737           0 :                 const oid *lvals = (const oid *) li.base;
     738           0 :                 TIMEOUT_LOOP(lci->ncand, timeoffset) {
     739           0 :                         oid c = canditer_next(lci);
     740             : 
     741           0 :                         o = lvals[c - l->hseqbase];
     742           0 :                         *o1p++ = c;
     743           0 :                         if (o >= lo && o < hi) {
     744           0 :                                 *o2p++ = o - r->tseqbase + r->hseqbase;
     745             :                         } else {
     746           0 :                                 *o2p++ = oid_nil;
     747           0 :                                 r2->tnil = true;
     748           0 :                                 r2->tnonil = false;
     749             :                         }
     750             :                 }
     751           0 :                 bat_iterator_end(&li);
     752           0 :                 TIMEOUT_CHECK(timeoffset,
     753             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     754             :         }
     755           0 :         r1->tsorted = true;
     756           0 :         r1->trevsorted = BATcount(r1) <= 1;
     757           0 :         r1->tkey = true;
     758           0 :         r1->tseqbase = oid_nil;
     759           0 :         r1->tnil = false;
     760           0 :         r1->tnonil = true;
     761           0 :         BATsetcount(r1, lci->ncand);
     762           0 :         BATsetcount(r2, lci->ncand);
     763           0 :         r2->tsorted = l->tsorted || BATcount(r2) <= 1;
     764           0 :         r2->trevsorted = l->trevsorted || BATcount(r2) <= 1;
     765           0 :         r2->tkey = l->tkey || BATcount(r2) <= 1;
     766           0 :         r2->tseqbase = oid_nil;
     767             : 
     768           0 :   doreturn:
     769           0 :         if (r1->tkey)
     770           0 :                 virtualize(r1);
     771           0 :         if (r2->tkey && r2->tsorted)
     772           0 :                 virtualize(r2);
     773           0 :   doreturn2:
     774       24883 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     775             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
     776             :                   "sr=" ALGOOPTBATFMT ","
     777             :                   "nil_on_miss=%s,only_misses=%s;%s %s "
     778             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
     779             :                   ALGOBATPAR(l), ALGOBATPAR(r),
     780             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
     781             :                   nil_on_miss ? "true" : "false",
     782             :                   only_misses ? "true" : "false",
     783             :                   swapped ? " swapped" : "", reason,
     784             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
     785             :                   GDKusec() - t0);
     786             : 
     787             :         return GDK_SUCCEED;
     788             : 
     789           0 :   bailout:
     790           0 :         BBPreclaim(r1);
     791           0 :         BBPreclaim(r2);
     792           0 :         return GDK_FAIL;
     793             : }
     794             : 
     795             : /* Implementation of mergejoin (see below) for the special case that
     796             :  * the values are of type int, and some more conditions are met. */
     797             : static gdk_return
     798        5996 : mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
     799             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
     800             :               const char *reason)
     801             : {
     802             :         BAT *r1, *r2;
     803             :         BUN lstart, lend, lcnt;
     804             :         BUN rstart, rend;
     805             :         BUN lscan, rscan;       /* opportunistic scan window */
     806             :         BUN maxsize;
     807             :         const int *lvals, *rvals;
     808             :         int v;
     809             :         BUN nl, nr;
     810             :         oid lv;
     811             :         BUN i;
     812        5996 :         BATiter li = bat_iterator(l);
     813        5996 :         BATiter ri = bat_iterator(r);
     814             : 
     815       17988 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
     816        5996 :         assert(r->tsorted || r->trevsorted);
     817             : 
     818        5996 :         MT_thread_setalgorithm(__func__);
     819             :         lstart = rstart = 0;
     820        5996 :         lend = BATcount(l);
     821             :         lcnt = lend - lstart;
     822        5996 :         rend = BATcount(r);
     823        5996 :         lvals = (const int *) li.base;
     824        5996 :         rvals = (const int *) ri.base;
     825        5996 :         assert(!r->tvarsized || !r->ttype);
     826             :         size_t counter = 0;
     827             :         lng timeoffset = 0;
     828        5996 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     829        5996 :         if (qry_ctx != NULL) {
     830        5689 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     831             :         }
     832             : 
     833             :         /* basic properties will be adjusted if necessary later on,
     834             :          * they were initially set by joininitresults() */
     835             : 
     836        5996 :         if (lend == 0 || rend == 0) {
     837             :                 /* there are no matches */
     838           0 :                 bat_iterator_end(&li);
     839           0 :                 bat_iterator_end(&ri);
     840           0 :                 return nomatch(r1p, r2p, l, r,
     841           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
     842             :                                false, false, __func__, t0);
     843             :         }
     844             : 
     845        5996 :         if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r),
     846        5996 :                                        l->tkey, r->tkey, false, false,
     847             :                                        false, false, estimate)) == BUN_NONE) {
     848           0 :                 bat_iterator_end(&li);
     849           0 :                 bat_iterator_end(&ri);
     850           0 :                 return GDK_FAIL;
     851             :         }
     852        5996 :         r1 = *r1p;
     853        5996 :         r2 = r2p ? *r2p : NULL;
     854             : 
     855             :         /* determine opportunistic scan window for l and r */
     856       37259 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
     857       31263 :                 nl >>= 1;
     858       54845 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
     859       48849 :                 nr >>= 1;
     860             : 
     861        5996 :         if (!nil_matches) {
     862             :                 /* skip over nils at the start of the columns */
     863        4196 :                 if (lscan < lend - lstart && is_int_nil(lvals[lstart + lscan])) {
     864           0 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
     865             :                                                lend - 1, int_nil, 1, 1);
     866             :                 } else {
     867        4199 :                         while (is_int_nil(lvals[lstart]))
     868           3 :                                 lstart++;
     869             :                 }
     870        4196 :                 if (rscan < rend - rstart && is_int_nil(rvals[rstart + rscan])) {
     871           0 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
     872             :                                                rend - 1, int_nil, 1, 1);
     873             :                 } else {
     874        4196 :                         while (is_int_nil(rvals[rstart]))
     875           0 :                                 rstart++;
     876             :                 }
     877             :         }
     878             :         /* from here on we don't have to worry about nil values */
     879             : 
     880      489046 :         while (lstart < lend && rstart < rend) {
     881      484918 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
     882             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     883             : 
     884      484918 :                 v = rvals[rstart];
     885             : 
     886      484918 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
     887       18977 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
     888             :                                                lend - 1, v, 1, 0);
     889             :                 } else {
     890             :                         /* scan l for v */
     891      549213 :                         while (lstart < lend && lvals[lstart] < v)
     892       83272 :                                 lstart++;
     893             :                 }
     894      487037 :                 if (lstart >= lend) {
     895             :                         /* nothing found */
     896             :                         break;
     897             :                 }
     898             : 
     899             :                 /* Here we determine the next value in l that we are
     900             :                  * going to try to match in r.  We will also count the
     901             :                  * number of occurrences in l of that value.
     902             :                  * Afterwards, v points to the value and nl is the
     903             :                  * number of times it occurs.  Also, lstart will
     904             :                  * point to the next value to be considered (ready for
     905             :                  * the next iteration).
     906             :                  * If there are many equal values in l (more than
     907             :                  * lscan), we will use binary search to find the end
     908             :                  * of the sequence.  Obviously, we can do this only if
     909             :                  * l is actually sorted (lscan > 0). */
     910             :                 nl = 1;         /* we'll match (at least) one in l */
     911             :                 nr = 0;         /* maybe we won't match anything in r */
     912      486267 :                 v = lvals[lstart];
     913      486267 :                 if (l->tkey) {
     914             :                         /* if l is key, there is a single value */
     915       90465 :                         lstart++;
     916      395802 :                 } else if (lscan < lend - lstart &&
     917      390258 :                            v == lvals[lstart + lscan]) {
     918             :                         /* lots of equal values: use binary search to
     919             :                          * find end */
     920       22851 :                         nl = binsearch_int(NULL, 0, lvals, lstart + lscan,
     921             :                                            lend - 1, v, 1, 1);
     922       22851 :                         nl -= lstart;
     923       22851 :                         lstart += nl;
     924             :                 } else {
     925             :                         /* just scan */
     926     1663229 :                         while (++lstart < lend && v == lvals[lstart])
     927     1290278 :                                 nl++;
     928             :                 }
     929             :                 /* lstart points one beyond the value we're
     930             :                  * going to match: ready for the next iteration. */
     931             : 
     932             :                 /* First we find the first value in r that is at
     933             :                  * least as large as v, then we find the first
     934             :                  * value in r that is larger than v.  The difference
     935             :                  * is the number of values equal to v and is stored in
     936             :                  * nr.
     937             :                  * We will use binary search on r to find both ends of
     938             :                  * the sequence of values that are equal to v in case
     939             :                  * the position is "too far" (more than rscan
     940             :                  * away). */
     941             : 
     942             :                 /* first find the location of the first value in r
     943             :                  * that is >= v, then find the location of the first
     944             :                  * value in r that is > v; the difference is the
     945             :                  * number of values equal to v */
     946             : 
     947             :                 /* look ahead a little (rscan) in r to see whether
     948             :                  * we're better off doing a binary search */
     949      486267 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
     950             :                         /* value too far away in r: use binary
     951             :                          * search */
     952       17916 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
     953             :                                                rend - 1, v, 1, 0);
     954             :                 } else {
     955             :                         /* scan r for v */
     956      496239 :                         while (rstart < rend && rvals[rstart] < v)
     957       27888 :                                 rstart++;
     958             :                 }
     959      486511 :                 if (rstart == rend) {
     960             :                         /* nothing found */
     961             :                         break;
     962             :                 }
     963             : 
     964             :                 /* now find the end of the sequence of equal values v */
     965             : 
     966             :                 /* if r is key, there is zero or one match, otherwise
     967             :                  * look ahead a little (rscan) in r to see whether
     968             :                  * we're better off doing a binary search */
     969      485415 :                 if (r->tkey) {
     970      245259 :                         if (rstart < rend && v == rvals[rstart]) {
     971             :                                 nr = 1;
     972      243894 :                                 rstart++;
     973             :                         }
     974      240156 :                 } else if (rscan < rend - rstart &&
     975      238875 :                            v == rvals[rstart + rscan]) {
     976             :                         /* range too large: use binary search */
     977       70881 :                         nr = binsearch_int(NULL, 0, rvals, rstart + rscan,
     978             :                                            rend - 1, v, 1, 1);
     979       71636 :                         nr -= rstart;
     980       71636 :                         rstart += nr;
     981             :                 } else {
     982             :                         /* scan r for end of range */
     983     1457638 :                         while (rstart < rend && v == rvals[rstart]) {
     984     1288363 :                                 nr++;
     985     1288363 :                                 rstart++;
     986             :                         }
     987             :                 }
     988             :                 /* rstart points to first value > v or end of
     989             :                  * r, and nr is the number of values in r that
     990             :                  * are equal to v */
     991      240911 :                 if (nr == 0) {
     992             :                         /* no entries in r found */
     993        1453 :                         continue;
     994             :                 }
     995             :                 /* make space: nl values in l match nr values in r, so
     996             :                  * we need to add nl * nr values in the results */
     997      484717 :                 if (maybeextend(r1, r2, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
     998           0 :                         goto bailout;
     999             : 
    1000             :                 /* maintain properties */
    1001      481597 :                 if (nl > 1) {
    1002             :                         /* value occurs multiple times in l, so entry
    1003             :                          * in r will be repeated multiple times: hence
    1004             :                          * r2 is not key and not dense */
    1005      327942 :                         if (r2) {
    1006      291065 :                                 r2->tkey = false;
    1007      291065 :                                 r2->tseqbase = oid_nil;
    1008             :                         }
    1009             :                         /* multiple different values will be inserted
    1010             :                          * in r1 (always in order), so not reverse
    1011             :                          * ordered anymore */
    1012      327942 :                         r1->trevsorted = false;
    1013             :                 }
    1014      481597 :                 if (nr > 1) {
    1015             :                         /* value occurs multiple times in r, so entry
    1016             :                          * in l will be repeated multiple times: hence
    1017             :                          * r1 is not key and not dense */
    1018      204032 :                         r1->tkey = false;
    1019      204032 :                         r1->tseqbase = oid_nil;
    1020             :                         /* multiple different values will be inserted
    1021             :                          * in r2 (in order), so not reverse ordered
    1022             :                          * anymore */
    1023      204032 :                         if (r2) {
    1024      128075 :                                 r2->trevsorted = false;
    1025      128075 :                                 if (nl > 1) {
    1026             :                                         /* multiple values in l match
    1027             :                                          * multiple values in r, so an
    1028             :                                          * ordered sequence will be
    1029             :                                          * inserted multiple times in
    1030             :                                          * r2, so r2 is not ordered
    1031             :                                          * anymore */
    1032       88215 :                                         r2->tsorted = false;
    1033             :                                 }
    1034             :                         }
    1035             :                 }
    1036      481597 :                 if (BATcount(r1) > 0) {
    1037             :                         /* a new, higher value will be inserted into
    1038             :                          * r1, so r1 is not reverse ordered anymore */
    1039      479498 :                         r1->trevsorted = false;
    1040             :                         /* a new higher value will be added to r2 */
    1041      479498 :                         if (r2) {
    1042      387425 :                                 r2->trevsorted = false;
    1043             :                         }
    1044      479498 :                         if (BATtdense(r1) &&
    1045      269871 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1046         104 :                                 r1->tseqbase = oid_nil;
    1047             :                         }
    1048             :                 }
    1049             : 
    1050      481597 :                 if (r2 &&
    1051      388681 :                     BATcount(r2) > 0 &&
    1052      385294 :                     BATtdense(r2) &&
    1053       84103 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1054         378 :                         r2->tseqbase = oid_nil;
    1055             :                 }
    1056             : 
    1057             :                 /* insert values */
    1058      481597 :                 lv = l->hseqbase + lstart - nl;
    1059    16305380 :                 for (i = 0; i < nl; i++) {
    1060             :                         BUN j;
    1061             : 
    1062   121316393 :                         for (j = 0; j < nr; j++) {
    1063   105492610 :                                 APPEND(r1, lv);
    1064             :                         }
    1065    15823783 :                         if (r2) {
    1066    15626990 :                                 oid rv = r->hseqbase + rstart - nr;
    1067             : 
    1068   114288480 :                                 for (j = 0; j < nr; j++) {
    1069    98661490 :                                         APPEND(r2, rv);
    1070    98661490 :                                         rv++;
    1071             :                                 }
    1072             :                         }
    1073    15823783 :                         lv++;
    1074             :                 }
    1075             :         }
    1076             :         /* also set other bits of heap to correct value to indicate size */
    1077        5994 :         BATsetcount(r1, BATcount(r1));
    1078        5995 :         if (r2) {
    1079        4471 :                 BATsetcount(r2, BATcount(r2));
    1080        4471 :                 assert(BATcount(r1) == BATcount(r2));
    1081             :         }
    1082        5995 :         if (BATcount(r1) > 0) {
    1083        4370 :                 if (BATtdense(r1))
    1084        3484 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1085        4370 :                 if (r2 && BATtdense(r2))
    1086        2325 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1087             :         } else {
    1088        1625 :                 r1->tseqbase = 0;
    1089        1625 :                 if (r2) {
    1090        1014 :                         r2->tseqbase = 0;
    1091             :                 }
    1092             :         }
    1093        5995 :         bat_iterator_end(&li);
    1094        5996 :         bat_iterator_end(&ri);
    1095        5996 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1096             :                   "nil_matches=%s;%s %s "
    1097             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1098             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1099             :                   nil_matches ? "true" : "false",
    1100             :                   swapped ? " swapped" : "", reason,
    1101             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1102             :                   GDKusec() - t0);
    1103             : 
    1104             :         return GDK_SUCCEED;
    1105             : 
    1106           0 :   bailout:
    1107           0 :         bat_iterator_end(&li);
    1108           0 :         bat_iterator_end(&ri);
    1109           0 :         BBPreclaim(r1);
    1110           0 :         BBPreclaim(r2);
    1111           0 :         return GDK_FAIL;
    1112             : }
    1113             : 
    1114             : /* Implementation of mergejoin (see below) for the special case that
    1115             :  * the values are of type lng, and some more conditions are met. */
    1116             : static gdk_return
    1117         172 : mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1118             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1119             :               const char *reason)
    1120             : {
    1121             :         BAT *r1, *r2;
    1122             :         BUN lstart, lend, lcnt;
    1123             :         BUN rstart, rend;
    1124             :         BUN lscan, rscan;       /* opportunistic scan window */
    1125             :         BUN maxsize;
    1126             :         const lng *lvals, *rvals;
    1127             :         lng v;
    1128             :         BUN nl, nr;
    1129             :         oid lv;
    1130             :         BUN i;
    1131         172 :         BATiter li = bat_iterator(l);
    1132         172 :         BATiter ri = bat_iterator(r);
    1133             : 
    1134         516 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1135         172 :         assert(r->tsorted || r->trevsorted);
    1136             : 
    1137         172 :         MT_thread_setalgorithm(__func__);
    1138             :         lstart = rstart = 0;
    1139         172 :         lend = BATcount(l);
    1140             :         lcnt = lend - lstart;
    1141         172 :         rend = BATcount(r);
    1142         172 :         lvals = (const lng *) li.base;
    1143         172 :         rvals = (const lng *) ri.base;
    1144         172 :         assert(!r->tvarsized || !r->ttype);
    1145             :         size_t counter = 0;
    1146             :         lng timeoffset = 0;
    1147         172 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1148         172 :         if (qry_ctx != NULL) {
    1149         172 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    1150             :         }
    1151             : 
    1152             :         /* basic properties will be adjusted if necessary later on,
    1153             :          * they were initially set by joininitresults() */
    1154             : 
    1155         172 :         if (lend == 0 || rend == 0) {
    1156             :                 /* there are no matches */
    1157           0 :                 bat_iterator_end(&li);
    1158           0 :                 bat_iterator_end(&ri);
    1159           0 :                 return nomatch(r1p, r2p, l, r,
    1160           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1161             :                                false, false, __func__, t0);
    1162             :         }
    1163             : 
    1164         172 :         if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r),
    1165         172 :                                        l->tkey, r->tkey, false, false,
    1166             :                                        false, false, estimate)) == BUN_NONE) {
    1167           0 :                 bat_iterator_end(&li);
    1168           0 :                 bat_iterator_end(&ri);
    1169           0 :                 return GDK_FAIL;
    1170             :         }
    1171         172 :         r1 = *r1p;
    1172         172 :         r2 = r2p ? *r2p : NULL;
    1173             : 
    1174             :         /* determine opportunistic scan window for l and r */
    1175        1249 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1176        1077 :                 nl >>= 1;
    1177        1201 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1178        1029 :                 nr >>= 1;
    1179             : 
    1180         172 :         if (!nil_matches) {
    1181             :                 /* skip over nils at the start of the columns */
    1182          95 :                 if (lscan < lend - lstart && is_lng_nil(lvals[lstart + lscan])) {
    1183           0 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1184             :                                                lend - 1, lng_nil, 1, 1);
    1185             :                 } else {
    1186          95 :                         while (is_lng_nil(lvals[lstart]))
    1187           0 :                                 lstart++;
    1188             :                 }
    1189          95 :                 if (rscan < rend - rstart && is_lng_nil(rvals[rstart + rscan])) {
    1190           0 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1191             :                                                rend - 1, lng_nil, 1, 1);
    1192             :                 } else {
    1193          95 :                         while (is_lng_nil(rvals[rstart]))
    1194           0 :                                 rstart++;
    1195             :                 }
    1196             :         }
    1197             :         /* from here on we don't have to worry about nil values */
    1198             : 
    1199      417033 :         while (lstart < lend && rstart < rend) {
    1200      416903 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    1201             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1202      416902 :                 v = rvals[rstart];
    1203             : 
    1204      416902 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1205        1628 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1206             :                                                lend - 1, v, 1, 0);
    1207             :                 } else {
    1208             :                         /* scan l for v */
    1209      517372 :                         while (lstart < lend && lvals[lstart] < v)
    1210      102098 :                                 lstart++;
    1211             :                 }
    1212      416972 :                 if (lstart >= lend) {
    1213             :                         /* nothing found */
    1214             :                         break;
    1215             :                 }
    1216             : 
    1217             :                 /* Here we determine the next value in l that we are
    1218             :                  * going to try to match in r.  We will also count the
    1219             :                  * number of occurrences in l of that value.
    1220             :                  * Afterwards, v points to the value and nl is the
    1221             :                  * number of times it occurs.  Also, lstart will
    1222             :                  * point to the next value to be considered (ready for
    1223             :                  * the next iteration).
    1224             :                  * If there are many equal values in l (more than
    1225             :                  * lscan), we will use binary search to find the end
    1226             :                  * of the sequence.  Obviously, we can do this only if
    1227             :                  * l is actually sorted (lscan > 0). */
    1228             :                 nl = 1;         /* we'll match (at least) one in l */
    1229             :                 nr = 0;         /* maybe we won't match anything in r */
    1230      416947 :                 v = lvals[lstart];
    1231      416947 :                 if (l->tkey) {
    1232             :                         /* if l is key, there is a single value */
    1233      367121 :                         lstart++;
    1234       49826 :                 } else if (lscan < lend - lstart &&
    1235       49740 :                            v == lvals[lstart + lscan]) {
    1236             :                         /* lots of equal values: use binary search to
    1237             :                          * find end */
    1238         395 :                         nl = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1239             :                                            lend - 1, v, 1, 1);
    1240         395 :                         nl -= lstart;
    1241         395 :                         lstart += nl;
    1242             :                 } else {
    1243             :                         /* just scan */
    1244       89844 :                         while (++lstart < lend && v == lvals[lstart])
    1245       40413 :                                 nl++;
    1246             :                 }
    1247             :                 /* lstart points one beyond the value we're
    1248             :                  * going to match: ready for the next iteration. */
    1249             : 
    1250             :                 /* First we find the first value in r that is at
    1251             :                  * least as large as v, then we find the first
    1252             :                  * value in r that is larger than v.  The difference
    1253             :                  * is the number of values equal to v and is stored in
    1254             :                  * nr.
    1255             :                  * We will use binary search on r to find both ends of
    1256             :                  * the sequence of values that are equal to v in case
    1257             :                  * the position is "too far" (more than rscan
    1258             :                  * away). */
    1259             : 
    1260             :                 /* first find the location of the first value in r
    1261             :                  * that is >= v, then find the location of the first
    1262             :                  * value in r that is > v; the difference is the
    1263             :                  * number of values equal to v */
    1264             : 
    1265             :                 /* look ahead a little (rscan) in r to see whether
    1266             :                  * we're better off doing a binary search */
    1267      416947 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1268             :                         /* value too far away in r: use binary
    1269             :                          * search */
    1270        1555 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1271             :                                                rend - 1, v, 1, 0);
    1272             :                 } else {
    1273             :                         /* scan r for v */
    1274     1480436 :                         while (rstart < rend && rvals[rstart] < v)
    1275     1065044 :                                 rstart++;
    1276             :                 }
    1277      416899 :                 if (rstart == rend) {
    1278             :                         /* nothing found */
    1279             :                         break;
    1280             :                 }
    1281             : 
    1282             :                 /* now find the end of the sequence of equal values v */
    1283             : 
    1284             :                 /* if r is key, there is zero or one match, otherwise
    1285             :                  * look ahead a little (rscan) in r to see whether
    1286             :                  * we're better off doing a binary search */
    1287      416883 :                 if (r->tkey) {
    1288      382096 :                         if (rstart < rend && v == rvals[rstart]) {
    1289             :                                 nr = 1;
    1290       86697 :                                 rstart++;
    1291             :                         }
    1292       34787 :                 } else if (rscan < rend - rstart &&
    1293       34746 :                            v == rvals[rstart + rscan]) {
    1294             :                         /* range too large: use binary search */
    1295           0 :                         nr = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1296             :                                            rend - 1, v, 1, 1);
    1297           0 :                         nr -= rstart;
    1298           0 :                         rstart += nr;
    1299             :                 } else {
    1300             :                         /* scan r for end of range */
    1301       76277 :                         while (rstart < rend && v == rvals[rstart]) {
    1302       41490 :                                 nr++;
    1303       41490 :                                 rstart++;
    1304             :                         }
    1305             :                 }
    1306             :                 /* rstart points to first value > v or end of
    1307             :                  * r, and nr is the number of values in r that
    1308             :                  * are equal to v */
    1309       34787 :                 if (nr == 0) {
    1310             :                         /* no entries in r found */
    1311      295317 :                         continue;
    1312             :                 }
    1313             :                 /* make space: nl values in l match nr values in r, so
    1314             :                  * we need to add nl * nr values in the results */
    1315      121566 :                 if (maybeextend(r1, r2, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1316           0 :                         goto bailout;
    1317             : 
    1318             :                 /* maintain properties */
    1319      121544 :                 if (nl > 1) {
    1320             :                         /* value occurs multiple times in l, so entry
    1321             :                          * in r will be repeated multiple times: hence
    1322             :                          * r2 is not key and not dense */
    1323       10651 :                         if (r2) {
    1324        5648 :                                 r2->tkey = false;
    1325        5648 :                                 r2->tseqbase = oid_nil;
    1326             :                         }
    1327             :                         /* multiple different values will be inserted
    1328             :                          * in r1 (always in order), so not reverse
    1329             :                          * ordered anymore */
    1330       10651 :                         r1->trevsorted = false;
    1331             :                 }
    1332      121544 :                 if (nr > 1) {
    1333             :                         /* value occurs multiple times in r, so entry
    1334             :                          * in l will be repeated multiple times: hence
    1335             :                          * r1 is not key and not dense */
    1336        1969 :                         r1->tkey = false;
    1337        1969 :                         r1->tseqbase = oid_nil;
    1338             :                         /* multiple different values will be inserted
    1339             :                          * in r2 (in order), so not reverse ordered
    1340             :                          * anymore */
    1341        1969 :                         if (r2) {
    1342        1969 :                                 r2->trevsorted = false;
    1343        1969 :                                 if (nl > 1) {
    1344             :                                         /* multiple values in l match
    1345             :                                          * multiple values in r, so an
    1346             :                                          * ordered sequence will be
    1347             :                                          * inserted multiple times in
    1348             :                                          * r2, so r2 is not ordered
    1349             :                                          * anymore */
    1350          51 :                                         r2->tsorted = false;
    1351             :                                 }
    1352             :                         }
    1353             :                 }
    1354      121544 :                 if (BATcount(r1) > 0) {
    1355             :                         /* a new, higher value will be inserted into
    1356             :                          * r1, so r1 is not reverse ordered anymore */
    1357      121390 :                         r1->trevsorted = false;
    1358             :                         /* a new higher value will be added to r2 */
    1359      121390 :                         if (r2) {
    1360      114683 :                                 r2->trevsorted = false;
    1361             :                         }
    1362      121390 :                         if (BATtdense(r1) &&
    1363       52797 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1364          28 :                                 r1->tseqbase = oid_nil;
    1365             :                         }
    1366             :                 }
    1367             : 
    1368      121544 :                 if (r2 &&
    1369      114829 :                     BATcount(r2) > 0 &&
    1370      114683 :                     BATtdense(r2) &&
    1371       51665 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1372          22 :                         r2->tseqbase = oid_nil;
    1373             :                 }
    1374             : 
    1375             :                 /* insert values */
    1376      121544 :                 lv = l->hseqbase + lstart - nl;
    1377      286764 :                 for (i = 0; i < nl; i++) {
    1378             :                         BUN j;
    1379             : 
    1380      338790 :                         for (j = 0; j < nr; j++) {
    1381      173570 :                                 APPEND(r1, lv);
    1382             :                         }
    1383      165220 :                         if (r2) {
    1384      146838 :                                 oid rv = r->hseqbase + rstart - nr;
    1385             : 
    1386      301953 :                                 for (j = 0; j < nr; j++) {
    1387      155115 :                                         APPEND(r2, rv);
    1388      155115 :                                         rv++;
    1389             :                                 }
    1390             :                         }
    1391      165220 :                         lv++;
    1392             :                 }
    1393             :         }
    1394             :         /* also set other bits of heap to correct value to indicate size */
    1395         171 :         BATsetcount(r1, BATcount(r1));
    1396         171 :         if (r2) {
    1397         158 :                 BATsetcount(r2, BATcount(r2));
    1398         158 :                 assert(BATcount(r1) == BATcount(r2));
    1399             :         }
    1400         171 :         if (BATcount(r1) > 0) {
    1401         154 :                 if (BATtdense(r1))
    1402         110 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1403         154 :                 if (r2 && BATtdense(r2))
    1404          86 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1405             :         } else {
    1406          17 :                 r1->tseqbase = 0;
    1407          17 :                 if (r2) {
    1408          12 :                         r2->tseqbase = 0;
    1409             :                 }
    1410             :         }
    1411         171 :         bat_iterator_end(&li);
    1412         171 :         bat_iterator_end(&ri);
    1413         171 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1414             :                   "nil_matches=%s;%s %s "
    1415             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1416             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1417             :                   nil_matches ? "true" : "false",
    1418             :                   swapped ? " swapped" : "", reason,
    1419             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1420             :                   GDKusec() - t0);
    1421             : 
    1422             :         return GDK_SUCCEED;
    1423             : 
    1424           1 :   bailout:
    1425           1 :         bat_iterator_end(&li);
    1426           1 :         bat_iterator_end(&ri);
    1427           1 :         BBPreclaim(r1);
    1428           1 :         BBPreclaim(r2);
    1429           1 :         return GDK_FAIL;
    1430             : }
    1431             : 
    1432             : /* Implementation of mergejoin (see below) for the special case that
    1433             :  * the values are of type oid, and the right-hand side is a candidate
    1434             :  * list with exception, and some more conditions are met. */
    1435             : static gdk_return
    1436           0 : mergejoin_cand(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1437             :                bool nil_matches, BUN estimate, lng t0, bool swapped,
    1438             :                const char *reason)
    1439             : {
    1440             : /* the comments in this function have not been checked after making a
    1441             :  * copy of mergejoin below and adapting it to a mask right-hand side */
    1442             :         BAT *r1, *r2;
    1443             :         BUN lstart, lend, lcnt;
    1444             :         struct canditer lci, rci;
    1445             :         BUN lscan;              /* opportunistic scan window */
    1446             :         BUN maxsize;
    1447             :         const oid *lvals;
    1448             :         oid v;
    1449             :         BUN nl, nr;
    1450             :         oid lv;
    1451             :         BUN i;
    1452           0 :         BATiter li = bat_iterator(l);
    1453             : 
    1454           0 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1455             : 
    1456           0 :         MT_thread_setalgorithm(__func__);
    1457             :         lstart = 0;
    1458           0 :         lend = BATcount(l);
    1459             :         lcnt = lend - lstart;
    1460           0 :         if (l->ttype == TYPE_void) {
    1461           0 :                 assert(!is_oid_nil(l->tseqbase));
    1462           0 :                 lcnt = canditer_init(&lci, NULL, l);
    1463             :                 lvals = NULL;
    1464             :         } else {
    1465           0 :                 lci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1466           0 :                 lvals = (const oid *) li.base;
    1467           0 :                 assert(lvals != NULL);
    1468             :         }
    1469             : 
    1470           0 :         assert(complex_cand(r));
    1471           0 :         canditer_init(&rci, NULL, r);
    1472             :         size_t counter = 0;
    1473             :         lng timeoffset = 0;
    1474           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1475           0 :         if (qry_ctx != NULL) {
    1476           0 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    1477             :         }
    1478             : 
    1479             :         /* basic properties will be adjusted if necessary later on,
    1480             :          * they were initially set by joininitresults() */
    1481             : 
    1482           0 :         if (lend == 0 || rci.ncand == 0) {
    1483             :                 /* there are no matches */
    1484           0 :                 bat_iterator_end(&li);
    1485           0 :                 return nomatch(r1p, r2p, l, r,
    1486           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1487             :                                false, false, __func__, t0);
    1488             :         }
    1489             : 
    1490           0 :         if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r),
    1491           0 :                                        l->tkey, r->tkey, false, false,
    1492             :                                        false, false, estimate)) == BUN_NONE) {
    1493           0 :                 bat_iterator_end(&li);
    1494           0 :                 return GDK_FAIL;
    1495             :         }
    1496           0 :         r1 = *r1p;
    1497           0 :         r2 = r2p ? *r2p : NULL;
    1498             : 
    1499             :         /* determine opportunistic scan window for l and r */
    1500           0 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1501           0 :                 nl >>= 1;
    1502             : 
    1503           0 :         if (!nil_matches) {
    1504             :                 /* skip over nils at the start of the columns */
    1505           0 :                 if (lscan < lend - lstart && lvals && is_oid_nil(lvals[lstart + lscan])) {
    1506           0 :                         lstart = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1507             :                                                lend - 1, oid_nil, 1, 1);
    1508           0 :                 } else if (lvals) {
    1509           0 :                         while (is_oid_nil(lvals[lstart]))
    1510           0 :                                 lstart++;
    1511             :                 } /* else l is candidate list: no nils */
    1512             :         }
    1513             :         /* from here on we don't have to worry about nil values */
    1514             : 
    1515           0 :         while (lstart < lend && rci.next < rci.ncand) {
    1516           0 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    1517             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1518           0 :                 v = canditer_peek(&rci);
    1519             : 
    1520           0 :                 if (lvals) {
    1521           0 :                         if (lscan < lend - lstart &&
    1522           0 :                             lvals[lstart + lscan] < v) {
    1523           0 :                                 lstart = binsearch_oid(NULL, 0, lvals,
    1524             :                                                        lstart + lscan,
    1525             :                                                        lend - 1, v, 1, 0);
    1526             :                         } else {
    1527             :                                 /* scan l for v */
    1528           0 :                                 while (lstart < lend && lvals[lstart] < v)
    1529           0 :                                         lstart++;
    1530             :                         }
    1531             :                 } else {
    1532           0 :                         lstart = canditer_search(&lci, v, true);
    1533           0 :                         canditer_setidx(&lci, lstart);
    1534             :                 }
    1535           0 :                 if (lstart >= lend) {
    1536             :                         /* nothing found */
    1537             :                         break;
    1538             :                 }
    1539             : 
    1540             :                 /* Here we determine the next value in l that we are
    1541             :                  * going to try to match in r.  We will also count the
    1542             :                  * number of occurrences in l of that value.
    1543             :                  * Afterwards, v points to the value and nl is the
    1544             :                  * number of times it occurs.  Also, lstart will
    1545             :                  * point to the next value to be considered (ready for
    1546             :                  * the next iteration).
    1547             :                  * If there are many equal values in l (more than
    1548             :                  * lscan), we will use binary search to find the end
    1549             :                  * of the sequence.  Obviously, we can do this only if
    1550             :                  * l is actually sorted (lscan > 0). */
    1551             :                 nl = 1;         /* we'll match (at least) one in l */
    1552             :                 nr = 0;         /* maybe we won't match anything in r */
    1553           0 :                 v = lvals ? lvals[lstart] : canditer_next(&lci);
    1554           0 :                 if (l->tkey || lvals == NULL) {
    1555             :                         /* if l is key, there is a single value */
    1556           0 :                         lstart++;
    1557           0 :                 } else if (lscan < lend - lstart &&
    1558           0 :                            v == lvals[lstart + lscan]) {
    1559             :                         /* lots of equal values: use binary search to
    1560             :                          * find end */
    1561           0 :                         nl = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1562             :                                            lend - 1, v, 1, 1);
    1563           0 :                         nl -= lstart;
    1564           0 :                         lstart += nl;
    1565             :                 } else {
    1566             :                         /* just scan */
    1567           0 :                         while (++lstart < lend && v == lvals[lstart])
    1568           0 :                                 nl++;
    1569             :                 }
    1570             :                 /* lstart points one beyond the value we're
    1571             :                  * going to match: ready for the next iteration. */
    1572             : 
    1573             :                 /* First we find the first value in r that is at
    1574             :                  * least as large as v, then we find the first
    1575             :                  * value in r that is larger than v.  The difference
    1576             :                  * is the number of values equal to v and is stored in
    1577             :                  * nr.
    1578             :                  * We will use binary search on r to find both ends of
    1579             :                  * the sequence of values that are equal to v in case
    1580             :                  * the position is "too far" (more than rscan
    1581             :                  * away). */
    1582             : 
    1583             :                 /* first find the location of the first value in r
    1584             :                  * that is >= v, then find the location of the first
    1585             :                  * value in r that is > v; the difference is the
    1586             :                  * number of values equal to v */
    1587           0 :                 nr = canditer_search(&rci, v, true);
    1588           0 :                 canditer_setidx(&rci, nr);
    1589           0 :                 if (nr == rci.ncand) {
    1590             :                         /* nothing found */
    1591             :                         break;
    1592             :                 }
    1593             : 
    1594             :                 /* now find the end of the sequence of equal values v */
    1595             : 
    1596             :                 /* if r is key, there is zero or one match, otherwise
    1597             :                  * look ahead a little (rscan) in r to see whether
    1598             :                  * we're better off doing a binary search */
    1599           0 :                 if (canditer_peek(&rci) == v) {
    1600             :                         nr = 1;
    1601           0 :                         canditer_next(&rci);
    1602             :                 } else {
    1603             :                         /* rci points to first value > v or end of
    1604             :                          * r, and nr is the number of values in r that
    1605             :                          * are equal to v */
    1606             :                         /* no entries in r found */
    1607           0 :                         continue;
    1608             :                 }
    1609             :                 /* make space: nl values in l match nr values in r, so
    1610             :                  * we need to add nl * nr values in the results */
    1611           0 :                 if (maybeextend(r1, r2, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1612           0 :                         goto bailout;
    1613             : 
    1614             :                 /* maintain properties */
    1615           0 :                 if (nl > 1) {
    1616             :                         /* value occurs multiple times in l, so entry
    1617             :                          * in r will be repeated multiple times: hence
    1618             :                          * r2 is not key and not dense */
    1619           0 :                         if (r2) {
    1620           0 :                                 r2->tkey = false;
    1621           0 :                                 r2->tseqbase = oid_nil;
    1622             :                         }
    1623             :                         /* multiple different values will be inserted
    1624             :                          * in r1 (always in order), so not reverse
    1625             :                          * ordered anymore */
    1626           0 :                         r1->trevsorted = false;
    1627             :                 }
    1628           0 :                 if (BATcount(r1) > 0) {
    1629             :                         /* a new, higher value will be inserted into
    1630             :                          * r1, so r1 is not reverse ordered anymore */
    1631           0 :                         r1->trevsorted = false;
    1632             :                         /* a new higher value will be added to r2 */
    1633           0 :                         if (r2) {
    1634           0 :                                 r2->trevsorted = false;
    1635             :                         }
    1636           0 :                         if (BATtdense(r1) &&
    1637           0 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1638           0 :                                 r1->tseqbase = oid_nil;
    1639             :                         }
    1640             :                 }
    1641             : 
    1642           0 :                 if (r2 &&
    1643           0 :                     BATcount(r2) > 0 &&
    1644           0 :                     BATtdense(r2) &&
    1645           0 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rci.next - nr) {
    1646           0 :                         r2->tseqbase = oid_nil;
    1647             :                 }
    1648             : 
    1649             :                 /* insert values */
    1650           0 :                 lv = l->hseqbase + lstart - nl;
    1651           0 :                 for (i = 0; i < nl; i++) {
    1652             :                         BUN j;
    1653             : 
    1654           0 :                         for (j = 0; j < nr; j++) {
    1655           0 :                                 APPEND(r1, lv);
    1656             :                         }
    1657           0 :                         if (r2) {
    1658           0 :                                 oid rv = r->hseqbase + rci.next - nr;
    1659             : 
    1660           0 :                                 for (j = 0; j < nr; j++) {
    1661           0 :                                         APPEND(r2, rv);
    1662           0 :                                         rv++;
    1663             :                                 }
    1664             :                         }
    1665           0 :                         lv++;
    1666             :                 }
    1667             :         }
    1668             :         /* also set other bits of heap to correct value to indicate size */
    1669           0 :         BATsetcount(r1, BATcount(r1));
    1670           0 :         if (r2) {
    1671           0 :                 BATsetcount(r2, BATcount(r2));
    1672           0 :                 assert(BATcount(r1) == BATcount(r2));
    1673             :         }
    1674           0 :         if (BATcount(r1) > 0) {
    1675           0 :                 if (BATtdense(r1))
    1676           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1677           0 :                 if (r2 && BATtdense(r2))
    1678           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1679             :         } else {
    1680           0 :                 r1->tseqbase = 0;
    1681           0 :                 if (r2) {
    1682           0 :                         r2->tseqbase = 0;
    1683             :                 }
    1684             :         }
    1685           0 :         bat_iterator_end(&li);
    1686           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1687             :                   "nil_matches=%s;%s %s "
    1688             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1689             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1690             :                   nil_matches ? "true" : "false",
    1691             :                   swapped ? " swapped" : "", reason,
    1692             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1693             :                   GDKusec() - t0);
    1694             : 
    1695             :         return GDK_SUCCEED;
    1696             : 
    1697           0 :   bailout:
    1698           0 :         bat_iterator_end(&li);
    1699           0 :         BBPreclaim(r1);
    1700           0 :         BBPreclaim(r2);
    1701           0 :         return GDK_FAIL;
    1702             : }
    1703             : 
    1704             : /* Perform a "merge" join on l and r (if both are sorted) with
    1705             :  * optional candidate lists, or join using binary search on r if l is
    1706             :  * not sorted.  The return BATs have already been created by the
    1707             :  * caller.
    1708             :  *
    1709             :  * If nil_matches is set, nil values are treated as ordinary values
    1710             :  * that can match; otherwise nil values never match.
    1711             :  *
    1712             :  * If nil_on_miss is set, a nil value is returned in r2 if there is no
    1713             :  * match in r for a particular value in l (left outer join).
    1714             :  *
    1715             :  * If semi is set, only a single set of values in r1/r2 is returned if
    1716             :  * there is a match of l in r, no matter how many matches there are in
    1717             :  * r; otherwise all matches are returned.
    1718             :  *
    1719             :  * If max_one is set, only a single match is allowed.  This is like
    1720             :  * semi, but enforces the single match.
    1721             :  *
    1722             :  * t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
    1723             :  */
    1724             : static gdk_return
    1725       12749 : mergejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1726             :           struct canditer *restrict lci, struct canditer *restrict rci,
    1727             :           bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    1728             :           bool not_in, bool max_one, bool min_one, BUN estimate,
    1729             :           lng t0, bool swapped,
    1730             :           const char *reason)
    1731             : {
    1732             :         /* [lr]scan determine how far we look ahead in l/r in order to
    1733             :          * decide whether we want to do a binary search or a scan */
    1734             :         BUN lscan, rscan;
    1735             :         const void *lvals, *rvals; /* the values of l/r (NULL if dense) */
    1736             :         const char *lvars, *rvars; /* the indirect values (NULL if fixed size) */
    1737       12749 :         const void *nil = ATOMnilptr(l->ttype);
    1738       12749 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    1739             :         const void *v;          /* points to value under consideration */
    1740             :         const void *prev = NULL;
    1741             :         BUN nl, nr;
    1742             :         bool insert_nil;
    1743             :         /* equal_order is set if we can scan both BATs in the same
    1744             :          * order, so when both are sorted or both are reverse sorted
    1745             :          * -- important to know in order to skip over values; if l is
    1746             :          * not sorted, this must be set to true and we will always do a
    1747             :          * binary search on all of r */
    1748             :         bool equal_order;
    1749             :         /* [lr]ordering is either 1 or -1 depending on the order of
    1750             :          * l/r: it determines the comparison function used */
    1751             :         int lordering, rordering;
    1752             :         oid lv;
    1753             :         BUN i, j;               /* counters */
    1754       12749 :         oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */
    1755             :         struct canditer llci, rrci;
    1756             :         struct canditer *mlci, xlci;
    1757             :         struct canditer *mrci, xrci;
    1758             : 
    1759       12749 :         if (lci->tpe == cand_dense && lci->ncand == BATcount(l) &&
    1760       12680 :             rci->tpe == cand_dense && rci->ncand == BATcount(r) &&
    1761       12040 :             !nil_on_miss && !semi && !max_one && !min_one && !only_misses &&
    1762        7339 :             !not_in &&
    1763        6262 :             l->tsorted && r->tsorted) {
    1764             :                 /* special cases with far fewer options */
    1765        6253 :                 if (complex_cand(r))
    1766           0 :                         return mergejoin_cand(r1p, r2p, l, r, nil_matches,
    1767             :                                               estimate, t0, swapped, __func__);
    1768       12459 :                 switch (ATOMbasetype(l->ttype)) {
    1769        5995 :                 case TYPE_int:
    1770        5995 :                         return mergejoin_int(r1p, r2p, l, r, nil_matches,
    1771             :                                              estimate, t0, swapped, __func__);
    1772         172 :                 case TYPE_lng:
    1773         172 :                         return mergejoin_lng(r1p, r2p, l, r, nil_matches,
    1774             :                                              estimate, t0, swapped, __func__);
    1775             :                 }
    1776             :         }
    1777             : 
    1778       19667 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1779        6582 :         assert(r->tsorted || r->trevsorted);
    1780             : 
    1781             :         size_t counter = 0;
    1782             :         lng timeoffset = 0;
    1783        6582 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1784        6582 :         if (qry_ctx != NULL) {
    1785        5390 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    1786             :         }
    1787             : 
    1788        6582 :         BATiter li = bat_iterator(l);
    1789        6582 :         BATiter ri = bat_iterator(r);
    1790        6582 :         MT_thread_setalgorithm(__func__);
    1791        6582 :         if (BATtvoid(l)) {
    1792             :                 /* l->ttype == TYPE_void && is_oid_nil(l->tseqbase) is
    1793             :                  * handled by selectjoin */
    1794         103 :                 assert(!is_oid_nil(l->tseqbase));
    1795         103 :                 canditer_init(&llci, NULL, l);
    1796         103 :                 lvals = NULL;
    1797             :         } else {
    1798        6479 :                 lvals = li.base;                              /* non NULL */
    1799        6479 :                 llci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1800             :         }
    1801        6582 :         rrci = (struct canditer) {.tpe = cand_dense};
    1802        6582 :         if (BATtvoid(r)) {
    1803           0 :                 if (!is_oid_nil(r->tseqbase))
    1804           0 :                         canditer_init(&rrci, NULL, r);
    1805             :                 rvals = NULL;
    1806             :         } else {
    1807        6582 :                 rvals = ri.base;
    1808             :         }
    1809        6582 :         if (l->tvarsized && l->ttype) {
    1810         232 :                 assert(r->tvarsized && r->ttype);
    1811         232 :                 lvars = li.vh->base;
    1812         232 :                 rvars = ri.vh->base;
    1813             :         } else {
    1814        6350 :                 assert(!r->tvarsized || !r->ttype);
    1815             :                 lvars = rvars = NULL;
    1816             :         }
    1817             : 
    1818        6582 :         if (not_in && rci->ncand > 0 && !r->tnonil &&
    1819        1005 :             ((BATtvoid(l) && l->tseqbase == oid_nil) ||
    1820        1005 :              (BATtvoid(r) && r->tseqbase == oid_nil) ||
    1821        1005 :              (rvals && cmp(nil, VALUE(r, (r->tsorted ? rci->seq : canditer_last(rci)) - r->hseqbase)) == 0))) {
    1822           0 :                 bat_iterator_end(&li);
    1823           0 :                 bat_iterator_end(&ri);
    1824           0 :                 return nomatch(r1p, r2p, l, r, lci, false, false,
    1825             :                                __func__, t0);
    1826             :         }
    1827             : 
    1828        6582 :         if (lci->ncand == 0 ||
    1829        6582 :             rci->ncand == 0 ||
    1830        6582 :             (!nil_matches &&
    1831        6195 :              ((l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) ||
    1832        6195 :               (r->ttype == TYPE_void && is_oid_nil(r->tseqbase)))) ||
    1833        6582 :             (l->ttype == TYPE_void && is_oid_nil(l->tseqbase) &&
    1834           0 :              (r->tnonil ||
    1835           0 :               (r->ttype == TYPE_void && !is_oid_nil(r->tseqbase)))) ||
    1836        6582 :             (r->ttype == TYPE_void && is_oid_nil(r->tseqbase) &&
    1837           0 :              (l->tnonil ||
    1838           0 :               (l->ttype == TYPE_void && !is_oid_nil(l->tseqbase))))) {
    1839             :                 /* there are no matches */
    1840           0 :                 bat_iterator_end(&li);
    1841           0 :                 bat_iterator_end(&ri);
    1842           0 :                 return nomatch(r1p, r2p, l, r, lci, nil_on_miss, only_misses,
    1843             :                                __func__, t0);
    1844             :         }
    1845             : 
    1846        6582 :         BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand,
    1847        6582 :                                       l->tkey, r->tkey, semi | max_one,
    1848             :                                       nil_on_miss, only_misses, min_one,
    1849             :                                       estimate);
    1850        6582 :         if (maxsize == BUN_NONE) {
    1851           0 :                 bat_iterator_end(&li);
    1852           0 :                 bat_iterator_end(&ri);
    1853           0 :                 return GDK_FAIL;
    1854             :         }
    1855        6582 :         BAT *r1 = *r1p;
    1856        6582 :         BAT *r2 = r2p ? *r2p : NULL;
    1857             : 
    1858        6582 :         if (lci->tpe == cand_mask) {
    1859             :                 mlci = lci;
    1860           0 :                 canditer_init(&xlci, l, NULL);
    1861             :                 lci = &xlci;
    1862             :         } else {
    1863             :                 mlci = NULL;
    1864        6582 :                 xlci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1865             :         }
    1866        6582 :         if (rci->tpe == cand_mask) {
    1867             :                 mrci = rci;
    1868           0 :                 canditer_init(&xrci, r, NULL);
    1869             :                 rci = &xrci;
    1870             :         } else {
    1871             :                 mrci = NULL;
    1872        6582 :                 xrci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1873             :         }
    1874             : 
    1875        6582 :         if (l->tsorted || l->trevsorted) {
    1876        5280 :                 equal_order = (l->tsorted && r->tsorted) ||
    1877         233 :                         (l->trevsorted && r->trevsorted &&
    1878         190 :                          !BATtvoid(l) && !BATtvoid(r));
    1879        5280 :                 lordering = l->tsorted && (r->tsorted || !equal_order) ? 1 : -1;
    1880        5280 :                 rordering = equal_order ? lordering : -lordering;
    1881        5280 :                 if (!l->tnonil && !nil_matches && !nil_on_miss) {
    1882        2129 :                         nl = binsearch(NULL, 0, l->ttype, lvals, lvars, li.width, 0, BATcount(l), nil, l->tsorted ? 1 : -1, l->tsorted ? 1 : 0);
    1883        2050 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    1884        2050 :                         if (l->tsorted) {
    1885        1971 :                                 canditer_setidx(lci, nl);
    1886          79 :                         } else if (l->trevsorted) {
    1887          79 :                                 lci->ncand = nl;
    1888             :                         }
    1889             :                 }
    1890             :                 /* determine opportunistic scan window for l */
    1891       10560 :                 lscan = 4 + ilog2(lci->ncand);
    1892             :         } else {
    1893             :                 /* if l not sorted, we will always use binary search
    1894             :                  * on r */
    1895        1302 :                 assert(!BATtvoid(l)); /* void is always sorted */
    1896             :                 lscan = 0;
    1897             :                 equal_order = true;
    1898             :                 lordering = 1;
    1899        1302 :                 rordering = r->tsorted ? 1 : -1;
    1900             :         }
    1901             :         /* determine opportunistic scan window for r; if l is not
    1902             :          * sorted this is only used to find range of equal values */
    1903        6582 :         rscan = 4 + ilog2(rci->ncand);
    1904             : 
    1905        6582 :         if (!equal_order) {
    1906             :                 /* we go through r backwards */
    1907          43 :                 canditer_setidx(rci, rci->ncand);
    1908             :         }
    1909             :         /* At this point the various variables that help us through
    1910             :          * the algorithm have been set.  The table explains them.  The
    1911             :          * first two columns are the inputs, the next three columns
    1912             :          * are the variables, the final two columns indicate how the
    1913             :          * variables can be used.
    1914             :          *
    1915             :          * l/r    sl/sr | vals  cand  off | result   value being matched
    1916             :          * -------------+-----------------+----------------------------------
    1917             :          * dense  NULL  | NULL  NULL  set | i        off==nil?nil:i+off
    1918             :          * dense  dense | NULL  NULL  set | i        off==nil?nil:i+off
    1919             :          * dense  set   | NULL  set   set | cand[i]  off==nil?nil:cand[i]+off
    1920             :          * set    NULL  | set   NULL  0   | i        vals[i]
    1921             :          * set    dense | set   NULL  0   | i        vals[i]
    1922             :          * set    set   | set   set   0   | cand[i]  vals[cand[i]]
    1923             :          *
    1924             :          * If {l,r}off is lng_nil, all values in the corresponding bat
    1925             :          * are oid_nil because the bat has type VOID and the tseqbase
    1926             :          * is nil.
    1927             :          */
    1928             : 
    1929             : 
    1930             :         /* Before we start adding values to r1 and r2, the properties
    1931             :          * are as follows:
    1932             :          * tseqbase - 0
    1933             :          * tkey - true
    1934             :          * tsorted - true
    1935             :          * trevsorted - true
    1936             :          * tnil - false
    1937             :          * tnonil - true
    1938             :          * We will modify these as we go along.
    1939             :          */
    1940      352980 :         while (lci->next < lci->ncand) {
    1941      349094 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    1942             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1943      349094 :                 if (lscan == 0) {
    1944             :                         /* always search r completely */
    1945      192235 :                         assert(equal_order);
    1946      192235 :                         canditer_reset(rci);
    1947             :                 } else {
    1948             :                         /* If l is sorted (lscan > 0), we look at the
    1949             :                          * next value in r to see whether we can jump
    1950             :                          * over a large section of l using binary
    1951             :                          * search.  We do this by looking ahead in l
    1952             :                          * (lscan far, to be precise) and seeing if
    1953             :                          * the value there is still too "small"
    1954             :                          * (definition depends on sort order of l).
    1955             :                          * If it is, we use binary search on l,
    1956             :                          * otherwise we scan l for the next position
    1957             :                          * with a value greater than or equal to the
    1958             :                          * value in r.
    1959             :                          * The next value to match in r is the first
    1960             :                          * if equal_order is set, the last
    1961             :                          * otherwise.
    1962             :                          * When skipping over values in l, we count
    1963             :                          * how many we skip in nlx.  We need this in
    1964             :                          * case only_misses or nil_on_miss is set, and
    1965             :                          * to properly set the dense property in the
    1966             :                          * first output BAT. */
    1967             :                         BUN nlx = 0; /* number of non-matching values in l */
    1968             : 
    1969      156859 :                         if (equal_order) {
    1970      156740 :                                 if (rci->next == rci->ncand)
    1971             :                                         v = NULL; /* no more values */
    1972      154489 :                                 else if (mrci) {
    1973           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peek(rci), true);
    1974           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    1975             :                                 } else
    1976      154489 :                                         v = VALUE(r, canditer_peek(rci) - r->hseqbase);
    1977             :                         } else {
    1978         119 :                                 if (rci->next == 0)
    1979             :                                         v = NULL; /* no more values */
    1980         116 :                                 else if (mrci) {
    1981           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peekprev(rci), false);
    1982           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    1983             :                                 } else
    1984         116 :                                         v = VALUE(r, canditer_peekprev(rci) - r->hseqbase);
    1985             :                         }
    1986             :                         /* here, v points to next value in r, or if
    1987             :                          * we're at the end of r, v is NULL */
    1988             :                         if (v == NULL) {
    1989        2254 :                                 nlx = lci->ncand - lci->next;
    1990             :                         } else {
    1991      154545 :                                 if (lscan < lci->ncand - lci->next) {
    1992      132514 :                                         lv = canditer_idx(lci, lci->next + lscan);
    1993      133085 :                                         lv -= l->hseqbase;
    1994      133085 :                                         if (lvals) {
    1995      131478 :                                                 if (lordering * cmp(VALUE(l, lv), v) < 0) {
    1996        1674 :                                                         nlx = binsearch(NULL, 0, l->ttype, lvals, lvars, li.width, lv, BATcount(l), v, lordering, 0);
    1997        1674 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    1998        1674 :                                                         nlx -= lci->next;
    1999             :                                                 }
    2000             :                                         } else {
    2001        1607 :                                                 assert(lordering == 1);
    2002        1607 :                                                 if (canditer_idx(&llci, lv) < *(const oid *)v) {
    2003           9 :                                                         nlx = canditer_search(&llci, *(const oid *)v, true);
    2004           9 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2005           9 :                                                         nlx -= lci->next;
    2006             :                                                 }
    2007             :                                         }
    2008      133209 :                                         if (mlci) {
    2009           0 :                                                 lv = canditer_mask_next(mlci, lci->seq + lci->next + nlx, true);
    2010           0 :                                                 if (lv == oid_nil)
    2011           0 :                                                         nlx = lci->ncand - lci->next;
    2012             :                                                 else
    2013           0 :                                                         nlx = lv - lci->seq - lci->next;
    2014             :                                         }
    2015      133209 :                                         if (lci->next + nlx == lci->ncand)
    2016             :                                                 v = NULL;
    2017             :                                 }
    2018             :                         }
    2019      135463 :                         if (nlx > 0) {
    2020        3937 :                                 if (only_misses) {
    2021        2365 :                                         if (maybeextend(r1, r2, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2022           0 :                                                 goto bailout;
    2023      347368 :                                         while (nlx > 0) {
    2024      345003 :                                                 lv = canditer_next(lci);
    2025      345003 :                                                 if (mlci == NULL || canditer_contains(mlci, lv))
    2026      345003 :                                                         APPEND(r1, lv);
    2027      345003 :                                                 nlx--;
    2028             :                                         }
    2029        2365 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2030         936 :                                                 r1->trevsorted = false;
    2031        1572 :                                 } else if (nil_on_miss) {
    2032           0 :                                         if (r2->tnonil) {
    2033           0 :                                                 r2->tnil = true;
    2034           0 :                                                 r2->tnonil = false;
    2035           0 :                                                 r2->tseqbase = oid_nil;
    2036           0 :                                                 r2->tsorted = false;
    2037           0 :                                                 r2->trevsorted = false;
    2038           0 :                                                 r2->tkey = false;
    2039             :                                         }
    2040           0 :                                         if (maybeextend(r1, r2, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2041           0 :                                                 goto bailout;
    2042           0 :                                         while (nlx > 0) {
    2043           0 :                                                 lv = canditer_next(lci);
    2044           0 :                                                 if (mlci == NULL || canditer_contains(mlci, lv)) {
    2045           0 :                                                         APPEND(r1, lv);
    2046           0 :                                                         APPEND(r2, oid_nil);
    2047             :                                                 }
    2048           0 :                                                 nlx--;
    2049             :                                         }
    2050           0 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2051           0 :                                                 r1->trevsorted = false;
    2052             :                                 } else {
    2053        1572 :                                         canditer_setidx(lci, lci->next + nlx);
    2054             :                                 }
    2055             :                         }
    2056      157494 :                         if (v == NULL) {
    2057             :                                 /* we have exhausted the inputs */
    2058             :                                 break;
    2059             :                         }
    2060             :                 }
    2061             : 
    2062             :                 /* Here we determine the next value in l that we are
    2063             :                  * going to try to match in r.  We will also count the
    2064             :                  * number of occurrences in l of that value.
    2065             :                  * Afterwards, v points to the value and nl is the
    2066             :                  * number of times it occurs.  Also, lci will point to
    2067             :                  * the next value to be considered (ready for the next
    2068             :                  * iteration).
    2069             :                  * If there are many equal values in l (more than
    2070             :                  * lscan), we will use binary search to find the end
    2071             :                  * of the sequence.  Obviously, we can do this only if
    2072             :                  * l is actually sorted (lscan > 0). */
    2073             :                 nl = 1;         /* we'll match (at least) one in l */
    2074             :                 nr = 0;         /* maybe we won't match anything in r */
    2075      343925 :                 lv = canditer_peek(lci);
    2076      343497 :                 if (mlci) {
    2077           0 :                         lv = canditer_mask_next(mlci, lv, true);
    2078           0 :                         if (lv == oid_nil)
    2079             :                                 break;
    2080           0 :                         canditer_setidx(lci, canditer_search(lci, lv, true));
    2081             :                 }
    2082      344654 :                 v = VALUE(l, lv - l->hseqbase);
    2083      344655 :                 if (l->tkey) {
    2084             :                         /* if l is key, there is a single value */
    2085      227268 :                 } else if (lscan > 0 &&
    2086       86020 :                            lscan < lci->ncand - lci->next &&
    2087       42461 :                            cmp(v, VALUE(l, canditer_idx(lci, lci->next + lscan) - l->hseqbase)) == 0) {
    2088             :                         /* lots of equal values: use binary search to
    2089             :                          * find end */
    2090         543 :                         assert(lvals != NULL);
    2091         543 :                         nl = binsearch(NULL, 0,
    2092         543 :                                        l->ttype, lvals, lvars,
    2093         543 :                                        li.width, lci->next + lscan,
    2094             :                                        BATcount(l),
    2095             :                                        v, lordering, 1);
    2096         543 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2097         543 :                         nl -= lci->next;
    2098             :                 } else {
    2099      226808 :                         struct canditer ci = *lci; /* work on copy */
    2100             :                         nl = 0; /* it will be incremented again */
    2101             :                         do {
    2102      453911 :                                 canditer_next(&ci);
    2103      452755 :                                 nl++;
    2104      447445 :                         } while (ci.next < ci.ncand &&
    2105      452755 :                                  cmp(v, VALUE(l, canditer_peek(&ci) - l->hseqbase)) == 0);
    2106             :                 }
    2107             :                 /* lci->next + nl is the position for the next iteration */
    2108             : 
    2109      340196 :                 if ((!nil_matches || not_in) && !l->tnonil && cmp(v, nil) == 0) {
    2110        2129 :                         if (not_in) {
    2111             :                                 /* just skip the whole thing: nils
    2112             :                                  * don't cause any output */
    2113           1 :                                 canditer_setidx(lci, lci->next + nl);
    2114           1 :                                 continue;
    2115             :                         }
    2116             :                         /* v is nil and nils don't match anything, set
    2117             :                          * to NULL to indicate nil */
    2118             :                         v = NULL;
    2119             :                 }
    2120             : 
    2121             :                 /* First we find the "first" value in r that is "at
    2122             :                  * least as large" as v, then we find the "first"
    2123             :                  * value in r that is "larger" than v.  The difference
    2124             :                  * is the number of values equal to v and is stored in
    2125             :                  * nr.  The definitions of "larger" and "first" depend
    2126             :                  * on the orderings of l and r.  If equal_order is
    2127             :                  * set, we go through r from low to high (this
    2128             :                  * includes the case that l is not sorted); otherwise
    2129             :                  * we go through r from high to low.
    2130             :                  * In either case, we will use binary search on r to
    2131             :                  * find both ends of the sequence of values that are
    2132             :                  * equal to v in case the position is "too far" (more
    2133             :                  * than rscan away). */
    2134             :                 if (v == NULL) {
    2135             :                         nr = 0; /* nils don't match anything */
    2136      336936 :                 } else if (r->ttype == TYPE_void && is_oid_nil(r->tseqbase)) {
    2137           0 :                         if (is_oid_nil(*(const oid *) v)) {
    2138             :                                 /* all values in r match */
    2139           0 :                                 nr = rci->ncand;
    2140             :                         } else {
    2141             :                                 /* no value in r matches */
    2142             :                                 nr = 0;
    2143             :                         }
    2144             :                         /* in either case, we're done after this */
    2145           0 :                         canditer_setidx(rci, equal_order ? rci->ncand : 0);
    2146      336936 :                 } else if (equal_order) {
    2147             :                         /* first find the location of the first value
    2148             :                          * in r that is >= v, then find the location
    2149             :                          * of the first value in r that is > v; the
    2150             :                          * difference is the number of values equal
    2151             :                          * v; we change rci */
    2152             : 
    2153             :                         /* look ahead a little (rscan) in r to
    2154             :                          * see whether we're better off doing
    2155             :                          * a binary search */
    2156      336820 :                         if (rvals) {
    2157      336820 :                                 if (rscan < rci->ncand - rci->next &&
    2158      288040 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) > 0) {
    2159             :                                         /* value too far away in r:
    2160             :                                          * use binary search */
    2161      124622 :                                         lv = binsearch(NULL, 0, r->ttype, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 0);
    2162      136117 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2163      135232 :                                         canditer_setidx(rci, lv);
    2164             :                                 } else {
    2165             :                                         /* scan r for v */
    2166      242359 :                                         while (rci->next < rci->ncand) {
    2167      240906 :                                                 if (rordering * cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) <= 0)
    2168             :                                                         break;
    2169       30526 :                                                 canditer_next(rci);
    2170             :                                         }
    2171             :                                 }
    2172      669952 :                                 if (rci->next < rci->ncand &&
    2173      327120 :                                     cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0) {
    2174             :                                         /* if we found an equal value,
    2175             :                                          * look for the last equal
    2176             :                                          * value */
    2177      215068 :                                         if (r->tkey) {
    2178             :                                                 /* r is key, there can
    2179             :                                                  * only be a single
    2180             :                                                  * equal value */
    2181             :                                                 nr = 1;
    2182      136384 :                                                 canditer_next(rci);
    2183      156030 :                                         } else if (rscan < rci->ncand - rci->next &&
    2184       77347 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) == 0) {
    2185             :                                                 /* many equal values:
    2186             :                                                  * use binary search
    2187             :                                                  * to find the end */
    2188       42967 :                                                 nr = binsearch(NULL, 0, r->ttype, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 1);
    2189       42969 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2190       42969 :                                                 nr -= rci->next;
    2191       42969 :                                                 canditer_setidx(rci, rci->next + nr);
    2192             :                                         } else {
    2193             :                                                 /* scan r for end of
    2194             :                                                  * range */
    2195             :                                                 do {
    2196       94787 :                                                         nr++;
    2197       94787 :                                                         canditer_next(rci);
    2198       94475 :                                                 } while (rci->next < rci->ncand &&
    2199       94787 :                                                          cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0);
    2200             :                                         }
    2201             :                                 }
    2202             :                         } else {
    2203           0 :                                 assert(rordering == 1);
    2204           0 :                                 rval = canditer_search(&rrci, *(const oid*)v, true) + r->hseqbase;
    2205           0 :                                 lv = canditer_search(rci, rval, true);
    2206           0 :                                 canditer_setidx(rci, lv);
    2207           0 :                                 nr = (canditer_idx(&rrci, canditer_peek(rci) - r->hseqbase) == *(oid*)v);
    2208           0 :                                 if (nr == 1)
    2209           0 :                                         canditer_next(rci);
    2210             :                         }
    2211             :                         /* rci points to first value > v or end of r,
    2212             :                          * and nr is the number of values in r that
    2213             :                          * are equal to v */
    2214             :                 } else {
    2215             :                         /* first find the location of the first value
    2216             :                          * in r that is > v, then find the location
    2217             :                          * of the first value in r that is >= v; the
    2218             :                          * difference is the number of values equal
    2219             :                          * v; we change rci */
    2220             : 
    2221             :                         /* look back from the end a little
    2222             :                          * (rscan) in r to see whether we're
    2223             :                          * better off doing a binary search */
    2224         116 :                         if (rvals) {
    2225         116 :                                 if (rci->next > rscan &&
    2226          67 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) < 0) {
    2227             :                                         /* value too far away
    2228             :                                          * in r: use binary
    2229             :                                          * search */
    2230          25 :                                         lv = binsearch(NULL, 0, r->ttype, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 1);
    2231          25 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2232          25 :                                         canditer_setidx(rci, lv);
    2233             :                                 } else {
    2234             :                                         /* scan r for v */
    2235         357 :                                         while (rci->next > 0 &&
    2236         351 :                                                rordering * cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) < 0)
    2237         266 :                                                 canditer_prev(rci);
    2238             :                                 }
    2239         226 :                                 if (rci->next > 0 &&
    2240         110 :                                     cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0) {
    2241             :                                         /* if we found an equal value,
    2242             :                                          * look for the last equal
    2243             :                                          * value */
    2244          82 :                                         if (r->tkey) {
    2245             :                                                 /* r is key, there can only be a single equal value */
    2246             :                                                 nr = 1;
    2247          65 :                                                 canditer_prev(rci);
    2248          30 :                                         } else if (rci->next > rscan &&
    2249          13 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) == 0) {
    2250             :                                                 /* use binary search to find the start */
    2251           0 :                                                 nr = binsearch(NULL, 0, r->ttype, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 0);
    2252           0 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2253           0 :                                                 nr = rci->next - nr;
    2254           0 :                                                 canditer_setidx(rci, rci->next - nr);
    2255             :                                         } else {
    2256             :                                                 /* scan r for start of range */
    2257             :                                                 do {
    2258          17 :                                                         canditer_prev(rci);
    2259          17 :                                                         nr++;
    2260          16 :                                                 } while (rci->next > 0 &&
    2261          17 :                                                          cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0);
    2262             :                                         }
    2263             :                                 }
    2264             :                         } else {
    2265           0 :                                 lv = canditer_search(&rrci, *(const oid *)v, true);
    2266           0 :                                 lv = canditer_search(rci, lv + r->hseqbase, true);
    2267           0 :                                 nr = (canditer_idx(rci, lv) == *(const oid*)v);
    2268           0 :                                 canditer_setidx(rci, lv);
    2269             :                         }
    2270             :                         /* rci points to first value > v
    2271             :                          * or end of r, and nr is the number of values
    2272             :                          * in r that are equal to v */
    2273             :                 }
    2274             : 
    2275       81341 :                 if (nr == 0) {
    2276             :                         /* no entries in r found */
    2277      132487 :                         if (!(nil_on_miss | only_misses)) {
    2278      107134 :                                 if (min_one) {
    2279           0 :                                         GDKerror("not enough matches");
    2280           0 :                                         goto bailout;
    2281             :                                 }
    2282      111638 :                                 if (lscan > 0 &&
    2283        4504 :                                     (equal_order ? rci->next == rci->ncand : rci->next == 0)) {
    2284             :                                         /* nothing more left to match
    2285             :                                          * in r */
    2286             :                                         break;
    2287             :                                 }
    2288      107115 :                                 canditer_setidx(lci, lci->next + nl);
    2289      106548 :                                 continue;
    2290             :                         }
    2291             :                         /* insert a nil to indicate a non-match */
    2292             :                         insert_nil = true;
    2293             :                         nr = 1;
    2294       25353 :                         if (r2) {
    2295           0 :                                 r2->tnil = true;
    2296           0 :                                 r2->tnonil = false;
    2297           0 :                                 r2->tsorted = false;
    2298           0 :                                 r2->trevsorted = false;
    2299           0 :                                 r2->tseqbase = oid_nil;
    2300           0 :                                 r2->tkey = false;
    2301             :                         }
    2302      214997 :                 } else if (nr > 1 && max_one) {
    2303          35 :                         GDKerror("more than one match");
    2304          35 :                         goto bailout;
    2305      214962 :                 } else if (only_misses) {
    2306             :                         /* we had a match, so we're not interested */
    2307       94021 :                         canditer_setidx(lci, lci->next + nl);
    2308       93999 :                         continue;
    2309             :                 } else {
    2310             :                         insert_nil = false;
    2311      120941 :                         if (semi) {
    2312             :                                 /* for semi-join, only insert single
    2313             :                                  * value */
    2314             :                                 nr = 1;
    2315             :                         }
    2316             :                 }
    2317             :                 /* make space: nl values in l match nr values in r, so
    2318             :                  * we need to add nl * nr values in the results */
    2319      146294 :                 if (maybeextend(r1, r2, nl * nr, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2320           0 :                         goto bailout;
    2321             : 
    2322             :                 /* maintain properties */
    2323      146530 :                 if (nl > 1) {
    2324       37628 :                         if (r2) {
    2325             :                                 /* value occurs multiple times in l,
    2326             :                                  * so entry in r will be repeated
    2327             :                                  * multiple times: hence r2 is not key
    2328             :                                  * and not dense */
    2329       11839 :                                 r2->tkey = false;
    2330       11839 :                                 r2->tseqbase = oid_nil;
    2331             :                         }
    2332             :                         /* multiple different values will be inserted
    2333             :                          * in r1 (always in order), so not reverse
    2334             :                          * ordered anymore */
    2335       37628 :                         r1->trevsorted = false;
    2336             :                 }
    2337      146530 :                 if (nr > 1) {
    2338             :                         /* value occurs multiple times in r, so entry
    2339             :                          * in l will be repeated multiple times: hence
    2340             :                          * r1 is not key and not dense */
    2341       43092 :                         r1->tkey = false;
    2342       43092 :                         if (r2) {
    2343             :                                 /* multiple different values will be
    2344             :                                  * inserted in r2 (in order), so not
    2345             :                                  * reverse ordered anymore */
    2346       42571 :                                 r2->trevsorted = false;
    2347       42571 :                                 if (nl > 1) {
    2348             :                                         /* multiple values in l match
    2349             :                                          * multiple values in r, so an
    2350             :                                          * ordered sequence will be
    2351             :                                          * inserted multiple times in
    2352             :                                          * r2, so r2 is not ordered
    2353             :                                          * anymore */
    2354        3920 :                                         r2->tsorted = false;
    2355             :                                 }
    2356             :                         }
    2357             :                 }
    2358      146530 :                 if (lscan == 0) {
    2359             :                         /* deduce relative positions of r matches for
    2360             :                          * this and previous value in v */
    2361       82986 :                         if (prev && r2) {
    2362             :                                 /* keyness or r2 can only be assured
    2363             :                                  * as long as matched values are
    2364             :                                  * ordered */
    2365       69390 :                                 int ord = rordering * cmp(prev, v ? v : nil);
    2366       69758 :                                 if (ord < 0) {
    2367             :                                         /* previous value in l was
    2368             :                                          * less than current */
    2369       29304 :                                         r2->trevsorted = false;
    2370       29304 :                                         r2->tkey &= r2->tsorted;
    2371       40454 :                                 } else if (ord > 0) {
    2372             :                                         /* previous value was
    2373             :                                          * greater */
    2374       27771 :                                         r2->tsorted = false;
    2375       27771 :                                         r2->tkey &= r2->trevsorted;
    2376             :                                 } else {
    2377             :                                         /* value can be equal if
    2378             :                                          * intervening values in l
    2379             :                                          * didn't match anything; if
    2380             :                                          * multiple values match in r,
    2381             :                                          * r2 won't be sorted */
    2382       12683 :                                         r2->tkey = false;
    2383       12683 :                                         if (nr > 1) {
    2384       12651 :                                                 r2->tsorted = false;
    2385       12651 :                                                 r2->trevsorted = false;
    2386             :                                         }
    2387             :                                 }
    2388             :                         }
    2389       83354 :                         prev = v ? v : nil;
    2390             :                 }
    2391      146898 :                 if (BATcount(r1) > 0) {
    2392             :                         /* a new, higher value will be inserted into
    2393             :                          * r1, so r1 is not reverse ordered anymore */
    2394      141837 :                         r1->trevsorted = false;
    2395      141837 :                         if (r2) {
    2396             :                                 /* depending on whether l and r are
    2397             :                                  * ordered the same or not, a new
    2398             :                                  * higher or lower value will be added
    2399             :                                  * to r2 */
    2400       70482 :                                 if (equal_order)
    2401       70434 :                                         r2->trevsorted = false;
    2402             :                                 else {
    2403          48 :                                         r2->tsorted = false;
    2404          48 :                                         r2->tseqbase = oid_nil;
    2405             :                                 }
    2406             :                         }
    2407             :                 }
    2408             : 
    2409             :                 /* insert values: first the left output */
    2410             :                 BUN nladded = 0;
    2411      380558 :                 for (i = 0; i < nl; i++) {
    2412      234512 :                         lv = canditer_next(lci);
    2413      233660 :                         if (mlci == NULL || canditer_contains(mlci, lv)) {
    2414      233660 :                                 nladded++;
    2415    16048441 :                                 for (j = 0; j < nr; j++)
    2416    15814781 :                                         APPEND(r1, lv);
    2417             :                         }
    2418             :                 }
    2419             :                 nl = nladded;
    2420             :                 /* then the right output, various different ways of
    2421             :                  * doing it */
    2422      146046 :                 if (r2 == NULL) {
    2423             :                         /* nothing to do */
    2424       71236 :                 } else if (insert_nil) {
    2425             :                         do {
    2426           0 :                                 for (i = 0; i < nr; i++) {
    2427           0 :                                         APPEND(r2, oid_nil);
    2428             :                                 }
    2429           0 :                         } while (--nl > 0);
    2430       71236 :                 } else if (equal_order) {
    2431       71154 :                         struct canditer ci = *rci; /* work on copy */
    2432       71154 :                         if (r2->batCount > 0 &&
    2433       72221 :                             BATtdense(r2) &&
    2434        1805 :                             ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_idx(&ci, ci.next - nr))
    2435         132 :                                 r2->tseqbase = oid_nil;
    2436             :                         do {
    2437       96249 :                                 canditer_setidx(&ci, ci.next - nr);
    2438    15761255 :                                 for (i = 0; i < nr; i++) {
    2439    15665202 :                                         APPEND(r2, canditer_next(&ci));
    2440             :                                 }
    2441       96053 :                         } while (--nl > 0);
    2442             :                 } else {
    2443          82 :                         if (r2->batCount > 0 &&
    2444          48 :                             BATtdense(r2) &&
    2445           0 :                             ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_peek(rci))
    2446           0 :                                 r2->tseqbase = oid_nil;
    2447             :                         do {
    2448         294 :                                 struct canditer ci = *rci; /* work on copy */
    2449         588 :                                 for (i = 0; i < nr; i++) {
    2450         294 :                                         APPEND(r2, canditer_next(&ci));
    2451             :                                 }
    2452         294 :                         } while (--nl > 0);
    2453             :                 }
    2454             :         }
    2455             :         /* also set other bits of heap to correct value to indicate size */
    2456        6547 :         BATsetcount(r1, BATcount(r1));
    2457        6548 :         r1->tseqbase = oid_nil;
    2458        6548 :         if (r1->tkey)
    2459        6502 :                 r1 = virtualize(r1);
    2460        6547 :         if (r2) {
    2461        1109 :                 BATsetcount(r2, BATcount(r2));
    2462        1109 :                 assert(BATcount(r1) == BATcount(r2));
    2463        1109 :                 r2->tseqbase = oid_nil;
    2464        1109 :                 if (BATcount(r2) <= 1) {
    2465         317 :                         r2->tkey = true;
    2466         317 :                         r2 = virtualize(r2);
    2467             :                 }
    2468             :         }
    2469        6547 :         bat_iterator_end(&li);
    2470        6546 :         bat_iterator_end(&ri);
    2471        6547 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    2472             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    2473             :                   "sr=" ALGOOPTBATFMT ","
    2474             :                   "nil_on_miss=%s,semi=%s,only_misses=%s,not_in=%s;%s %s "
    2475             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    2476             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    2477             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    2478             :                   nil_on_miss ? "true" : "false",
    2479             :                   semi ? "true" : "false",
    2480             :                   only_misses ? "true" : "false",
    2481             :                   not_in ? "true" : "false",
    2482             :                   swapped ? " swapped" : "", reason,
    2483             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    2484             :                   GDKusec() - t0);
    2485             : 
    2486             :         return GDK_SUCCEED;
    2487             : 
    2488          35 :   bailout:
    2489          35 :         bat_iterator_end(&li);
    2490          35 :         bat_iterator_end(&ri);
    2491          35 :         BBPreclaim(r1);
    2492          35 :         BBPreclaim(r2);
    2493          35 :         return GDK_FAIL;
    2494             : }
    2495             : 
    2496             : #define HASHLOOPBODY()                                                  \
    2497             :         do {                                                            \
    2498             :                 if (nr >= 1 && max_one) {                            \
    2499             :                         GDKerror("more than one match");              \
    2500             :                         goto bailout;                                   \
    2501             :                 }                                                       \
    2502             :                 if (maybeextend(r1, r2, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2503             :                         goto bailout;                                   \
    2504             :                 APPEND(r1, lo);                                         \
    2505             :                 if (r2)                                                 \
    2506             :                         APPEND(r2, ro);                                 \
    2507             :                 nr++;                                                   \
    2508             :         } while (false)
    2509             : 
    2510             : #define EQ_int(a, b)    ((a) == (b))
    2511             : #define EQ_lng(a, b)    ((a) == (b))
    2512             : #ifdef HAVE_HGE
    2513             : #define EQ_uuid(a, b)   ((a).h == (b).h)
    2514             : #else
    2515             : #define EQ_uuid(a, b)   (memcmp((a).u, (b).u, UUID_SIZE) == 0)
    2516             : #endif
    2517             : 
    2518             : #define HASHJOIN(TYPE)                                                  \
    2519             :         do {                                                            \
    2520             :                 TYPE *rvals = ri.base;                                  \
    2521             :                 TYPE *lvals = li.base;                                  \
    2522             :                 TYPE v;                                                 \
    2523             :                 while (lci->next < lci->ncand) {                       \
    2524             :                         GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
    2525             :                         lo = canditer_next(lci);                        \
    2526             :                         v = lvals[lo - l->hseqbase];                 \
    2527             :                         nr = 0;                                         \
    2528             :                         if ((!nil_matches || not_in) && is_##TYPE##_nil(v)) { \
    2529             :                                 /* no match */                          \
    2530             :                                 if (not_in) {                           \
    2531             :                                         lskipped = BATcount(r1) > 0; \
    2532             :                                         continue;                       \
    2533             :                                 }                                       \
    2534             :                         } else if (hash_cand) {                         \
    2535             :                                 /* private hash: no locks */            \
    2536             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2537             :                                      rb != BUN_NONE;                    \
    2538             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2539             :                                         ro = canditer_idx(rci, rb);     \
    2540             :                                         if (!EQ_##TYPE(v, rvals[ro - r->hseqbase])) \
    2541             :                                                 continue;               \
    2542             :                                         if (only_misses) {              \
    2543             :                                                 nr++;                   \
    2544             :                                                 break;                  \
    2545             :                                         }                               \
    2546             :                                         HASHLOOPBODY();                 \
    2547             :                                         if (semi && !max_one)           \
    2548             :                                                 break;                  \
    2549             :                                 }                                       \
    2550             :                         } else if (rci->tpe != cand_dense) {         \
    2551             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2552             :                                      rb != BUN_NONE;                    \
    2553             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2554             :                                         if (rb >= rl && rb < rh &&        \
    2555             :                                             EQ_##TYPE(v, rvals[rb]) &&  \
    2556             :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) { \
    2557             :                                                 if (only_misses) {      \
    2558             :                                                         nr++;           \
    2559             :                                                         break;          \
    2560             :                                                 }                       \
    2561             :                                                 HASHLOOPBODY();         \
    2562             :                                                 if (semi && !max_one)   \
    2563             :                                                         break;          \
    2564             :                                         }                               \
    2565             :                                 }                                       \
    2566             :                         } else {                                        \
    2567             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2568             :                                      rb != BUN_NONE;                    \
    2569             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2570             :                                         if (rb >= rl && rb < rh &&        \
    2571             :                                             EQ_##TYPE(v, rvals[rb])) {  \
    2572             :                                                 if (only_misses) {      \
    2573             :                                                         nr++;           \
    2574             :                                                         break;          \
    2575             :                                                 }                       \
    2576             :                                                 ro = (oid) (rb - roff + rseq); \
    2577             :                                                 HASHLOOPBODY();         \
    2578             :                                                 if (semi && !max_one)   \
    2579             :                                                         break;          \
    2580             :                                         }                               \
    2581             :                                 }                                       \
    2582             :                         }                                               \
    2583             :                         if (nr == 0) {                                  \
    2584             :                                 if (only_misses) {                      \
    2585             :                                         nr = 1;                         \
    2586             :                                         if (maybeextend(r1, r2, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2587             :                                                 goto bailout;           \
    2588             :                                         APPEND(r1, lo);                 \
    2589             :                                         if (lskipped)                   \
    2590             :                                                 r1->tseqbase = oid_nil;      \
    2591             :                                 } else if (nil_on_miss) {               \
    2592             :                                         nr = 1;                         \
    2593             :                                         r2->tnil = true;             \
    2594             :                                         r2->tnonil = false;          \
    2595             :                                         r2->tkey = false;            \
    2596             :                                         if (maybeextend(r1, r2, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2597             :                                                 goto bailout;           \
    2598             :                                         APPEND(r1, lo);                 \
    2599             :                                         APPEND(r2, oid_nil);            \
    2600             :                                 } else if (min_one) {                   \
    2601             :                                         GDKerror("not enough matches");       \
    2602             :                                         goto bailout;                   \
    2603             :                                 } else {                                \
    2604             :                                         lskipped = BATcount(r1) > 0; \
    2605             :                                 }                                       \
    2606             :                         } else if (only_misses) {                       \
    2607             :                                 lskipped = BATcount(r1) > 0;         \
    2608             :                         } else {                                        \
    2609             :                                 if (lskipped) {                         \
    2610             :                                         /* note, we only get here in an \
    2611             :                                          * iteration *after* lskipped was \
    2612             :                                          * first set to true, i.e. we did \
    2613             :                                          * indeed skip values in l */   \
    2614             :                                         r1->tseqbase = oid_nil;              \
    2615             :                                 }                                       \
    2616             :                                 if (nr > 1) {                                \
    2617             :                                         r1->tkey = false;            \
    2618             :                                         r1->tseqbase = oid_nil;              \
    2619             :                                 }                                       \
    2620             :                         }                                               \
    2621             :                         if (nr > 0 && BATcount(r1) > nr)          \
    2622             :                                 r1->trevsorted = false;                      \
    2623             :                 }                                                       \
    2624             :         } while (0)
    2625             : 
    2626             : /* Implementation of join using a hash lookup of values in the right
    2627             :  * column. */
    2628             : static gdk_return
    2629       13419 : hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    2630             :          struct canditer *restrict lci, struct canditer *restrict rci,
    2631             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    2632             :          bool not_in, bool max_one, bool min_one,
    2633             :          BUN estimate, lng t0, bool swapped,
    2634             :          bool hash, bool phash, bool hash_cand,
    2635             :          const char *reason)
    2636             : {
    2637             :         oid lo, ro;
    2638             :         BATiter li, ri;
    2639             :         BUN rb, roff = 0;
    2640             :         /* rl, rh: lower and higher bounds for BUN values in hash table */
    2641             :         BUN rl, rh;
    2642             :         oid rseq;
    2643             :         BUN nr;
    2644             :         const char *lvals;
    2645             :         const char *lvars;
    2646       13419 :         const void *nil = ATOMnilptr(l->ttype);
    2647       13419 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    2648       13419 :         oid lval = oid_nil;     /* hold value if l is dense */
    2649             :         const char *v = (const char *) &lval;
    2650             :         bool lskipped = false;  /* whether we skipped values in l */
    2651             :         Hash *restrict hsh = NULL;
    2652             :         bool locked = false;
    2653             : 
    2654       40250 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2655             : 
    2656             :         size_t counter = 0;
    2657             :         lng timeoffset = 0;
    2658       13419 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2659       13418 :         if (qry_ctx != NULL) {
    2660       12610 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    2661             :         }
    2662             : 
    2663       13418 :         li = bat_iterator(l);
    2664       13419 :         ri = bat_iterator(r);
    2665             : 
    2666       13418 :         int t = ATOMbasetype(r->ttype);
    2667       13418 :         if (BATtvoid(r) || BATtvoid(l))
    2668             :                 t = TYPE_void;
    2669             : 
    2670       13418 :         lvals = (const char *) li.base;
    2671       13418 :         if (l->tvarsized && l->ttype) {
    2672        1241 :                 assert(r->tvarsized && r->ttype);
    2673        1241 :                 lvars = li.vh->base;
    2674             :         } else {
    2675       12177 :                 assert(!r->tvarsized || !r->ttype);
    2676             :                 lvars = NULL;
    2677             :         }
    2678             :         /* offset to convert BUN to OID for value in right column */
    2679       13418 :         rseq = r->hseqbase;
    2680             : 
    2681       13418 :         if (lci->ncand == 0 || rci->ncand== 0) {
    2682           0 :                 bat_iterator_end(&li);
    2683           0 :                 bat_iterator_end(&ri);
    2684           0 :                 return nomatch(r1p, r2p, l, r, lci,
    2685             :                                nil_on_miss, only_misses, __func__, t0);
    2686             :         }
    2687             : 
    2688       13418 :         BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand,
    2689       13418 :                                       l->tkey, r->tkey, semi | max_one,
    2690             :                                       nil_on_miss, only_misses, min_one,
    2691             :                                       estimate);
    2692       13419 :         if (maxsize == BUN_NONE) {
    2693           0 :                 bat_iterator_end(&li);
    2694           0 :                 bat_iterator_end(&ri);
    2695           0 :                 return GDK_FAIL;
    2696             :         }
    2697             : 
    2698       13419 :         BAT *r1 = *r1p;
    2699       13419 :         BAT *r2 = r2p ? *r2p : NULL;
    2700             : 
    2701       13419 :         rl = rci->seq - r->hseqbase;
    2702       13419 :         rh = canditer_last(rci) + 1 - r->hseqbase;
    2703       13418 :         if (hash_cand) {
    2704             :                 /* we need to create a hash on r specific for the
    2705             :                  * candidate list */
    2706             :                 char ext[32];
    2707          62 :                 assert(rci->s);
    2708         114 :                 MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash (swapped)" : "hashjoin using candidate hash");
    2709          62 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating "
    2710             :                           "hash for candidate list " ALGOBATFMT "%s%s\n",
    2711             :                           ALGOBATPAR(r), ALGOBATPAR(rci->s),
    2712             :                           r->thash ? " ignoring existing hash" : "",
    2713             :                           swapped ? " (swapped)" : "");
    2714          62 :                 if (snprintf(ext, sizeof(ext), "thshjn%x",
    2715          62 :                              (unsigned) THRgettid()) >= (int) sizeof(ext))
    2716           0 :                         goto bailout;
    2717          62 :                 if ((hsh = BAThash_impl(r, rci, ext)) == NULL) {
    2718           0 :                         goto bailout;
    2719             :                 }
    2720       13356 :         } else if (phash) {
    2721             :                 /* there is a hash on the parent which we should use */
    2722        1768 :                 MT_thread_setalgorithm(swapped ? "hashjoin using parent hash (swapped)" : "hashjoin using parent hash");
    2723        1377 :                 BAT *b = BBP_cache(VIEWtparent(r));
    2724        1377 :                 TRC_DEBUG(ALGO, "%s(%s): using "
    2725             :                           "parent(" ALGOBATFMT ") for hash%s\n",
    2726             :                           __func__,
    2727             :                           BATgetId(r), ALGOBATPAR(b),
    2728             :                           swapped ? " (swapped)" : "");
    2729        1377 :                 roff = r->tbaseoff - b->tbaseoff;
    2730        1377 :                 rl += roff;
    2731        1377 :                 rh += roff;
    2732             :                 r = b;
    2733        1377 :                 bat_iterator_end(&ri);
    2734        1377 :                 ri = bat_iterator(r);
    2735        1377 :                 MT_rwlock_rdlock(&r->thashlock);
    2736        1377 :                 hsh = r->thash;
    2737             :                 locked = true;
    2738       11979 :         } else if (hash) {
    2739             :                 /* there is a hash on r which we should use */
    2740        9489 :                 MT_thread_setalgorithm(swapped ? "hashjoin using existing hash (swapped)" : "hashjoin using existing hash");
    2741        5666 :                 MT_rwlock_rdlock(&r->thashlock);
    2742        5667 :                 hsh = r->thash;
    2743             :                 locked = true;
    2744        5667 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": using "
    2745             :                           "existing hash%s\n",
    2746             :                           ALGOBATPAR(r),
    2747             :                           swapped ? " (swapped)" : "");
    2748        6312 :         } else if (BATtdense(r)) {
    2749             :                 /* no hash, just dense lookup */
    2750           1 :                 MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)" : "hashjoin on dense");
    2751             :         } else {
    2752             :                 /* we need to create a hash on r */
    2753        9143 :                 MT_thread_setalgorithm(swapped ? "hashjoin using new hash (swapped)" : "hashjoin using new hash");
    2754        6312 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating hash%s\n",
    2755             :                           ALGOBATPAR(r),
    2756             :                           swapped ? " (swapped)" : "");
    2757        6312 :                 if (BAThash(r) != GDK_SUCCEED)
    2758           0 :                         goto bailout;
    2759        6312 :                 MT_rwlock_rdlock(&r->thashlock);
    2760        6312 :                 hsh = r->thash;
    2761             :                 locked = true;
    2762             :         }
    2763       13419 :         if (locked && hsh == NULL) {
    2764           0 :                 GDKerror("Hash disappeared for "ALGOBATFMT"\n", ALGOBATPAR(r));
    2765           0 :                 goto bailout;
    2766             :         }
    2767       13419 :         assert(hsh != NULL || BATtdense(r));
    2768       13419 :         if (hsh) {
    2769       13418 :                 TRC_DEBUG(ALGO, "hash for " ALGOBATFMT ": nbucket " BUNFMT ", nunique " BUNFMT ", nheads " BUNFMT "\n", ALGOBATPAR(r), hsh->nbucket, hsh->nunique, hsh->nheads);
    2770             :         }
    2771             : 
    2772       13419 :         if (not_in && !r->tnonil) {
    2773             :                 /* check whether there is a nil on the right, since if
    2774             :                  * so, we should return an empty result */
    2775         340 :                 if (hash_cand) {
    2776           0 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    2777             :                              rb != BUN_NONE;
    2778           0 :                              rb = HASHgetlink(hsh, rb)) {
    2779           0 :                                 ro = canditer_idx(rci, rb);
    2780           0 :                                 if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
    2781           0 :                                         assert(!locked);
    2782           0 :                                         HEAPfree(&hsh->heaplink, true);
    2783           0 :                                         HEAPfree(&hsh->heapbckt, true);
    2784           0 :                                         GDKfree(hsh);
    2785           0 :                                         bat_iterator_end(&li);
    2786           0 :                                         bat_iterator_end(&ri);
    2787           0 :                                         return nomatch(r1p, r2p, l, r, lci,
    2788             :                                                        false, false, __func__, t0);
    2789             :                                 }
    2790             :                         }
    2791         340 :                 } else if (!BATtdense(r)) {
    2792         500 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    2793             :                              rb != BUN_NONE;
    2794         160 :                              rb = HASHgetlink(hsh, rb)) {
    2795         161 :                                 if (rb >= rl && rb < rh &&
    2796         161 :                                     (cmp == NULL ||
    2797         161 :                                      (*cmp)(nil, BUNtail(ri, rb)) == 0)) {
    2798           1 :                                         if (locked)
    2799           1 :                                                 MT_rwlock_rdunlock(&r->thashlock);
    2800           1 :                                         bat_iterator_end(&li);
    2801           1 :                                         bat_iterator_end(&ri);
    2802           1 :                                         return nomatch(r1p, r2p, l, r, lci,
    2803             :                                                        false, false,
    2804             :                                                        __func__, t0);
    2805             :                                 }
    2806             :                         }
    2807             :                 }
    2808             :         }
    2809             : 
    2810             :         /* basic properties will be adjusted if necessary later on,
    2811             :          * they were initially set by joininitresults() */
    2812             : 
    2813       13418 :         if (r2) {
    2814       10964 :                 r2->tkey = l->tkey;
    2815             :                 /* r2 is not likely to be sorted (although it is
    2816             :                  * certainly possible) */
    2817       10964 :                 r2->tsorted = false;
    2818       10964 :                 r2->trevsorted = false;
    2819       10964 :                 r2->tseqbase = oid_nil;
    2820             :         }
    2821             : 
    2822       13418 :         if (lci->tpe != cand_dense)
    2823         274 :                 r1->tseqbase = oid_nil;
    2824             : 
    2825             : 
    2826       13418 :         switch (t) {
    2827        9441 :         case TYPE_int:
    2828   324415891 :                 HASHJOIN(int);
    2829             :                 break;
    2830        2197 :         case TYPE_lng:
    2831   125796246 :                 HASHJOIN(lng);
    2832             :                 break;
    2833           0 :         case TYPE_uuid:
    2834           0 :                 HASHJOIN(uuid);
    2835           0 :                 break;
    2836             :         default:
    2837     2510845 :                 while (lci->next < lci->ncand) {
    2838     2509070 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
    2839             :                                         GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    2840     2509070 :                         lo = canditer_next(lci);
    2841     2572731 :                         if (BATtdense(l))
    2842         132 :                                 lval = lo - l->hseqbase + l->tseqbase;
    2843     2572599 :                         else if (l->ttype != TYPE_void)
    2844     2556327 :                                 v = VALUE(l, lo - l->hseqbase);
    2845             :                         nr = 0;
    2846     2572731 :                         if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
    2847             :                                 /* no match */
    2848        2826 :                                 if (not_in) {
    2849          10 :                                         lskipped = BATcount(r1) > 0;
    2850          10 :                                         continue;
    2851             :                                 }
    2852     2568920 :                         } else if (hash_cand) {
    2853           0 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    2854             :                                      rb != BUN_NONE;
    2855           0 :                                      rb = HASHgetlink(hsh, rb)) {
    2856           0 :                                         ro = canditer_idx(rci, rb);
    2857           0 :                                         if ((*cmp)(v, BUNtail(ri, ro - r->hseqbase)) != 0)
    2858           0 :                                                 continue;
    2859           0 :                                         if (only_misses) {
    2860           0 :                                                 nr++;
    2861           0 :                                                 break;
    2862             :                                         }
    2863           0 :                                         HASHLOOPBODY();
    2864           0 :                                         if (semi && !max_one)
    2865             :                                                 break;
    2866             :                                 }
    2867     2568920 :                         } else if (hsh == NULL) {
    2868           4 :                                 assert(BATtdense(r));
    2869           4 :                                 ro = *(const oid *) v;
    2870           4 :                                 if (ro >= r->tseqbase &&
    2871           4 :                                     ro < r->tseqbase + r->batCount) {
    2872           4 :                                         ro -= r->tseqbase;
    2873           4 :                                         ro += rseq;
    2874           4 :                                         if (canditer_contains(rci, ro)) {
    2875           4 :                                                 if (only_misses) {
    2876             :                                                         nr++;
    2877             :                                                         break;
    2878             :                                                 }
    2879           4 :                                                 HASHLOOPBODY();
    2880           4 :                                                 if (semi && !max_one)
    2881             :                                                         break;
    2882             :                                         }
    2883             :                                 }
    2884     2568916 :                         } else if (rci->tpe != cand_dense) {
    2885       24189 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    2886             :                                      rb != BUN_NONE;
    2887        8058 :                                      rb = HASHgetlink(hsh, rb)) {
    2888       16075 :                                         if (rb >= rl && rb < rh &&
    2889        8489 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0 &&
    2890         472 :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) {
    2891         394 :                                                 if (only_misses) {
    2892           0 :                                                         nr++;
    2893           0 :                                                         break;
    2894             :                                                 }
    2895         394 :                                                 HASHLOOPBODY();
    2896         394 :                                                 if (semi && !max_one)
    2897             :                                                         break;
    2898             :                                         }
    2899             :                                 }
    2900             :                         } else {
    2901     4907605 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    2902             :                                      rb != BUN_NONE;
    2903     2354820 :                                      rb = HASHgetlink(hsh, rb)) {
    2904     5067603 :                                         if (rb >= rl && rb < rh &&
    2905     2624741 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0) {
    2906     2194683 :                                                 if (only_misses) {
    2907       66522 :                                                         nr++;
    2908       66522 :                                                         break;
    2909             :                                                 }
    2910     2128161 :                                                 ro = (oid) (rb - roff + rseq);
    2911     2128161 :                                                 HASHLOOPBODY();
    2912     2107423 :                                                 if (semi && !max_one)
    2913             :                                                         break;
    2914             :                                         }
    2915             :                                 }
    2916             :                         }
    2917     2506235 :                         if (nr == 0) {
    2918      411154 :                                 if (only_misses) {
    2919             :                                         nr = 1;
    2920         244 :                                         if (maybeextend(r1, r2, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2921           0 :                                                 goto bailout;
    2922         244 :                                         APPEND(r1, lo);
    2923         244 :                                         if (lskipped)
    2924         210 :                                                 r1->tseqbase = oid_nil;
    2925      410910 :                                 } else if (nil_on_miss) {
    2926             :                                         nr = 1;
    2927           0 :                                         r2->tnil = true;
    2928           0 :                                         r2->tnonil = false;
    2929           0 :                                         r2->tkey = false;
    2930           0 :                                         if (maybeextend(r1, r2, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2931           0 :                                                 goto bailout;
    2932           0 :                                         APPEND(r1, lo);
    2933           0 :                                         APPEND(r2, oid_nil);
    2934      410910 :                                 } else if (min_one) {
    2935           0 :                                         GDKerror("not enough matches");
    2936           0 :                                         goto bailout;
    2937             :                                 } else {
    2938      410910 :                                         lskipped = BATcount(r1) > 0;
    2939             :                                 }
    2940     2097901 :                         } else if (only_misses) {
    2941       66375 :                                 lskipped = BATcount(r1) > 0;
    2942             :                         } else {
    2943     2031526 :                                 if (lskipped) {
    2944             :                                         /* note, we only get here in an
    2945             :                                          * iteration *after* lskipped was
    2946             :                                          * first set to true, i.e. we did
    2947             :                                          * indeed skip values in l */
    2948     1865346 :                                         r1->tseqbase = oid_nil;
    2949             :                                 }
    2950     2031526 :                                 if (nr > 1) {
    2951        6039 :                                         r1->tkey = false;
    2952        6039 :                                         r1->tseqbase = oid_nil;
    2953             :                                 }
    2954             :                         }
    2955     2509055 :                         if (nr > 0 && BATcount(r1) > nr)
    2956     2030499 :                                 r1->trevsorted = false;
    2957             :                 }
    2958             :                 break;
    2959             :         }
    2960       13405 :         if (locked) {
    2961             :                 locked = false;
    2962       13343 :                 MT_rwlock_rdunlock(&r->thashlock);
    2963             :         }
    2964       13411 :         bat_iterator_end(&li);
    2965       13413 :         bat_iterator_end(&ri);
    2966             : 
    2967       13412 :         if (hash_cand) {
    2968          62 :                 HEAPfree(&hsh->heaplink, true);
    2969          62 :                 HEAPfree(&hsh->heapbckt, true);
    2970          62 :                 GDKfree(hsh);
    2971             :         }
    2972             :         /* also set other bits of heap to correct value to indicate size */
    2973       13412 :         BATsetcount(r1, BATcount(r1));
    2974       13412 :         if (BATcount(r1) <= 1) {
    2975        4037 :                 r1->tsorted = true;
    2976        4037 :                 r1->trevsorted = true;
    2977        4037 :                 r1->tkey = true;
    2978        4037 :                 r1->tseqbase = 0;
    2979             :         }
    2980       13412 :         if (r2) {
    2981       10958 :                 BATsetcount(r2, BATcount(r2));
    2982       10958 :                 assert(BATcount(r1) == BATcount(r2));
    2983       10958 :                 if (BATcount(r2) <= 1) {
    2984        3503 :                         r2->tsorted = true;
    2985        3503 :                         r2->trevsorted = true;
    2986        3503 :                         r2->tkey = true;
    2987        3503 :                         r2->tseqbase = 0;
    2988             :                 }
    2989             :         }
    2990       13412 :         if (BATcount(r1) > 0) {
    2991       10517 :                 if (BATtdense(r1))
    2992        5803 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    2993       10517 :                 if (r2 && BATtdense(r2))
    2994        1119 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    2995             :         } else {
    2996        2895 :                 r1->tseqbase = 0;
    2997        2895 :                 if (r2) {
    2998        2384 :                         r2->tseqbase = 0;
    2999             :                 }
    3000             :         }
    3001       13412 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3002             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3003             :                   "nil_matches=%s,nil_on_miss=%s,semi=%s,only_misses=%s,"
    3004             :                   "not_in=%s,max_one=%s,min_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3005             :                   " (" LLFMT "usec)\n",
    3006             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3007             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    3008             :                   nil_matches ? "true" : "false",
    3009             :                   nil_on_miss ? "true" : "false",
    3010             :                   semi ? "true" : "false",
    3011             :                   only_misses ? "true" : "false",
    3012             :                   not_in ? "true" : "false",
    3013             :                   max_one ? "true" : "false",
    3014             :                   min_one ? "true" : "false",
    3015             :                   swapped ? " swapped" : "", reason,
    3016             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3017             :                   GDKusec() - t0);
    3018             : 
    3019             :         return GDK_SUCCEED;
    3020             : 
    3021           5 :   bailout:
    3022           5 :         bat_iterator_end(&li);
    3023           5 :         bat_iterator_end(&ri);
    3024           5 :         if (locked)
    3025           5 :                 MT_rwlock_rdunlock(&r->thashlock);
    3026           5 :         if (hash_cand && hsh) {
    3027           0 :                 HEAPfree(&hsh->heaplink, true);
    3028           0 :                 HEAPfree(&hsh->heapbckt, true);
    3029           0 :                 GDKfree(hsh);
    3030             :         }
    3031           5 :         BBPreclaim(r1);
    3032           5 :         BBPreclaim(r2);
    3033           5 :         return GDK_FAIL;
    3034             : }
    3035             : 
    3036             : /* Count the number of unique values for the first half and the complete
    3037             :  * set (the sample s of b) and return the two values in *cnt1 and
    3038             :  * *cnt2. In case of error, both values are 0. */
    3039             : static void
    3040       14096 : count_unique(BAT *b, BAT *s, BUN *cnt1, BUN *cnt2)
    3041             : {
    3042             :         struct canditer ci;
    3043             :         BUN half;
    3044             :         BUN cnt = 0;
    3045             :         const void *v;
    3046             :         const char *bvals;
    3047             :         const char *bvars;
    3048             :         oid bval;
    3049             :         oid i, o;
    3050             :         const char *nme;
    3051             :         BUN hb;
    3052             :         BATiter bi;
    3053             :         int (*cmp)(const void *, const void *);
    3054             :         const char *algomsg = "";
    3055             :         lng t0 = 0;
    3056             : 
    3057       14096 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    3058       14096 :         canditer_init(&ci, b, s);
    3059       14095 :         half = ci.ncand / 2;
    3060             : 
    3061       14095 :         if (b->tkey || ci.ncand <= 1 || BATtdense(b)) {
    3062             :                 /* trivial: already unique */
    3063          10 :                 *cnt1 = half;
    3064          10 :                 *cnt2 = ci.ncand;
    3065          10 :                 return;
    3066             :         }
    3067             : 
    3068       14085 :         if ((BATordered(b) && BATordered_rev(b)) ||
    3069       14085 :             (b->ttype == TYPE_void && is_oid_nil(b->tseqbase))) {
    3070             :                 /* trivial: all values are the same */
    3071           0 :                 *cnt1 = *cnt2 = 1;
    3072           0 :                 return;
    3073             :         }
    3074             : 
    3075       14085 :         assert(b->ttype != TYPE_void);
    3076             : 
    3077       14085 :         bi = bat_iterator(b);
    3078       14086 :         bvals = bi.base;
    3079       14086 :         if (b->tvarsized && b->ttype)
    3080        1180 :                 bvars = bi.vh->base;
    3081             :         else
    3082             :                 bvars = NULL;
    3083       14086 :         cmp = ATOMcompare(b->ttype);
    3084             : 
    3085       14086 :         *cnt1 = *cnt2 = 0;
    3086             : 
    3087       14086 :         if (BATordered(b) || BATordered_rev(b)) {
    3088             :                 const void *prev = NULL;
    3089             :                 algomsg = "sorted";
    3090       84511 :                 for (i = 0; i < ci.ncand; i++) {
    3091       84057 :                         if (i == half)
    3092         454 :                                 *cnt1 = cnt;
    3093       84057 :                         o = canditer_next(&ci);
    3094       84056 :                         v = VALUE(b, o - b->hseqbase);
    3095       84056 :                         if (prev == NULL || (*cmp)(v, prev) != 0) {
    3096       37641 :                                 cnt++;
    3097             :                         }
    3098             :                         prev = v;
    3099             :                 }
    3100         454 :                 *cnt2 = cnt;
    3101       13631 :         } else if (ATOMbasetype(b->ttype) == TYPE_bte) {
    3102             :                 unsigned char val;
    3103             :                 uint32_t seen[256 / 32];
    3104             : 
    3105             :                 algomsg = "byte-sized atoms";
    3106          23 :                 assert(bvars == NULL);
    3107          23 :                 memset(seen, 0, sizeof(seen));
    3108        1469 :                 for (i = 0; i < ci.ncand; i++) {
    3109        1446 :                         if (i == ci.ncand/ 2) {
    3110             :                                 cnt = 0;
    3111         207 :                                 for (int j = 0; j < 256 / 32; j++)
    3112         184 :                                         cnt += candmask_pop(seen[j]);
    3113          23 :                                 *cnt1 = cnt;
    3114             :                         }
    3115        1446 :                         o = canditer_next(&ci);
    3116        1446 :                         val = ((const unsigned char *) bvals)[o - b->hseqbase];
    3117        1446 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3118         108 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3119             :                         }
    3120             :                 }
    3121             :                 cnt = 0;
    3122         207 :                 for (int j = 0; j < 256 / 32; j++)
    3123         184 :                         cnt += candmask_pop(seen[j]);
    3124          23 :                 *cnt2 = cnt;
    3125       13608 :         } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
    3126             :                 unsigned short val;
    3127             :                 uint32_t *seen = NULL;
    3128             : 
    3129             :                 algomsg = "short-sized atoms";
    3130         446 :                 assert(bvars == NULL);
    3131         446 :                 seen = GDKzalloc((65536 / 32) * sizeof(seen[0]));
    3132         445 :                 if (seen == NULL) {
    3133           0 :                         bat_iterator_end(&bi);
    3134           0 :                         return;
    3135             :                 }
    3136       93586 :                 for (i = 0; i < ci.ncand; i++) {
    3137       93140 :                         if (i == half) {
    3138             :                                 cnt = 0;
    3139      844222 :                                 for (int j = 0; j < 65536 / 32; j++)
    3140      843776 :                                         cnt += candmask_pop(seen[j]);
    3141         446 :                                 *cnt1 = cnt;
    3142             :                         }
    3143       93140 :                         o = canditer_next(&ci);
    3144       93141 :                         val = ((const unsigned short *) bvals)[o - b->hseqbase];
    3145       93141 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3146        1223 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3147             :                         }
    3148             :                 }
    3149             :                 cnt = 0;
    3150      850366 :                 for (int j = 0; j < 65536 / 32; j++)
    3151      849920 :                         cnt += candmask_pop(seen[j]);
    3152         446 :                 *cnt2 = cnt;
    3153         446 :                 GDKfree(seen);
    3154             :                 seen = NULL;
    3155             :         } else {
    3156             :                 BUN prb;
    3157             :                 BUN p;
    3158             :                 BUN mask;
    3159       13162 :                 Hash hs = {0};
    3160             : 
    3161       13162 :                 GDKclrerr();    /* not interested in BAThash errors */
    3162             :                 algomsg = "new partial hash";
    3163       13160 :                 nme = BBP_physical(b->batCacheid);
    3164       13160 :                 mask = HASHmask(ci.ncand);
    3165             :                 if (mask < ((BUN) 1 << 16))
    3166             :                         mask = (BUN) 1 << 16;
    3167       13160 :                 if ((hs.heaplink.farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0 ||
    3168       13159 :                     (hs.heapbckt.farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap)) < 0 ||
    3169       13160 :                     snprintf(hs.heaplink.filename, sizeof(hs.heaplink.filename), "%s.thshunil%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs.heaplink.filename) ||
    3170       26325 :                     snprintf(hs.heapbckt.filename, sizeof(hs.heapbckt.filename), "%s.thshunib%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs.heapbckt.filename) ||
    3171       13162 :                     HASHnew(&hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) != GDK_SUCCEED) {
    3172           0 :                         GDKerror("cannot allocate hash table\n");
    3173           0 :                         HEAPfree(&hs.heaplink, true);
    3174           0 :                         HEAPfree(&hs.heapbckt, true);
    3175           0 :                         bat_iterator_end(&bi);
    3176           0 :                         return;
    3177             :                 }
    3178     3829888 :                 for (i = 0; i < ci.ncand; i++) {
    3179     3816731 :                         if (i == half)
    3180       13156 :                                 *cnt1 = cnt;
    3181     3816731 :                         o = canditer_next(&ci);
    3182     3817121 :                         v = VALUE(b, o - b->hseqbase);
    3183     3817121 :                         prb = HASHprobe(&hs, v);
    3184     3866174 :                         for (hb = HASHget(&hs, prb);
    3185             :                              hb != BUN_NONE;
    3186       45485 :                              hb = HASHgetlink(&hs, hb)) {
    3187     1521224 :                                 if (cmp(v, BUNtail(bi, hb)) == 0)
    3188             :                                         break;
    3189             :                         }
    3190     3820225 :                         if (hb == BUN_NONE) {
    3191     2345037 :                                 p = o - b->hseqbase;
    3192     2345037 :                                 cnt++;
    3193             :                                 /* enter into hash table */
    3194     2345037 :                                 HASHputlink(&hs, p, HASHget(&hs, prb));
    3195     2341427 :                                 HASHput(&hs, prb, p);
    3196             :                         }
    3197             :                 }
    3198       13157 :                 *cnt2 = cnt;
    3199       13157 :                 HEAPfree(&hs.heaplink, true);
    3200       13163 :                 HEAPfree(&hs.heapbckt, true);
    3201             :         }
    3202       14086 :         bat_iterator_end(&bi);
    3203             : 
    3204       14086 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
    3205             :                   " -> " BUNFMT " " BUNFMT " (%s -- " LLFMT "usec)\n",
    3206             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    3207             :                   *cnt1, *cnt2, algomsg, GDKusec() - t0);
    3208             : 
    3209             :         return;
    3210             : }
    3211             : 
    3212             : static double
    3213       18181 : guess_uniques(BAT *b, struct canditer *ci)
    3214             : {
    3215             :         BUN cnt1, cnt2;
    3216             :         BAT *s1;
    3217             : 
    3218       18181 :         if (b->tkey)
    3219        4084 :                 return (double) ci->ncand;
    3220             : 
    3221       14097 :         if (ci->s == NULL ||
    3222       14095 :             (ci->tpe == cand_dense && ci->ncand == BATcount(b))) {
    3223       14094 :                 MT_lock_set(&b->theaplock);
    3224       14095 :                 double unique_est = b->tunique_est;
    3225       14095 :                 MT_lock_unset(&b->theaplock);
    3226       14093 :                 if (unique_est != 0) {
    3227           2 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n",
    3228             :                                   ALGOBATPAR(b));
    3229           2 :                         return unique_est;
    3230             :                 }
    3231       14091 :                 s1 = BATsample_with_seed(b, 1000, (uint64_t) GDKusec() * (uint64_t) b->batCacheid);
    3232             :         } else {
    3233           3 :                 BAT *s2 = BATsample_with_seed(ci->s, 1000, (uint64_t) GDKusec() * (uint64_t) b->batCacheid);
    3234           3 :                 s1 = BATproject(s2, ci->s);
    3235           3 :                 BBPreclaim(s2);
    3236             :         }
    3237       14095 :         BUN n2 = BATcount(s1);
    3238       14095 :         BUN n1 = n2 / 2;
    3239       14095 :         count_unique(b, s1, &cnt1, &cnt2);
    3240       14096 :         BBPreclaim(s1);
    3241             : 
    3242       14096 :         double A = (double) (cnt2 - cnt1) / (n2 - n1);
    3243       14096 :         double B = cnt1 - n1 * A;
    3244             : 
    3245       14096 :         B += A * ci->ncand;
    3246       14096 :         if (ci->s == NULL ||
    3247           3 :             (ci->tpe == cand_dense && ci->ncand == BATcount(b))) {
    3248       14093 :                 MT_lock_set(&b->theaplock);
    3249       14093 :                 if (b->tunique_est == 0)
    3250       13543 :                         b->tunique_est = B;
    3251       14093 :                 MT_lock_unset(&b->theaplock);
    3252             :         }
    3253             :         return B;
    3254             : }
    3255             : 
    3256             : BUN
    3257           0 : BATguess_uniques(BAT *b, struct canditer *ci)
    3258             : {
    3259             :         struct canditer lci;
    3260           0 :         if (ci == NULL) {
    3261           0 :                 canditer_init(&lci, b, NULL);
    3262             :                 ci = &lci;
    3263             :         }
    3264           0 :         return (BUN) guess_uniques(b, ci);
    3265             : }
    3266             : 
    3267             : /* estimate the cost of doing a hashjoin with a hash on r; return value
    3268             :  * is the estimated cost, the last three arguments receive some extra
    3269             :  * information */
    3270             : static double
    3271       28173 : joincost(BAT *r, struct canditer *lci, struct canditer *rci,
    3272             :          bool *hash, bool *phash, bool *cand)
    3273             : {
    3274             :         bool rhash;
    3275             :         bool prhash = false;
    3276             :         bool rcand = false;
    3277             :         double rcost = 1;
    3278             :         bat parent;
    3279             :         BAT *b;
    3280             :         BUN nheads;
    3281             :         BUN cnt;
    3282             : 
    3283       28173 :         (void) BATcheckhash(r);
    3284       28174 :         MT_rwlock_rdlock(&r->thashlock);
    3285       28174 :         rhash = r->thash != NULL;
    3286       28174 :         nheads = r->thash ? r->thash->nheads : 0;
    3287       28174 :         cnt = BATcount(r);
    3288       28174 :         MT_rwlock_rdunlock(&r->thashlock);
    3289             : 
    3290       28176 :         if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
    3291         594 :             rci->nvals > 0) {
    3292             :                 /* if we need to do binary search on candidate list,
    3293             :                  * take that into account; note checking the other
    3294             :                  * candidate types is essentially free */
    3295         594 :                 rcost += log2((double) rci->nvals);
    3296             :         }
    3297       28176 :         rcost *= lci->ncand;
    3298       28176 :         if (BATtdense(r)) {
    3299             :                 /* no need for a hash, and lookup is free */
    3300             :                 rhash = false;  /* don't use it, even if it's there */
    3301             :         } else {
    3302       28174 :                 if (rhash) {
    3303             :                         /* average chain length */
    3304        6995 :                         rcost *= (double) cnt / nheads;
    3305       21179 :                 } else if ((parent = VIEWtparent(r)) != 0 &&
    3306        8280 :                            (b = BBP_cache(parent)) != NULL &&
    3307        4140 :                            BATcheckhash(b)) {
    3308        2122 :                         MT_rwlock_rdlock(&b->thashlock);
    3309        2121 :                         rhash = prhash = b->thash != NULL;
    3310        2121 :                         if (rhash) {
    3311             :                                 /* average chain length */
    3312        2121 :                                 rcost *= (double) BATcount(b) / b->thash->nheads;
    3313             :                         }
    3314        2121 :                         MT_rwlock_rdunlock(&b->thashlock);
    3315             :                 }
    3316       28174 :                 if (!rhash) {
    3317       19057 :                         MT_lock_set(&r->theaplock);
    3318       19054 :                         double unique_est = r->tunique_est;
    3319       19054 :                         MT_lock_unset(&r->theaplock);
    3320       19057 :                         if (unique_est == 0)
    3321       18079 :                                 unique_est = guess_uniques(r, &(struct canditer){.tpe=cand_dense, .ncand=BATcount(r)});
    3322             :                         /* we have an estimate of the number of unique
    3323             :                          * values, assume some collisions */
    3324       19057 :                         rcost *= 1.1 * ((double) cnt / unique_est);
    3325             : #ifdef PERSISTENTHASH
    3326             :                         /* only count the cost of creating the hash for
    3327             :                          * non-persistent bats */
    3328       19057 :                         MT_lock_set(&r->theaplock);
    3329       19057 :                         if (!(BBP_status(r->batCacheid) & BBPEXISTING) /* || r->theap->dirty */ || GDKinmemory(r->theap->farmid))
    3330       18045 :                                 rcost += cnt * 2.0;
    3331       19057 :                         MT_lock_unset(&r->theaplock);
    3332             : #else
    3333             :                         rcost += cnt * 2.0;
    3334             : #endif
    3335             :                 }
    3336             :         }
    3337       28175 :         if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
    3338             :                 /* instead of using the hash on r (cost in rcost), we
    3339             :                  * can build a new hash on r taking the candidate list
    3340             :                  * into account; don't do this for masked candidate
    3341             :                  * since the searching of the candidate list
    3342             :                  * (canditer_idx) will kill us */
    3343             :                 double rccost;
    3344         754 :                 if (rhash && !prhash) {
    3345         481 :                         rccost = (double) cnt / nheads;
    3346             :                 } else {
    3347         273 :                         MT_lock_set(&r->theaplock);
    3348         273 :                         double unique_est = r->tunique_est;
    3349         273 :                         MT_lock_unset(&r->theaplock);
    3350         273 :                         if (unique_est == 0)
    3351         103 :                                 unique_est = guess_uniques(r, rci);
    3352             :                         /* we have an estimate of the number of unique
    3353             :                          * values, assume some chains */
    3354         273 :                         rccost = 1.1 * ((double) cnt / unique_est);
    3355             :                 }
    3356         754 :                 rccost *= lci->ncand;
    3357         754 :                 rccost += rci->ncand * 2.0; /* cost of building the hash */
    3358         754 :                 if (rccost < rcost) {
    3359             :                         rcost = rccost;
    3360             :                         rcand = true;
    3361             :                 }
    3362             :         }
    3363       28175 :         *hash = rhash;
    3364       28175 :         *phash = prhash;
    3365       28175 :         *cand = rcand;
    3366       28175 :         return rcost;
    3367             : }
    3368             : 
    3369             : #define MASK_EQ         1
    3370             : #define MASK_LT         2
    3371             : #define MASK_GT         4
    3372             : #define MASK_LE         (MASK_EQ | MASK_LT)
    3373             : #define MASK_GE         (MASK_EQ | MASK_GT)
    3374             : #define MASK_NE         (MASK_LT | MASK_GT)
    3375             : 
    3376             : static gdk_return
    3377       19839 : thetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int opcode, BUN estimate, const char *reason, lng t0)
    3378             : {
    3379             :         struct canditer lci, rci;
    3380             :         BUN lcnt, rcnt;
    3381             :         const char *lvals, *rvals;
    3382             :         const char *lvars, *rvars;
    3383       19839 :         const void *nil = ATOMnilptr(l->ttype);
    3384       19839 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    3385             :         const void *vl, *vr;
    3386             :         oid lastr = 0;          /* last value inserted into r2 */
    3387             :         BUN nr;
    3388             :         oid lo, ro;
    3389             :         int c;
    3390             :         bool lskipped = false;  /* whether we skipped values in l */
    3391             :         lng loff = 0, roff = 0;
    3392       19839 :         oid lval = oid_nil, rval = oid_nil;
    3393             : 
    3394             :         lng timeoffset = 0;
    3395       19839 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    3396       19839 :         if (qry_ctx != NULL) {
    3397       19839 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    3398             :         }
    3399             : 
    3400       59517 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    3401       19839 :         assert((opcode & (MASK_EQ | MASK_LT | MASK_GT)) != 0);
    3402             : 
    3403       19839 :         BATiter li = bat_iterator(l);
    3404       19838 :         BATiter ri = bat_iterator(r);
    3405             : 
    3406       19839 :         lcnt = canditer_init(&lci, l, sl);
    3407       19839 :         rcnt = canditer_init(&rci, r, sr);
    3408             : 
    3409       19839 :         lvals = BATtvoid(l) ? NULL : (const char *) li.base;
    3410       19839 :         rvals = BATtvoid(r) ? NULL : (const char *) ri.base;
    3411       19839 :         if (l->tvarsized && l->ttype) {
    3412           8 :                 assert(r->tvarsized && r->ttype);
    3413           8 :                 lvars = li.vh->base;
    3414           8 :                 rvars = ri.vh->base;
    3415             :         } else {
    3416       19831 :                 assert(!r->tvarsized || !r->ttype);
    3417             :                 lvars = rvars = NULL;
    3418             :         }
    3419             : 
    3420       19839 :         if (BATtvoid(l)) {
    3421           0 :                 if (!BATtdense(l)) {
    3422             :                         /* trivial: nils don't match anything */
    3423           0 :                         bat_iterator_end(&li);
    3424           0 :                         bat_iterator_end(&ri);
    3425           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    3426             :                                        false, false, __func__, t0);
    3427             :                 }
    3428           0 :                 loff = (lng) l->tseqbase - (lng) l->hseqbase;
    3429             :         }
    3430       19839 :         if (BATtvoid(r)) {
    3431           0 :                 if (!BATtdense(r)) {
    3432             :                         /* trivial: nils don't match anything */
    3433           0 :                         bat_iterator_end(&li);
    3434           0 :                         bat_iterator_end(&ri);
    3435           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    3436             :                                        false, false, __func__, t0);
    3437             :                 }
    3438           0 :                 roff = (lng) r->tseqbase - (lng) r->hseqbase;
    3439             :         }
    3440             : 
    3441       19839 :         BUN maxsize = joininitresults(r1p, r2p, lcnt, rcnt, false, false,
    3442             :                                       false, false, false, false, estimate);
    3443       19837 :         if (maxsize == BUN_NONE) {
    3444           0 :                 bat_iterator_end(&li);
    3445           0 :                 bat_iterator_end(&ri);
    3446           0 :                 return GDK_FAIL;
    3447             :         }
    3448       19837 :         BAT *r1 = *r1p;
    3449       19837 :         BAT *r2 = r2p ? *r2p : NULL;
    3450             : 
    3451       19837 :         r1->tkey = true;
    3452       19837 :         r1->tsorted = true;
    3453       19837 :         r1->trevsorted = true;
    3454       19837 :         if (r2) {
    3455        4300 :                 r2->tkey = true;
    3456        4300 :                 r2->tsorted = true;
    3457        4300 :                 r2->trevsorted = true;
    3458             :         }
    3459             : 
    3460             :         /* nested loop implementation for theta join */
    3461             :         vl = &lval;
    3462             :         vr = &rval;
    3463      407452 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    3464      387631 :                 lo = canditer_next(&lci);
    3465      386590 :                 if (lvals)
    3466      386590 :                         vl = VALUE(l, lo - l->hseqbase);
    3467             :                 else
    3468           0 :                         lval = (oid) ((lng) lo + loff);
    3469             :                 nr = 0;
    3470      386590 :                 if (cmp(vl, nil) != 0) {
    3471      380116 :                         canditer_reset(&rci);
    3472     2260228 :                         TIMEOUT_LOOP(rci.ncand, timeoffset) {
    3473     1500781 :                                 ro = canditer_next(&rci);
    3474     1482674 :                                 if (rvals)
    3475     1482674 :                                         vr = VALUE(r, ro - r->hseqbase);
    3476             :                                 else
    3477           0 :                                         rval = (oid) ((lng) ro + roff);
    3478     1482674 :                                 if (cmp(vr, nil) == 0)
    3479       77446 :                                         continue;
    3480     1406893 :                                 c = cmp(vl, vr);
    3481     1412527 :                                 if (!((opcode & MASK_LT && c < 0) ||
    3482     1202093 :                                       (opcode & MASK_GT && c > 0) ||
    3483      805065 :                                       (opcode & MASK_EQ && c == 0)))
    3484      805042 :                                         continue;
    3485      607485 :                                 if (maybeextend(r1, r2, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    3486           0 :                                         goto bailout;
    3487      617836 :                                 if (BATcount(r1) > 0) {
    3488      604876 :                                         if (r2 && lastr + 1 != ro)
    3489       41359 :                                                 r2->tseqbase = oid_nil;
    3490      604876 :                                         if (nr == 0) {
    3491      126287 :                                                 r1->trevsorted = false;
    3492      126287 :                                                 if (r2 == NULL) {
    3493             :                                                         /* nothing */
    3494       31145 :                                                 } else if (lastr > ro) {
    3495       29140 :                                                         r2->tsorted = false;
    3496       29140 :                                                         r2->tkey = false;
    3497        2005 :                                                 } else if (lastr < ro) {
    3498           0 :                                                         r2->trevsorted = false;
    3499             :                                                 } else {
    3500        2005 :                                                         r2->tkey = false;
    3501             :                                                 }
    3502             :                                         }
    3503             :                                 }
    3504      617836 :                                 APPEND(r1, lo);
    3505      617836 :                                 if (r2) {
    3506      210612 :                                         APPEND(r2, ro);
    3507             :                                 }
    3508             :                                 lastr = ro;
    3509      617836 :                                 nr++;
    3510             :                         }
    3511      379773 :                         TIMEOUT_CHECK(timeoffset,
    3512             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    3513             :                 }
    3514      387615 :                 if (nr > 1) {
    3515      100518 :                         r1->tkey = false;
    3516      100518 :                         r1->tseqbase = oid_nil;
    3517      100518 :                         if (r2) {
    3518       32707 :                                 r2->trevsorted = false;
    3519             :                         }
    3520      287097 :                 } else if (nr == 0) {
    3521      248151 :                         lskipped = BATcount(r1) > 0;
    3522       38946 :                 } else if (lskipped) {
    3523       31420 :                         r1->tseqbase = oid_nil;
    3524             :                 }
    3525             :         }
    3526             :         /* also set other bits of heap to correct value to indicate size */
    3527       19821 :         BATsetcount(r1, BATcount(r1));
    3528       19837 :         if (r2) {
    3529        4301 :                 BATsetcount(r2, BATcount(r2));
    3530        4301 :                 assert(BATcount(r1) == BATcount(r2));
    3531             :         }
    3532       19837 :         if (BATcount(r1) > 0) {
    3533       13428 :                 if (BATtdense(r1))
    3534         176 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3535       13428 :                 if (r2 && BATtdense(r2))
    3536         377 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3537             :         } else {
    3538        6409 :                 r1->tseqbase = 0;
    3539        6409 :                 if (r2) {
    3540         625 :                         r2->tseqbase = 0;
    3541             :                 }
    3542             :         }
    3543       19837 :         bat_iterator_end(&li);
    3544       19839 :         bat_iterator_end(&ri);
    3545       19837 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3546             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3547             :                   "opcode=%s%s%s; %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3548             :                   " (" LLFMT "usec)\n",
    3549             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3550             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3551             :                   opcode & MASK_LT ? "<" : "",
    3552             :                   opcode & MASK_GT ? ">" : "",
    3553             :                   opcode & MASK_EQ ? "=" : "",
    3554             :                   reason,
    3555             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3556             :                   GDKusec() - t0);
    3557             :         return GDK_SUCCEED;
    3558             : 
    3559           0 :   bailout:
    3560           0 :         bat_iterator_end(&li);
    3561           0 :         bat_iterator_end(&ri);
    3562           0 :         BBPreclaim(r1);
    3563           0 :         BBPreclaim(r2);
    3564           0 :         return GDK_FAIL;
    3565             : }
    3566             : 
    3567             : /* small ordered right, dense left, oid's only, do fetches */
    3568             : static gdk_return
    3569           0 : fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    3570             :           struct canditer *restrict lci, struct canditer *restrict rci,
    3571             :           const char *reason, lng t0)
    3572             : {
    3573           0 :         oid lo = lci->seq - l->hseqbase + l->tseqbase, hi = lo + lci->ncand;
    3574             :         BUN b, e, p;
    3575             :         BAT *r1, *r2 = NULL;
    3576             : 
    3577           0 :         MT_thread_setalgorithm(__func__);
    3578           0 :         if (r->tsorted) {
    3579           0 :                 b = SORTfndfirst(r, &lo);
    3580           0 :                 e = SORTfndfirst(r, &hi);
    3581             :         } else {
    3582           0 :                 assert(r->trevsorted);
    3583           0 :                 b = SORTfndlast(r, &hi);
    3584           0 :                 e = SORTfndlast(r, &lo);
    3585             :         }
    3586           0 :         if (b < rci->seq - r->hseqbase)
    3587             :                 b = rci->seq - r->hseqbase;
    3588           0 :         if (e > rci->seq + rci->ncand - r->hseqbase)
    3589             :                 e = rci->seq + rci->ncand - r->hseqbase;
    3590           0 :         if (e == b) {
    3591           0 :                 return nomatch(r1p, r2p, l, r, lci,
    3592             :                                false, false, __func__, t0);
    3593             :         }
    3594           0 :         r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
    3595           0 :         if (r1 == NULL)
    3596             :                 return GDK_FAIL;
    3597           0 :         if (r2p) {
    3598           0 :                 if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
    3599           0 :                         BBPreclaim(r1);
    3600           0 :                         return GDK_FAIL;
    3601             :                 }
    3602           0 :                 *r2p = r2;
    3603             :         }
    3604           0 :         *r1p = r1;
    3605           0 :         oid *op = (oid *) Tloc(r1, 0);
    3606           0 :         BATiter ri = bat_iterator(r);
    3607           0 :         const oid *rp = (const oid *) ri.base;
    3608           0 :         for (p = b; p < e; p++) {
    3609           0 :                 *op++ = rp[p] + l->hseqbase - l->tseqbase;
    3610             :         }
    3611           0 :         bat_iterator_end(&ri);
    3612           0 :         BATsetcount(r1, e - b);
    3613           0 :         r1->tkey = r->tkey;
    3614           0 :         r1->tsorted = r->tsorted || e - b <= 1;
    3615           0 :         r1->trevsorted = r->trevsorted || e - b <= 1;
    3616           0 :         r1->tseqbase = e == b ? 0 : e - b == 1 ? *(const oid *)Tloc(r1, 0) : oid_nil;
    3617           0 :         TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    3618             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    3619             :                   "sr=" ALGOOPTBATFMT ") %s "
    3620             :                   "-> (" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us\n",
    3621             :                   __func__,
    3622             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3623             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3624             :                   reason,
    3625             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3626             :                   GDKusec() - t0);
    3627             : 
    3628             :         return GDK_SUCCEED;
    3629             : }
    3630             : 
    3631             : static BAT *
    3632        6745 : bitmaskjoin(BAT *l, BAT *r,
    3633             :             struct canditer *restrict lci, struct canditer *restrict rci,
    3634             :             bool only_misses,
    3635             :             const char *reason, lng t0)
    3636             : {
    3637             :         BAT *r1;
    3638        6745 :         size_t nmsk = (lci->ncand + 31) / 32;
    3639        6745 :         uint32_t *mask = GDKzalloc(nmsk * sizeof(uint32_t));
    3640             :         BUN cnt = 0;
    3641             : 
    3642        6745 :         MT_thread_setalgorithm(__func__);
    3643        6744 :         if (mask == NULL)
    3644             :                 return NULL;
    3645             : 
    3646    19747793 :         for (BUN n = 0; n < rci->ncand; n++) {
    3647    19741048 :                 oid o = canditer_next(rci) - r->hseqbase;
    3648    19498478 :                 o = BUNtoid(r, o);
    3649    19741049 :                 if (is_oid_nil(o))
    3650           0 :                         continue;
    3651    19741049 :                 o += l->hseqbase;
    3652    19741049 :                 if (o < lci->seq + l->tseqbase)
    3653           2 :                         continue;
    3654    19741047 :                 o -= lci->seq + l->tseqbase;
    3655    19741047 :                 if (o >= lci->ncand)
    3656           0 :                         continue;
    3657    19741047 :                 if ((mask[o >> 5] & (1U << (o & 0x1F))) == 0) {
    3658    18620794 :                         cnt++;
    3659    18620794 :                         mask[o >> 5] |= 1U << (o & 0x1F);
    3660             :                 }
    3661             :         }
    3662        6745 :         if (only_misses)
    3663        4140 :                 cnt = lci->ncand - cnt;
    3664        6745 :         if (cnt == 0 || cnt == lci->ncand) {
    3665        2102 :                 GDKfree(mask);
    3666        2102 :                 if (cnt == 0)
    3667         348 :                         return BATdense(0, 0, 0);
    3668        1754 :                 return BATdense(0, lci->seq, lci->ncand);
    3669             :         }
    3670        4643 :         r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
    3671        4643 :         if (r1 != NULL) {
    3672        4643 :                 oid *r1p = Tloc(r1, 0);
    3673             : 
    3674        4643 :                 r1->tkey = true;
    3675        4643 :                 r1->tnil = false;
    3676        4643 :                 r1->tnonil = true;
    3677        4643 :                 r1->tsorted = true;
    3678        4643 :                 r1->trevsorted = cnt <= 1;
    3679        4643 :                 if (only_misses) {
    3680             :                         /* set the bits for unused values at the
    3681             :                          * end so that we don't need special
    3682             :                          * code in the loop */
    3683        3792 :                         if (lci->ncand & 0x1F)
    3684        3706 :                                 mask[nmsk - 1] |= ~0U << (lci->ncand & 0x1F);
    3685     1733590 :                         for (size_t i = 0; i < nmsk; i++)
    3686     1729798 :                                 if (mask[i] != ~0U)
    3687    54869430 :                                         for (uint32_t j = 0; j < 32; j++)
    3688    53206720 :                                                 if ((mask[i] & (1U << j)) == 0)
    3689    47571136 :                                                         *r1p++ = i * 32 + j + lci->seq;
    3690             :                 } else {
    3691      471353 :                         for (size_t i = 0; i < nmsk; i++)
    3692      470502 :                                 if (mask[i] != 0U)
    3693    12893496 :                                         for (uint32_t j = 0; j < 32; j++)
    3694    12502784 :                                                 if ((mask[i] & (1U << j)) != 0)
    3695    11508901 :                                                         *r1p++ = i * 32 + j + lci->seq;
    3696             :                 }
    3697        4643 :                 BATsetcount(r1, cnt);
    3698        4643 :                 assert((BUN) (r1p - (oid*) Tloc(r1, 0)) == BATcount(r1));
    3699             : 
    3700        4643 :                 TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    3701             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    3702             :                           "sr=" ALGOOPTBATFMT ",only_misses=%s; %s "
    3703             :                           "-> " ALGOBATFMT " (" LLFMT "usec)\n",
    3704             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    3705             :                           ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    3706             :                           only_misses ? "true" : "false",
    3707             :                           reason,
    3708             :                           ALGOBATPAR(r1),
    3709             :                           GDKusec() - t0);
    3710             :         }
    3711        4643 :         GDKfree(mask);
    3712        4643 :         return r1;
    3713             : }
    3714             : 
    3715             : /* Make the implementation choices for various left joins.
    3716             :  * nil_matches: nil is an ordinary value that can match;
    3717             :  * nil_on_miss: outer join: fill in a nil value in case of no match;
    3718             :  * semi: semi join: return one of potentially more than one matches;
    3719             :  * only_misses: difference: list rows without match on the right;
    3720             :  * not_in: for implementing NOT IN: if nil on right then there are no matches;
    3721             :  * max_one: error if there is more than one match. */
    3722             : static gdk_return
    3723       99075 : leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    3724             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    3725             :          bool not_in, bool max_one, bool min_one, BUN estimate,
    3726             :          const char *func, lng t0)
    3727             : {
    3728             :         BUN lcnt, rcnt;
    3729             :         struct canditer lci, rci;
    3730             :         bool rhash, prhash, rcand;
    3731             :         bat parent;
    3732             :         double rcost = 0;
    3733             :         gdk_return rc;
    3734             : 
    3735       99075 :         MT_thread_setalgorithm(__func__);
    3736             :         /* only_misses implies left output only */
    3737       98982 :         assert(!only_misses || r2p == NULL);
    3738             :         /* if nil_on_miss is set, we really need a right output */
    3739       98982 :         assert(!nil_on_miss || r2p != NULL);
    3740             :         /* if not_in is set, then so is only_misses */
    3741       98982 :         assert(!not_in || only_misses);
    3742       98982 :         *r1p = NULL;
    3743       98982 :         if (r2p)
    3744         876 :                 *r2p = NULL;
    3745             : 
    3746       98982 :         lcnt = canditer_init(&lci, l, sl);
    3747       99060 :         rcnt = canditer_init(&rci, r, sr);
    3748             : 
    3749       98929 :         if ((parent = VIEWtparent(l)) != 0) {
    3750        2056 :                 BAT *b = BBP_cache(parent);
    3751        2056 :                 if (l->hseqbase == b->hseqbase &&
    3752        2085 :                     BATcount(l) == BATcount(b) &&
    3753         958 :                     ATOMtype(l->ttype) == ATOMtype(b->ttype)) {
    3754             :                         l = b;
    3755             :                 }
    3756             :         }
    3757       98929 :         if ((parent = VIEWtparent(r)) != 0) {
    3758        2965 :                 BAT *b = BBP_cache(parent);
    3759        2965 :                 if (r->hseqbase == b->hseqbase &&
    3760        4711 :                     BATcount(r) == BATcount(b) &&
    3761        3492 :                     ATOMtype(r->ttype) == ATOMtype(b->ttype)) {
    3762             :                         r = b;
    3763             :                 }
    3764             :         }
    3765             : 
    3766       98929 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    3767           3 :                 if ((l = BATunmask(l)) == NULL)
    3768             :                         return GDK_FAIL;
    3769             :         } else {
    3770       98926 :                 BBPfix(l->batCacheid);
    3771             :         }
    3772       99042 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    3773           1 :                 if ((r = BATunmask(r)) == NULL) {
    3774           0 :                         BBPunfix(l->batCacheid);
    3775           0 :                         return GDK_FAIL;
    3776             :                 }
    3777             :         } else {
    3778       99041 :                 BBPfix(r->batCacheid);
    3779             :         }
    3780             : 
    3781       99043 :         if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED) {
    3782             :                 rc = GDK_FAIL;
    3783           0 :                 goto doreturn;
    3784             :         }
    3785             : 
    3786       99034 :         if (lcnt == 0 || rcnt == 0) {
    3787       64682 :                 TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    3788             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    3789             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
    3790             :                           "nil_on_miss=%d,semi=%d,only_misses=%d,"
    3791             :                           "not_in=%d,max_one=%d,min_one=%d)\n",
    3792             :                           func,
    3793             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    3794             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3795             :                           nil_matches, nil_on_miss, semi, only_misses,
    3796             :                           not_in, max_one, min_one);
    3797       64682 :                 rc = nomatch(r1p, r2p, l, r, &lci,
    3798             :                              nil_on_miss, only_misses, func, t0);
    3799       64725 :                 goto doreturn;
    3800             :         }
    3801             : 
    3802       34352 :         if (!nil_on_miss && !semi && !max_one && !min_one && !only_misses && !not_in &&
    3803         199 :             (lcnt == 1 || (BATordered(l) && BATordered_rev(l)) ||
    3804         182 :              (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) {
    3805             :                 /* single value to join, use select */
    3806          56 :                 rc = selectjoin(r1p, r2p, l, r, &lci, &rci,
    3807             :                                 nil_matches, t0, false, func);
    3808          56 :                 goto doreturn;
    3809       34296 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    3810             :                 /* use special implementation for dense right-hand side */
    3811       22001 :                 rc = mergejoin_void(r1p, r2p, l, r, &lci, &rci,
    3812             :                                     nil_on_miss, only_misses, t0, false,
    3813             :                                     func);
    3814       22003 :                 goto doreturn;
    3815       12295 :         } else if (BATtdense(l)
    3816        6851 :                    && lci.tpe == cand_dense
    3817        6822 :                    && rci.tpe == cand_dense
    3818             :                    && !semi
    3819        6822 :                    && !max_one
    3820             :                    && !min_one
    3821        4140 :                    && !nil_matches
    3822             :                    && !only_misses
    3823        4140 :                    && !not_in
    3824             :                    /* && (rcnt * 1024) < lcnt */
    3825           0 :                    && (BATordered(r) || BATordered_rev(r))) {
    3826           0 :                 assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
    3827           0 :                 rc = fetchjoin(r1p, r2p, l, r, sl, sr, &lci, &rci, func, t0);
    3828           0 :                 goto doreturn;
    3829       12295 :         } else if (BATtdense(l)
    3830        6851 :                    && lci.tpe == cand_dense
    3831        6822 :                    && r2p == NULL
    3832        6750 :                    && (semi || only_misses)
    3833             :                    && !nil_on_miss
    3834        6750 :                    && !not_in
    3835             :                    && !max_one
    3836        6750 :                    && !min_one) {
    3837        6745 :                 *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0);
    3838        6743 :                 rc = *r1p == NULL ? GDK_FAIL : GDK_SUCCEED;
    3839        6743 :                 goto doreturn;
    3840             :         } else {
    3841             :                 /* looking at r->tvheap, so we need a lock */
    3842        5550 :                 MT_lock_set(&r->theaplock);
    3843        5550 :                 BUN hsz = r->tvheap ? r->tvheap->size : 0;
    3844        5550 :                 MT_lock_unset(&r->theaplock);
    3845        5550 :                 if ((BATordered(r) || BATordered_rev(r))
    3846        4948 :                     && (BATordered(l)
    3847         508 :                         || BATordered_rev(l)
    3848         505 :                         || BATtdense(r)
    3849         505 :                         || lcnt < 1024
    3850         272 :                         || BATcount(r) * (Tsize(r) + hsz + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) {
    3851        4812 :                         rc = mergejoin(r1p, r2p, l, r, &lci, &rci,
    3852             :                                        nil_matches, nil_on_miss, semi, only_misses,
    3853             :                                        not_in, max_one, min_one, estimate, t0, false, func);
    3854        4812 :                         goto doreturn;
    3855             :                 }
    3856             :         }
    3857         738 :         rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
    3858             : 
    3859         738 :         if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) {
    3860             :                 /* maybe do a hash join on the swapped operands; if we
    3861             :                  * do, we need to sort the output, so we take that into
    3862             :                  * account as well */
    3863             :                 bool lhash, plhash, lcand;
    3864             :                 double lcost;
    3865             : 
    3866         210 :                 lcost = joincost(l, &rci, &lci, &lhash, &plhash, &lcand);
    3867         210 :                 if (semi)
    3868          98 :                         lcost += rci.ncand; /* cost of BATunique(r) */
    3869             :                 /* add cost of sorting; obviously we don't know the
    3870             :                  * size, so we guess that the size of the output is
    3871             :                  * the same as the right input */
    3872         210 :                 lcost += rci.ncand * log((double) rci.ncand); /* sort */
    3873         210 :                 if (lcost < rcost) {
    3874          53 :                         BAT *tmp = sr;
    3875             :                         BAT *r1, *r2;
    3876          53 :                         if (semi) {
    3877          48 :                                 sr = BATunique(r, sr);
    3878          48 :                                 if (sr == NULL) {
    3879             :                                         rc = GDK_FAIL;
    3880           0 :                                         goto doreturn;
    3881             :                                 }
    3882          48 :                                 canditer_init(&rci, r, sr);
    3883             :                         }
    3884          53 :                         rc = hashjoin(&r2, &r1, r, l, &rci, &lci, nil_matches,
    3885             :                                       false, false, false, false, false, false, estimate,
    3886             :                                       t0, true, lhash, plhash, lcand, func);
    3887          53 :                         if (semi)
    3888          48 :                                 BBPunfix(sr->batCacheid);
    3889          53 :                         if (rc != GDK_SUCCEED)
    3890           0 :                                 goto doreturn;
    3891          53 :                         if (r2p == NULL) {
    3892          48 :                                 BBPunfix(r2->batCacheid);
    3893          48 :                                 r2 = NULL;
    3894             :                         }
    3895          53 :                         if (semi)
    3896          48 :                                 r1->tkey = true;
    3897          53 :                         if (!VIEWtparent(r1) &&
    3898          53 :                             r1->ttype == TYPE_oid &&
    3899          53 :                             BBP_refs(r1->batCacheid) == 1 &&
    3900          53 :                             (r2 == NULL ||
    3901           5 :                              (!VIEWtparent(r2) &&
    3902           5 :                               BBP_refs(r2->batCacheid) == 1 &&
    3903           5 :                               r2->ttype == TYPE_oid))) {
    3904             :                                 /* in-place sort if we can */
    3905          53 :                                 if (r2) {
    3906           5 :                                         GDKqsort(r1->theap->base, r2->theap->base,
    3907           5 :                                                  NULL, r1->batCount, r1->twidth,
    3908           5 :                                                  r2->twidth, TYPE_oid, false,
    3909             :                                                  false);
    3910           5 :                                         r2->tsorted = false;
    3911           5 :                                         r2->trevsorted = false;
    3912           5 :                                         r2->tseqbase = oid_nil;
    3913           5 :                                         *r2p = r2;
    3914             :                                 } else {
    3915          48 :                                         GDKqsort(r1->theap->base, NULL, NULL,
    3916          48 :                                                  r1->batCount, r1->twidth, 0,
    3917             :                                                  TYPE_oid, false, false);
    3918             :                                 }
    3919          53 :                                 r1->tsorted = true;
    3920          53 :                                 r1->trevsorted = false;
    3921          53 :                                 *r1p = r1;
    3922             :                         } else {
    3923             :                                 BAT *ob;
    3924           0 :                                 rc = BATsort(&tmp, r2p ? &ob : NULL, NULL,
    3925             :                                              r1, NULL, NULL, false, false, false);
    3926           0 :                                 BBPunfix(r1->batCacheid);
    3927           0 :                                 if (rc != GDK_SUCCEED) {
    3928           0 :                                         if (r2)
    3929           0 :                                                 BBPunfix(r2->batCacheid);
    3930           0 :                                         goto doreturn;
    3931             :                                 }
    3932           0 :                                 *r1p = r1 = tmp;
    3933           0 :                                 if (r2p) {
    3934           0 :                                         tmp = BATproject(ob, r2);
    3935           0 :                                         BBPunfix(r2->batCacheid);
    3936           0 :                                         BBPunfix(ob->batCacheid);
    3937           0 :                                         if (tmp == NULL) {
    3938           0 :                                                 BBPunfix(r1->batCacheid);
    3939             :                                                 rc = GDK_FAIL;
    3940           0 :                                                 goto doreturn;
    3941             :                                         }
    3942           0 :                                         *r2p = tmp;
    3943             :                                 }
    3944             :                         }
    3945             :                         rc = GDK_SUCCEED;
    3946          53 :                         goto doreturn;
    3947             :                 }
    3948             :         }
    3949         685 :         rc = hashjoin(r1p, r2p, l, r, &lci, &rci,
    3950             :                       nil_matches, nil_on_miss, semi, only_misses,
    3951             :                       not_in, max_one, min_one, estimate, t0, false, rhash, prhash,
    3952             :                       rcand, func);
    3953       99077 :   doreturn:
    3954       99077 :         BBPunfix(l->batCacheid);
    3955       99077 :         BBPunfix(r->batCacheid);
    3956       99068 :         return rc;
    3957             : }
    3958             : 
    3959             : /* Perform an equi-join over l and r.  Returns two new, aligned, bats
    3960             :  * with the oids of matching tuples.  The result is in the same order
    3961             :  * as l (i.e. r1 is sorted). */
    3962             : gdk_return
    3963         655 : BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    3964             : {
    3965         655 :         return leftjoin(r1p, r2p, l, r, sl, sr, nil_matches,
    3966             :                         false, false, false, false, false, false,
    3967             :                         estimate, __func__,
    3968         655 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    3969             : }
    3970             : 
    3971             : /* Performs a left outer join over l and r.  Returns two new, aligned,
    3972             :  * bats with the oids of matching tuples, or the oid in the first
    3973             :  * output bat and nil in the second output bat if the value in l does
    3974             :  * not occur in r.  The result is in the same order as l (i.e. r1 is
    3975             :  * sorted). */
    3976             : gdk_return
    3977           0 : BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
    3978             : {
    3979           0 :         return leftjoin(r1p, r2p, l, r, sl, sr, nil_matches,
    3980             :                         true, false, false, false, match_one, match_one,
    3981             :                         estimate, __func__,
    3982           0 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    3983             : }
    3984             : 
    3985             : /* Perform a semi-join over l and r.  Returns one or two new, bats
    3986             :  * with the oids of matching tuples.  The result is in the same order
    3987             :  * as l (i.e. r1 is sorted).  If a single bat is returned, it is a
    3988             :  * candidate list. */
    3989             : gdk_return
    3990         221 : BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    3991             :             bool nil_matches, bool max_one, BUN estimate)
    3992             : {
    3993         221 :         return leftjoin(r1p, r2p, l, r, sl, sr, nil_matches,
    3994             :                         false, true, false, false, max_one, false,
    3995             :                         estimate, __func__,
    3996         221 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    3997             : }
    3998             : 
    3999             : /* Return a candidate list with the list of rows in l whose value also
    4000             :  * occurs in r.  This is just the left output of a semi-join. */
    4001             : BAT *
    4002        7069 : BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one,
    4003             :              BUN estimate)
    4004             : {
    4005             :         BAT *bn;
    4006             : 
    4007        7069 :         if (leftjoin(&bn, NULL, l, r, sl, sr, nil_matches,
    4008             :                      false, true, false, false, max_one, false,
    4009             :                      estimate, __func__,
    4010        7069 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4011        7065 :                 return virtualize(bn);
    4012             :         return NULL;
    4013             : }
    4014             : 
    4015             : /* Return the difference of l and r.  The result is a BAT with the
    4016             :  * oids of those values in l that do not occur in r.  This is what you
    4017             :  * might call an anti-semi-join.  The result is a candidate list. */
    4018             : BAT *
    4019       91133 : BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in,
    4020             :         BUN estimate)
    4021             : {
    4022             :         BAT *bn;
    4023             : 
    4024       91133 :         if (leftjoin(&bn, NULL, l, r, sl, sr, nil_matches,
    4025             :                      false, false, true, not_in, false, false,
    4026             :                      estimate, __func__,
    4027       91133 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4028       91122 :                 return virtualize(bn);
    4029             :         return NULL;
    4030             : }
    4031             : 
    4032             : gdk_return
    4033       19839 : BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate)
    4034             : {
    4035             :         int opcode = 0;
    4036             :         lng t0 = 0;
    4037             : 
    4038             :         /* encode operator as a bit mask into opcode */
    4039       19839 :         switch (op) {
    4040           0 :         case JOIN_EQ:
    4041           0 :                 return BATjoin(r1p, r2p, l, r, sl, sr, nil_matches, estimate);
    4042             :         case JOIN_NE:
    4043             :                 opcode = MASK_NE;
    4044             :                 break;
    4045        4247 :         case JOIN_LT:
    4046             :                 opcode = MASK_LT;
    4047        4247 :                 break;
    4048          13 :         case JOIN_LE:
    4049             :                 opcode = MASK_LE;
    4050          13 :                 break;
    4051       15501 :         case JOIN_GT:
    4052             :                 opcode = MASK_GT;
    4053       15501 :                 break;
    4054          10 :         case JOIN_GE:
    4055             :                 opcode = MASK_GE;
    4056          10 :                 break;
    4057           0 :         default:
    4058           0 :                 GDKerror("unknown operator %d.\n", op);
    4059           0 :                 return GDK_FAIL;
    4060             :         }
    4061             : 
    4062       19839 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4063       19839 :         *r1p = NULL;
    4064       19839 :         if (r2p) {
    4065        4302 :                 *r2p = NULL;
    4066             :         }
    4067       19839 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4068             :                 return GDK_FAIL;
    4069             : 
    4070       19839 :         return thetajoin(r1p, r2p, l, r, sl, sr, opcode, estimate,
    4071             :                          __func__, t0);
    4072             : }
    4073             : 
    4074             : gdk_return
    4075      319964 : BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4076             : {
    4077             :         struct canditer lci, rci;
    4078      319964 :         bool lhash = false, rhash = false, lcand = false;
    4079      319964 :         bool plhash = false, prhash = false, rcand = false;
    4080             :         bool swap;
    4081             :         bat parent;
    4082             :         double rcost = 0;
    4083             :         double lcost = 0;
    4084             :         gdk_return rc;
    4085             :         lng t0 = 0;
    4086      319964 :         BAT *r2 = NULL;
    4087             : 
    4088      319964 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4089             : 
    4090      319964 :         canditer_init(&lci, l, sl);
    4091      319960 :         canditer_init(&rci, r, sr);
    4092             : 
    4093      319950 :         if ((parent = VIEWtparent(l)) != 0) {
    4094       78778 :                 BAT *b = BBP_cache(parent);
    4095       78778 :                 if (l->hseqbase == b->hseqbase &&
    4096       83241 :                     BATcount(l) == BATcount(b) &&
    4097       26722 :                     ATOMtype(l->ttype) == ATOMtype(b->ttype))
    4098             :                         l = b;
    4099             :         }
    4100      319950 :         if ((parent = VIEWtparent(r)) != 0) {
    4101      194924 :                 BAT *b = BBP_cache(parent);
    4102      194924 :                 if (r->hseqbase == b->hseqbase &&
    4103      326992 :                     BATcount(r) == BATcount(b) &&
    4104      285318 :                     ATOMtype(r->ttype) == ATOMtype(b->ttype))
    4105             :                         r = b;
    4106             :         }
    4107             : 
    4108      319950 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4109           0 :                 if ((l = BATunmask(l)) == NULL)
    4110             :                         return GDK_FAIL;
    4111             :         } else {
    4112      319950 :                 BBPfix(l->batCacheid);
    4113             :         }
    4114      319927 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4115           1 :                 if ((r = BATunmask(r)) == NULL) {
    4116           0 :                         BBPunfix(l->batCacheid);
    4117           0 :                         return GDK_FAIL;
    4118             :                 }
    4119             :         } else {
    4120      319926 :                 BBPfix(r->batCacheid);
    4121             :         }
    4122             : 
    4123      319901 :         *r1p = NULL;
    4124      319901 :         if (r2p)
    4125      281040 :                 *r2p = NULL;
    4126             : 
    4127      319901 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED) {
    4128             :                 rc = GDK_FAIL;
    4129           0 :                 goto doreturn;
    4130             :         }
    4131             : 
    4132      319915 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4133      238427 :                 TRC_DEBUG(ALGO, "BATjoin(l=" ALGOBATFMT ","
    4134             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4135             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d)\n",
    4136             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4137             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4138             :                           nil_matches);
    4139      238427 :                 rc = nomatch(r1p, r2p, l, r, &lci,
    4140             :                              false, false, __func__, t0);
    4141      238441 :                 goto doreturn;
    4142             :         }
    4143             : 
    4144             :         swap = false;
    4145             : 
    4146       81488 :         if (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase))) {
    4147             :                 /* single value to join, use select */
    4148       48849 :                 rc = selectjoin(r1p, r2p, l, r, &lci, &rci,
    4149             :                                 nil_matches, t0, false, __func__);
    4150       48849 :                 goto doreturn;
    4151       32638 :         } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) {
    4152             :                 /* single value to join, use select */
    4153       13587 :                 rc = selectjoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci,
    4154             :                                 nil_matches, t0, true, __func__);
    4155        9140 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4156        4694 :                         BBPunfix(r2->batCacheid);
    4157        9141 :                 goto doreturn;
    4158       23496 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4159             :                 /* use special implementation for dense right-hand side */
    4160        2847 :                 rc = mergejoin_void(r1p, r2p, l, r, &lci, &rci,
    4161             :                                     false, false, t0, false, __func__);
    4162        2848 :                 goto doreturn;
    4163       20649 :         } else if (BATtdense(l) && lci.tpe == cand_dense) {
    4164             :                 /* use special implementation for dense right-hand side */
    4165          44 :                 rc = mergejoin_void(r2p ? r2p : &r2, r1p, r, l, &rci, &lci,
    4166             :                                     false, false, t0, true, __func__);
    4167          32 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4168          20 :                         BBPunfix(r2->batCacheid);
    4169          32 :                 goto doreturn;
    4170       30326 :         } else if ((BATordered(l) || BATordered_rev(l)) &&
    4171       12520 :                    (BATordered(r) || BATordered_rev(r))) {
    4172             :                 /* both sorted */
    4173        7005 :                 rc = mergejoin(r1p, r2p, l, r, &lci, &rci,
    4174             :                                nil_matches, false, false, false, false, false, false,
    4175             :                                estimate, t0, false, __func__);
    4176        7005 :                 goto doreturn;
    4177             :         }
    4178             : 
    4179       13612 :         lcost = joincost(l, &rci, &lci, &lhash, &plhash, &lcand);
    4180       13614 :         rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
    4181             : 
    4182             :         /* if the cost of doing searches on l is lower than the cost
    4183             :          * of doing searches on r, we swap */
    4184             :         swap = (lcost < rcost);
    4185             : 
    4186       27227 :         if ((r->ttype == TYPE_void && r->tvheap != NULL) ||
    4187       27355 :             ((BATordered(r) || BATordered_rev(r)) &&
    4188        5006 :              (lci.ncand * (log2((double) rci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4189             :                 /* r is sorted and it is cheaper to do multiple binary
    4190             :                  * searches than it is to use a hash */
    4191         604 :                 rc = mergejoin(r1p, r2p, l, r, &lci, &rci,
    4192             :                                nil_matches, false, false, false, false, false, false,
    4193             :                                estimate, t0, false, __func__);
    4194       26014 :         } else if ((l->ttype == TYPE_void && l->tvheap != NULL) ||
    4195       26136 :             ((BATordered(l) || BATordered_rev(l)) &&
    4196        2704 :              (rci.ncand * (log2((double) lci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4197             :                 /* l is sorted and it is cheaper to do multiple binary
    4198             :                  * searches than it is to use a hash */
    4199         642 :                 rc = mergejoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci,
    4200             :                                nil_matches, false, false, false, false, false, false,
    4201             :                                estimate, t0, true, __func__);
    4202         328 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4203          14 :                         BBPunfix(r2->batCacheid);
    4204       12680 :         } else if (swap) {
    4205       11888 :                 rc = hashjoin(r2p ? r2p : &r2, r1p, r, l, &rci, &lci,
    4206             :                               nil_matches, false, false, false, false, false, false,
    4207             :                               estimate, t0, true, lhash, plhash, lcand,
    4208             :                               __func__);
    4209        6268 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4210         648 :                         BBPunfix(r2->batCacheid);
    4211             :         } else {
    4212        6412 :                 rc = hashjoin(r1p, r2p, l, r, &lci, &rci,
    4213             :                               nil_matches, false, false, false, false, false, false,
    4214             :                               estimate, t0, false, rhash, prhash, rcand,
    4215             :                               __func__);
    4216             :         }
    4217      319930 :   doreturn:
    4218      319930 :         BBPunfix(l->batCacheid);
    4219      319933 :         BBPunfix(r->batCacheid);
    4220      319955 :         return rc;
    4221             : }
    4222             : 
    4223             : gdk_return
    4224           0 : BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4225             :             const void *c1, const void *c2, bool linc, bool hinc, BUN estimate)
    4226             : {
    4227             :         lng t0 = 0;
    4228             :         BUN lcnt, rcnt;
    4229             :         struct canditer lci, rci;
    4230             :         const char *lvals, *rvals;
    4231             :         int t;
    4232           0 :         const void *nil = ATOMnilptr(l->ttype);
    4233           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    4234             :         const char *vl, *vr;
    4235             :         oid lastr = 0;          /* last value inserted into r2 */
    4236             :         BUN nr;
    4237             :         oid lo, ro;
    4238             :         bool lskipped = false;  /* whether we skipped values in l */
    4239             :         BUN nils = 0;           /* needed for XXX_WITH_CHECK macros */
    4240             : 
    4241           0 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4242             : 
    4243             :         size_t counter = 0;
    4244             :         lng timeoffset = 0;
    4245           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    4246           0 :         if (qry_ctx != NULL) {
    4247           0 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    4248             :         }
    4249             : 
    4250             : 
    4251           0 :         MT_thread_setalgorithm(__func__);
    4252           0 :         *r1p = NULL;
    4253           0 :         if (r2p) {
    4254           0 :                 *r2p = NULL;
    4255             :         }
    4256           0 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4257             :                 return GDK_FAIL;
    4258             : 
    4259           0 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    4260             : 
    4261             :         t = ATOMtype(l->ttype);
    4262           0 :         t = ATOMbasetype(t);
    4263             : 
    4264           0 :         lcnt = canditer_init(&lci, l, sl);
    4265           0 :         rcnt = canditer_init(&rci, r, sr);
    4266             : 
    4267           0 :         if (lcnt == 0 || rcnt == 0)
    4268           0 :                 return nomatch(r1p, r2p, l, r, &lci,
    4269             :                                false, false, __func__, t0);
    4270             : 
    4271           0 :         switch (t) {
    4272           0 :         case TYPE_bte:
    4273           0 :                 if (is_bte_nil(*(const bte *)c1) ||
    4274           0 :                     is_bte_nil(*(const bte *)c2) ||
    4275           0 :                     -*(const bte *)c1 > *(const bte *)c2 ||
    4276           0 :                     ((!hinc || !linc) && -*(const bte *)c1 == *(const bte *)c2))
    4277           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4278             :                                        false, false, __func__, t0);
    4279             :                 break;
    4280           0 :         case TYPE_sht:
    4281           0 :                 if (is_sht_nil(*(const sht *)c1) ||
    4282           0 :                     is_sht_nil(*(const sht *)c2) ||
    4283           0 :                     -*(const sht *)c1 > *(const sht *)c2 ||
    4284           0 :                     ((!hinc || !linc) && -*(const sht *)c1 == *(const sht *)c2))
    4285           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4286             :                                        false, false, __func__, t0);
    4287             :                 break;
    4288           0 :         case TYPE_int:
    4289           0 :                 if (is_int_nil(*(const int *)c1) ||
    4290           0 :                     is_int_nil(*(const int *)c2) ||
    4291           0 :                     -*(const int *)c1 > *(const int *)c2 ||
    4292           0 :                     ((!hinc || !linc) && -*(const int *)c1 == *(const int *)c2))
    4293           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4294             :                                        false, false, __func__, t0);
    4295             :                 break;
    4296           0 :         case TYPE_lng:
    4297           0 :                 if (is_lng_nil(*(const lng *)c1) ||
    4298           0 :                     is_lng_nil(*(const lng *)c2) ||
    4299           0 :                     -*(const lng *)c1 > *(const lng *)c2 ||
    4300           0 :                     ((!hinc || !linc) && -*(const lng *)c1 == *(const lng *)c2))
    4301           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4302             :                                        false, false, __func__, t0);
    4303             :                 break;
    4304             : #ifdef HAVE_HGE
    4305           0 :         case TYPE_hge:
    4306           0 :                 if (is_hge_nil(*(const hge *)c1) ||
    4307           0 :                     is_hge_nil(*(const hge *)c2) ||
    4308           0 :                     -*(const hge *)c1 > *(const hge *)c2 ||
    4309           0 :                     ((!hinc || !linc) && -*(const hge *)c1 == *(const hge *)c2))
    4310           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4311             :                                        false, false, __func__, t0);
    4312             :                 break;
    4313             : #endif
    4314           0 :         case TYPE_flt:
    4315           0 :                 if (is_flt_nil(*(const flt *)c1) ||
    4316           0 :                     is_flt_nil(*(const flt *)c2) ||
    4317           0 :                     -*(const flt *)c1 > *(const flt *)c2 ||
    4318           0 :                     ((!hinc || !linc) && -*(const flt *)c1 == *(const flt *)c2))
    4319           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4320             :                                        false, false, __func__, t0);
    4321             :                 break;
    4322           0 :         case TYPE_dbl:
    4323           0 :                 if (is_dbl_nil(*(const dbl *)c1) ||
    4324           0 :                     is_dbl_nil(*(const dbl *)c2) ||
    4325           0 :                     -*(const dbl *)c1 > *(const dbl *)c2 ||
    4326           0 :                     ((!hinc || !linc) && -*(const dbl *)c1 == *(const dbl *)c2))
    4327           0 :                         return nomatch(r1p, r2p, l, r, &lci,
    4328             :                                        false, false, __func__, t0);
    4329             :                 break;
    4330           0 :         default:
    4331           0 :                 GDKerror("unsupported type\n");
    4332           0 :                 return GDK_FAIL;
    4333             :         }
    4334             : 
    4335           0 :         BUN maxsize = joininitresults(r1p, r2p, lcnt, rcnt, false, false,
    4336             :                                       false, false, false, false, estimate);
    4337           0 :         if (maxsize == BUN_NONE)
    4338             :                 return GDK_FAIL;
    4339           0 :         BAT *r1 = *r1p;
    4340           0 :         BAT *r2 = r2p ? *r2p : NULL;
    4341           0 :         BATiter li = bat_iterator(l);
    4342           0 :         BATiter ri = bat_iterator(r);
    4343             : 
    4344           0 :         lvals = (const char *) li.base;
    4345           0 :         rvals = (const char *) ri.base;
    4346           0 :         assert(!r->tvarsized);
    4347             : 
    4348           0 :         assert(lvals != NULL);
    4349           0 :         assert(rvals != NULL);
    4350             : 
    4351           0 :         r1->tkey = true;
    4352           0 :         r1->tsorted = true;
    4353           0 :         r1->trevsorted = true;
    4354           0 :         if (r2) {
    4355           0 :                 r2->tkey = true;
    4356           0 :                 r2->tsorted = true;
    4357           0 :                 r2->trevsorted = true;
    4358             :         }
    4359             : 
    4360             :         /* nested loop implementation for band join */
    4361           0 :         for (BUN lidx = 0; lidx < lcnt; lidx++) {
    4362           0 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    4363             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    4364           0 :                 lo = canditer_next(&lci);
    4365           0 :                 vl = FVALUE(l, lo - l->hseqbase);
    4366           0 :                 if (cmp(vl, nil) == 0)
    4367           0 :                         continue;
    4368             :                 nr = 0;
    4369           0 :                 canditer_reset(&rci);
    4370           0 :                 for (BUN ridx = 0; ridx < rcnt; ridx++) {
    4371           0 :                         ro = canditer_next(&rci);
    4372           0 :                         vr = FVALUE(r, ro - r->hseqbase);
    4373           0 :                         switch (ATOMtype(l->ttype)) {
    4374           0 :                         case TYPE_bte: {
    4375           0 :                                 if (is_bte_nil(*(const bte *) vr))
    4376           0 :                                         continue;
    4377             :                                 sht v1 = (sht) *(const bte *) vr, v2;
    4378             :                                 v2 = v1;
    4379           0 :                                 v1 -= *(const bte *)c1;
    4380           0 :                                 if (*(const bte *)vl <= v1 &&
    4381           0 :                                     (!linc || *(const bte *)vl != v1))
    4382           0 :                                         continue;
    4383           0 :                                 v2 += *(const bte *)c2;
    4384           0 :                                 if (*(const bte *)vl >= v2 &&
    4385           0 :                                     (!hinc || *(const bte *)vl != v2))
    4386           0 :                                         continue;
    4387             :                                 break;
    4388             :                         }
    4389           0 :                         case TYPE_sht: {
    4390           0 :                                 if (is_sht_nil(*(const sht *) vr))
    4391           0 :                                         continue;
    4392           0 :                                 int v1 = (int) *(const sht *) vr, v2;
    4393             :                                 v2 = v1;
    4394           0 :                                 v1 -= *(const sht *)c1;
    4395           0 :                                 if (*(const sht *)vl <= v1 &&
    4396           0 :                                     (!linc || *(const sht *)vl != v1))
    4397           0 :                                         continue;
    4398           0 :                                 v2 += *(const sht *)c2;
    4399           0 :                                 if (*(const sht *)vl >= v2 &&
    4400           0 :                                     (!hinc || *(const sht *)vl != v2))
    4401           0 :                                         continue;
    4402             :                                 break;
    4403             :                         }
    4404           0 :                         case TYPE_int: {
    4405           0 :                                 if (is_int_nil(*(const int *) vr))
    4406           0 :                                         continue;
    4407           0 :                                 lng v1 = (lng) *(const int *) vr, v2;
    4408             :                                 v2 = v1;
    4409           0 :                                 v1 -= *(const int *)c1;
    4410           0 :                                 if (*(const int *)vl <= v1 &&
    4411           0 :                                     (!linc || *(const int *)vl != v1))
    4412           0 :                                         continue;
    4413           0 :                                 v2 += *(const int *)c2;
    4414           0 :                                 if (*(const int *)vl >= v2 &&
    4415           0 :                                     (!hinc || *(const int *)vl != v2))
    4416           0 :                                         continue;
    4417             :                                 break;
    4418             :                         }
    4419             : #ifdef HAVE_HGE
    4420           0 :                         case TYPE_lng: {
    4421           0 :                                 if (is_lng_nil(*(const lng *) vr))
    4422           0 :                                         continue;
    4423           0 :                                 hge v1 = (hge) *(const lng *) vr, v2;
    4424             :                                 v2 = v1;
    4425           0 :                                 v1 -= *(const lng *)c1;
    4426           0 :                                 if (*(const lng *)vl <= v1 &&
    4427           0 :                                     (!linc || *(const lng *)vl != v1))
    4428           0 :                                         continue;
    4429           0 :                                 v2 += *(const lng *)c2;
    4430           0 :                                 if (*(const lng *)vl >= v2 &&
    4431           0 :                                     (!hinc || *(const lng *)vl != v2))
    4432           0 :                                         continue;
    4433             :                                 break;
    4434             :                         }
    4435             : #else
    4436             : #ifdef HAVE___INT128
    4437             :                         case TYPE_lng: {
    4438             :                                 if (is_lng_nil(*(const lng *) vr))
    4439             :                                         continue;
    4440             :                                 __int128 v1 = (__int128) *(const lng *) vr, v2;
    4441             :                                 v2 = v1;
    4442             :                                 v1 -= *(const lng *)c1;
    4443             :                                 if (*(const lng *)vl <= v1 &&
    4444             :                                     (!linc || *(const lng *)vl != v1))
    4445             :                                         continue;
    4446             :                                 v2 += *(const lng *)c2;
    4447             :                                 if (*(const lng *)vl >= v2 &&
    4448             :                                     (!hinc || *(const lng *)vl != v2))
    4449             :                                         continue;
    4450             :                                 break;
    4451             :                         }
    4452             : #else
    4453             :                         case TYPE_lng: {
    4454             :                                 if (is_lng_nil(*(const lng *) vr))
    4455             :                                         continue;
    4456             :                                 lng v1, v2;
    4457             :                                 bool abort_on_error = true;
    4458             :                                 SUB_WITH_CHECK(*(const lng *)vr,
    4459             :                                                *(const lng *)c1,
    4460             :                                                lng, v1,
    4461             :                                                GDK_lng_max,
    4462             :                                                do{if(*(const lng*)c1<0)goto nolmatch;else goto lmatch1;}while(false));
    4463             :                                 if (*(const lng *)vl <= v1 &&
    4464             :                                     (!linc || *(const lng *)vl != v1))
    4465             :                                         continue;
    4466             :                                   lmatch1:
    4467             :                                 ADD_WITH_CHECK(*(const lng *)vr,
    4468             :                                                *(const lng *)c2,
    4469             :                                                lng, v2,
    4470             :                                                GDK_lng_max,
    4471             :                                                do{if(*(const lng*)c2>0)goto nolmatch;else goto lmatch2;}while(false));
    4472             :                                 if (*(const lng *)vl >= v2 &&
    4473             :                                     (!hinc || *(const lng *)vl != v2))
    4474             :                                         continue;
    4475             :                                   lmatch2:
    4476             :                                 break;
    4477             :                                   nolmatch:
    4478             :                                 continue;
    4479             :                         }
    4480             : #endif
    4481             : #endif
    4482             : #ifdef HAVE_HGE
    4483           0 :                         case TYPE_hge: {
    4484           0 :                                 if (is_hge_nil(*(const hge *) vr))
    4485           0 :                                         continue;
    4486             :                                 hge v1, v2;
    4487             :                                 bool abort_on_error = true;
    4488           0 :                                 SUB_WITH_CHECK(*(const hge *)vr,
    4489             :                                                *(const hge *)c1,
    4490             :                                                hge, v1,
    4491             :                                                GDK_hge_max,
    4492             :                                                do{if(*(const hge*)c1<0)goto nohmatch;else goto hmatch1;}while(false));
    4493           0 :                                 if (*(const hge *)vl <= v1 &&
    4494           0 :                                     (!linc || *(const hge *)vl != v1))
    4495           0 :                                         continue;
    4496           0 :                                   hmatch1:
    4497           0 :                                 ADD_WITH_CHECK(*(const hge *)vr,
    4498             :                                                *(const hge *)c2,
    4499             :                                                hge, v2,
    4500             :                                                GDK_hge_max,
    4501             :                                                do{if(*(const hge*)c2>0)goto nohmatch;else goto hmatch2;}while(false));
    4502           0 :                                 if (*(const hge *)vl >= v2 &&
    4503           0 :                                     (!hinc || *(const hge *)vl != v2))
    4504           0 :                                         continue;
    4505           0 :                                   hmatch2:
    4506             :                                 break;
    4507           0 :                                   nohmatch:
    4508           0 :                                 continue;
    4509             :                         }
    4510             : #endif
    4511           0 :                         case TYPE_flt: {
    4512           0 :                                 if (is_flt_nil(*(const flt *) vr))
    4513           0 :                                         continue;
    4514           0 :                                 dbl v1 = (dbl) *(const flt *) vr, v2;
    4515             :                                 v2 = v1;
    4516           0 :                                 v1 -= *(const flt *)c1;
    4517           0 :                                 if (*(const flt *)vl <= v1 &&
    4518           0 :                                     (!linc || *(const flt *)vl != v1))
    4519           0 :                                         continue;
    4520           0 :                                 v2 += *(const flt *)c2;
    4521           0 :                                 if (*(const flt *)vl >= v2 &&
    4522           0 :                                     (!hinc || *(const flt *)vl != v2))
    4523           0 :                                         continue;
    4524             :                                 break;
    4525             :                         }
    4526           0 :                         case TYPE_dbl: {
    4527           0 :                                 if (is_dbl_nil(*(const dbl *) vr))
    4528           0 :                                         continue;
    4529             :                                 dbl v1, v2;
    4530             :                                 bool abort_on_error = true;
    4531           0 :                                 SUB_WITH_CHECK(*(const dbl *)vr,
    4532             :                                                *(const dbl *)c1,
    4533             :                                                dbl, v1,
    4534             :                                                GDK_dbl_max,
    4535             :                                                do{if(*(const dbl*)c1<0)goto nodmatch;else goto dmatch1;}while(false));
    4536           0 :                                 if (*(const dbl *)vl <= v1 &&
    4537           0 :                                     (!linc || *(const dbl *)vl != v1))
    4538           0 :                                         continue;
    4539           0 :                                   dmatch1:
    4540           0 :                                 ADD_WITH_CHECK(*(const dbl *)vr,
    4541             :                                                *(const dbl *)c2,
    4542             :                                                dbl, v2,
    4543             :                                                GDK_dbl_max,
    4544             :                                                do{if(*(const dbl*)c2>0)goto nodmatch;else goto dmatch2;}while(false));
    4545           0 :                                 if (*(const dbl *)vl >= v2 &&
    4546           0 :                                     (!hinc || *(const dbl *)vl != v2))
    4547           0 :                                         continue;
    4548           0 :                                   dmatch2:
    4549             :                                 break;
    4550           0 :                                   nodmatch:
    4551           0 :                                 continue;
    4552             :                         }
    4553             :                         }
    4554           0 :                         if (maybeextend(r1, r2, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    4555           0 :                                 goto bailout;
    4556           0 :                         if (BATcount(r1) > 0) {
    4557           0 :                                 if (r2 && lastr + 1 != ro)
    4558           0 :                                         r2->tseqbase = oid_nil;
    4559           0 :                                 if (nr == 0) {
    4560           0 :                                         r1->trevsorted = false;
    4561           0 :                                         if (r2 == NULL) {
    4562             :                                                 /* nothing */
    4563           0 :                                         } else if (lastr > ro) {
    4564           0 :                                                 r2->tsorted = false;
    4565           0 :                                                 r2->tkey = false;
    4566           0 :                                         } else if (lastr < ro) {
    4567           0 :                                                 r2->trevsorted = false;
    4568             :                                         } else {
    4569           0 :                                                 r2->tkey = false;
    4570             :                                         }
    4571             :                                 }
    4572             :                         }
    4573           0 :                         APPEND(r1, lo);
    4574           0 :                         if (r2) {
    4575           0 :                                 APPEND(r2, ro);
    4576             :                         }
    4577             :                         lastr = ro;
    4578           0 :                         nr++;
    4579             :                 }
    4580           0 :                 if (nr > 1) {
    4581           0 :                         r1->tkey = false;
    4582           0 :                         r1->tseqbase = oid_nil;
    4583           0 :                         if (r2) {
    4584           0 :                                 r2->trevsorted = false;
    4585             :                         }
    4586           0 :                 } else if (nr == 0) {
    4587           0 :                         lskipped = BATcount(r1) > 0;
    4588           0 :                 } else if (lskipped) {
    4589           0 :                         r1->tseqbase = oid_nil;
    4590             :                 }
    4591             :         }
    4592             :         /* also set other bits of heap to correct value to indicate size */
    4593           0 :         BATsetcount(r1, BATcount(r1));
    4594           0 :         if (r2) {
    4595           0 :                 BATsetcount(r2, BATcount(r2));
    4596           0 :                 assert(BATcount(r1) == BATcount(r2));
    4597             :         }
    4598           0 :         if (BATcount(r1) > 0) {
    4599           0 :                 if (BATtdense(r1))
    4600           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    4601           0 :                 if (r2 && BATtdense(r2))
    4602           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    4603             :         } else {
    4604           0 :                 r1->tseqbase = 0;
    4605           0 :                 if (r2) {
    4606           0 :                         r2->tseqbase = 0;
    4607             :                 }
    4608             :         }
    4609           0 :         bat_iterator_end(&li);
    4610           0 :         bat_iterator_end(&ri);
    4611           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    4612             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    4613             :                   " -> " ALGOBATFMT "," ALGOOPTBATFMT
    4614             :                   " (" LLFMT "usec)\n",
    4615             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    4616             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4617             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    4618             :                   GDKusec() - t0);
    4619             :         return GDK_SUCCEED;
    4620             : 
    4621           0 :   bailout:
    4622           0 :         bat_iterator_end(&li);
    4623           0 :         bat_iterator_end(&ri);
    4624           0 :         BBPreclaim(r1);
    4625           0 :         BBPreclaim(r2);
    4626           0 :         return GDK_FAIL;
    4627             : }
    4628             : 
    4629             : gdk_return
    4630         134 : BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh,
    4631             :              BAT *sl, BAT *sr, bool linc, bool hinc, bool anti, bool symmetric,
    4632             :              BUN estimate)
    4633             : {
    4634             :         struct canditer lci, rci;
    4635         134 :         BAT *r1 = NULL, *r2 = NULL;
    4636             :         BUN maxsize;
    4637             :         lng t0 = 0;
    4638             : 
    4639         134 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4640         134 :         *r1p = NULL;
    4641         134 :         if (r2p) {
    4642         111 :                 *r2p = NULL;
    4643             :         }
    4644         134 :         if (joinparamcheck(l, rl, rh, sl, sr, __func__) != GDK_SUCCEED)
    4645             :                 return GDK_FAIL;
    4646         257 :         if (canditer_init(&lci, l, sl) == 0 ||
    4647         123 :             canditer_init(&rci, rl, sr) == 0 ||
    4648         116 :             (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) ||
    4649         116 :             ((rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) &&
    4650           0 :              (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)))) {
    4651             :                 /* trivial: empty input */
    4652          18 :                 return nomatch(r1p, r2p, l, rl, &lci, false, false,
    4653             :                                __func__, t0);
    4654             :         }
    4655         116 :         if (rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) {
    4656           0 :                 if (!anti)
    4657           0 :                         return nomatch(r1p, r2p, l, rl, &lci, false, false,
    4658             :                                        __func__, t0);
    4659           0 :                 return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate,
    4660             :                                  __func__, t0);
    4661             :         }
    4662         116 :         if (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)) {
    4663           0 :                 if (!anti)
    4664           0 :                         return nomatch(r1p, r2p, l, rl, &lci, false, false,
    4665             :                                        __func__, t0);
    4666           0 :                 return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate,
    4667             :                                  __func__, t0);
    4668             :         }
    4669             : 
    4670         133 :         if ((maxsize = joininitresults(&r1, r2p ? &r2 : NULL, sl ? BATcount(sl) : BATcount(l), sr ? BATcount(sr) : BATcount(rl), false, false, false, false, false, false, estimate)) == BUN_NONE)
    4671             :                 return GDK_FAIL;
    4672         116 :         *r1p = r1;
    4673         116 :         if (r2p) {
    4674          99 :                 *r2p = r2;
    4675             :         }
    4676         116 :         if (maxsize == 0)
    4677             :                 return GDK_SUCCEED;
    4678             : 
    4679             :         /* note, the rangejoin implementation is in gdk_select.c since
    4680             :          * it uses the imprints code there */
    4681         116 :         return rangejoin(r1, r2, l, rl, rh, &lci, &rci, linc, hinc, anti, symmetric, maxsize);
    4682             : }

Generated by: LCOV version 1.14