LCOV - code coverage report
Current view: top level - gdk - gdk_select.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 811 987 82.2 %
Date: 2021-10-13 02:24:04 Functions: 23 24 95.8 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : #include "monetdb_config.h"
      10             : #include "gdk.h"
      11             : #include "gdk_private.h"
      12             : 
      13             : /* auxiliary functions and structs for imprints */
      14             : #include "gdk_imprints.h"
      15             : 
      16             : static inline oid *
      17   166626913 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
      18             : {
      19   166626913 :         if (i == BATcapacity(bn)) {
      20          62 :                 BATsetcount(bn, i);
      21          62 :                 if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
      22             :                         return NULL;
      23          62 :                 a = (oid *) Tloc(bn, 0);
      24             :         }
      25   166626913 :         a[i] = v;
      26   166626913 :         return a;
      27             : }
      28             : 
      29             : BAT *
      30     1360741 : virtualize(BAT *bn)
      31             : {
      32             :         /* input must be a valid candidate list or NULL */
      33     1360741 :         if (bn == NULL)
      34             :                 return NULL;
      35     1360740 :         if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
      36           6 :                 fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
      37           6 :                         bn->ttype, is_oid_nil(bn->tseqbase),
      38           6 :                         bn->tkey, bn->tsorted);
      39           0 :                 fflush(stderr);
      40             :         }
      41     1360698 :         assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
      42             :                 bn->ttype == TYPE_oid) &&
      43             :                bn->tkey && bn->tsorted);
      44     1360698 :         assert(BBP_refs(bn->batCacheid) == 1);
      45     1360698 :         assert(BBP_lrefs(bn->batCacheid) == 0);
      46             :         /* since bn has unique and strictly ascending values, we can
      47             :          * easily check whether the column is dense */
      48     1360698 :         if (bn->ttype == TYPE_oid &&
      49     1078542 :             (BATcount(bn) <= 1 ||
      50      360633 :              * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
      51      360633 :              * (const oid *) Tloc(bn, BUNlast(bn) - 1))) {
      52             :                 /* column is dense, replace by virtual oid */
      53      767712 :                 TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
      54             :                           ALGOBATPAR(bn),
      55             :                           BATcount(bn) > 0 ? * (const oid *) Tloc(bn, 0) : 0);
      56      767723 :                 if (BATcount(bn) == 0)
      57      366615 :                         bn->tseqbase = 0;
      58             :                 else
      59      401108 :                         bn->tseqbase = * (const oid *) Tloc(bn, 0);
      60      767985 :                 if (VIEWtparent(bn)) {
      61         262 :                         Heap *h = GDKmalloc(sizeof(Heap));
      62         262 :                         bat bid = VIEWtparent(bn);
      63         262 :                         if (h == NULL) {
      64           0 :                                 BBPunfix(bn->batCacheid);
      65           0 :                                 return NULL;
      66             :                         }
      67         262 :                         *h = *bn->theap;
      68         262 :                         settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
      69         262 :                         h->parentid = bn->batCacheid;
      70         262 :                         h->base = NULL;
      71         262 :                         ATOMIC_INIT(&h->refs, 1);
      72         262 :                         HEAPdecref(bn->theap, false);
      73         262 :                         bn->theap = h;
      74         262 :                         BBPunshare(bid);
      75         262 :                         BBPunfix(bid);
      76             :                 } else {
      77      767461 :                         HEAPfree(bn->theap, true);
      78             :                 }
      79      767734 :                 bn->theap->storage = bn->theap->newstorage = STORE_MEM;
      80      767734 :                 bn->theap->size = 0;
      81      767734 :                 bn->ttype = TYPE_void;
      82      767734 :                 bn->tvarsized = true;
      83      767734 :                 bn->twidth = 0;
      84      767734 :                 bn->tshift = 0;
      85             :         }
      86             : 
      87             :         return bn;
      88             : }
      89             : 
      90             : #define HASHloop_bound(bi, h, hb, v, lo, hi)            \
      91             :         for (hb = HASHget(h, HASHprobe((h), v));        \
      92             :              hb != BUN_NONE;                            \
      93             :              hb = HASHgetlink(h,hb))                    \
      94             :                 if (hb >= (lo) && hb < (hi) &&            \
      95             :                     (cmp == NULL ||                     \
      96             :                      (*cmp)(v, BUNtail(bi, hb)) == 0))
      97             : 
      98             : static BAT *
      99      813579 : hashselect(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     100             :            const void *tl, BUN maximum, bool havehash, bool phash,
     101             :            const char **algo)
     102             : {
     103             :         BUN i, cnt;
     104             :         oid o, *restrict dst;
     105             :         BUN l, h, d = 0;
     106             :         oid seq;
     107             :         int (*cmp)(const void *, const void *);
     108             : 
     109             :         size_t counter = 0;
     110             :         lng timeoffset = 0;
     111      813579 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     112      813582 :         if (qry_ctx != NULL) {
     113      750791 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     114             :         }
     115             : 
     116      813582 :         assert(bn->ttype == TYPE_oid);
     117      813582 :         seq = b->hseqbase;
     118      813582 :         l = ci->seq - seq;
     119      813582 :         h = canditer_last(ci) + 1 - seq;
     120             : 
     121      813588 :         *algo = "hashselect";
     122      813588 :         if (phash) {
     123      758349 :                 BAT *b2 = BBP_cache(VIEWtparent(b));
     124      758349 :                 *algo = "hashselect on parent";
     125      758349 :                 TRC_DEBUG(ALGO, ALGOBATFMT
     126             :                           " using parent(" ALGOBATFMT ") "
     127             :                           "for hash\n",
     128             :                           ALGOBATPAR(b),
     129             :                           ALGOBATPAR(b2));
     130      758349 :                 d = b->tbaseoff - b2->tbaseoff;
     131      758349 :                 l += d;
     132      758349 :                 h += d;
     133             :                 b = b2;
     134      758349 :                 bat_iterator_end(bi);
     135      758350 :                 *bi = bat_iterator(b);
     136             :         }
     137             : 
     138      813590 :         if (!havehash) {
     139           2 :                 if (BAThash(b) != GDK_SUCCEED) {
     140           0 :                         BBPreclaim(bn);
     141           0 :                         return NULL;
     142             :                 }
     143           2 :                 MT_rwlock_rdlock(&b->thashlock);
     144           2 :                 if (b->thash == NULL) {
     145           0 :                         GDKerror("Hash destroyed before we could use it\n");
     146           0 :                         BBPreclaim(bn);
     147           0 :                         MT_rwlock_rdunlock(&b->thashlock);
     148           0 :                         return NULL;
     149             :                 }
     150             :         }
     151     1624013 :         switch (ATOMbasetype(b->ttype)) {
     152             :         case TYPE_bte:
     153             :         case TYPE_sht:
     154             :                 cmp = NULL;     /* no need to compare: "hash" is perfect */
     155             :                 break;
     156      806568 :         default:
     157      806568 :                 cmp = ATOMcompare(b->ttype);
     158      806568 :                 break;
     159             :         }
     160      813590 :         dst = (oid *) Tloc(bn, 0);
     161             :         cnt = 0;
     162      813590 :         if (ci->tpe != cand_dense) {
     163    10805131 :                 HASHloop_bound(*bi, b->thash, i, tl, l, h) {
     164     4621163 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
     165             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     166     4621163 :                         o = (oid) (i + seq - d);
     167     4621163 :                         if (canditer_contains(ci, o)) {
     168     2130277 :                                 dst = buninsfix(bn, dst, cnt, o,
     169     2130277 :                                                 maximum - BATcapacity(bn),
     170             :                                                 maximum);
     171     2132421 :                                 if (dst == NULL) {
     172           0 :                                         MT_rwlock_rdunlock(&b->thashlock);
     173           0 :                                         BBPreclaim(bn);
     174           0 :                                         return NULL;
     175             :                                 }
     176     2132421 :                                 cnt++;
     177             :                         }
     178             :                 }
     179             :         } else {
     180    10987204 :                 HASHloop_bound(*bi, b->thash, i, tl, l, h) {
     181     8647997 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
     182             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     183     8647997 :                         o = (oid) (i + seq - d);
     184     8647997 :                         dst = buninsfix(bn, dst, cnt, o,
     185     8647997 :                                         maximum - BATcapacity(bn),
     186             :                                         maximum);
     187     8648590 :                         if (dst == NULL) {
     188           0 :                                 MT_rwlock_rdunlock(&b->thashlock);
     189           0 :                                 BBPreclaim(bn);
     190           0 :                                 return NULL;
     191             :                         }
     192     8648590 :                         cnt++;
     193             :                 }
     194             :         }
     195      813563 :         MT_rwlock_rdunlock(&b->thashlock);
     196      813587 :         BATsetcount(bn, cnt);
     197      813578 :         bn->tkey = true;
     198      813578 :         if (cnt > 1) {
     199             :                 /* hash chains produce results in the order high to
     200             :                  * low, so we just need to reverse */
     201     5505378 :                 for (l = 0, h = BUNlast(bn) - 1; l < h; l++, h--) {
     202     5334938 :                         o = dst[l];
     203     5334938 :                         dst[l] = dst[h];
     204     5334938 :                         dst[h] = o;
     205             :                 }
     206             :         }
     207      813578 :         bn->tsorted = true;
     208      813578 :         bn->trevsorted = bn->batCount <= 1;
     209      813578 :         bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
     210      813578 :         return bn;
     211             : 
     212           0 :   bailout:
     213           0 :         BBPreclaim(bn);
     214           0 :         return NULL;
     215             : }
     216             : 
     217             : /* Imprints select code */
     218             : 
     219             : /* inner check, non-dense canditer */
     220             : #define impscheck(TEST,ADD)                                             \
     221             :         do {                                                            \
     222             :                 const oid e = (oid) (i+limit-pr_off+hseq);              \
     223             :                 if (im[icnt] & mask) {                                      \
     224             :                         if ((im[icnt] & ~innermask) == 0) {         \
     225             :                                 while (p < ncand && o < e) {      \
     226             :                                         v = src[o-hseq];                \
     227             :                                         if ((ADD) == NULL) {            \
     228             :                                                 BBPreclaim(bn);         \
     229             :                                                 return BUN_NONE;        \
     230             :                                         }                               \
     231             :                                         cnt++;                          \
     232             :                                         p++;                            \
     233             :                                         o = canditer_next(ci);          \
     234             :                                 }                                       \
     235             :                         } else {                                        \
     236             :                                 while (p < ncand && o < e) {      \
     237             :                                         v = src[o-hseq];                \
     238             :                                         if ((ADD) == NULL) {            \
     239             :                                                 BBPreclaim(bn);         \
     240             :                                                 return BUN_NONE;        \
     241             :                                         }                               \
     242             :                                         cnt += (TEST) != 0;             \
     243             :                                         p++;                            \
     244             :                                         o = canditer_next(ci);          \
     245             :                                 }                                       \
     246             :                         }                                               \
     247             :                 } else {                                                \
     248             :                         while (p < ncand && o < e) {              \
     249             :                                 p++;                                    \
     250             :                                 o = canditer_next(ci);                  \
     251             :                         }                                               \
     252             :                 }                                                       \
     253             :         } while (false)
     254             : 
     255             : /* inner check, dense canditer */
     256             : #define impscheck_dense(TEST,ADD)                                       \
     257             :         do {                                                            \
     258             :                 const oid e = (oid) (i+limit-pr_off+hseq);              \
     259             :                 if (im[icnt] & mask) {                                      \
     260             :                         if ((im[icnt] & ~innermask) == 0) {         \
     261             :                                 while (p < ncand && o < e) {      \
     262             :                                         v = src[o-hseq];                \
     263             :                                         if ((ADD) == NULL) {            \
     264             :                                                 BBPreclaim(bn);         \
     265             :                                                 return BUN_NONE;        \
     266             :                                         }                               \
     267             :                                         cnt++;                          \
     268             :                                         p++;                            \
     269             :                                         o = canditer_next_dense(ci);    \
     270             :                                 }                                       \
     271             :                         } else {                                        \
     272             :                                 while (p < ncand && o < e) {      \
     273             :                                         v = src[o-hseq];                \
     274             :                                         if ((ADD) == NULL) {            \
     275             :                                                 BBPreclaim(bn);         \
     276             :                                                 return BUN_NONE;        \
     277             :                                         }                               \
     278             :                                         cnt += (TEST) != 0;             \
     279             :                                         p++;                            \
     280             :                                         o = canditer_next_dense(ci);    \
     281             :                                 }                                       \
     282             :                         }                                               \
     283             :                 } else {                                                \
     284             :                         BUN skip_sz = MIN(ncand - p, e - o);    \
     285             :                         p += skip_sz;                                   \
     286             :                         o += skip_sz;                                   \
     287             :                         ci->next += skip_sz;                         \
     288             :                 }                                                       \
     289             :         } while (false)
     290             : 
     291             : /* main loop for imprints */
     292             : /*
     293             :  * icnt is the iterator for imprints
     294             :  * dcnt is the iterator for dictionary entries
     295             :  * i    is the iterator for the values in imprints
     296             :  */
     297             : #define impsloop(ISDENSE,TEST,ADD)                                      \
     298             :         do {                                                            \
     299             :                 BUN dcnt, icnt, limit, i;                               \
     300             :                 const cchdc_t *restrict d = (cchdc_t *) imprints->dict;      \
     301             :                 const uint8_t rpp = ATOMelmshift(IMPS_PAGE >> bi->shift); \
     302             :                 o = canditer_next(ci);                                  \
     303             :                 for (i = 0, dcnt = 0, icnt = 0, p = 0;                  \
     304             :                      dcnt < imprints->dictcnt && i <= w - hseq + pr_off && p < ncand; \
     305             :                      dcnt++) {                                          \
     306             :                         GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
     307             :                         limit = ((BUN) d[dcnt].cnt) << rpp;               \
     308             :                         while (i + limit <= o - hseq + pr_off) {     \
     309             :                                 i += limit;                             \
     310             :                                 icnt += d[dcnt].repeat ? 1 : d[dcnt].cnt; \
     311             :                                 dcnt++;                                 \
     312             :                                 limit = ((BUN) d[dcnt].cnt) << rpp;       \
     313             :                         }                                               \
     314             :                         if (!d[dcnt].repeat) {                          \
     315             :                                 const BUN l = icnt + d[dcnt].cnt;       \
     316             :                                 limit = (BUN) 1 << rpp;                   \
     317             :                                 while (i + limit <= o - hseq + pr_off) { \
     318             :                                         icnt++;                         \
     319             :                                         i += limit;                     \
     320             :                                 }                                       \
     321             :                                 for (;                                  \
     322             :                                      icnt < l && i <= w - hseq + pr_off; \
     323             :                                      icnt++) {                          \
     324             :                                         impscheck##ISDENSE(TEST,ADD);   \
     325             :                                         i += limit;                     \
     326             :                                 }                                       \
     327             :                         }                                               \
     328             :                         else {                                          \
     329             :                                 impscheck##ISDENSE(TEST,ADD);           \
     330             :                                 i += limit;                             \
     331             :                                 icnt++;                                 \
     332             :                         }                                               \
     333             :                 }                                                       \
     334             :         } while (false)
     335             : 
     336             : static inline oid *
     337           0 : quickins(oid *dst, BUN cnt, oid o, BAT *bn)
     338             : {
     339             :         (void) bn;
     340           0 :         assert(cnt < BATcapacity(bn));
     341           0 :         dst[cnt] = o;
     342           0 :         return dst;
     343             : }
     344             : 
     345             : /* construct the mask */
     346             : #define impsmask(ISDENSE,TEST,B)                                        \
     347             :         do {                                                            \
     348             :                 const uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
     349             :                 uint##B##_t mask = 0, innermask;                        \
     350             :                 const int tpe = ATOMbasetype(b->ttype);                      \
     351             :                 const int lbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, tl); \
     352             :                 const int hbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, th); \
     353             :                 /* note: (1<<n)-1 gives a sequence of n one bits */       \
     354             :                 /* to set bits hbin..lbin inclusive, we would do: */    \
     355             :                 /* mask = ((1 << (hbin + 1)) - 1) - ((1 << lbin) - 1); */ \
     356             :                 /* except ((1 << (hbin + 1)) - 1) is not defined if */    \
     357             :                 /* hbin == sizeof(uint##B##_t)*8 - 1 */                 \
     358             :                 /* the following does work, however */                  \
     359             :                 mask = (((((uint##B##_t) 1 << hbin) - 1) << 1) | 1) - (((uint##B##_t) 1 << lbin) - 1); \
     360             :                 innermask = mask;                                       \
     361             :                 if (vl != minval)                                       \
     362             :                         innermask = IMPSunsetBit(B, innermask, lbin);   \
     363             :                 if (vh != maxval)                                       \
     364             :                         innermask = IMPSunsetBit(B, innermask, hbin);   \
     365             :                 if (anti) {                                             \
     366             :                         uint##B##_t tmp = mask;                         \
     367             :                         mask = ~innermask;                              \
     368             :                         innermask = ~tmp;                               \
     369             :                 }                                                       \
     370             :                 /* if there are nils, we may need to check bin 0 */     \
     371             :                 if (!b->tnonil)                                              \
     372             :                         innermask = IMPSunsetBit(B, innermask, 0);      \
     373             :                                                                         \
     374             :                 if (BATcapacity(bn) < maximum) {                     \
     375             :                         impsloop(ISDENSE, TEST,                         \
     376             :                                  dst = buninsfix(bn, dst, cnt, o,       \
     377             :                                                  (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
     378             :                                                         * (dbl) (ncand-p) * 1.1 + 1024), \
     379             :                                                  maximum));             \
     380             :                 } else {                                                \
     381             :                         impsloop(ISDENSE, TEST, dst = quickins(dst, cnt, o, bn)); \
     382             :                 }                                                       \
     383             :         } while (false)
     384             : 
     385             : #define checkMINMAX(B, TYPE)                                            \
     386             :         do {                                                            \
     387             :                 const BUN *restrict imp_cnt = imprints->stats + 128; \
     388             :                 imp_min = imp_max = nil;                                \
     389             :                 for (BUN ii = 0; ii < B; ii++) {                     \
     390             :                         if (is_##TYPE##_nil(imp_min) && imp_cnt[ii]) {  \
     391             :                                 imp_min = basesrc[imprints->stats[ii]];      \
     392             :                         }                                               \
     393             :                         if (is_##TYPE##_nil(imp_max) && imp_cnt[B-1-ii]) { \
     394             :                                 imp_max = basesrc[imprints->stats[64+B-1-ii]]; \
     395             :                         }                                               \
     396             :                 }                                                       \
     397             :                 assert(!is_##TYPE##_nil(imp_min) &&                     \
     398             :                        !is_##TYPE##_nil(imp_max));                      \
     399             :                 if (anti ?                                              \
     400             :                     vl < imp_min && vh > imp_max :                        \
     401             :                     vl > imp_max || vh < imp_min) {                       \
     402             :                         return 0;                                       \
     403             :                 }                                                       \
     404             :         } while (false)
     405             : 
     406             : /* choose number of bits */
     407             : #define bitswitch(ISDENSE, TEST, TYPE)                                  \
     408             :         do {                                                            \
     409             :                 BUN ncand = ci->ncand;       \
     410             :                 assert(imprints);                                       \
     411             :                 *algo = parent ? "parent imprints select " #TEST " (canditer_next" #ISDENSE ")" : "imprints select " #TEST " (canditer_next" #ISDENSE ")"; \
     412             :                 switch (imprints->bits) {                            \
     413             :                 case 8:                                                 \
     414             :                         checkMINMAX(8, TYPE);                           \
     415             :                         impsmask(ISDENSE,TEST,8);                       \
     416             :                         break;                                          \
     417             :                 case 16:                                                \
     418             :                         checkMINMAX(16, TYPE);                          \
     419             :                         impsmask(ISDENSE,TEST,16);                      \
     420             :                         break;                                          \
     421             :                 case 32:                                                \
     422             :                         checkMINMAX(32, TYPE);                          \
     423             :                         impsmask(ISDENSE,TEST,32);                      \
     424             :                         break;                                          \
     425             :                 case 64:                                                \
     426             :                         checkMINMAX(64, TYPE);                          \
     427             :                         impsmask(ISDENSE,TEST,64);                      \
     428             :                         break;                                          \
     429             :                 default: assert(0); break;                              \
     430             :                 }                                                       \
     431             :         } while (false)
     432             : 
     433             : /* scan select without imprints */
     434             : 
     435             : /* core scan select loop with & without candidates */
     436             : #define scanloop(NAME,canditer_next,TEST)                               \
     437             :         do {                                                            \
     438             :                 BUN ncand = ci->ncand;       \
     439             :                 *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
     440             :                 if (BATcapacity(bn) < maximum) {                     \
     441             :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {        \
     442             :                                 o = canditer_next(ci);                  \
     443             :                                 v = src[o-hseq];                        \
     444             :                                 if (TEST) {                             \
     445             :                                         dst = buninsfix(bn, dst, cnt, o, \
     446             :                                                   (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
     447             :                                                          * (dbl) (ncand-p) * 1.1 + 1024), \
     448             :                                                         maximum);       \
     449             :                                         if (dst == NULL) {              \
     450             :                                                 BBPreclaim(bn);         \
     451             :                                                 return BUN_NONE;        \
     452             :                                         }                               \
     453             :                                         cnt++;                          \
     454             :                                 }                                       \
     455             :                         }                                               \
     456             :                 } else {                                                \
     457             :                         TIMEOUT_LOOP(ncand, timeoffset) {               \
     458             :                                 o = canditer_next(ci);                  \
     459             :                                 v = src[o-hseq];                        \
     460             :                                 assert(cnt < BATcapacity(bn));               \
     461             :                                 dst[cnt] = o;                           \
     462             :                                 cnt += (TEST) != 0;                     \
     463             :                         }                                               \
     464             :                 }                                                       \
     465             :                 TIMEOUT_CHECK(timeoffset, TIMEOUT_HANDLER(BUN_NONE));   \
     466             :         } while (false)
     467             : 
     468             : /* argument list for type-specific core scan select function call */
     469             : #define scanargs                                                        \
     470             :         b, bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil,    \
     471             :         cnt, b->hseqbase, dst, maximum, imprints, algo
     472             : 
     473             : #define PREVVALUEbte(x) ((x) - 1)
     474             : #define PREVVALUEsht(x) ((x) - 1)
     475             : #define PREVVALUEint(x) ((x) - 1)
     476             : #define PREVVALUElng(x) ((x) - 1)
     477             : #ifdef HAVE_HGE
     478             : #define PREVVALUEhge(x) ((x) - 1)
     479             : #endif
     480             : #define PREVVALUEoid(x) ((x) - 1)
     481             : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
     482             : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
     483             : 
     484             : #define NEXTVALUEbte(x) ((x) + 1)
     485             : #define NEXTVALUEsht(x) ((x) + 1)
     486             : #define NEXTVALUEint(x) ((x) + 1)
     487             : #define NEXTVALUElng(x) ((x) + 1)
     488             : #ifdef HAVE_HGE
     489             : #define NEXTVALUEhge(x) ((x) + 1)
     490             : #endif
     491             : #define NEXTVALUEoid(x) ((x) + 1)
     492             : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
     493             : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
     494             : 
     495             : #define MINVALUEbte     GDK_bte_min
     496             : #define MINVALUEsht     GDK_sht_min
     497             : #define MINVALUEint     GDK_int_min
     498             : #define MINVALUElng     GDK_lng_min
     499             : #ifdef HAVE_HGE
     500             : #define MINVALUEhge     GDK_hge_min
     501             : #endif
     502             : #define MINVALUEoid     GDK_oid_min
     503             : #define MINVALUEflt     GDK_flt_min
     504             : #define MINVALUEdbl     GDK_dbl_min
     505             : 
     506             : #define MAXVALUEbte     GDK_bte_max
     507             : #define MAXVALUEsht     GDK_sht_max
     508             : #define MAXVALUEint     GDK_int_max
     509             : #define MAXVALUElng     GDK_lng_max
     510             : #ifdef HAVE_HGE
     511             : #define MAXVALUEhge     GDK_hge_max
     512             : #endif
     513             : #define MAXVALUEoid     GDK_oid_max
     514             : #define MAXVALUEflt     GDK_flt_max
     515             : #define MAXVALUEdbl     GDK_dbl_max
     516             : 
     517             : #define choose(NAME, ISDENSE, TEST, TYPE)                               \
     518             :         do {                                                            \
     519             :                 if (imprints) {                                         \
     520             :                         bitswitch(ISDENSE, TEST, TYPE);                 \
     521             :                 } else {                                                \
     522             :                         scanloop(NAME, canditer_next##ISDENSE, TEST);   \
     523             :                 }                                                       \
     524             :         } while (false)
     525             : 
     526             : /* definition of type-specific core scan select function */
     527             : #define scanfunc(NAME, TYPE, ISDENSE)                                   \
     528             : static BUN                                                              \
     529             : NAME##_##TYPE(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn, \
     530             :               const TYPE *tl, const TYPE *th, bool li, bool hi,         \
     531             :               bool equi, bool anti, bool lval, bool hval,               \
     532             :               bool lnil, BUN cnt, const oid hseq, oid *restrict dst,    \
     533             :               BUN maximum, Imprints *imprints, const char **algo)       \
     534             : {                                                                       \
     535             :         TYPE vl = *tl;                                                  \
     536             :         TYPE vh = *th;                                                  \
     537             :         TYPE imp_min;                                                   \
     538             :         TYPE imp_max;                                                   \
     539             :         TYPE v;                                                         \
     540             :         const TYPE nil = TYPE##_nil;                                    \
     541             :         const TYPE minval = MINVALUE##TYPE;                             \
     542             :         const TYPE maxval = MAXVALUE##TYPE;                             \
     543             :         const TYPE *src = (const TYPE *) bi->base;                   \
     544             :         const TYPE *basesrc;                                            \
     545             :         oid o, w;                                                       \
     546             :         BUN p;                                                          \
     547             :         BUN pr_off = 0;                                                 \
     548             :         bat parent = 0;                                                 \
     549             :         (void) li;                                                      \
     550             :         (void) hi;                                                      \
     551             :         (void) lval;                                                    \
     552             :         (void) hval;                                                    \
     553             :         assert(li == !anti);                                            \
     554             :         assert(hi == !anti);                                            \
     555             :         assert(lval);                                                   \
     556             :         assert(hval);                                                   \
     557             :         size_t counter = 0;                                             \
     558             :         lng timeoffset = 0;                                             \
     559             :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();                      \
     560             :         if (qry_ctx != NULL) {                                          \
     561             :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0; \
     562             :         }                                                               \
     563             :         if (imprints && imprints->imprints.parentid != b->batCacheid) {   \
     564             :                 parent = imprints->imprints.parentid;                        \
     565             :                 BAT *pbat = BBP_cache(parent);                          \
     566             :                 assert(pbat);                                           \
     567             :                 basesrc = (const TYPE *) Tloc(pbat, 0);                 \
     568             :                 pr_off = (BUN) (src - basesrc);                         \
     569             :         } else {                                                        \
     570             :                 basesrc = src;                                          \
     571             :         }                                                               \
     572             :         w = canditer_last(ci);                                          \
     573             :         if (equi) {                                                     \
     574             :                 assert(imprints == NULL);                               \
     575             :                 if (lnil)                                               \
     576             :                         scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
     577             :                 else                                                    \
     578             :                         scanloop(NAME, canditer_next##ISDENSE, v == vl); \
     579             :         } else if (anti) {                                              \
     580             :                 if (b->tnonil) {                                     \
     581             :                         choose(NAME, ISDENSE, (v <= vl || v >= vh), TYPE); \
     582             :                 } else {                                                \
     583             :                         choose(NAME, ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh), TYPE); \
     584             :                 }                                                       \
     585             :         } else if (b->tnonil && vl == minval) {                              \
     586             :                 choose(NAME, ISDENSE, v <= vh, TYPE);                        \
     587             :         } else if (vh == maxval) {                                      \
     588             :                 choose(NAME, ISDENSE, v >= vl, TYPE);                        \
     589             :         } else {                                                        \
     590             :                 choose(NAME, ISDENSE, v >= vl && v <= vh, TYPE);  \
     591             :         }                                                               \
     592             :         return cnt;                                                     \
     593             :   bailout:                                                              \
     594             :         BBPreclaim(bn);                                                 \
     595             :         return BUN_NONE;                                                \
     596             : }
     597             : 
     598             : static BUN
     599        3395 : fullscan_any(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     600             :              const void *tl, const void *th,
     601             :              bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
     602             :              bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
     603             :              BUN maximum, Imprints *imprints, const char **algo)
     604             : {
     605             :         const void *v;
     606        3395 :         const void *restrict nil = ATOMnilptr(b->ttype);
     607        3395 :         int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
     608             :         oid o;
     609        3395 :         BUN p, ncand = ci->ncand;
     610             :         int c;
     611             : 
     612             :         (void) maximum;
     613             :         (void) imprints;
     614             :         (void) lnil;
     615             :         lng timeoffset = 0;
     616        3395 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     617        3395 :         if (qry_ctx != NULL) {
     618        3395 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     619             :         }
     620             : 
     621        3395 :         if (equi) {
     622         137 :                 *algo = "select: fullscan equi";
     623         137 :                 if (ci->tpe == cand_dense) {
     624     2037253 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     625     2036917 :                                 o = canditer_next_dense(ci);
     626     2036917 :                                 v = BUNtail(*bi, o-hseq);
     627     2036917 :                                 if ((*cmp)(tl, v) == 0) {
     628       22536 :                                         dst = buninsfix(bn, dst, cnt, o,
     629       22536 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     630       22536 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     631             :                                                         maximum);
     632       22536 :                                         if (dst == NULL) {
     633           0 :                                                 BBPreclaim(bn);
     634           0 :                                                 return BUN_NONE;
     635             :                                         }
     636       22536 :                                         cnt++;
     637             :                                 }
     638             :                         }
     639             :                 } else {
     640       23065 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     641       22894 :                                 o = canditer_next(ci);
     642       22894 :                                 v = BUNtail(*bi, o-hseq);
     643       22894 :                                 if ((*cmp)(tl, v) == 0) {
     644       20328 :                                         dst = buninsfix(bn, dst, cnt, o,
     645       20328 :                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     646       20328 :                                                         * (dbl) (ncand-p) * 1.1 + 1024),
     647             :                                                 maximum);
     648       20328 :                                         if (dst == NULL) {
     649           0 :                                                 BBPreclaim(bn);
     650           0 :                                                 return BUN_NONE;
     651             :                                         }
     652       20328 :                                         cnt++;
     653             :                                 }
     654             :                         }
     655             :                 }
     656        3258 :         } else if (anti) {
     657         490 :                 *algo = "select: fullscan anti";
     658         490 :                 if (ci->tpe == cand_dense) {
     659        5907 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     660        4605 :                                 o = canditer_next_dense(ci);
     661        4605 :                                 v = BUNtail(*bi, o-hseq);
     662        4605 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     663        4605 :                                         ((lval &&
     664        4605 :                                         ((c = (*cmp)(tl, v)) > 0 ||
     665        1758 :                                         (!li && c == 0))) ||
     666        1757 :                                         (hval &&
     667        1758 :                                         ((c = (*cmp)(th, v)) < 0 ||
     668         463 :                                         (!hi && c == 0))))) {
     669        4142 :                                         dst = buninsfix(bn, dst, cnt, o,
     670        4142 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     671        4142 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     672             :                                                         maximum);
     673        4142 :                                         if (dst == NULL) {
     674           0 :                                                 BBPreclaim(bn);
     675           0 :                                                 return BUN_NONE;
     676             :                                         }
     677        4142 :                                         cnt++;
     678             :                                 }
     679             :                         }
     680             :                 } else {
     681       10581 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     682       10413 :                                 o = canditer_next(ci);
     683       10413 :                                 v = BUNtail(*bi, o-hseq);
     684       10413 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     685       10413 :                                         ((lval &&
     686       10413 :                                         ((c = (*cmp)(tl, v)) > 0 ||
     687        3825 :                                         (!li && c == 0))) ||
     688        3825 :                                         (hval &&
     689        3825 :                                         ((c = (*cmp)(th, v)) < 0 ||
     690          48 :                                         (!hi && c == 0))))) {
     691       10365 :                                         dst = buninsfix(bn, dst, cnt, o,
     692       10365 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     693       10365 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     694             :                                                         maximum);
     695       10365 :                                         if (dst == NULL) {
     696           0 :                                                 BBPreclaim(bn);
     697           0 :                                                 return BUN_NONE;
     698             :                                         }
     699       10365 :                                         cnt++;
     700             :                                 }
     701             :                         }
     702             :                 }
     703             :         } else {
     704        2768 :                 *algo = "select: fullscan range";
     705        2768 :                 if (ci->tpe == cand_dense) {
     706      381120 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     707      373263 :                                 o = canditer_next_dense(ci);
     708      373263 :                                 v = BUNtail(*bi, o-hseq);
     709      373263 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     710        4039 :                                         ((!lval ||
     711        4039 :                                         (c = cmp(tl, v)) < 0 ||
     712      364308 :                                         (li && c == 0)) &&
     713        3918 :                                         (!hval ||
     714        3918 :                                         (c = cmp(th, v)) > 0 ||
     715        3662 :                                         (hi && c == 0)))) {
     716      359673 :                                         dst = buninsfix(bn, dst, cnt, o,
     717      359673 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     718      359673 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     719             :                                                         maximum);
     720      359649 :                                         if (dst == NULL) {
     721           0 :                                                 BBPreclaim(bn);
     722           0 :                                                 return BUN_NONE;
     723             :                                         }
     724      359649 :                                         cnt++;
     725             :                                 }
     726             :                         }
     727             :                 } else {
     728     1443631 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     729     1443091 :                                 o = canditer_next(ci);
     730     1443331 :                                 v = BUNtail(*bi, o-hseq);
     731     1443331 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     732         226 :                                         ((!lval ||
     733         226 :                                         (c = cmp(tl, v)) < 0 ||
     734     1442844 :                                         (li && c == 0)) &&
     735           0 :                                         (!hval ||
     736           0 :                                         (c = cmp(th, v)) > 0 ||
     737           0 :                                         (hi && c == 0)))) {
     738     1442776 :                                         dst = buninsfix(bn, dst, cnt, o,
     739     1442776 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     740     1442776 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     741             :                                                         maximum);
     742     1442800 :                                         if (dst == NULL) {
     743           0 :                                                 BBPreclaim(bn);
     744           0 :                                                 return BUN_NONE;
     745             :                                         }
     746     1442800 :                                         cnt++;
     747             :                                 }
     748             :                         }
     749             :                 }
     750             :         }
     751        3394 :         TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     752             :         return cnt;
     753             :   bailout:
     754           0 :         BBPreclaim(bn);
     755           0 :         return BUN_NONE;
     756             : }
     757             : 
     758             : static BUN
     759       17964 : fullscan_str(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     760             :              const char *tl, const char *th,
     761             :              bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
     762             :              bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
     763             :              BUN maximum, Imprints *imprints, const char **algo)
     764             : {
     765             :         var_t pos;
     766       17964 :         BUN p, ncand = ci->ncand;
     767             :         oid o;
     768             :         lng timeoffset = 0;
     769       17964 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     770       17964 :         if (qry_ctx != NULL) {
     771       16561 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     772             :         }
     773             : 
     774       17964 :         if (!equi || !GDK_ELIMDOUBLES(b->tvheap))
     775        3358 :                 return fullscan_any(b, bi, ci, bn, tl, th, li, hi, equi, anti,
     776             :                                     lval, hval, lnil, cnt, hseq, dst,
     777             :                                     maximum, imprints, algo);
     778       14606 :         if ((pos = strLocate(b->tvheap, tl)) == (var_t) -2) {
     779        2016 :                 *algo = "select: fullscan equi strelim (nomatch)";
     780        2016 :                 return 0;
     781             :         }
     782       12590 :         if (pos == (var_t) -1) {
     783           0 :                 BBPreclaim(bn);
     784           0 :                 return BUN_NONE;
     785             :         }
     786       12590 :         *algo = "select: fullscan equi strelim";
     787       12590 :         assert(pos >= GDK_VAROFFSET);
     788       12590 :         switch (bi->width) {
     789        1473 :         case 1: {
     790        1473 :                 const unsigned char *ptr = (const unsigned char *) bi->base;
     791        1473 :                 pos -= GDK_VAROFFSET;
     792        1473 :                 if (ci->tpe == cand_dense) {
     793    12481203 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     794    12477301 :                                 o = canditer_next_dense(ci);
     795    12477301 :                                 if (ptr[o - hseq] == pos) {
     796     4014333 :                                         dst = buninsfix(bn, dst, cnt, o,
     797     4014333 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     798     4014333 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     799             :                                                         maximum);
     800     4014333 :                                         if (dst == NULL) {
     801           0 :                                                 BBPreclaim(bn);
     802           0 :                                                 return BUN_NONE;
     803             :                                         }
     804     4014333 :                                         cnt++;
     805             :                                 }
     806             :                         }
     807             :                 } else {
     808     7083735 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     809     7081840 :                                 o = canditer_next(ci);
     810     7074313 :                                 if (ptr[o - hseq] == pos) {
     811     3009289 :                                         dst = buninsfix(bn, dst, cnt, o,
     812     3009289 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     813     3009289 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     814             :                                                         maximum);
     815     3016815 :                                         if (dst == NULL) {
     816           0 :                                                 BBPreclaim(bn);
     817           0 :                                                 return BUN_NONE;
     818             :                                         }
     819     3016815 :                                         cnt++;
     820             :                                 }
     821             :                         }
     822             :                 }
     823             :                 break;
     824             :         }
     825       11116 :         case 2: {
     826       11116 :                 const unsigned short *ptr = (const unsigned short *) bi->base;
     827       11116 :                 pos -= GDK_VAROFFSET;
     828       11116 :                 if (ci->tpe == cand_dense) {
     829      526350 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     830      508647 :                                 o = canditer_next_dense(ci);
     831      508647 :                                 if (ptr[o - hseq] == pos) {
     832       11764 :                                         dst = buninsfix(bn, dst, cnt, o,
     833       11764 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     834       11764 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     835             :                                                         maximum);
     836       11764 :                                         if (dst == NULL) {
     837           0 :                                                 BBPreclaim(bn);
     838           0 :                                                 return BUN_NONE;
     839             :                                         }
     840       11764 :                                         cnt++;
     841             :                                 }
     842             :                         }
     843             :                 } else {
     844      595520 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     845      579878 :                                 o = canditer_next(ci);
     846      579877 :                                 if (ptr[o - hseq] == pos) {
     847        6492 :                                         dst = buninsfix(bn, dst, cnt, o,
     848        6492 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     849        6492 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     850             :                                                         maximum);
     851        6490 :                                         if (dst == NULL) {
     852           0 :                                                 BBPreclaim(bn);
     853           0 :                                                 return BUN_NONE;
     854             :                                         }
     855        6490 :                                         cnt++;
     856             :                                 }
     857             :                         }
     858             :                 }
     859             :                 break;
     860             :         }
     861             : #if SIZEOF_VAR_T == 8
     862           0 :         case 4: {
     863           0 :                 const unsigned int *ptr = (const unsigned int *) bi->base;
     864           0 :                 if (ci->tpe == cand_dense) {
     865           0 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     866           0 :                                 o = canditer_next_dense(ci);
     867           0 :                                 if (ptr[o - hseq] == pos) {
     868           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     869           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     870           0 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     871             :                                                         maximum);
     872           0 :                                         if (dst == NULL) {
     873           0 :                                                 BBPreclaim(bn);
     874           0 :                                                 return BUN_NONE;
     875             :                                         }
     876           0 :                                         cnt++;
     877             :                                 }
     878             :                         }
     879             :                 } else {
     880           0 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     881           0 :                                 o = canditer_next(ci);
     882           0 :                                 if (ptr[o - hseq] == pos) {
     883           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     884           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     885           0 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     886             :                                                         maximum);
     887           0 :                                         if (dst == NULL) {
     888           0 :                                                 BBPreclaim(bn);
     889           0 :                                                 return BUN_NONE;
     890             :                                         }
     891           0 :                                         cnt++;
     892             :                                 }
     893             :                         }
     894             :                 }
     895             :                 break;
     896             :         }
     897             : #endif
     898           1 :         default: {
     899           1 :                 const var_t *ptr = (const var_t *) bi->base;
     900           1 :                 if (ci->tpe == cand_dense) {
     901          13 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     902          10 :                                 o = canditer_next_dense(ci);
     903          10 :                                 if (ptr[o - hseq] == pos) {
     904           1 :                                         dst = buninsfix(bn, dst, cnt, o,
     905           1 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     906           1 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     907             :                                                         maximum);
     908           1 :                                         if (dst == NULL) {
     909           0 :                                                 BBPreclaim(bn);
     910           0 :                                                 return BUN_NONE;
     911             :                                         }
     912           1 :                                         cnt++;
     913             :                                 }
     914             :                         }
     915             :                 } else {
     916           0 :                         TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
     917           0 :                                 o = canditer_next(ci);
     918           0 :                                 if (ptr[o - hseq] == pos) {
     919           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     920           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     921           0 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     922             :                                                         maximum);
     923           0 :                                         if (dst == NULL) {
     924           0 :                                                 BBPreclaim(bn);
     925           0 :                                                 return BUN_NONE;
     926             :                                         }
     927           0 :                                         cnt++;
     928             :                                 }
     929             :                         }
     930             :                 }
     931             :                 break;
     932             :         }
     933             :         }
     934       12586 :         TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     935             :         return cnt;
     936             :   bailout:
     937           0 :         BBPreclaim(bn);
     938           0 :         return BUN_NONE;
     939             : }
     940             : 
     941             : /* scan select type switch */
     942             : #ifdef HAVE_HGE
     943             : #define scanfunc_hge(NAME, ISDENSE)             \
     944             :         scanfunc(NAME, hge, ISDENSE)
     945             : #else
     946             : #define scanfunc_hge(NAME, ISDENSE)
     947             : #endif
     948             : #define scan_sel(NAME, ISDENSE)                 \
     949             :         scanfunc(NAME, bte, ISDENSE)            \
     950             :         scanfunc(NAME, sht, ISDENSE)            \
     951             :         scanfunc(NAME, int, ISDENSE)            \
     952             :         scanfunc(NAME, flt, ISDENSE)            \
     953             :         scanfunc(NAME, dbl, ISDENSE)            \
     954             :         scanfunc(NAME, lng, ISDENSE)            \
     955             :         scanfunc_hge(NAME, ISDENSE)
     956             : 
     957             : /* scan/imprints select */
     958    13288324 : scan_sel(fullscan, )
     959   828197125 : scan_sel(densescan, _dense)
     960             : 
     961             : 
     962             : static BAT *
     963      140152 : scanselect(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     964             :            const void *tl, const void *th,
     965             :            bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
     966             :            bool lnil, BUN maximum, Imprints *imprints, const char **algo)
     967             : {
     968             : #ifndef NDEBUG
     969             :         int (*cmp)(const void *, const void *);
     970             : #endif
     971             :         int t;
     972             :         BUN cnt = 0;
     973             :         oid *restrict dst;
     974             : 
     975      140152 :         assert(b != NULL);
     976      140152 :         assert(bn != NULL);
     977      140152 :         assert(bn->ttype == TYPE_oid);
     978      140152 :         assert(!lval || tl != NULL);
     979      140152 :         assert(!hval || th != NULL);
     980      140152 :         assert(!equi || (li && hi && !anti));
     981      140152 :         assert(!anti || lval || hval);
     982      140152 :         assert(b->ttype != TYPE_void || equi || b->tnonil);
     983             : 
     984             : #ifndef NDEBUG
     985      140152 :         cmp = ATOMcompare(b->ttype);
     986             : #endif
     987             : 
     988      140152 :         assert(!lval || !hval || (*cmp)(tl, th) <= 0);
     989             : 
     990      140143 :         dst = (oid *) Tloc(bn, 0);
     991             : 
     992      140143 :         t = ATOMbasetype(b->ttype);
     993             : 
     994             :         /* call type-specific core scan select function */
     995      140143 :         switch (t) {
     996       39924 :         case TYPE_bte:
     997       39924 :                 if (ci->tpe == cand_dense)
     998       37795 :                         cnt = densescan_bte(scanargs);
     999             :                 else
    1000        2129 :                         cnt = fullscan_bte(scanargs);
    1001             :                 break;
    1002        3617 :         case TYPE_sht:
    1003        3617 :                 if (ci->tpe == cand_dense)
    1004        3025 :                         cnt = densescan_sht(scanargs);
    1005             :                 else
    1006         592 :                         cnt = fullscan_sht(scanargs);
    1007             :                 break;
    1008       73166 :         case TYPE_int:
    1009       73166 :                 if (ci->tpe == cand_dense)
    1010       46256 :                         cnt = densescan_int(scanargs);
    1011             :                 else
    1012       26910 :                         cnt = fullscan_int(scanargs);
    1013             :                 break;
    1014          24 :         case TYPE_flt:
    1015          24 :                 if (ci->tpe == cand_dense)
    1016          23 :                         cnt = densescan_flt(scanargs);
    1017             :                 else
    1018           1 :                         cnt = fullscan_flt(scanargs);
    1019             :                 break;
    1020         115 :         case TYPE_dbl:
    1021         115 :                 if (ci->tpe == cand_dense)
    1022         114 :                         cnt = densescan_dbl(scanargs);
    1023             :                 else
    1024           1 :                         cnt = fullscan_dbl(scanargs);
    1025             :                 break;
    1026        5080 :         case TYPE_lng:
    1027        5080 :                 if (ci->tpe == cand_dense)
    1028        5040 :                         cnt = densescan_lng(scanargs);
    1029             :                 else
    1030          40 :                         cnt = fullscan_lng(scanargs);
    1031             :                 break;
    1032             : #ifdef HAVE_HGE
    1033         218 :         case TYPE_hge:
    1034         218 :                 if (ci->tpe == cand_dense)
    1035         209 :                         cnt = densescan_hge(scanargs);
    1036             :                 else
    1037           9 :                         cnt = fullscan_hge(scanargs);
    1038             :                 break;
    1039             : #endif
    1040       17963 :         case TYPE_str:
    1041       17963 :                 cnt = fullscan_str(scanargs);
    1042       17960 :                 break;
    1043          36 :         default:
    1044          36 :                 cnt = fullscan_any(scanargs);
    1045          36 :                 break;
    1046             :         }
    1047      140130 :         if (cnt == BUN_NONE) {
    1048             :                 return NULL;
    1049             :         }
    1050      140129 :         assert(bn->batCapacity >= cnt);
    1051             : 
    1052      140129 :         BATsetcount(bn, cnt);
    1053      140142 :         bn->tsorted = true;
    1054      140142 :         bn->trevsorted = bn->batCount <= 1;
    1055      140142 :         bn->tkey = true;
    1056      140142 :         bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == b->batCount ? b->hseqbase : oid_nil;
    1057             : 
    1058      140142 :         return bn;
    1059             : }
    1060             : 
    1061             : /* Normalize the variables li, hi, lval, hval, possibly changing anti
    1062             :  * in the process.  This works for all (and only) numeric types.
    1063             :  *
    1064             :  * Note that the expression x < v is equivalent to x <= v' where v' is
    1065             :  * the next smaller value in the domain of v (similarly for x > v).
    1066             :  * Also note that for floating point numbers there actually is such a
    1067             :  * value.  In fact, there is a function in standard C that calculates
    1068             :  * that value.
    1069             :  *
    1070             :  * The result of this macro is:
    1071             :  * li == !anti, hi == !anti, lval == true, hval == true
    1072             :  * This means that all ranges that we check for are closed ranges.  If
    1073             :  * a range is one-sided, we fill in the minimum resp. maximum value in
    1074             :  * the domain so that we create a closed range. */
    1075             : #define NORMALIZE(TYPE)                                                 \
    1076             :         do {                                                            \
    1077             :                 if (anti && li) {                                       \
    1078             :                         /* -inf < x < vl === -inf < x <= vl-1 */    \
    1079             :                         if (*(TYPE*)tl == MINVALUE##TYPE) {             \
    1080             :                                 /* -inf < x < MIN || *th <[=] x < +inf */ \
    1081             :                                 /* degenerates into half range */       \
    1082             :                                 /* *th <[=] x < +inf */                   \
    1083             :                                 anti = false;                           \
    1084             :                                 tl = th;                                \
    1085             :                                 li = !hi;                               \
    1086             :                                 hval = false;                           \
    1087             :                                 /* further dealt with below */          \
    1088             :                         } else {                                        \
    1089             :                                 vl.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)tl); \
    1090             :                                 tl = &vl.v_##TYPE;                  \
    1091             :                                 li = false;                             \
    1092             :                         }                                               \
    1093             :                 }                                                       \
    1094             :                 if (anti && hi) {                                       \
    1095             :                         /* vl < x < +inf === vl+1 <= x < +inf */    \
    1096             :                         if (*(TYPE*)th == MAXVALUE##TYPE) {             \
    1097             :                                 /* -inf < x <[=] *tl || MAX > x > +inf */ \
    1098             :                                 /* degenerates into half range */       \
    1099             :                                 /* -inf < x <[=] *tl */                   \
    1100             :                                 anti = false;                           \
    1101             :                                 if (tl == &vl.v_##TYPE) {           \
    1102             :                                         vh.v_##TYPE = vl.v_##TYPE;      \
    1103             :                                         th = &vh.v_##TYPE;          \
    1104             :                                 } else {                                \
    1105             :                                         th = tl;                        \
    1106             :                                 }                                       \
    1107             :                                 hi = !li;                               \
    1108             :                                 lval = false;                           \
    1109             :                                 /* further dealt with below */          \
    1110             :                         } else {                                        \
    1111             :                                 vh.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)th); \
    1112             :                                 th = &vh.v_##TYPE;                  \
    1113             :                                 hi = false;                             \
    1114             :                         }                                               \
    1115             :                 }                                                       \
    1116             :                 if (!anti) {                                            \
    1117             :                         if (lval) {                                     \
    1118             :                                 /* range bounded on left */             \
    1119             :                                 if (!li) {                              \
    1120             :                                         /* open range on left */        \
    1121             :                                         if (*(TYPE*)tl == MAXVALUE##TYPE) \
    1122             :                                                 return BATdense(0, 0, 0); \
    1123             :                                         /* vl < x === vl+1 <= x */        \
    1124             :                                         vl.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)tl); \
    1125             :                                         li = true;                      \
    1126             :                                         tl = &vl.v_##TYPE;          \
    1127             :                                 }                                       \
    1128             :                         } else {                                        \
    1129             :                                 /* -inf, i.e. smallest value */         \
    1130             :                                 vl.v_##TYPE = MINVALUE##TYPE;           \
    1131             :                                 li = true;                              \
    1132             :                                 tl = &vl.v_##TYPE;                  \
    1133             :                                 lval = true;                            \
    1134             :                         }                                               \
    1135             :                         if (hval) {                                     \
    1136             :                                 /* range bounded on right */            \
    1137             :                                 if (!hi) {                              \
    1138             :                                         /* open range on right */       \
    1139             :                                         if (*(TYPE*)th == MINVALUE##TYPE) \
    1140             :                                                 return BATdense(0, 0, 0); \
    1141             :                                         /* x < vh === x <= vh-1 */        \
    1142             :                                         vh.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)th); \
    1143             :                                         hi = true;                      \
    1144             :                                         th = &vh.v_##TYPE;          \
    1145             :                                 }                                       \
    1146             :                         } else {                                        \
    1147             :                                 /* +inf, i.e. largest value */          \
    1148             :                                 vh.v_##TYPE = MAXVALUE##TYPE;           \
    1149             :                                 hi = true;                              \
    1150             :                                 th = &vh.v_##TYPE;                  \
    1151             :                                 hval = true;                            \
    1152             :                         }                                               \
    1153             :                         if (*(TYPE*)tl > *(TYPE*)th)                 \
    1154             :                                 return BATdense(0, 0, 0);               \
    1155             :                 }                                                       \
    1156             :                 assert(lval);                                           \
    1157             :                 assert(hval);                                           \
    1158             :                 assert(li != anti);                                     \
    1159             :                 assert(hi != anti);                                     \
    1160             :                 /* if anti is set, we can now check */                  \
    1161             :                 /* (x <= *tl || x >= *th) && x != nil */          \
    1162             :                 /* if equi==true, the check is x != *tl && x != nil */  \
    1163             :                 /* if anti is not set, we can check just */             \
    1164             :                 /* *tl <= x && x <= *th */                                \
    1165             :                 /* if equi==true, the check is x == *tl */              \
    1166             :                 /* note that this includes the check for != nil */      \
    1167             :                 /* in the case where equi==true, the check is x == *tl */ \
    1168             :         } while (false)
    1169             : 
    1170             : /* generic range select
    1171             :  *
    1172             :  * Return a BAT with the OID values of b for qualifying tuples.  The
    1173             :  * return BAT is sorted (i.e. in the same order as the input BAT).
    1174             :  *
    1175             :  * If s is non-NULL, it is a list of candidates.  s must be sorted.
    1176             :  *
    1177             :  * tl may not be NULL, li, hi, and anti must be either 0 or 1.
    1178             :  *
    1179             :  * If th is NULL, hi is ignored.
    1180             :  *
    1181             :  * If anti is 0, qualifying tuples are those whose value is between tl
    1182             :  * and th (as in x >[=] tl && x <[=] th, where equality depends on li
    1183             :  * and hi--so if tl > th, nothing will be returned).  If li or hi is
    1184             :  * 1, the respective boundary is inclusive, otherwise exclusive.  If
    1185             :  * th is NULL it is taken to be equal to tl, turning this into an
    1186             :  * equi- or point-select.  Note that for a point select to return
    1187             :  * anything, li (and hi if th was not NULL) must be 1.  There is a
    1188             :  * special case if tl is nil and th is NULL.  This is the only way to
    1189             :  * select for nil values.
    1190             :  *
    1191             :  * If anti is 1, the result is the complement of what the result would
    1192             :  * be if anti were 0, except that nils are filtered out.
    1193             :  *
    1194             :  * In brief:
    1195             :  * - if tl==nil and th==NULL and anti==0, return all nils (only way to
    1196             :  *   get nils);
    1197             :  * - it tl==nil and th==nil, return all but nils;
    1198             :  * - if tl==nil and th!=NULL, no lower bound;
    1199             :  * - if th==NULL or tl==th, point (equi) select;
    1200             :  * - if th==nil, no upper bound
    1201             :  *
    1202             :  * A complete breakdown of the various arguments follows.  Here, v, v1
    1203             :  * and v2 are values from the appropriate domain, and
    1204             :  * v != nil, v1 != nil, v2 != nil, v1 < v2.
    1205             :  *      tl      th      li      hi      anti    result list of OIDs for values
    1206             :  *      -----------------------------------------------------------------
    1207             :  *      nil     NULL    true    ignored false   x == nil (only way to get nil)
    1208             :  *      nil     NULL    false   ignored false   NOTHING
    1209             :  *      nil     NULL    ignored ignored true    x != nil
    1210             :  *      nil     nil     ignored ignored false   x != nil
    1211             :  *      nil     nil     ignored ignored true    NOTHING
    1212             :  *      nil     v       ignored false   false   x < v
    1213             :  *      nil     v       ignored true    false   x <= v
    1214             :  *      nil     v       ignored false   true    x >= v
    1215             :  *      nil     v       ignored true    true    x > v
    1216             :  *      v       nil     false   ignored false   x > v
    1217             :  *      v       nil     true    ignored false   x >= v
    1218             :  *      v       nil     false   ignored true    x <= v
    1219             :  *      v       nil     true    ignored true    x < v
    1220             :  *      v       NULL    false   ignored false   NOTHING
    1221             :  *      v       NULL    true    ignored false   x == v
    1222             :  *      v       NULL    false   ignored true    x != nil
    1223             :  *      v       NULL    true    ignored true    x != v
    1224             :  *      v       v       false   false   false   NOTHING
    1225             :  *      v       v       true    false   false   NOTHING
    1226             :  *      v       v       false   true    false   NOTHING
    1227             :  *      v       v       true    true    false   x == v
    1228             :  *      v       v       false   false   true    x != nil
    1229             :  *      v       v       true    false   true    x != nil
    1230             :  *      v       v       false   true    true    x != nil
    1231             :  *      v       v       true    true    true    x != v
    1232             :  *      v1      v2      false   false   false   v1 < x < v2
    1233             :  *      v1      v2      true    false   false   v1 <= x < v2
    1234             :  *      v1      v2      false   true    false   v1 < x <= v2
    1235             :  *      v1      v2      true    true    false   v1 <= x <= v2
    1236             :  *      v1      v2      false   false   true    x <= v1 or x >= v2
    1237             :  *      v1      v2      true    false   true    x < v1 or x >= v2
    1238             :  *      v1      v2      false   true    true    x <= v1 or x > v2
    1239             :  *      v1      v2      true    true    true    x < v1 or x > v2
    1240             :  *      v2      v1      ignored ignored false   NOTHING
    1241             :  *      v2      v1      ignored ignored true    x != nil
    1242             :  */
    1243             : BAT *
    1244     2357335 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
    1245             :              bool li, bool hi, bool anti)
    1246             : {
    1247             :         bool lval;              /* low value used for comparison */
    1248             :         bool lnil;              /* low value is nil */
    1249             :         bool hval;              /* high value used for comparison */
    1250             :         bool equi;              /* select for single value (not range) */
    1251             :         bool wanthash = false;  /* use hash (equi must be true) */
    1252             :         bool havehash = false;  /* we have a hash (and the hashlock) */
    1253             :         bool phash = false;     /* use hash on parent BAT (if view) */
    1254             :         int t;                  /* data type */
    1255             :         bat parent;             /* b's parent bat (if b is a view) */
    1256             :         const void *nil;
    1257             :         BAT *bn, *tmp;
    1258             :         struct canditer ci;
    1259             :         BUN estimate = BUN_NONE, maximum = BUN_NONE;
    1260             :         oid vwl = 0, vwh = 0;
    1261             :         lng vwo = 0;
    1262             :         Heap *oidxh = NULL;
    1263             :         const char *algo;
    1264             :         union {
    1265             :                 bte v_bte;
    1266             :                 sht v_sht;
    1267             :                 int v_int;
    1268             :                 lng v_lng;
    1269             : #ifdef HAVE_HGE
    1270             :                 hge v_hge;
    1271             : #endif
    1272             :                 flt v_flt;
    1273             :                 dbl v_dbl;
    1274             :                 oid v_oid;
    1275             :         } vl, vh;
    1276     2357335 :         lng t0 = GDKusec();
    1277             : 
    1278     2357317 :         BATcheck(b, NULL);
    1279     2357317 :         if (tl == NULL) {
    1280           0 :                 GDKerror("tl value required");
    1281           0 :                 return NULL;
    1282             :         }
    1283             : 
    1284     2357317 :         if (s && s->ttype != TYPE_msk && !BATtordered(s)) {
    1285           0 :                 GDKerror("invalid argument: s must be sorted.\n");
    1286           0 :                 return NULL;
    1287             :         }
    1288             : 
    1289     2357317 :         if (canditer_init(&ci, b, s) == 0) {
    1290             :                 /* trivially empty result */
    1291     1128872 :                 MT_thread_setalgorithm("select: trivially empty");
    1292     1128834 :                 bn = BATdense(0, 0, 0);
    1293     1128818 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1294             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1295             :                           " (" LLFMT " usec): "
    1296             :                           "trivially empty\n",
    1297             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1298             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1299     1128818 :                 return bn;
    1300             :         }
    1301             : 
    1302     1228439 :         t = b->ttype;
    1303     1228439 :         nil = ATOMnilptr(t);
    1304             :         /* can we use the base type? */
    1305     1228439 :         t = ATOMbasetype(t);
    1306     1228439 :         lnil = ATOMcmp(t, tl, nil) == 0; /* low value = nil? */
    1307             : 
    1308     1228448 :         if (!lnil && th != NULL && (!li || !hi) && !anti && ATOMcmp(t, tl, th) == 0) {
    1309             :                 /* upper and lower bound of range are equal and we
    1310             :                  * want an interval that's open on at least one
    1311             :                  * side */
    1312          32 :                 MT_thread_setalgorithm("select: empty interval");
    1313          32 :                 bn = BATdense(0, 0, 0);
    1314          32 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1315             :                           ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
    1316             :                           ALGOOPTBATFMT " (" LLFMT " usec): "
    1317             :                           "empty interval\n",
    1318             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1319             :                           li, hi, anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1320          32 :                 return bn;
    1321             :         }
    1322             : 
    1323     1228412 :         lval = !lnil || th == NULL;      /* low value used for comparison */
    1324     1228412 :         equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
    1325     1228409 :         if (equi) {
    1326     1182247 :                 assert(lval);
    1327     1182247 :                 if (th == NULL)
    1328             :                         hi = li;
    1329             :                 th = tl;
    1330             :                 hval = true;
    1331             :         } else {
    1332       46162 :                 hval = ATOMcmp(t, th, nil) != 0;
    1333             :         }
    1334     1228406 :         if (anti) {
    1335       63198 :                 if (lval != hval) {
    1336             :                         /* one of the end points is nil and the other
    1337             :                          * isn't: swap sub-ranges */
    1338             :                         const void *tv;
    1339             :                         bool ti;
    1340          41 :                         assert(!equi);
    1341             :                         ti = li;
    1342          41 :                         li = !hi;
    1343          41 :                         hi = !ti;
    1344             :                         tv = tl;
    1345             :                         tl = th;
    1346             :                         th = tv;
    1347             :                         ti = lval;
    1348             :                         lval = hval;
    1349             :                         hval = ti;
    1350          41 :                         lnil = ATOMcmp(t, tl, nil) == 0;
    1351             :                         anti = false;
    1352          41 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1353             :                                   ",s=" ALGOOPTBATFMT ",anti=%d "
    1354             :                                   "anti: switch ranges...\n",
    1355             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1356             :                                   anti);
    1357       63157 :                 } else if (!lval && !hval) {
    1358             :                         /* antiselect for nil-nil range: all non-nil
    1359             :                          * values are in range; we must return all
    1360             :                          * other non-nil values, i.e. nothing */
    1361          18 :                         MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
    1362          18 :                         bn = BATdense(0, 0, 0);
    1363          18 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1364             :                                   ",s=" ALGOOPTBATFMT ",anti=%d -> "
    1365             :                                   ALGOOPTBATFMT " (" LLFMT " usec): "
    1366             :                                   "anti: nil-nil range, nonil\n",
    1367             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1368             :                                   anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1369          18 :                         return bn;
    1370       63139 :                 } else if (equi && lnil) {
    1371             :                         /* antiselect for nil value: turn into range
    1372             :                          * select for nil-nil range (i.e. everything
    1373             :                          * but nil) */
    1374             :                         equi = false;
    1375             :                         anti = false;
    1376             :                         lval = false;
    1377             :                         hval = false;
    1378        5500 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1379             :                                   ",s=" ALGOOPTBATFMT ",anti=0 "
    1380             :                                   "anti-nil...\n",
    1381             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s));
    1382       57639 :                 } else if (equi) {
    1383             :                         equi = false;
    1384       33536 :                         if (!(li && hi)) {
    1385             :                                 /* antiselect for nothing: turn into
    1386             :                                  * range select for nil-nil range
    1387             :                                  * (i.e. everything but nil) */
    1388             :                                 anti = false;
    1389             :                                 lval = false;
    1390             :                                 hval = false;
    1391          12 :                                 TRC_DEBUG(ALGO, "b="
    1392             :                                           ALGOBATFMT ",s="
    1393             :                                           ALGOOPTBATFMT ",anti=0 "
    1394             :                                           "anti-nothing...\n",
    1395             :                                           ALGOBATPAR(b),
    1396             :                                           ALGOOPTBATPAR(s));
    1397             :                         }
    1398       24103 :                 } else if (ATOMcmp(t, tl, th) > 0) {
    1399             :                         /* empty range: turn into range select for
    1400             :                          * nil-nil range (i.e. everything but nil) */
    1401             :                         equi = false;
    1402             :                         anti = false;
    1403             :                         lval = false;
    1404             :                         hval = false;
    1405          24 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1406             :                                   ",s=" ALGOOPTBATFMT ",anti=0 "
    1407             :                                   "anti-nil...\n",
    1408             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s));
    1409             :                 }
    1410             :         }
    1411             : 
    1412             :         /* if equi set, then so are both lval and hval */
    1413     1228388 :         assert(!equi || (lval && hval));
    1414             : 
    1415     1228388 :         if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
    1416             :                 /* empty range */
    1417          41 :                 MT_thread_setalgorithm("select: empty range");
    1418          41 :                 bn = BATdense(0, 0, 0);
    1419          41 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1420             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1421             :                           " (" LLFMT " usec) "
    1422             :                           "empty range\n",
    1423             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1424             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1425          41 :                 return bn;
    1426             :         }
    1427     1228333 :         if (equi && lnil && b->tnonil) {
    1428             :                 /* return all nils, but there aren't any */
    1429        6447 :                 MT_thread_setalgorithm("select: equi-nil, nonil");
    1430        6447 :                 bn = BATdense(0, 0, 0);
    1431        6447 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1432             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1433             :                           " (" LLFMT " usec): "
    1434             :                           "equi-nil, nonil\n",
    1435             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1436             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1437        6447 :                 return bn;
    1438             :         }
    1439             : 
    1440     1221886 :         if (!equi && !lval && !hval && lnil && b->tnonil) {
    1441             :                 /* return all non-nils from a BAT that doesn't have
    1442             :                  * any: i.e. return everything */
    1443         370 :                 MT_thread_setalgorithm("select: everything, nonil");
    1444         370 :                 bn = canditer_slice(&ci, 0, ci.ncand);
    1445         370 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1446             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1447             :                           " (" LLFMT " usec): "
    1448             :                           "everything, nonil\n",
    1449             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1450             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1451         370 :                 return bn;
    1452             :         }
    1453             : 
    1454     1221516 :         BATiter bi = bat_iterator(b);
    1455             : 
    1456     1221543 :         if (anti) {
    1457             :                 int c;
    1458             : 
    1459       57605 :                 MT_lock_set(&b->theaplock);
    1460       57601 :                 if (b->tminpos != BUN_NONE) {
    1461       28012 :                         c = ATOMcmp(t, tl, BUNtail(bi, b->tminpos));
    1462       28011 :                         if (c < 0 || (li && c == 0)) {
    1463        3324 :                                 if (b->tmaxpos != BUN_NONE) {
    1464        3273 :                                         c = ATOMcmp(t, th, BUNtail(bi, b->tmaxpos));
    1465        3273 :                                         if (c > 0 || (hi && c == 0)) {
    1466          77 :                                                 MT_lock_unset(&b->theaplock);
    1467             :                                                 /* tl..th range fully
    1468             :                                                  * inside MIN..MAX
    1469             :                                                  * range of values in
    1470             :                                                  * BAT, so nothing
    1471             :                                                  * left over for
    1472             :                                                  * anti */
    1473          77 :                                                 MT_thread_setalgorithm("select: nothing, out of range");
    1474          77 :                                                 bn = BATdense(0, 0, 0);
    1475          77 :                                                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1476             :                                                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1477             :                                                           " (" LLFMT " usec): "
    1478             :                                                           "nothing, out of range\n",
    1479             :                                                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1480          77 :                                                 bat_iterator_end(&bi);
    1481          77 :                                                 return bn;
    1482             :                                         }
    1483             :                                 }
    1484             :                         }
    1485             :                 }
    1486       57523 :                 MT_lock_unset(&b->theaplock);
    1487     1163938 :         } else if (!equi || !lnil) {
    1488             :                 int c;
    1489             : 
    1490     1156247 :                 if (hval) {
    1491     1138710 :                         MT_lock_set(&b->theaplock);
    1492     1138704 :                         if (b->tminpos != BUN_NONE) {
    1493      892156 :                                 c = ATOMcmp(t, th, BUNtail(bi, b->tminpos));
    1494      892153 :                                 if (c < 0 || (!hi && c == 0)) {
    1495        1493 :                                         MT_lock_unset(&b->theaplock);
    1496             :                                         /* smallest value in BAT larger than
    1497             :                                          * what we're looking for */
    1498        1493 :                                         MT_thread_setalgorithm("select: nothing, out of range");
    1499        1493 :                                         bn = BATdense(0, 0, 0);
    1500        1493 :                                         TRC_DEBUG(ALGO, "b="
    1501             :                                                   ALGOBATFMT ",s="
    1502             :                                                   ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1503             :                                                   " (" LLFMT " usec): "
    1504             :                                                   "nothing, out of range\n",
    1505             :                                                   ALGOBATPAR(b),
    1506             :                                                   ALGOOPTBATPAR(s), anti,
    1507             :                                                   ALGOOPTBATPAR(bn), GDKusec() - t0);
    1508        1493 :                                         bat_iterator_end(&bi);
    1509        1493 :                                         return bn;
    1510             :                                 }
    1511             :                         }
    1512     1137208 :                         MT_lock_unset(&b->theaplock);
    1513             :                 }
    1514     1154757 :                 if (lval) {
    1515     1145600 :                         MT_lock_set(&b->theaplock);
    1516     1145602 :                         if (b->tmaxpos != BUN_NONE) {
    1517      829184 :                                 c = ATOMcmp(t, tl, BUNtail(bi, b->tmaxpos));
    1518      829185 :                                 if (c > 0 || (!li && c == 0)) {
    1519      153516 :                                         MT_lock_unset(&b->theaplock);
    1520             :                                         /* largest value in BAT smaller than
    1521             :                                          * what we're looking for */
    1522      153516 :                                         MT_thread_setalgorithm("select: nothing, out of range");
    1523      153516 :                                         bn = BATdense(0, 0, 0);
    1524      153516 :                                         TRC_DEBUG(ALGO, "b="
    1525             :                                                   ALGOBATFMT ",s="
    1526             :                                                   ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1527             :                                                   " (" LLFMT " usec): "
    1528             :                                                   "nothing, out of range\n",
    1529             :                                                   ALGOBATPAR(b),
    1530             :                                                   ALGOOPTBATPAR(s), anti,
    1531             :                                                   ALGOOPTBATPAR(bn), GDKusec() - t0);
    1532      153516 :                                         bat_iterator_end(&bi);
    1533      153516 :                                         return bn;
    1534             :                                 }
    1535             :                         }
    1536      992087 :                         MT_lock_unset(&b->theaplock);
    1537             :                 }
    1538             :         }
    1539             : 
    1540     1066450 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    1541       28904 :                 NORMALIZE(oid);
    1542             :         } else {
    1543     1037546 :                 switch (t) {
    1544       86261 :                 case TYPE_bte:
    1545       86261 :                         NORMALIZE(bte);
    1546             :                         break;
    1547        8891 :                 case TYPE_sht:
    1548        8891 :                         NORMALIZE(sht);
    1549             :                         break;
    1550      786007 :                 case TYPE_int:
    1551      786007 :                         NORMALIZE(int);
    1552             :                         break;
    1553        3590 :                 case TYPE_lng:
    1554        3590 :                         NORMALIZE(lng);
    1555             :                         break;
    1556             : #ifdef HAVE_HGE
    1557         368 :                 case TYPE_hge:
    1558         368 :                         NORMALIZE(hge);
    1559             :                         break;
    1560             : #endif
    1561         100 :                 case TYPE_flt:
    1562         100 :                         NORMALIZE(flt);
    1563             :                         break;
    1564         415 :                 case TYPE_dbl:
    1565         415 :                         NORMALIZE(dbl);
    1566             :                         break;
    1567             :                 }
    1568     1066446 :         }
    1569             : 
    1570     1066446 :         parent = VIEWtparent(b);
    1571      902691 :         assert(parent >= 0);
    1572             :         /* use hash only for equi-join, and then only if b or its
    1573             :          * parent already has a hash, or if b or its parent is
    1574             :          * persistent and the total size wouldn't be too large; check
    1575             :          * for existence of hash last since that may involve I/O */
    1576     1066446 :         if (equi) {
    1577      981908 :                 havehash = BATcheckhash(b);
    1578      981914 :                 if (havehash) {
    1579       55235 :                         MT_rwlock_rdlock(&b->thashlock);
    1580       55235 :                         if (b->thash == NULL) {
    1581           0 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1582             :                                 havehash = false;
    1583             :                         }
    1584             :                 }
    1585      981914 :                 wanthash = havehash ||
    1586      926679 :                         (!b->batTransient &&
    1587       26707 :                          ATOMsize(b->ttype) >= sizeof(BUN) / 4 &&
    1588       26705 :                          BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) < GDK_mem_maxsize / 2);
    1589      981914 :                 if (wanthash && !havehash) {
    1590       26705 :                         MT_lock_set(&b->theaplock);
    1591       26705 :                         if (b->tunique_est != 0 &&
    1592          86 :                             b->tunique_est < BATcount(b) / NO_HASH_SELECT_FRACTION) {
    1593             :                                 /* too many duplicates: not worth it */
    1594             :                                 wanthash = false;
    1595             :                         }
    1596       26705 :                         MT_lock_unset(&b->theaplock);
    1597             :                 }
    1598             :         }
    1599             : 
    1600     1066452 :         if (equi && !havehash && parent != 0) {
    1601             :                 /* use parent hash if it already exists and if either
    1602             :                  * a quick check shows the value we're looking for
    1603             :                  * does not occur, or if it is cheaper to check the
    1604             :                  * candidate list for each value in the hash chain
    1605             :                  * than to scan (cost for probe is average length of
    1606             :                  * hash chain (count divided by #slots) times the cost
    1607             :                  * to do a binary search on the candidate list (or 1
    1608             :                  * if no need for search)) */
    1609      809567 :                 tmp = BBP_cache(parent);
    1610      809567 :                 if (tmp && BATcheckhash(tmp)) {
    1611      772900 :                         MT_rwlock_rdlock(&tmp->thashlock);
    1612      772901 :                         phash = tmp->thash != NULL &&
    1613      902673 :                                 (BATcount(tmp) == BATcount(b) ||
    1614      183679 :                                  BATcount(tmp) / tmp->thash->nheads * (ci.tpe != cand_dense ? ilog2(BATcount(s)) : 1) < (s ? BATcount(s) : BATcount(b)) ||
    1615       20401 :                                  HASHget(tmp->thash, HASHprobe(tmp->thash, tl)) == BUN_NONE);
    1616      772904 :                         if (phash)
    1617             :                                 havehash = wanthash = true;
    1618             :                         else
    1619       18883 :                                 MT_rwlock_rdunlock(&tmp->thashlock);
    1620             :                 }
    1621             :                 /* create a hash on the parent bat (and use it) if it is
    1622             :                  * the same size as the view and it is persistent */
    1623      809548 :                 if (!phash &&
    1624       55527 :                     !tmp->batTransient &&
    1625       52678 :                     BATcount(tmp) == BATcount(b) &&
    1626        4320 :                     BAThash(tmp) == GDK_SUCCEED) {
    1627        4320 :                         MT_rwlock_rdlock(&tmp->thashlock);
    1628        4320 :                         if (tmp->thash)
    1629             :                                 havehash = wanthash = phash = true;
    1630             :                         else
    1631           0 :                                 MT_rwlock_rdunlock(&tmp->thashlock);
    1632             :                 }
    1633             :         }
    1634             :         /* at this point, if havehash is set, we have the hash lock
    1635             :          * the lock is on the parent if phash is set, on b itself if not
    1636             :          * also, if havehash is set, then so is wanthash (but not v.v.) */
    1637             : 
    1638     1066433 :         if (!havehash) {
    1639             :                 /* make sure tsorted and trevsorted flags are set, but
    1640             :                  * we only need to know if we're not yet sure that we're
    1641             :                  * going for the hash (i.e. it already exists) */
    1642      252849 :                 (void) BATordered(b);
    1643      252853 :                 (void) BATordered_rev(b);
    1644             :         }
    1645             : 
    1646             :         /* If there is an order index or it is a view and the parent
    1647             :          * has an ordered index, and the bat is not tsorted or
    1648             :          * trevstorted then use the order index.  And there is no cand
    1649             :          * list or if there is one, it is dense.
    1650             :          * TODO: we do not support anti-select with order index */
    1651             :         bool poidx = false;
    1652     1066426 :         if (!anti &&
    1653      209897 :             !havehash &&
    1654      209897 :             !b->tsorted &&
    1655      118464 :             !b->trevsorted &&
    1656      114411 :             ci.tpe == cand_dense) {
    1657             :                 BAT *view = NULL;
    1658       95315 :                 (void) BATcheckorderidx(b);
    1659       95315 :                 MT_lock_set(&b->batIdxLock);
    1660       95317 :                 if ((oidxh = b->torderidx) != NULL)
    1661          42 :                         HEAPincref(oidxh);
    1662       95317 :                 MT_lock_unset(&b->batIdxLock);
    1663       95307 :                 if (oidxh == NULL && parent) {
    1664       18031 :                         BAT *pb = BBP_cache(parent);
    1665       18031 :                         (void) BATcheckorderidx(pb);
    1666       18033 :                         MT_lock_set(&pb->batIdxLock);
    1667       18035 :                         if ((oidxh = pb->torderidx) != NULL) {
    1668           7 :                                 HEAPincref(oidxh);
    1669             :                                 view = b;
    1670             :                                 b = pb;
    1671             :                         }
    1672       18035 :                         MT_lock_unset(&pb->batIdxLock);
    1673             :                 }
    1674       95311 :                 if (oidxh) {
    1675             :                         /* Is query selective enough to use the ordered index ? */
    1676             :                         /* TODO: Test if this heuristic works in practice */
    1677             :                         /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < b->batCount/1000 ? (BUN)1000: b->batCount/1000))*/
    1678          49 :                         if ((ORDERfnd(b, oidxh, th) - ORDERfnd(b, oidxh, tl)) < b->batCount/3) {
    1679          26 :                                 if (view) {
    1680             :                                         poidx = true; /* using parent oidx */
    1681           7 :                                         vwo = (lng) (view->tbaseoff - b->tbaseoff);
    1682           7 :                                         vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
    1683           7 :                                         vwh = vwl + canditer_last(&ci) - ci.seq;
    1684           7 :                                         vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
    1685           7 :                                         TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
    1686             :                                 } else {
    1687          19 :                                         vwl = ci.seq;
    1688          19 :                                         vwh = canditer_last(&ci);
    1689             :                                 }
    1690             :                         } else {
    1691          23 :                                 if (view) {
    1692             :                                         b = view;
    1693             :                                         view = NULL;
    1694             :                                 }
    1695          23 :                                 HEAPdecref(oidxh, false);
    1696             :                                 oidxh = NULL;
    1697             :                         }
    1698             :                 }
    1699             :         }
    1700             : 
    1701     1066422 :         if (!havehash && (b->tsorted || b->trevsorted || oidxh != NULL)) {
    1702             :                 BUN low = 0;
    1703      112684 :                 BUN high = b->batCount;
    1704             : 
    1705      112684 :                 if (BATtdense(b)) {
    1706             :                         /* positional */
    1707             :                         /* we expect nonil to be set, in which case we
    1708             :                          * already know that we're not dealing with a
    1709             :                          * nil equiselect (dealt with above) */
    1710             :                         oid h, l;
    1711       24133 :                         assert(b->tnonil);
    1712       24133 :                         assert(b->tsorted);
    1713       24133 :                         assert(oidxh == NULL);
    1714       24133 :                         algo = "select: dense";
    1715       24133 :                         h = * (oid *) th + hi;
    1716       24133 :                         if (h > b->tseqbase)
    1717       24120 :                                 h -= b->tseqbase;
    1718             :                         else
    1719             :                                 h = 0;
    1720             :                         if ((BUN) h < high)
    1721             :                                 high = (BUN) h;
    1722             : 
    1723       24133 :                         l = *(oid *) tl + !li;
    1724       24133 :                         if (l > b->tseqbase)
    1725       19527 :                                 l -= b->tseqbase;
    1726             :                         else
    1727             :                                 l = 0;
    1728             :                         if ((BUN) l > low)
    1729             :                                 low = (BUN) l;
    1730             :                         if (low > high)
    1731             :                                 low = high;
    1732       88551 :                 } else if (b->tsorted) {
    1733       77029 :                         assert(oidxh == NULL);
    1734       77029 :                         algo = "select: sorted";
    1735       77029 :                         if (lval) {
    1736       76439 :                                 if (li)
    1737       72711 :                                         low = SORTfndfirst(b, tl);
    1738             :                                 else
    1739        3728 :                                         low = SORTfndlast(b, tl);
    1740             :                         } else {
    1741             :                                 /* skip over nils at start of column */
    1742         590 :                                 low = SORTfndlast(b, nil);
    1743             :                         }
    1744       77034 :                         if (hval) {
    1745       76439 :                                 if (hi)
    1746       72705 :                                         high = SORTfndlast(b, th);
    1747             :                                 else
    1748        3734 :                                         high = SORTfndfirst(b, th);
    1749             :                         }
    1750       11522 :                 } else if (b->trevsorted) {
    1751       11496 :                         assert(oidxh == NULL);
    1752       11496 :                         algo = "select: reverse sorted";
    1753       11496 :                         if (lval) {
    1754       11455 :                                 if (li)
    1755       11327 :                                         high = SORTfndlast(b, tl);
    1756             :                                 else
    1757         128 :                                         high = SORTfndfirst(b, tl);
    1758             :                         } else {
    1759             :                                 /* skip over nils at end of column */
    1760          41 :                                 high = SORTfndfirst(b, nil);
    1761             :                         }
    1762       11496 :                         if (hval) {
    1763       11455 :                                 if (hi)
    1764       11327 :                                         low = SORTfndfirst(b, th);
    1765             :                                 else
    1766         128 :                                         low = SORTfndlast(b, th);
    1767             :                         }
    1768             :                 } else {
    1769          26 :                         assert(oidxh != NULL);
    1770          26 :                         algo = poidx ? "select: parent orderidx" : "select: orderidx";
    1771          26 :                         if (lval) {
    1772          26 :                                 if (li)
    1773          26 :                                         low = ORDERfndfirst(b, oidxh, tl);
    1774             :                                 else
    1775           0 :                                         low = ORDERfndlast(b, oidxh, tl);
    1776             :                         } else {
    1777             :                                 /* skip over nils at start of column */
    1778           0 :                                 low = ORDERfndlast(b, oidxh, nil);
    1779             :                         }
    1780          26 :                         if (hval) {
    1781          26 :                                 if (hi)
    1782          26 :                                         high = ORDERfndlast(b, oidxh, th);
    1783             :                                 else
    1784           0 :                                         high = ORDERfndfirst(b, oidxh, th);
    1785             :                         }
    1786             :                 }
    1787      112681 :                 if (anti) {
    1788       17197 :                         assert(oidxh == NULL);
    1789       17197 :                         if (b->tsorted) {
    1790        9753 :                                 BUN first = SORTfndlast(b, nil);
    1791             :                                 /* match: [first..low) + [high..last) */
    1792        9753 :                                 bn = canditer_slice2val(&ci,
    1793             :                                                         first + b->hseqbase,
    1794             :                                                         low + b->hseqbase,
    1795        9753 :                                                         high + b->hseqbase,
    1796             :                                                         oid_nil);
    1797             :                         } else {
    1798        7444 :                                 BUN last = SORTfndfirst(b, nil);
    1799             :                                 /* match: [first..low) + [high..last) */
    1800        7444 :                                 bn = canditer_slice2val(&ci,
    1801             :                                                         oid_nil,
    1802             :                                                         low + b->hseqbase,
    1803             :                                                         high + b->hseqbase,
    1804        7444 :                                                         last + b->hseqbase);
    1805             :                         }
    1806             :                 } else {
    1807       95484 :                         if (b->tsorted || b->trevsorted) {
    1808       95458 :                                 assert(oidxh == NULL);
    1809             :                                 /* match: [low..high) */
    1810       95458 :                                 bn = canditer_sliceval(&ci,
    1811             :                                                        low + b->hseqbase,
    1812       95458 :                                                        high + b->hseqbase);
    1813             :                         } else {
    1814             :                                 BUN i;
    1815             :                                 BUN cnt = 0;
    1816             :                                 const oid *rs;
    1817             :                                 oid *rbn;
    1818             : 
    1819          26 :                                 rs = (const oid *) oidxh->base + ORDERIDXOFF;
    1820          26 :                                 rs += low;
    1821          26 :                                 bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
    1822          26 :                                 if (bn == NULL) {
    1823           0 :                                         HEAPdecref(oidxh, false);
    1824           0 :                                         bat_iterator_end(&bi);
    1825           0 :                                         return NULL;
    1826             :                                 }
    1827             : 
    1828          26 :                                 rbn = (oid *) Tloc((bn), 0);
    1829             : 
    1830        2932 :                                 for (i = low; i < high; i++) {
    1831        2906 :                                         if (vwl <= *rs && *rs <= vwh) {
    1832        1138 :                                                 *rbn++ = (oid) ((lng) *rs + vwo);
    1833        1138 :                                                 cnt++;
    1834             :                                         }
    1835        2906 :                                         rs++;
    1836             :                                 }
    1837          26 :                                 HEAPdecref(oidxh, false);
    1838          26 :                                 BATsetcount(bn, cnt);
    1839             : 
    1840             :                                 /* output must be sorted */
    1841          26 :                                 GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
    1842          26 :                                 bn->tsorted = true;
    1843          26 :                                 bn->trevsorted = bn->batCount <= 1;
    1844          26 :                                 bn->tkey = true;
    1845          26 :                                 bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
    1846          26 :                                 bn->tnil = false;
    1847          26 :                                 bn->tnonil = true;
    1848          26 :                                 if (s) {
    1849           8 :                                         s = BATintersectcand(bn, s);
    1850           8 :                                         BBPunfix(bn->batCacheid);
    1851             :                                         bn = s;
    1852             :                                 }
    1853             :                         }
    1854             :                 }
    1855             : 
    1856      112717 :                 bn = virtualize(bn);
    1857      112707 :                 MT_thread_setalgorithm(algo);
    1858      112705 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
    1859             :                           ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
    1860             :                           ALGOBATPAR(b), anti ? "true" : "false",
    1861             :                           ALGOOPTBATPAR(bn), algo,
    1862             :                           GDKusec() - t0);
    1863             : 
    1864      112705 :                 bat_iterator_end(&bi);
    1865      112718 :                 return bn;
    1866             :         }
    1867             : 
    1868      953738 :         assert(oidxh == NULL);
    1869             :         /* upper limit for result size */
    1870      953738 :         maximum = ci.ncand;
    1871      953738 :         if (b->tkey) {
    1872             :                 /* exact result size in special cases */
    1873      132424 :                 if (equi) {
    1874             :                         estimate = 1;
    1875         256 :                 } else if (!anti && lval && hval) {
    1876          11 :                         switch (t) {
    1877           0 :                         case TYPE_bte:
    1878           0 :                                 estimate = (BUN) (*(bte *) th - *(bte *) tl);
    1879           0 :                                 break;
    1880           0 :                         case TYPE_sht:
    1881           0 :                                 estimate = (BUN) (*(sht *) th - *(sht *) tl);
    1882           0 :                                 break;
    1883          11 :                         case TYPE_int:
    1884          11 :                                 estimate = (BUN) (*(int *) th - *(int *) tl);
    1885          11 :                                 break;
    1886           0 :                         case TYPE_lng:
    1887           0 :                                 estimate = (BUN) (*(lng *) th - *(lng *) tl);
    1888           0 :                                 break;
    1889             : #ifdef HAVE_HGE
    1890           0 :                         case TYPE_hge:
    1891           0 :                                 if (*(hge *) th - *(hge *) tl < (hge) BUN_MAX)
    1892           0 :                                         estimate = (BUN) (*(hge *) th - *(hge *) tl);
    1893             :                                 break;
    1894             : #endif
    1895             :                         }
    1896          11 :                         if (estimate != BUN_NONE)
    1897          11 :                                 estimate += li + hi - 1;
    1898             :                 }
    1899             :         }
    1900             :         /* refine upper limit by exact size (if known) */
    1901      953738 :         maximum = MIN(maximum, estimate);
    1902      953738 :         if (wanthash && !havehash && estimate == BUN_NONE) {
    1903             :                 /* no exact result size, but we need estimate to
    1904             :                  * choose between hash- & scan-select (if we already
    1905             :                  * have a hash, it's a no-brainer: we use it) */
    1906       25347 :                 if (ci.ncand <= 10000) {
    1907             :                         /* "small" input: don't bother about more accurate
    1908             :                          * estimate */
    1909             :                         estimate = maximum;
    1910             :                 } else {
    1911             :                         /* layman's quick "pseudo-sample" of 1000 tuples,
    1912             :                          * i.e., 333 from begin, middle & end of BAT */
    1913             :                         BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
    1914             :                         BAT *smpl, *slct;
    1915             : 
    1916             :                         delta = 1000 / 3 / 2;
    1917           2 :                         skip = (BATcount(b) - (2 * delta)) / 2;
    1918           8 :                         for (pos = delta; pos < BATcount(b); pos += skip) {
    1919           6 :                                 smpl = BATslice(b, pos - delta, pos + delta);
    1920           6 :                                 if (smpl) {
    1921           6 :                                         slct = BATselect(smpl, NULL, tl,
    1922             :                                                             th, li, hi, anti);
    1923           6 :                                         if (slct) {
    1924           6 :                                                 smpl_cnt += BATcount(smpl);
    1925           6 :                                                 slct_cnt += BATcount(slct);
    1926           6 :                                                 BBPreclaim(slct);
    1927             :                                         }
    1928           6 :                                         BBPreclaim(smpl);
    1929             :                                 }
    1930             :                         }
    1931           2 :                         if (smpl_cnt > 0 && slct_cnt > 0) {
    1932             :                                 /* linear extrapolation plus 10% margin */
    1933           0 :                                 estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
    1934           0 :                                                   * (dbl) BATcount(b) * 1.1);
    1935           2 :                         } else if (smpl_cnt > 0 && slct_cnt == 0) {
    1936             :                                 /* estimate low enough to trigger hash select */
    1937           2 :                                 estimate = (ci.ncand / 100) - 1;
    1938             :                         }
    1939             :                 }
    1940       25347 :                 wanthash = estimate < ci.ncand / 100;
    1941             :         }
    1942      953738 :         if (estimate == BUN_NONE) {
    1943             :                 /* no better estimate possible/required:
    1944             :                  * (pre-)allocate 1M tuples, i.e., avoid/delay extend
    1945             :                  * without too much overallocation */
    1946             :                 estimate = 1000000;
    1947             :         }
    1948             :         /* limit estimation by upper limit */
    1949      953738 :         estimate = MIN(estimate, maximum);
    1950             : 
    1951      953738 :         bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
    1952      953728 :         if (bn == NULL) {
    1953           0 :                 if (havehash)
    1954           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1955           0 :                 bat_iterator_end(&bi);
    1956           0 :                 return NULL;
    1957             :         }
    1958             : 
    1959      953728 :         if (wanthash) {
    1960             :                 /* hashselect unlocks the hash lock */
    1961      813582 :                 bn = hashselect(b, &bi, &ci, bn, tl, maximum, havehash, phash, &algo);
    1962             :         } else {
    1963      140146 :                 assert(!havehash);
    1964             :                 /* use imprints if
    1965             :                  *   i) bat is persistent, or parent is persistent
    1966             :                  *  ii) it is not an equi-select, and
    1967             :                  * iii) imprints are supported.
    1968             :                  */
    1969             :                 tmp = NULL;
    1970             :                 Imprints *imprints = NULL;
    1971             :                 if (!equi &&
    1972             :                     /* DISABLES CODE */ (0) && imprintable(b->ttype) &&
    1973             :                     (!b->batTransient ||
    1974             :                      (parent != 0 &&
    1975             :                       (tmp = BBP_cache(parent)) != NULL &&
    1976             :                       !tmp->batTransient)) &&
    1977             :                     BATimprints(b) == GDK_SUCCEED) {
    1978             :                         if (tmp != NULL) {
    1979             :                                 MT_lock_set(&tmp->batIdxLock);
    1980             :                                 imprints = tmp->timprints;
    1981             :                                 if (imprints != NULL)
    1982             :                                         IMPSincref(imprints);
    1983             :                                 else
    1984             :                                         imprints = NULL;
    1985             :                                 MT_lock_unset(&tmp->batIdxLock);
    1986             :                         } else {
    1987             :                                 MT_lock_set(&b->batIdxLock);
    1988             :                                 imprints = b->timprints;
    1989             :                                 if (imprints != NULL)
    1990             :                                         IMPSincref(imprints);
    1991             :                                 else
    1992             :                                         imprints = NULL;
    1993             :                                 MT_lock_unset(&b->batIdxLock);
    1994             :                         }
    1995             :                 }
    1996      140146 :                 GDKclrerr();
    1997      140137 :                 bn = scanselect(b, &bi, &ci, bn, tl, th, li, hi, equi, anti,
    1998             :                                 lval, hval, lnil, maximum, imprints, &algo);
    1999             :                 if (imprints)
    2000             :                         IMPSdecref(imprints, false);
    2001             :         }
    2002      953723 :         bat_iterator_end(&bi);
    2003             : 
    2004      953734 :         bn = virtualize(bn);
    2005      953736 :         MT_thread_setalgorithm(algo);
    2006      953723 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
    2007             :                   " %s (" LLFMT " usec)\n",
    2008             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    2009             :                   anti ? "true" : "false",
    2010             :                   ALGOOPTBATPAR(bn), algo,
    2011             :                   GDKusec() - t0);
    2012             : 
    2013             :         return bn;
    2014             : }
    2015             : 
    2016             : /* theta select
    2017             :  *
    2018             :  * Returns a BAT with the OID values of b for qualifying tuples.  The
    2019             :  * return BAT is sorted (i.e. in the same order as the input BAT).
    2020             :  *
    2021             :  * If s is not NULL, it is a list of candidates.  s must be sorted.
    2022             :  *
    2023             :  * Theta select returns all values from b which are less/greater than
    2024             :  * or (not) equal to the provided value depending on the value of op.
    2025             :  * Op is a string with one of the values: "=", "==", "<", "<=", ">",
    2026             :  * ">=", "<>", "!=" (the first two are equivalent and the last two are
    2027             :  * equivalent).  Theta select never returns nils.
    2028             :  *
    2029             :  * If value is nil, the result is empty.
    2030             :  */
    2031             : BAT *
    2032      358863 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
    2033             : {
    2034             :         const void *nil;
    2035             : 
    2036      358863 :         BATcheck(b, NULL);
    2037      358863 :         BATcheck(val, NULL);
    2038      358863 :         BATcheck(op, NULL);
    2039             : 
    2040      358863 :         nil = ATOMnilptr(b->ttype);
    2041      358863 :         if (ATOMcmp(b->ttype, val, nil) == 0)
    2042          93 :                 return BATdense(0, 0, 0);
    2043      358759 :         if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
    2044             :                 /* "=" or "==" */
    2045      288880 :                 return BATselect(b, s, val, NULL, true, true, false);
    2046             :         }
    2047       69879 :         if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
    2048             :                 /* "!=" (equivalent to "<>") */
    2049       45635 :                 return BATselect(b, s, val, NULL, true, true, true);
    2050             :         }
    2051       24244 :         if (op[0] == '<') {
    2052         760 :                 if (op[1] == 0) {
    2053             :                         /* "<" */
    2054         616 :                         return BATselect(b, s, nil, val, false, false, false);
    2055             :                 }
    2056         144 :                 if (op[1] == '=' && op[2] == 0) {
    2057             :                         /* "<=" */
    2058         144 :                         return BATselect(b, s, nil, val, false, true, false);
    2059             :                 }
    2060           0 :                 if (op[1] == '>' && op[2] == 0) {
    2061             :                         /* "<>" (equivalent to "!=") */
    2062           0 :                         return BATselect(b, s, val, NULL, true, true, true);
    2063             :                 }
    2064             :         }
    2065       23484 :         if (op[0] == '>') {
    2066       23484 :                 if (op[1] == 0) {
    2067             :                         /* ">" */
    2068       23039 :                         return BATselect(b, s, val, nil, false, false, false);
    2069             :                 }
    2070         445 :                 if (op[1] == '=' && op[2] == 0) {
    2071             :                         /* ">=" */
    2072         445 :                         return BATselect(b, s, val, nil, true, false, false);
    2073             :                 }
    2074             :         }
    2075           0 :         GDKerror("unknown operator.\n");
    2076           0 :         return NULL;
    2077             : }
    2078             : 
    2079             : #define VALUE(s, x)     (s##vars ?                                      \
    2080             :                          s##vars + VarHeapVal(s##vals, (x), s##width) : \
    2081             :                          s##vals + ((x) * s##width))
    2082             : #define FVALUE(s, x)    (s##vals + ((x) * s##width))
    2083             : 
    2084             : #define LTany(a,b)      ((*cmp)(a, b) < 0)
    2085             : #define EQany(a,b)      ((*cmp)(a, b) == 0)
    2086             : #define is_any_nil(v)   ((v) == NULL || (*cmp)((v), nil) == 0)
    2087             : 
    2088             : #define less3(a,b,i,t)  (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(a, b) || (i && EQ##t(a, b)))
    2089             : #define grtr3(a,b,i,t)  (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(b, a) || (i && EQ##t(a, b)))
    2090             : #define or3(a,b)        ((a) == 1 || (b) == 1 ? 1 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 0)
    2091             : #define and3(a,b)       ((a) == 0 || (b) == 0 ? 0 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 1)
    2092             : #define not3(a)         (is_bit_nil(a) ? bit_nil : !(a))
    2093             : 
    2094             : #define between3(v, lo, linc, hi, hinc, TYPE)                           \
    2095             :         and3(grtr3(v, lo, linc, TYPE), less3(v, hi, hinc, TYPE))
    2096             : 
    2097             : #define BETWEEN(v, lo, linc, hi, hinc, TYPE)                            \
    2098             :         (is_##TYPE##_nil(v)                                             \
    2099             :          ? bit_nil                                                      \
    2100             :          : (bit) (anti                                                  \
    2101             :                   ? (symmetric                                          \
    2102             :                      ? not3(or3(between3(v, lo, linc, hi, hinc, TYPE),  \
    2103             :                                 between3(v, hi, hinc, lo, linc, TYPE))) \
    2104             :                      : not3(between3(v, lo, linc, hi, hinc, TYPE)))     \
    2105             :                   : (symmetric                                          \
    2106             :                      ? or3(between3(v, lo, linc, hi, hinc, TYPE),       \
    2107             :                            between3(v, hi, hinc, lo, linc, TYPE))       \
    2108             :                      : between3(v, lo, linc, hi, hinc, TYPE))))
    2109             : 
    2110             : gdk_return
    2111         116 : rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh,
    2112             :           struct canditer *lci, struct canditer *rci,
    2113             :           bool linc, bool hinc, bool anti, bool symmetric, BUN maxsize)
    2114             : {
    2115             :         const char *rlvals, *rhvals;
    2116             :         const char *lvars, *rlvars, *rhvars;
    2117             :         int rlwidth, rhwidth;
    2118             :         int lwidth;
    2119         116 :         const void *nil = ATOMnilptr(l->ttype);
    2120         116 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    2121             :         int t;
    2122         116 :         BUN cnt, ncnt, lncand = lci->ncand, rncand = rci->ncand;
    2123             :         oid *restrict dst1, *restrict dst2;
    2124             :         const void *vrl, *vrh;
    2125             :         oid ro;
    2126         116 :         oid rlval = oid_nil, rhval = oid_nil;
    2127             :         int sorted = 0;         /* which output column is sorted */
    2128             :         BAT *tmp = NULL;
    2129             :         const char *algo = NULL;
    2130         116 :         BATiter li = bat_iterator(l);
    2131         116 :         BATiter rli = bat_iterator(rl);
    2132         116 :         BATiter rhi = bat_iterator(rh);
    2133             :         Heap *oidxh = NULL;
    2134             : 
    2135             :         lng timeoffset = 0;
    2136         116 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2137         116 :         if (qry_ctx != NULL) {
    2138         116 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    2139             :         }
    2140             : 
    2141         348 :         assert(ATOMtype(l->ttype) == ATOMtype(rl->ttype));
    2142         232 :         assert(ATOMtype(l->ttype) == ATOMtype(rh->ttype));
    2143         116 :         assert(BATcount(rl) == BATcount(rh));
    2144         116 :         assert(rl->hseqbase == rh->hseqbase);
    2145         116 :         assert(r1->ttype == TYPE_oid);
    2146         116 :         assert(r2 == NULL || r2->ttype == TYPE_oid);
    2147         116 :         assert(r2 == NULL || BATcount(r1) == BATcount(r2));
    2148         116 :         assert(l->ttype != TYPE_void || !is_oid_nil(l->tseqbase));
    2149         116 :         assert(rl->ttype != TYPE_void || !is_oid_nil(rl->tseqbase));
    2150         116 :         assert(rh->ttype != TYPE_void || !is_oid_nil(rh->tseqbase));
    2151             : 
    2152         116 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    2153             :                   "rl=" ALGOBATFMT ",rh=" ALGOBATFMT ","
    2154             :                   "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT ","
    2155             :                   "anti=%s,symmetric=%s\n",
    2156             :                   ALGOBATPAR(l),
    2157             :                   ALGOBATPAR(rl),
    2158             :                   ALGOBATPAR(rh),
    2159             :                   ALGOOPTBATPAR(lci->s),
    2160             :                   ALGOOPTBATPAR(rci->s),
    2161             :                   anti ? "true" : "false",
    2162             :                   symmetric ? "true" : "false");
    2163             : 
    2164         116 :         rlvals = rl->ttype == TYPE_void ? NULL : (const char *) rli.base;
    2165         116 :         rhvals = rh->ttype == TYPE_void ? NULL : (const char *) rhi.base;
    2166         116 :         lwidth = li.width;
    2167         116 :         rlwidth = rli.width;
    2168         116 :         rhwidth = rhi.width;
    2169         116 :         dst1 = (oid *) Tloc(r1, 0);
    2170         116 :         dst2 = r2 ? (oid *) Tloc(r2, 0) : NULL;
    2171             : 
    2172         116 :         t = ATOMtype(l->ttype);
    2173             :         t = ATOMbasetype(t);
    2174             : 
    2175         116 :         if (l->tvarsized && l->ttype) {
    2176          15 :                 assert(rl->tvarsized && rl->ttype);
    2177          15 :                 assert(rh->tvarsized && rh->ttype);
    2178          15 :                 lvars = li.vh->base;
    2179          15 :                 rlvars = rli.vh->base;
    2180          15 :                 rhvars = rhi.vh->base;
    2181             :         } else {
    2182         101 :                 assert(!rl->tvarsized || !rl->ttype);
    2183         101 :                 assert(!rh->tvarsized || !rh->ttype);
    2184             :                 lvars = rlvars = rhvars = NULL;
    2185             :         }
    2186             : 
    2187         116 :         if (!anti && !symmetric && !BATordered(l) && !BATordered_rev(l)) {
    2188          11 :                 (void) BATcheckorderidx(l);
    2189          11 :                 MT_lock_set(&l->batIdxLock);
    2190          11 :                 if ((oidxh = l->torderidx) != NULL)
    2191           0 :                         HEAPincref(oidxh);
    2192          11 :                 MT_lock_unset(&l->batIdxLock);
    2193             : #if 0 /* needs checking */
    2194             :                 if (oidxh == NULL && VIEWtparent(l)) {
    2195             :                         BAT *pb = BBP_cache(VIEWtparent(l));
    2196             :                         (void) BATcheckorderidx(pb);
    2197             :                         MT_lock_set(&pb->batIdxLock);
    2198             :                         if ((oidxh = pb->torderidx) != NULL) {
    2199             :                                 HEAPincref(oidxh);
    2200             :                                 l = pb;
    2201             :                         }
    2202             :                         MT_lock_unset(&pb->batIdxLock);
    2203             :                 }
    2204             : #endif
    2205             :         }
    2206             : 
    2207             :         vrl = &rlval;
    2208             :         vrh = &rhval;
    2209         116 :         if (!anti && !symmetric && (BATordered(l) || BATordered_rev(l) || oidxh)) {
    2210             :                 /* left column is sorted, use binary search */
    2211             :                 sorted = 2;
    2212         424 :                 TIMEOUT_LOOP(rncand, timeoffset) {
    2213             :                         BUN low, high;
    2214             : 
    2215         240 :                         ro = canditer_next(rci);
    2216         239 :                         if (rlvals) {
    2217         239 :                                 vrl = VALUE(rl, ro - rl->hseqbase);
    2218             :                         } else {
    2219             :                                 /* TYPE_void */
    2220           0 :                                 rlval = ro - rl->hseqbase + rl->tseqbase;
    2221             :                         }
    2222         239 :                         if (rhvals) {
    2223         239 :                                 vrh = VALUE(rh, ro - rh->hseqbase);
    2224             :                         } else {
    2225             :                                 /* TYPE_void */
    2226           0 :                                 rhval = ro - rh->hseqbase + rh->tseqbase;
    2227             :                         }
    2228         239 :                         if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
    2229           0 :                                 continue;
    2230         238 :                         if (l->tsorted) {
    2231         219 :                                 if (linc)
    2232         158 :                                         low = SORTfndfirst(l, vrl);
    2233             :                                 else
    2234          61 :                                         low = SORTfndlast(l, vrl);
    2235         220 :                                 if (hinc)
    2236         159 :                                         high = SORTfndlast(l, vrh);
    2237             :                                 else
    2238          61 :                                         high = SORTfndfirst(l, vrh);
    2239          19 :                         } else  if (l->trevsorted) {
    2240          19 :                                 if (hinc)
    2241          19 :                                         low = SORTfndfirst(l, vrh);
    2242             :                                 else
    2243           0 :                                         low = SORTfndlast(l, vrh);
    2244          19 :                                 if (linc)
    2245           3 :                                         high = SORTfndlast(l, vrl);
    2246             :                                 else
    2247          16 :                                         high = SORTfndfirst(l, vrl);
    2248             :                         } else {
    2249           0 :                                 assert(oidxh);
    2250           0 :                                 if (linc)
    2251           0 :                                         low = ORDERfndfirst(l, oidxh, vrl);
    2252             :                                 else
    2253           0 :                                         low = ORDERfndlast(l, oidxh, vrl);
    2254           0 :                                 if (hinc)
    2255           0 :                                         high = ORDERfndlast(l, oidxh, vrh);
    2256             :                                 else
    2257           0 :                                         high = ORDERfndfirst(l, oidxh, vrh);
    2258             :                         }
    2259         238 :                         if (high <= low)
    2260         124 :                                 continue;
    2261         114 :                         if (l->tsorted || l->trevsorted) {
    2262         114 :                                 low = canditer_search(lci, low + l->hseqbase, true);
    2263         114 :                                 high = canditer_search(lci, high + l->hseqbase, true);
    2264         114 :                                 assert(high >= low);
    2265             : 
    2266         114 :                                 if (BATcapacity(r1) < BUNlast(r1) + high - low) {
    2267           0 :                                         cnt = BUNlast(r1) + high - low + 1024;
    2268             :                                         if (cnt > maxsize)
    2269             :                                                 cnt = maxsize;
    2270           0 :                                         BATsetcount(r1, BATcount(r1));
    2271           0 :                                         if (BATextend(r1, cnt) != GDK_SUCCEED)
    2272           0 :                                                 goto bailout;
    2273           0 :                                         dst1 = (oid *) Tloc(r1, 0);
    2274           0 :                                         if (r2) {
    2275           0 :                                                 BATsetcount(r2, BATcount(r2));
    2276           0 :                                                 if (BATextend(r2, cnt) != GDK_SUCCEED)
    2277           0 :                                                         goto bailout;
    2278           0 :                                                 assert(BATcapacity(r1) == BATcapacity(r2));
    2279           0 :                                                 dst2 = (oid *) Tloc(r2, 0);
    2280             :                                         }
    2281             :                                 }
    2282         114 :                                 canditer_setidx(lci, low);
    2283         603 :                                 while (low < high) {
    2284         489 :                                         dst1[r1->batCount++] = canditer_next(lci);
    2285         489 :                                         if (r2) {
    2286         477 :                                                 dst2[r2->batCount++] = ro;
    2287             :                                         }
    2288         489 :                                         low++;
    2289             :                                 }
    2290             :                         } else {
    2291             :                                 const oid *ord;
    2292             : 
    2293           0 :                                 assert(oidxh);
    2294           0 :                                 ord = (const oid *) oidxh->base + ORDERIDXOFF;
    2295             : 
    2296           0 :                                 if (BATcapacity(r1) < BUNlast(r1) + high - low) {
    2297           0 :                                         cnt = BUNlast(r1) + high - low + 1024;
    2298             :                                         if (cnt > maxsize)
    2299             :                                                 cnt = maxsize;
    2300           0 :                                         BATsetcount(r1, BATcount(r1));
    2301           0 :                                         if (BATextend(r1, cnt) != GDK_SUCCEED)
    2302           0 :                                                 goto bailout;
    2303           0 :                                         dst1 = (oid *) Tloc(r1, 0);
    2304           0 :                                         if (r2) {
    2305           0 :                                                 BATsetcount(r2, BATcount(r2));
    2306           0 :                                                 if (BATextend(r2, cnt) != GDK_SUCCEED)
    2307           0 :                                                         goto bailout;
    2308           0 :                                                 assert(BATcapacity(r1) == BATcapacity(r2));
    2309           0 :                                                 dst2 = (oid *) Tloc(r2, 0);
    2310             :                                         }
    2311             :                                 }
    2312             : 
    2313           0 :                                 while (low < high) {
    2314           0 :                                         if (canditer_contains(lci, ord[low])) {
    2315           0 :                                                 dst1[r1->batCount++] = ord[low];
    2316           0 :                                                 if (r2) {
    2317           0 :                                                         dst2[r2->batCount++] = ro;
    2318             :                                                 }
    2319             :                                         }
    2320           0 :                                         low++;
    2321             :                                 }
    2322             :                         }
    2323             :                 }
    2324          92 :                 if (oidxh)
    2325           0 :                         HEAPdecref(oidxh, false);
    2326          93 :                 TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    2327          93 :                 cnt = BATcount(r1);
    2328          93 :                 assert(r2 == NULL || BATcount(r1) == BATcount(r2));
    2329             :         } else if (!anti && !symmetric &&
    2330             :                    /* DISABLES CODE */ (0) && imprintable(l->ttype) &&
    2331             :                    (BATcount(rl) > 2 ||
    2332             :                     !l->batTransient ||
    2333             :                     (VIEWtparent(l) != 0 &&
    2334             :                      (tmp = BBP_cache(VIEWtparent(l))) != NULL &&
    2335             :                      !tmp->batTransient) ||
    2336             :                     BATcheckimprints(l)) &&
    2337             :                    BATimprints(l) == GDK_SUCCEED) {
    2338             :                 /* implementation using imprints on left column
    2339             :                  *
    2340             :                  * we use imprints if we can (the type is right for
    2341             :                  * imprints) and either the left bat is persistent or
    2342             :                  * already has imprints, or the right bats are long
    2343             :                  * enough (for creating imprints being worth it) */
    2344             :                 Imprints *imprints = NULL;
    2345             : 
    2346             :                 if (tmp) {
    2347             :                         MT_lock_set(&tmp->batIdxLock);
    2348             :                         imprints = tmp->timprints;
    2349             :                         if (imprints != NULL)
    2350             :                                 IMPSincref(imprints);
    2351             :                         else
    2352             :                                 imprints = NULL;
    2353             :                         MT_lock_unset(&tmp->batIdxLock);
    2354             :                 } else {
    2355             :                         MT_lock_set(&l->batIdxLock);
    2356             :                         imprints = l->timprints;
    2357             :                         if (imprints != NULL)
    2358             :                                 IMPSincref(imprints);
    2359             :                         else
    2360             :                                 imprints = NULL;
    2361             :                         MT_lock_unset(&l->batIdxLock);
    2362             :                 }
    2363             :                 /* in the unlikely case that the imprints were removed
    2364             :                  * before we could get at them, take the slow, nested
    2365             :                  * loop implementation */
    2366             :                 if (imprints == NULL)
    2367             :                         goto nestedloop;
    2368             : 
    2369             :                 sorted = 2;
    2370             :                 cnt = 0;
    2371             :                 TIMEOUT_LOOP_IDX_DECL(i, rncand, timeoffset) {
    2372             :                         maxsize = cnt + (rncand - i) * lncand;
    2373             :                         ro = canditer_next(rci);
    2374             :                         if (rlvals) {
    2375             :                                 vrl = FVALUE(rl, ro - rl->hseqbase);
    2376             :                         } else {
    2377             :                                 /* TYPE_void */
    2378             :                                 rlval = ro - rl->hseqbase + rl->tseqbase;
    2379             :                         }
    2380             :                         if (rhvals) {
    2381             :                                 vrh = FVALUE(rh, ro - rh->hseqbase);
    2382             :                         } else {
    2383             :                                 /* TYPE_void */
    2384             :                                 rhval = ro - rl->hseqbase + rl->tseqbase;
    2385             :                         }
    2386             :                         dst1 = (oid *) Tloc(r1, 0);
    2387             :                         canditer_reset(lci);
    2388             :                         switch (t) {
    2389             :                         case TYPE_bte: {
    2390             :                                 bte vl, vh;
    2391             :                                 if (is_bte_nil((vl = *(bte *) vrl)))
    2392             :                                         continue;
    2393             :                                 if (is_bte_nil((vh = *(bte *) vrh)))
    2394             :                                         continue;
    2395             :                                 if (!linc) {
    2396             :                                         if (vl == MAXVALUEbte)
    2397             :                                                 continue;
    2398             :                                         vl = NEXTVALUEbte(vl);
    2399             :                                 }
    2400             :                                 if (!hinc) {
    2401             :                                         if (vh == MINVALUEbte)
    2402             :                                                 continue;
    2403             :                                         vh = PREVVALUEbte(vh);
    2404             :                                 }
    2405             :                                 if (vl > vh)
    2406             :                                         continue;
    2407             :                                 ncnt = fullscan_bte(l, &li, lci, r1, &vl, &vh,
    2408             :                                                     true, true, false,
    2409             :                                                     false, true, true,
    2410             :                                                     false, cnt,
    2411             :                                                     l->hseqbase, dst1,
    2412             :                                                     maxsize,
    2413             :                                                     imprints, &algo);
    2414             :                                 break;
    2415             :                         }
    2416             :                         case TYPE_sht: {
    2417             :                                 sht vl, vh;
    2418             :                                 if (is_sht_nil((vl = *(sht *) vrl)))
    2419             :                                         continue;
    2420             :                                 if (is_sht_nil((vh = *(sht *) vrh)))
    2421             :                                         continue;
    2422             :                                 if (!linc) {
    2423             :                                         if (vl == MAXVALUEsht)
    2424             :                                                 continue;
    2425             :                                         vl = NEXTVALUEsht(vl);
    2426             :                                 }
    2427             :                                 if (!hinc) {
    2428             :                                         if (vh == MINVALUEsht)
    2429             :                                                 continue;
    2430             :                                         vh = PREVVALUEsht(vh);
    2431             :                                 }
    2432             :                                 if (vl > vh)
    2433             :                                         continue;
    2434             :                                 ncnt = fullscan_sht(l, &li, lci, r1, &vl, &vh,
    2435             :                                                     true, true, false,
    2436             :                                                     false, true, true,
    2437             :                                                     false, cnt,
    2438             :                                                     l->hseqbase, dst1,
    2439             :                                                     maxsize,
    2440             :                                                     imprints, &algo);
    2441             :                                 break;
    2442             :                         }
    2443             :                         case TYPE_int:
    2444             : #if SIZEOF_OID == SIZEOF_INT
    2445             :                         case TYPE_oid:
    2446             : #endif
    2447             :                         {
    2448             :                                 int vl, vh;
    2449             :                                 if (is_int_nil((vl = *(int *) vrl)))
    2450             :                                         continue;
    2451             :                                 if (is_int_nil((vh = *(int *) vrh)))
    2452             :                                         continue;
    2453             :                                 if (!linc) {
    2454             :                                         if (vl == MAXVALUEint)
    2455             :                                                 continue;
    2456             :                                         vl = NEXTVALUEint(vl);
    2457             :                                 }
    2458             :                                 if (!hinc) {
    2459             : #if SIZEOF_OID == SIZEOF_INT
    2460             :                                         if (t == TYPE_oid) {
    2461             :                                                 if (vh == MINVALUEoid)
    2462             :                                                         continue;
    2463             :                                                 vh = PREVVALUEoid(vh);
    2464             :                                         } else
    2465             : #endif
    2466             :                                         {
    2467             :                                                 if (vh == MINVALUEint)
    2468             :                                                         continue;
    2469             :                                                 vh = PREVVALUEint(vh);
    2470             :                                         }
    2471             :                                 }
    2472             :                                 if (vl > vh)
    2473             :                                         continue;
    2474             :                                 ncnt = fullscan_int(l, &li, lci, r1, &vl, &vh,
    2475             :                                                     true, true, false,
    2476             :                                                     false, true, true,
    2477             :                                                     false, cnt,
    2478             :                                                     l->hseqbase, dst1,
    2479             :                                                     maxsize,
    2480             :                                                     imprints, &algo);
    2481             :                                 break;
    2482             :                         }
    2483             :                         case TYPE_lng:
    2484             : #if SIZEOF_OID == SIZEOF_LNG
    2485             :                         case TYPE_oid:
    2486             : #endif
    2487             :                         {
    2488             :                                 lng vl, vh;
    2489             :                                 if (is_lng_nil((vl = *(lng *) vrl)))
    2490             :                                         continue;
    2491             :                                 if (is_lng_nil((vh = *(lng *) vrh)))
    2492             :                                         continue;
    2493             :                                 if (!linc) {
    2494             :                                         if (vl == MAXVALUElng)
    2495             :                                                 continue;
    2496             :                                         vl = NEXTVALUElng(vl);
    2497             :                                 }
    2498             :                                 if (!hinc) {
    2499             : #if SIZEOF_OID == SIZEOF_LNG
    2500             :                                         if (t == TYPE_oid) {
    2501             :                                                 if (vh == MINVALUEoid)
    2502             :                                                         continue;
    2503             :                                                 vh = PREVVALUEoid(vh);
    2504             :                                         } else
    2505             : #endif
    2506             :                                         {
    2507             :                                                 if (vh == MINVALUElng)
    2508             :                                                         continue;
    2509             :                                                 vh = PREVVALUElng(vh);
    2510             :                                         }
    2511             :                                 }
    2512             :                                 if (vl > vh)
    2513             :                                         continue;
    2514             :                                 ncnt = fullscan_lng(l, &li, lci, r1, &vl, &vh,
    2515             :                                                     true, true, false,
    2516             :                                                     false, true, true,
    2517             :                                                     false, cnt,
    2518             :                                                     l->hseqbase, dst1,
    2519             :                                                     maxsize,
    2520             :                                                     imprints, &algo);
    2521             :                                 break;
    2522             :                         }
    2523             : #ifdef HAVE_HGE
    2524             :                         case TYPE_hge: {
    2525             :                                 hge vl, vh;
    2526             :                                 if (is_hge_nil((vl = *(hge *) vrl)))
    2527             :                                         continue;
    2528             :                                 if (is_hge_nil((vh = *(hge *) vrh)))
    2529             :                                         continue;
    2530             :                                 if (!linc) {
    2531             :                                         if (vl == MAXVALUEhge)
    2532             :                                                 continue;
    2533             :                                         vl = NEXTVALUEhge(vl);
    2534             :                                 }
    2535             :                                 if (!hinc) {
    2536             :                                         if (vh == MINVALUEhge)
    2537             :                                                 continue;
    2538             :                                         vh = PREVVALUEhge(vh);
    2539             :                                 }
    2540             :                                 if (vl > vh)
    2541             :                                         continue;
    2542             :                                 ncnt = fullscan_hge(l, &li, lci, r1, &vl, &vh,
    2543             :                                                     true, true, false,
    2544             :                                                     false, true, true,
    2545             :                                                     false, cnt,
    2546             :                                                     l->hseqbase, dst1,
    2547             :                                                     maxsize,
    2548             :                                                     imprints, &algo);
    2549             :                                 break;
    2550             :                         }
    2551             : #endif
    2552             :                         case TYPE_flt: {
    2553             :                                 flt vl, vh;
    2554             :                                 vl = *(flt *) vrl;
    2555             :                                 if (is_flt_nil(vl))
    2556             :                                         continue;
    2557             :                                 vh = *(flt *) vrh;
    2558             :                                 if (is_flt_nil(vh))
    2559             :                                         continue;
    2560             :                                 if (!linc) {
    2561             :                                         if (vl == MAXVALUEflt)
    2562             :                                                 continue;
    2563             :                                         vl = NEXTVALUEflt(vl);
    2564             :                                 }
    2565             :                                 if (!hinc) {
    2566             :                                         if (vh == MINVALUEflt)
    2567             :                                                 continue;
    2568             :                                         vh = PREVVALUEflt(vh);
    2569             :                                 }
    2570             :                                 if (vl > vh)
    2571             :                                         continue;
    2572             :                                 ncnt = fullscan_flt(l, &li, lci, r1, &vl, &vh,
    2573             :                                                     true, true, false,
    2574             :                                                     false, true, true,
    2575             :                                                     false, cnt,
    2576             :                                                     l->hseqbase, dst1,
    2577             :                                                     maxsize,
    2578             :                                                     imprints, &algo);
    2579             :                                 break;
    2580             :                         }
    2581             :                         case TYPE_dbl: {
    2582             :                                 dbl vl, vh;
    2583             :                                 vl = *(dbl *) vrl;
    2584             :                                 if (is_dbl_nil(vl))
    2585             :                                         continue;
    2586             :                                 vh = *(dbl *) vrh;
    2587             :                                 if (is_dbl_nil(vh))
    2588             :                                         continue;
    2589             :                                 if (!linc) {
    2590             :                                         if (vl == MAXVALUEdbl)
    2591             :                                                 continue;
    2592             :                                         vl = NEXTVALUEdbl(vl);
    2593             :                                 }
    2594             :                                 if (!hinc) {
    2595             :                                         if (vh == MINVALUEdbl)
    2596             :                                                 continue;
    2597             :                                         vh = PREVVALUEdbl(vh);
    2598             :                                 }
    2599             :                                 if (vl > vh)
    2600             :                                         continue;
    2601             :                                 ncnt = fullscan_dbl(l, &li, lci, r1, &vl, &vh,
    2602             :                                                     true, true, false,
    2603             :                                                     false, true, true,
    2604             :                                                     false, cnt,
    2605             :                                                     l->hseqbase, dst1,
    2606             :                                                     maxsize,
    2607             :                                                     imprints, &algo);
    2608             :                                 break;
    2609             :                         }
    2610             :                         default:
    2611             :                                 ncnt = BUN_NONE;
    2612             :                                 GDKerror("unsupported type\n");
    2613             :                                 assert(0);
    2614             :                         }
    2615             :                         if (ncnt == BUN_NONE) {
    2616             :                                 IMPSdecref(imprints, false);
    2617             :                                 goto bailout;
    2618             :                         }
    2619             :                         assert(ncnt >= cnt || ncnt == 0);
    2620             :                         if (ncnt == cnt || ncnt == 0)
    2621             :                                 continue;
    2622             :                         if (r2) {
    2623             :                                 if (BATcapacity(r2) < ncnt) {
    2624             :                                         BATsetcount(r2, cnt);
    2625             :                                         if (BATextend(r2, BATcapacity(r1)) != GDK_SUCCEED) {
    2626             :                                                 IMPSdecref(imprints, false);
    2627             :                                                 goto bailout;
    2628             :                                         }
    2629             :                                         dst2 = (oid *) Tloc(r2, 0);
    2630             :                                 }
    2631             :                                 while (cnt < ncnt)
    2632             :                                         dst2[cnt++] = ro;
    2633             :                         } else {
    2634             :                                 cnt = ncnt;
    2635             :                         }
    2636             :                 }
    2637             :                 IMPSdecref(imprints, false);
    2638             :                 TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    2639             :         } else {
    2640          22 :           nestedloop:;
    2641             :                 /* nested loop implementation */
    2642             :                 const void *vl;
    2643             :                 const char *lvals;
    2644             :                 oid lval;
    2645             : 
    2646          22 :                 GDKclrerr();    /* not interested in BATimprints errors */
    2647             :                 sorted = 1;
    2648          22 :                 lvals = l->ttype == TYPE_void ? NULL : (const char *) li.base;
    2649             :                 vl = &lval;
    2650         294 :                 TIMEOUT_LOOP(lncand, timeoffset) {
    2651             :                         oid lo;
    2652             : 
    2653         250 :                         lo = canditer_next(lci);
    2654         250 :                         if (lvals) {
    2655         250 :                                 vl = VALUE(l, lo - l->hseqbase);
    2656         250 :                                 if (cmp(vl, nil) == 0)
    2657           8 :                                         continue;
    2658             :                         } else {
    2659           0 :                                 lval = lo - l->hseqbase + l->tseqbase;
    2660             :                         }
    2661         242 :                         canditer_reset(rci);
    2662       37473 :                         for (BUN j = 0; j < rncand; j++) {
    2663       37231 :                                 ro = canditer_next(rci);
    2664       33854 :                                 if (rlvals) {
    2665       33854 :                                         vrl = VALUE(rl, ro - rl->hseqbase);
    2666             :                                 } else {
    2667             :                                         /* TYPE_void */
    2668           0 :                                         rlval = ro - rl->hseqbase + rl->tseqbase;
    2669             :                                 }
    2670       33854 :                                 if (rhvals) {
    2671       33854 :                                         vrh = VALUE(rh, ro - rh->hseqbase);
    2672             :                                 } else {
    2673             :                                         /* TYPE_void */
    2674           0 :                                         rhval = ro - rh->hseqbase + rh->tseqbase;
    2675             :                                 }
    2676       33854 :                                 if (BETWEEN(vl, vrl, linc, vrh, hinc, any) != 1)
    2677       23999 :                                         continue;
    2678       13232 :                                 if (BUNlast(r1) == BATcapacity(r1)) {
    2679           2 :                                         BUN newcap = BATgrows(r1);
    2680             :                                         if (newcap > maxsize)
    2681             :                                                 newcap = maxsize;
    2682           2 :                                         BATsetcount(r1, BATcount(r1));
    2683           2 :                                         if (BATextend(r1, newcap) != GDK_SUCCEED)
    2684           0 :                                                 goto bailout;
    2685           2 :                                         dst1 = (oid *) Tloc(r1, 0);
    2686           2 :                                         if (r2) {
    2687           2 :                                                 BATsetcount(r2, BATcount(r2));
    2688           2 :                                                 if (BATextend(r2, newcap) != GDK_SUCCEED)
    2689           0 :                                                         goto bailout;
    2690           2 :                                                 assert(BATcapacity(r1) == BATcapacity(r2));
    2691           2 :                                                 dst2 = (oid *) Tloc(r2, 0);
    2692             :                                         }
    2693             :                                 }
    2694       13232 :                                 dst1[r1->batCount++] = lo;
    2695       13232 :                                 if (r2) {
    2696       13223 :                                         dst2[r2->batCount++] = ro;
    2697             :                                 }
    2698             :                         }
    2699             :                 }
    2700          22 :                 TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    2701          22 :                 cnt = BATcount(r1);
    2702          22 :                 assert(r2 == NULL || BATcount(r1) == BATcount(r2));
    2703             :         }
    2704             : 
    2705             :         /* also set other bits of heap to correct value to indicate size */
    2706         115 :         BATsetcount(r1, cnt);
    2707             : 
    2708             :         /* set properties using an extra scan (usually not complete) */
    2709         115 :         dst1 = (oid *) Tloc(r1, 0);
    2710         115 :         r1->tkey = true;
    2711         115 :         r1->tsorted = true;
    2712         115 :         r1->trevsorted = true;
    2713         115 :         r1->tseqbase = 0;
    2714         115 :         r1->tnil = false;
    2715         115 :         r1->tnonil = true;
    2716         423 :         for (ncnt = 1; ncnt < cnt; ncnt++) {
    2717         314 :                 if (dst1[ncnt - 1] == dst1[ncnt]) {
    2718         250 :                         r1->tseqbase = oid_nil;
    2719         250 :                         r1->tkey = false;
    2720          64 :                 } else if (dst1[ncnt - 1] < dst1[ncnt]) {
    2721          61 :                         r1->trevsorted = false;
    2722          61 :                         if (dst1[ncnt - 1] + 1 != dst1[ncnt])
    2723           5 :                                 r1->tseqbase = oid_nil;
    2724             :                 } else {
    2725           3 :                         assert(sorted != 1);
    2726           3 :                         r1->tsorted = false;
    2727           3 :                         r1->tseqbase = oid_nil;
    2728           3 :                         r1->tkey = false;
    2729             :                 }
    2730         576 :                 if (!(r1->trevsorted | BATtdense(r1) | r1->tkey | ((sorted != 1) & r1->tsorted)))
    2731             :                         break;
    2732             :         }
    2733         115 :         if (BATtdense(r1))
    2734          91 :                 r1->tseqbase = cnt > 0 ? dst1[0] : 0;
    2735         115 :         if (r2) {
    2736          99 :                 BATsetcount(r2, cnt);
    2737          98 :                 dst2 = (oid *) Tloc(r2, 0);
    2738          98 :                 r2->tkey = true;
    2739          98 :                 r2->tsorted = true;
    2740          98 :                 r2->trevsorted = true;
    2741          98 :                 r2->tseqbase = 0;
    2742          98 :                 r2->tnil = false;
    2743          98 :                 r2->tnonil = true;
    2744         406 :                 for (ncnt = 1; ncnt < cnt; ncnt++) {
    2745         316 :                         if (dst2[ncnt - 1] == dst2[ncnt]) {
    2746          43 :                                 r2->tseqbase = oid_nil;
    2747          43 :                                 r2->tkey = false;
    2748         273 :                         } else if (dst2[ncnt - 1] < dst2[ncnt]) {
    2749         266 :                                 r2->trevsorted = false;
    2750         266 :                                 if (dst2[ncnt - 1] + 1 != dst2[ncnt])
    2751          82 :                                         r2->tseqbase = oid_nil;
    2752             :                         } else {
    2753           7 :                                 assert(sorted != 2);
    2754           7 :                                 r2->tsorted = false;
    2755           7 :                                 r2->tseqbase = oid_nil;
    2756           7 :                                 r2->tkey = false;
    2757             :                         }
    2758         536 :                         if (!(r2->trevsorted | BATtdense(r2) | r2->tkey | ((sorted != 2) & r2->tsorted)))
    2759             :                                 break;
    2760             :                 }
    2761          98 :                 if (BATtdense(r2))
    2762          66 :                         r2->tseqbase = cnt > 0 ? dst2[0] : 0;
    2763             :         }
    2764         114 :         TRC_DEBUG(ALGO, "l=%s,rl=%s,rh=%s -> "
    2765             :                   "(" ALGOBATFMT "," ALGOOPTBATFMT ")\n",
    2766             :                   BATgetId(l), BATgetId(rl), BATgetId(rh),
    2767             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2));
    2768         114 :         bat_iterator_end(&li);
    2769         114 :         bat_iterator_end(&rli);
    2770         116 :         bat_iterator_end(&rhi);
    2771         116 :         return GDK_SUCCEED;
    2772             : 
    2773           0 :   bailout:
    2774           0 :         bat_iterator_end(&li);
    2775           0 :         bat_iterator_end(&rli);
    2776           0 :         bat_iterator_end(&rhi);
    2777           0 :         BBPreclaim(r1);
    2778           0 :         BBPreclaim(r2);
    2779           0 :         return GDK_FAIL;
    2780             : }

Generated by: LCOV version 1.14