LCOV - code coverage report
Current view: top level - gdk - gdk_select.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 779 945 82.4 %
Date: 2021-09-14 19:48:19 Functions: 23 24 95.8 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : #include "monetdb_config.h"
      10             : #include "gdk.h"
      11             : #include "gdk_private.h"
      12             : 
      13             : /* auxiliary functions and structs for imprints */
      14             : #include "gdk_imprints.h"
      15             : 
      16             : static inline oid *
      17   169227245 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
      18             : {
      19   169227245 :         if (i == BATcapacity(bn)) {
      20          62 :                 BATsetcount(bn, i);
      21          62 :                 if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
      22             :                         return NULL;
      23          62 :                 a = (oid *) Tloc(bn, 0);
      24             :         }
      25   169227245 :         a[i] = v;
      26   169227245 :         return a;
      27             : }
      28             : 
      29             : BAT *
      30     1624236 : virtualize(BAT *bn)
      31             : {
      32             :         /* input must be a valid candidate list or NULL */
      33     1624236 :         if (bn == NULL)
      34             :                 return NULL;
      35     1624236 :         if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
      36           0 :                 fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
      37           0 :                         bn->ttype, is_oid_nil(bn->tseqbase),
      38           0 :                         bn->tkey, bn->tsorted);
      39           0 :                 fflush(stderr);
      40             :         }
      41     1625718 :         assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
      42             :                 bn->ttype == TYPE_oid) &&
      43             :                bn->tkey && bn->tsorted);
      44     1625718 :         assert(BBP_refs(bn->batCacheid) == 1);
      45     1625718 :         assert(BBP_lrefs(bn->batCacheid) == 0);
      46             :         /* since bn has unique and strictly ascending values, we can
      47             :          * easily check whether the column is dense */
      48     1625718 :         if (bn->ttype == TYPE_oid &&
      49     1215519 :             (BATcount(bn) <= 1 ||
      50      321584 :              * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
      51      321584 :              * (const oid *) Tloc(bn, BUNlast(bn) - 1))) {
      52             :                 /* column is dense, replace by virtual oid */
      53      946255 :                 TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
      54             :                           ALGOBATPAR(bn),
      55             :                           BATcount(bn) > 0 ? * (const oid *) Tloc(bn, 0) : 0);
      56      946270 :                 if (BATcount(bn) == 0)
      57      546055 :                         bn->tseqbase = 0;
      58             :                 else
      59      400215 :                         bn->tseqbase = * (const oid *) Tloc(bn, 0);
      60      946465 :                 if (VIEWtparent(bn)) {
      61         195 :                         Heap *h = GDKmalloc(sizeof(Heap));
      62         195 :                         bat bid = VIEWtparent(bn);
      63         195 :                         if (h == NULL) {
      64           0 :                                 BBPunfix(bn->batCacheid);
      65           0 :                                 return NULL;
      66             :                         }
      67         195 :                         *h = *bn->theap;
      68         195 :                         settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
      69         195 :                         h->parentid = bn->batCacheid;
      70         195 :                         h->base = NULL;
      71         195 :                         ATOMIC_INIT(&h->refs, 1);
      72         195 :                         HEAPdecref(bn->theap, false);
      73         195 :                         bn->theap = h;
      74         195 :                         BBPunshare(bid);
      75         195 :                         BBPunfix(bid);
      76             :                 } else {
      77      946075 :                         HEAPfree(bn->theap, true);
      78             :                 }
      79      946721 :                 bn->theap->storage = bn->theap->newstorage = STORE_MEM;
      80      946721 :                 bn->theap->size = 0;
      81      946721 :                 bn->ttype = TYPE_void;
      82      946721 :                 bn->tvarsized = true;
      83      946721 :                 bn->twidth = 0;
      84      946721 :                 bn->tshift = 0;
      85             :         }
      86             : 
      87             :         return bn;
      88             : }
      89             : 
      90             : #define HASHloop_bound(bi, h, hb, v, lo, hi)            \
      91             :         for (hb = HASHget(h, HASHprobe((h), v));        \
      92             :              hb != BUN_NONE;                            \
      93             :              hb = HASHgetlink(h,hb))                    \
      94             :                 if (hb >= (lo) && hb < (hi) &&            \
      95             :                     (cmp == NULL ||                     \
      96             :                      (*cmp)(v, BUNtail(bi, hb)) == 0))
      97             : 
      98             : static BAT *
      99      949546 : hashselect(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     100             :            const void *tl, BUN maximum, bool havehash, bool phash,
     101             :            const char **algo)
     102             : {
     103             :         BUN i, cnt;
     104             :         oid o, *restrict dst;
     105             :         BUN l, h, d = 0;
     106             :         oid seq;
     107             :         int (*cmp)(const void *, const void *);
     108             : 
     109      949546 :         assert(bn->ttype == TYPE_oid);
     110      949546 :         seq = b->hseqbase;
     111      949546 :         l = ci->seq - seq;
     112      949546 :         h = canditer_last(ci) + 1 - seq;
     113             : 
     114      949308 :         *algo = "hashselect";
     115      949308 :         if (phash) {
     116      864012 :                 BAT *b2 = BBP_cache(VIEWtparent(b));
     117      864012 :                 *algo = "hashselect on parent";
     118      864012 :                 TRC_DEBUG(ALGO, ALGOBATFMT
     119             :                           " using parent(" ALGOBATFMT ") "
     120             :                           "for hash\n",
     121             :                           ALGOBATPAR(b),
     122             :                           ALGOBATPAR(b2));
     123      864012 :                 d = b->tbaseoff - b2->tbaseoff;
     124      864012 :                 l += d;
     125      864012 :                 h += d;
     126             :                 b = b2;
     127      864012 :                 bat_iterator_end(bi);
     128      864510 :                 *bi = bat_iterator(b);
     129             :         }
     130             : 
     131      950844 :         if (!havehash) {
     132           1 :                 if (BAThash(b) != GDK_SUCCEED) {
     133           0 :                         BBPreclaim(bn);
     134           0 :                         return NULL;
     135             :                 }
     136           1 :                 MT_rwlock_rdlock(&b->thashlock);
     137           1 :                 if (b->thash == NULL) {
     138           0 :                         GDKerror("Hash destroyed before we could use it\n");
     139           0 :                         BBPreclaim(bn);
     140           0 :                         MT_rwlock_rdunlock(&b->thashlock);
     141           0 :                         return NULL;
     142             :                 }
     143             :         }
     144     1898201 :         switch (ATOMbasetype(b->ttype)) {
     145             :         case TYPE_bte:
     146             :         case TYPE_sht:
     147             :                 cmp = NULL;     /* no need to compare: "hash" is perfect */
     148             :                 break;
     149      943319 :         default:
     150      943319 :                 cmp = ATOMcompare(b->ttype);
     151      943319 :                 break;
     152             :         }
     153      950844 :         dst = (oid *) Tloc(bn, 0);
     154             :         cnt = 0;
     155      950844 :         if (ci->tpe != cand_dense) {
     156     7017275 :                 HASHloop_bound(*bi, b->thash, i, tl, l, h) {
     157     3660215 :                         o = (oid) (i + seq - d);
     158     3660215 :                         if (canditer_contains(ci, o)) {
     159     1611946 :                                 dst = buninsfix(bn, dst, cnt, o,
     160     1611946 :                                                 maximum - BATcapacity(bn),
     161             :                                                 maximum);
     162     1614856 :                                 if (dst == NULL) {
     163           0 :                                         MT_rwlock_rdunlock(&b->thashlock);
     164           0 :                                         BBPreclaim(bn);
     165           0 :                                         return NULL;
     166             :                                 }
     167     1614856 :                                 cnt++;
     168             :                         }
     169             :                 }
     170             :         } else {
     171    10871153 :                 HASHloop_bound(*bi, b->thash, i, tl, l, h) {
     172     8280516 :                         o = (oid) (i + seq - d);
     173     8280516 :                         dst = buninsfix(bn, dst, cnt, o,
     174     8280516 :                                         maximum - BATcapacity(bn),
     175             :                                         maximum);
     176     8288394 :                         if (dst == NULL) {
     177           0 :                                 MT_rwlock_rdunlock(&b->thashlock);
     178           0 :                                 BBPreclaim(bn);
     179           0 :                                 return NULL;
     180             :                         }
     181     8288394 :                         cnt++;
     182             :                 }
     183             :         }
     184      950531 :         MT_rwlock_rdunlock(&b->thashlock);
     185      950746 :         BATsetcount(bn, cnt);
     186      950779 :         bn->tkey = true;
     187      950779 :         if (cnt > 1) {
     188             :                 /* hash chains produce results in the order high to
     189             :                  * low, so we just need to reverse */
     190     5328952 :                 for (l = 0, h = BUNlast(bn) - 1; l < h; l++, h--) {
     191     5183931 :                         o = dst[l];
     192     5183931 :                         dst[l] = dst[h];
     193     5183931 :                         dst[h] = o;
     194             :                 }
     195             :         }
     196      950779 :         bn->tsorted = true;
     197      950779 :         bn->trevsorted = bn->batCount <= 1;
     198      950779 :         bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
     199      950779 :         return bn;
     200             : }
     201             : 
     202             : /* Imprints select code */
     203             : 
     204             : /* inner check, non-dense canditer */
     205             : #define impscheck(TEST,ADD)                                             \
     206             :         do {                                                            \
     207             :                 const oid e = (oid) (i+limit-pr_off+hseq);              \
     208             :                 if (im[icnt] & mask) {                                      \
     209             :                         if ((im[icnt] & ~innermask) == 0) {         \
     210             :                                 while (p < ci->ncand && o < e) {       \
     211             :                                         v = src[o-hseq];                \
     212             :                                         if ((ADD) == NULL) {            \
     213             :                                                 BBPreclaim(bn);         \
     214             :                                                 return BUN_NONE;        \
     215             :                                         }                               \
     216             :                                         cnt++;                          \
     217             :                                         p++;                            \
     218             :                                         o = canditer_next(ci);          \
     219             :                                 }                                       \
     220             :                         } else {                                        \
     221             :                                 while (p < ci->ncand && o < e) {       \
     222             :                                         v = src[o-hseq];                \
     223             :                                         if ((ADD) == NULL) {            \
     224             :                                                 BBPreclaim(bn);         \
     225             :                                                 return BUN_NONE;        \
     226             :                                         }                               \
     227             :                                         cnt += (TEST) != 0;             \
     228             :                                         p++;                            \
     229             :                                         o = canditer_next(ci);          \
     230             :                                 }                                       \
     231             :                         }                                               \
     232             :                 } else {                                                \
     233             :                         while (p < ci->ncand && o < e) {               \
     234             :                                 p++;                                    \
     235             :                                 o = canditer_next(ci);                  \
     236             :                         }                                               \
     237             :                 }                                                       \
     238             :         } while (false)
     239             : 
     240             : /* inner check, dense canditer */
     241             : #define impscheck_dense(TEST,ADD)                                       \
     242             :         do {                                                            \
     243             :                 const oid e = (oid) (i+limit-pr_off+hseq);              \
     244             :                 if (im[icnt] & mask) {                                      \
     245             :                         if ((im[icnt] & ~innermask) == 0) {         \
     246             :                                 while (p < ci->ncand && o < e) {       \
     247             :                                         v = src[o-hseq];                \
     248             :                                         if ((ADD) == NULL) {            \
     249             :                                                 BBPreclaim(bn);         \
     250             :                                                 return BUN_NONE;        \
     251             :                                         }                               \
     252             :                                         cnt++;                          \
     253             :                                         p++;                            \
     254             :                                         o = canditer_next_dense(ci);    \
     255             :                                 }                                       \
     256             :                         } else {                                        \
     257             :                                 while (p < ci->ncand && o < e) {       \
     258             :                                         v = src[o-hseq];                \
     259             :                                         if ((ADD) == NULL) {            \
     260             :                                                 BBPreclaim(bn);         \
     261             :                                                 return BUN_NONE;        \
     262             :                                         }                               \
     263             :                                         cnt += (TEST) != 0;             \
     264             :                                         p++;                            \
     265             :                                         o = canditer_next_dense(ci);    \
     266             :                                 }                                       \
     267             :                         }                                               \
     268             :                 } else {                                                \
     269             :                         BUN skip_sz = MIN(ci->ncand - p, e - o);     \
     270             :                         p += skip_sz;                                   \
     271             :                         o += skip_sz;                                   \
     272             :                         ci->next += skip_sz;                         \
     273             :                 }                                                       \
     274             :         } while (false)
     275             : 
     276             : /* main loop for imprints */
     277             : /*
     278             :  * icnt is the iterator for imprints
     279             :  * dcnt is the iterator for dictionary entries
     280             :  * i    is the iterator for the values in imprints
     281             :  */
     282             : #define impsloop(ISDENSE,TEST,ADD)                                      \
     283             :         do {                                                            \
     284             :                 BUN dcnt, icnt, limit, i;                               \
     285             :                 const cchdc_t *restrict d = (cchdc_t *) imprints->dict;      \
     286             :                 const uint8_t rpp = ATOMelmshift(IMPS_PAGE >> bi->shift); \
     287             :                 o = canditer_next(ci);                                  \
     288             :                 for (i = 0, dcnt = 0, icnt = 0, p = 0;                  \
     289             :                      dcnt < imprints->dictcnt && i <= w - hseq + pr_off && p < ci->ncand; \
     290             :                      dcnt++) {                                          \
     291             :                         limit = ((BUN) d[dcnt].cnt) << rpp;               \
     292             :                         while (i + limit <= o - hseq + pr_off) {     \
     293             :                                 i += limit;                             \
     294             :                                 icnt += d[dcnt].repeat ? 1 : d[dcnt].cnt; \
     295             :                                 dcnt++;                                 \
     296             :                                 limit = ((BUN) d[dcnt].cnt) << rpp;       \
     297             :                         }                                               \
     298             :                         if (!d[dcnt].repeat) {                          \
     299             :                                 const BUN l = icnt + d[dcnt].cnt;       \
     300             :                                 limit = (BUN) 1 << rpp;                   \
     301             :                                 while (i + limit <= o - hseq + pr_off) { \
     302             :                                         icnt++;                         \
     303             :                                         i += limit;                     \
     304             :                                 }                                       \
     305             :                                 for (;                                  \
     306             :                                      icnt < l && i <= w - hseq + pr_off; \
     307             :                                      icnt++) {                          \
     308             :                                         impscheck##ISDENSE(TEST,ADD);   \
     309             :                                         i += limit;                     \
     310             :                                 }                                       \
     311             :                         }                                               \
     312             :                         else {                                          \
     313             :                                 impscheck##ISDENSE(TEST,ADD);           \
     314             :                                 i += limit;                             \
     315             :                                 icnt++;                                 \
     316             :                         }                                               \
     317             :                 }                                                       \
     318             :         } while (false)
     319             : 
     320             : static inline oid *
     321           0 : quickins(oid *dst, BUN cnt, oid o, BAT *bn)
     322             : {
     323             :         (void) bn;
     324           0 :         assert(cnt < BATcapacity(bn));
     325           0 :         dst[cnt] = o;
     326           0 :         return dst;
     327             : }
     328             : 
     329             : /* construct the mask */
     330             : #define impsmask(ISDENSE,TEST,B)                                        \
     331             :         do {                                                            \
     332             :                 const uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
     333             :                 uint##B##_t mask = 0, innermask;                        \
     334             :                 const int tpe = ATOMbasetype(b->ttype);                      \
     335             :                 const int lbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, tl); \
     336             :                 const int hbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, th); \
     337             :                 /* note: (1<<n)-1 gives a sequence of n one bits */       \
     338             :                 /* to set bits hbin..lbin inclusive, we would do: */    \
     339             :                 /* mask = ((1 << (hbin + 1)) - 1) - ((1 << lbin) - 1); */ \
     340             :                 /* except ((1 << (hbin + 1)) - 1) is not defined if */    \
     341             :                 /* hbin == sizeof(uint##B##_t)*8 - 1 */                 \
     342             :                 /* the following does work, however */                  \
     343             :                 mask = (((((uint##B##_t) 1 << hbin) - 1) << 1) | 1) - (((uint##B##_t) 1 << lbin) - 1); \
     344             :                 innermask = mask;                                       \
     345             :                 if (vl != minval)                                       \
     346             :                         innermask = IMPSunsetBit(B, innermask, lbin);   \
     347             :                 if (vh != maxval)                                       \
     348             :                         innermask = IMPSunsetBit(B, innermask, hbin);   \
     349             :                 if (anti) {                                             \
     350             :                         uint##B##_t tmp = mask;                         \
     351             :                         mask = ~innermask;                              \
     352             :                         innermask = ~tmp;                               \
     353             :                 }                                                       \
     354             :                 /* if there are nils, we may need to check bin 0 */     \
     355             :                 if (!b->tnonil)                                              \
     356             :                         innermask = IMPSunsetBit(B, innermask, 0);      \
     357             :                                                                         \
     358             :                 if (BATcapacity(bn) < maximum) {                     \
     359             :                         impsloop(ISDENSE, TEST,                         \
     360             :                                  dst = buninsfix(bn, dst, cnt, o,       \
     361             :                                                  (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
     362             :                                                         * (dbl) (ci->ncand-p) * 1.1 + 1024), \
     363             :                                                  maximum));             \
     364             :                 } else {                                                \
     365             :                         impsloop(ISDENSE, TEST, dst = quickins(dst, cnt, o, bn)); \
     366             :                 }                                                       \
     367             :         } while (false)
     368             : 
     369             : #define checkMINMAX(B, TYPE)                                            \
     370             :         do {                                                            \
     371             :                 const BUN *restrict imp_cnt = imprints->stats + 128; \
     372             :                 imp_min = imp_max = nil;                                \
     373             :                 for (BUN ii = 0; ii < B; ii++) {                     \
     374             :                         if (is_##TYPE##_nil(imp_min) && imp_cnt[ii]) {  \
     375             :                                 imp_min = basesrc[imprints->stats[ii]];      \
     376             :                         }                                               \
     377             :                         if (is_##TYPE##_nil(imp_max) && imp_cnt[B-1-ii]) { \
     378             :                                 imp_max = basesrc[imprints->stats[64+B-1-ii]]; \
     379             :                         }                                               \
     380             :                 }                                                       \
     381             :                 assert(!is_##TYPE##_nil(imp_min) &&                     \
     382             :                        !is_##TYPE##_nil(imp_max));                      \
     383             :                 if (anti ?                                              \
     384             :                     vl < imp_min && vh > imp_max :                        \
     385             :                     vl > imp_max || vh < imp_min) {                       \
     386             :                         return 0;                                       \
     387             :                 }                                                       \
     388             :         } while (false)
     389             : 
     390             : /* choose number of bits */
     391             : #define bitswitch(ISDENSE, TEST, TYPE)                                  \
     392             :         do {                                                            \
     393             :                 assert(imprints);                                       \
     394             :                 *algo = parent ? "parent imprints select " #TEST " (canditer_next" #ISDENSE ")" : "imprints select " #TEST " (canditer_next" #ISDENSE ")"; \
     395             :                 switch (imprints->bits) {                            \
     396             :                 case 8:                                                 \
     397             :                         checkMINMAX(8, TYPE);                           \
     398             :                         impsmask(ISDENSE,TEST,8);                       \
     399             :                         break;                                          \
     400             :                 case 16:                                                \
     401             :                         checkMINMAX(16, TYPE);                          \
     402             :                         impsmask(ISDENSE,TEST,16);                      \
     403             :                         break;                                          \
     404             :                 case 32:                                                \
     405             :                         checkMINMAX(32, TYPE);                          \
     406             :                         impsmask(ISDENSE,TEST,32);                      \
     407             :                         break;                                          \
     408             :                 case 64:                                                \
     409             :                         checkMINMAX(64, TYPE);                          \
     410             :                         impsmask(ISDENSE,TEST,64);                      \
     411             :                         break;                                          \
     412             :                 default: assert(0); break;                              \
     413             :                 }                                                       \
     414             :         } while (false)
     415             : 
     416             : /* scan select without imprints */
     417             : 
     418             : /* core scan select loop with & without candidates */
     419             : #define scanloop(NAME,canditer_next,TEST)                               \
     420             :         do {                                                            \
     421             :                 *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
     422             :                 if (BATcapacity(bn) < maximum) {                     \
     423             :                         for (p = 0; p < ci->ncand; p++) {         \
     424             :                                 o = canditer_next(ci);                  \
     425             :                                 v = src[o-hseq];                        \
     426             :                                 if (TEST) {                             \
     427             :                                         dst = buninsfix(bn, dst, cnt, o, \
     428             :                                                   (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
     429             :                                                          * (dbl) (ci->ncand-p) * 1.1 + 1024), \
     430             :                                                         maximum);       \
     431             :                                         if (dst == NULL) {              \
     432             :                                                 BBPreclaim(bn);         \
     433             :                                                 return BUN_NONE;        \
     434             :                                         }                               \
     435             :                                         cnt++;                          \
     436             :                                 }                                       \
     437             :                         }                                               \
     438             :                 } else {                                                \
     439             :                         for (p = 0; p < ci->ncand; p++) {         \
     440             :                                 o = canditer_next(ci);                  \
     441             :                                 v = src[o-hseq];                        \
     442             :                                 assert(cnt < BATcapacity(bn));               \
     443             :                                 dst[cnt] = o;                           \
     444             :                                 cnt += (TEST) != 0;                     \
     445             :                         }                                               \
     446             :                 }                                                       \
     447             :         } while (false)
     448             : 
     449             : /* argument list for type-specific core scan select function call */
     450             : #define scanargs                                                        \
     451             :         b, bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil,    \
     452             :         cnt, b->hseqbase, dst, maximum, imprints, algo
     453             : 
     454             : #define PREVVALUEbte(x) ((x) - 1)
     455             : #define PREVVALUEsht(x) ((x) - 1)
     456             : #define PREVVALUEint(x) ((x) - 1)
     457             : #define PREVVALUElng(x) ((x) - 1)
     458             : #ifdef HAVE_HGE
     459             : #define PREVVALUEhge(x) ((x) - 1)
     460             : #endif
     461             : #define PREVVALUEoid(x) ((x) - 1)
     462             : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
     463             : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
     464             : 
     465             : #define NEXTVALUEbte(x) ((x) + 1)
     466             : #define NEXTVALUEsht(x) ((x) + 1)
     467             : #define NEXTVALUEint(x) ((x) + 1)
     468             : #define NEXTVALUElng(x) ((x) + 1)
     469             : #ifdef HAVE_HGE
     470             : #define NEXTVALUEhge(x) ((x) + 1)
     471             : #endif
     472             : #define NEXTVALUEoid(x) ((x) + 1)
     473             : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
     474             : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
     475             : 
     476             : #define MINVALUEbte     GDK_bte_min
     477             : #define MINVALUEsht     GDK_sht_min
     478             : #define MINVALUEint     GDK_int_min
     479             : #define MINVALUElng     GDK_lng_min
     480             : #ifdef HAVE_HGE
     481             : #define MINVALUEhge     GDK_hge_min
     482             : #endif
     483             : #define MINVALUEoid     GDK_oid_min
     484             : #define MINVALUEflt     GDK_flt_min
     485             : #define MINVALUEdbl     GDK_dbl_min
     486             : 
     487             : #define MAXVALUEbte     GDK_bte_max
     488             : #define MAXVALUEsht     GDK_sht_max
     489             : #define MAXVALUEint     GDK_int_max
     490             : #define MAXVALUElng     GDK_lng_max
     491             : #ifdef HAVE_HGE
     492             : #define MAXVALUEhge     GDK_hge_max
     493             : #endif
     494             : #define MAXVALUEoid     GDK_oid_max
     495             : #define MAXVALUEflt     GDK_flt_max
     496             : #define MAXVALUEdbl     GDK_dbl_max
     497             : 
     498             : #define choose(NAME, ISDENSE, TEST, TYPE)                               \
     499             :         do {                                                            \
     500             :                 if (imprints) {                                         \
     501             :                         bitswitch(ISDENSE, TEST, TYPE);                 \
     502             :                 } else {                                                \
     503             :                         scanloop(NAME, canditer_next##ISDENSE, TEST);   \
     504             :                 }                                                       \
     505             :         } while (false)
     506             : 
     507             : /* definition of type-specific core scan select function */
     508             : #define scanfunc(NAME, TYPE, ISDENSE)                                   \
     509             : static BUN                                                              \
     510             : NAME##_##TYPE(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn, \
     511             :               const TYPE *tl, const TYPE *th, bool li, bool hi,         \
     512             :               bool equi, bool anti, bool lval, bool hval,               \
     513             :               bool lnil, BUN cnt, const oid hseq, oid *restrict dst,    \
     514             :               BUN maximum, Imprints *imprints, const char **algo)       \
     515             : {                                                                       \
     516             :         TYPE vl = *tl;                                                  \
     517             :         TYPE vh = *th;                                                  \
     518             :         TYPE imp_min;                                                   \
     519             :         TYPE imp_max;                                                   \
     520             :         TYPE v;                                                         \
     521             :         const TYPE nil = TYPE##_nil;                                    \
     522             :         const TYPE minval = MINVALUE##TYPE;                             \
     523             :         const TYPE maxval = MAXVALUE##TYPE;                             \
     524             :         const TYPE *src = (const TYPE *) bi->base;                   \
     525             :         const TYPE *basesrc;                                            \
     526             :         oid o, w;                                                       \
     527             :         BUN p;                                                          \
     528             :         BUN pr_off = 0;                                                 \
     529             :         bat parent = 0;                                                 \
     530             :         (void) li;                                                      \
     531             :         (void) hi;                                                      \
     532             :         (void) lval;                                                    \
     533             :         (void) hval;                                                    \
     534             :         assert(li == !anti);                                            \
     535             :         assert(hi == !anti);                                            \
     536             :         assert(lval);                                                   \
     537             :         assert(hval);                                                   \
     538             :         if (imprints && imprints->imprints.parentid != b->batCacheid) {   \
     539             :                 parent = imprints->imprints.parentid;                        \
     540             :                 BAT *pbat = BBP_cache(parent);                          \
     541             :                 assert(pbat);                                           \
     542             :                 basesrc = (const TYPE *) Tloc(pbat, 0);                 \
     543             :                 pr_off = (BUN) (src - basesrc);                         \
     544             :         } else {                                                        \
     545             :                 basesrc = src;                                          \
     546             :         }                                                               \
     547             :         w = canditer_last(ci);                                          \
     548             :         if (equi) {                                                     \
     549             :                 assert(imprints == NULL);                               \
     550             :                 if (lnil)                                               \
     551             :                         scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
     552             :                 else                                                    \
     553             :                         scanloop(NAME, canditer_next##ISDENSE, v == vl); \
     554             :         } else if (anti) {                                              \
     555             :                 if (b->tnonil) {                                     \
     556             :                         choose(NAME, ISDENSE, (v <= vl || v >= vh), TYPE); \
     557             :                 } else {                                                \
     558             :                         choose(NAME, ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh), TYPE); \
     559             :                 }                                                       \
     560             :         } else if (b->tnonil && vl == minval) {                              \
     561             :                 choose(NAME, ISDENSE, v <= vh, TYPE);                        \
     562             :         } else if (vh == maxval) {                                      \
     563             :                 choose(NAME, ISDENSE, v >= vl, TYPE);                        \
     564             :         } else {                                                        \
     565             :                 choose(NAME, ISDENSE, v >= vl && v <= vh, TYPE);  \
     566             :         }                                                               \
     567             :         return cnt;                                                     \
     568             : }
     569             : 
     570             : static BUN
     571        2740 : fullscan_any(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     572             :              const void *tl, const void *th,
     573             :              bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
     574             :              bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
     575             :              BUN maximum, Imprints *imprints, const char **algo)
     576             : {
     577             :         const void *v;
     578        2740 :         const void *restrict nil = ATOMnilptr(b->ttype);
     579        2740 :         int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
     580             :         oid o;
     581             :         BUN p;
     582             :         int c;
     583             : 
     584             :         (void) maximum;
     585             :         (void) imprints;
     586             :         (void) lnil;
     587             : 
     588        2740 :         if (equi) {
     589          93 :                 *algo = "select: fullscan equi";
     590          93 :                 if (ci->tpe == cand_dense) {
     591     1329905 :                         for (p = 0; p < ci->ncand; p++) {
     592     1329841 :                                 o = canditer_next_dense(ci);
     593     1329841 :                                 v = BUNtail(*bi, o-hseq);
     594     1329841 :                                 if ((*cmp)(tl, v) == 0) {
     595       21744 :                                         dst = buninsfix(bn, dst, cnt, o,
     596       21744 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     597       21744 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     598             :                                                         maximum);
     599       21847 :                                         if (dst == NULL) {
     600           0 :                                                 BBPreclaim(bn);
     601           0 :                                                 return BUN_NONE;
     602             :                                         }
     603       21847 :                                         cnt++;
     604             :                                 }
     605             :                         }
     606             :                 } else {
     607       22533 :                         for (p = 0; p < ci->ncand; p++) {
     608       22504 :                                 o = canditer_next(ci);
     609       22503 :                                 v = BUNtail(*bi, o-hseq);
     610       22503 :                                 if ((*cmp)(tl, v) == 0) {
     611       20152 :                                         dst = buninsfix(bn, dst, cnt, o,
     612       20152 :                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     613       20152 :                                                         * (dbl) (ci->ncand-p) * 1.1 + 1024),
     614             :                                                 maximum);
     615       20174 :                                         if (dst == NULL) {
     616           0 :                                                 BBPreclaim(bn);
     617           0 :                                                 return BUN_NONE;
     618             :                                         }
     619       20174 :                                         cnt++;
     620             :                                 }
     621             :                         }
     622             :                 }
     623        2647 :         } else if (anti) {
     624         352 :                 *algo = "select: fullscan anti";
     625         352 :                 if (ci->tpe == cand_dense) {
     626        4059 :                         for (p = 0; p < ci->ncand; p++) {
     627        3744 :                                 o = canditer_next_dense(ci);
     628        3744 :                                 v = BUNtail(*bi, o-hseq);
     629        3744 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     630        3744 :                                         ((lval &&
     631        3743 :                                         ((c = (*cmp)(tl, v)) > 0 ||
     632        1409 :                                         (!li && c == 0))) ||
     633        1409 :                                         (hval &&
     634        1409 :                                         ((c = (*cmp)(th, v)) < 0 ||
     635         397 :                                         (!hi && c == 0))))) {
     636        3347 :                                         dst = buninsfix(bn, dst, cnt, o,
     637        3347 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     638        3347 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     639             :                                                         maximum);
     640        3347 :                                         if (dst == NULL) {
     641           0 :                                                 BBPreclaim(bn);
     642           0 :                                                 return BUN_NONE;
     643             :                                         }
     644        3347 :                                         cnt++;
     645             :                                 }
     646             :                         }
     647             :                 } else {
     648       10344 :                         for (p = 0; p < ci->ncand; p++) {
     649       10306 :                                 o = canditer_next(ci);
     650       10307 :                                 v = BUNtail(*bi, o-hseq);
     651       10307 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     652       10307 :                                         ((lval &&
     653       10307 :                                         ((c = (*cmp)(tl, v)) > 0 ||
     654        3775 :                                         (!li && c == 0))) ||
     655        3775 :                                         (hval &&
     656        3775 :                                         ((c = (*cmp)(th, v)) < 0 ||
     657          46 :                                         (!hi && c == 0))))) {
     658       10261 :                                         dst = buninsfix(bn, dst, cnt, o,
     659       10261 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     660       10261 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     661             :                                                         maximum);
     662       10261 :                                         if (dst == NULL) {
     663           0 :                                                 BBPreclaim(bn);
     664           0 :                                                 return BUN_NONE;
     665             :                                         }
     666       10261 :                                         cnt++;
     667             :                                 }
     668             :                         }
     669             :                 }
     670             :         } else {
     671        2295 :                 *algo = "select: fullscan range";
     672        2295 :                 if (ci->tpe == cand_dense) {
     673      236749 :                         for (p = 0; p < ci->ncand; p++) {
     674      234493 :                                 o = canditer_next_dense(ci);
     675      234493 :                                 v = BUNtail(*bi, o-hseq);
     676      234493 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     677        4027 :                                         ((!lval ||
     678        4027 :                                         (c = cmp(tl, v)) < 0 ||
     679      201041 :                                         (li && c == 0)) &&
     680        3906 :                                         (!hval ||
     681        3906 :                                         (c = cmp(th, v)) > 0 ||
     682        3662 :                                         (hi && c == 0)))) {
     683      196418 :                                         dst = buninsfix(bn, dst, cnt, o,
     684      196418 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     685      196418 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     686             :                                                         maximum);
     687      196336 :                                         if (dst == NULL) {
     688           0 :                                                 BBPreclaim(bn);
     689           0 :                                                 return BUN_NONE;
     690             :                                         }
     691      196336 :                                         cnt++;
     692             :                                 }
     693             :                         }
     694             :                 } else {
     695     1045122 :                         for (p = 0; p < ci->ncand; p++) {
     696     1045078 :                                 o = canditer_next(ci);
     697     1053689 :                                 v = BUNtail(*bi, o-hseq);
     698     1053689 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     699         226 :                                         ((!lval ||
     700         226 :                                         (c = cmp(tl, v)) < 0 ||
     701     1050066 :                                         (li && c == 0)) &&
     702           0 :                                         (!hval ||
     703           0 :                                         (c = cmp(th, v)) > 0 ||
     704           0 :                                         (hi && c == 0)))) {
     705     1049998 :                                         dst = buninsfix(bn, dst, cnt, o,
     706     1049998 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     707     1049998 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     708             :                                                         maximum);
     709     1038449 :                                         if (dst == NULL) {
     710           0 :                                                 BBPreclaim(bn);
     711           0 :                                                 return BUN_NONE;
     712             :                                         }
     713     1038449 :                                         cnt++;
     714             :                                 }
     715             :                         }
     716             :                 }
     717             :         }
     718             :         return cnt;
     719             : }
     720             : 
     721             : static BUN
     722       20185 : fullscan_str(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     723             :              const char *tl, const char *th,
     724             :              bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
     725             :              bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
     726             :              BUN maximum, Imprints *imprints, const char **algo)
     727             : {
     728             :         var_t pos;
     729             :         BUN p;
     730             :         oid o;
     731             : 
     732       20185 :         if (!equi || !GDK_ELIMDOUBLES(b->tvheap))
     733        2709 :                 return fullscan_any(b, bi, ci, bn, tl, th, li, hi, equi, anti,
     734             :                                     lval, hval, lnil, cnt, hseq, dst,
     735             :                                     maximum, imprints, algo);
     736       17476 :         if ((pos = strLocate(b->tvheap, tl)) == 0) {
     737        1967 :                 *algo = "select: fullscan equi strelim (nomatch)";
     738        1967 :                 return 0;
     739             :         }
     740       15521 :         *algo = "select: fullscan equi strelim";
     741       15521 :         assert(pos >= GDK_VAROFFSET);
     742       15521 :         switch (bi->width) {
     743        1469 :         case 1: {
     744        1469 :                 const unsigned char *ptr = (const unsigned char *) bi->base;
     745        1469 :                 pos -= GDK_VAROFFSET;
     746        1469 :                 if (ci->tpe == cand_dense) {
     747     7240994 :                         for (p = 0; p < ci->ncand; p++) {
     748     7239890 :                                 o = canditer_next_dense(ci);
     749     7239890 :                                 if (ptr[o - hseq] == pos) {
     750     2812408 :                                         dst = buninsfix(bn, dst, cnt, o,
     751     2812408 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     752     2812408 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     753             :                                                         maximum);
     754     2812409 :                                         if (dst == NULL) {
     755           0 :                                                 BBPreclaim(bn);
     756           0 :                                                 return BUN_NONE;
     757             :                                         }
     758     2812409 :                                         cnt++;
     759             :                                 }
     760             :                         }
     761             :                 } else {
     762     5242229 :                         for (p = 0; p < ci->ncand; p++) {
     763     5241863 :                                 o = canditer_next(ci);
     764     5216882 :                                 if (ptr[o - hseq] == pos) {
     765     2525531 :                                         dst = buninsfix(bn, dst, cnt, o,
     766     2525531 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     767     2525531 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     768             :                                                         maximum);
     769     2550512 :                                         if (dst == NULL) {
     770           0 :                                                 BBPreclaim(bn);
     771           0 :                                                 return BUN_NONE;
     772             :                                         }
     773     2550512 :                                         cnt++;
     774             :                                 }
     775             :                         }
     776             :                 }
     777             :                 break;
     778             :         }
     779       14050 :         case 2: {
     780       14050 :                 const unsigned short *ptr = (const unsigned short *) bi->base;
     781       14050 :                 pos -= GDK_VAROFFSET;
     782       14050 :                 if (ci->tpe == cand_dense) {
     783      584869 :                         for (p = 0; p < ci->ncand; p++) {
     784      574664 :                                 o = canditer_next_dense(ci);
     785      574664 :                                 if (ptr[o - hseq] == pos) {
     786       12460 :                                         dst = buninsfix(bn, dst, cnt, o,
     787       12460 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     788       12460 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     789             :                                                         maximum);
     790       12480 :                                         if (dst == NULL) {
     791           0 :                                                 BBPreclaim(bn);
     792           0 :                                                 return BUN_NONE;
     793             :                                         }
     794       12480 :                                         cnt++;
     795             :                                 }
     796             :                         }
     797             :                 } else {
     798      228752 :                         for (p = 0; p < ci->ncand; p++) {
     799      224881 :                                 o = canditer_next(ci);
     800      224879 :                                 if (ptr[o - hseq] == pos) {
     801        4400 :                                         dst = buninsfix(bn, dst, cnt, o,
     802        4400 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     803        4400 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     804             :                                                         maximum);
     805        4408 :                                         if (dst == NULL) {
     806           0 :                                                 BBPreclaim(bn);
     807           0 :                                                 return BUN_NONE;
     808             :                                         }
     809        4408 :                                         cnt++;
     810             :                                 }
     811             :                         }
     812             :                 }
     813             :                 break;
     814             :         }
     815             : #if SIZEOF_VAR_T == 8
     816           0 :         case 4: {
     817           0 :                 const unsigned int *ptr = (const unsigned int *) bi->base;
     818           0 :                 if (ci->tpe == cand_dense) {
     819           0 :                         for (p = 0; p < ci->ncand; p++) {
     820           0 :                                 o = canditer_next_dense(ci);
     821           0 :                                 if (ptr[o - hseq] == pos) {
     822           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     823           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     824           0 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     825             :                                                         maximum);
     826           0 :                                         if (dst == NULL) {
     827           0 :                                                 BBPreclaim(bn);
     828           0 :                                                 return BUN_NONE;
     829             :                                         }
     830           0 :                                         cnt++;
     831             :                                 }
     832             :                         }
     833             :                 } else {
     834           0 :                         for (p = 0; p < ci->ncand; p++) {
     835           0 :                                 o = canditer_next(ci);
     836           0 :                                 if (ptr[o - hseq] == pos) {
     837           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     838           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     839           0 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     840             :                                                         maximum);
     841           0 :                                         if (dst == NULL) {
     842           0 :                                                 BBPreclaim(bn);
     843           0 :                                                 return BUN_NONE;
     844             :                                         }
     845           0 :                                         cnt++;
     846             :                                 }
     847             :                         }
     848             :                 }
     849             :                 break;
     850             :         }
     851             : #endif
     852           2 :         default: {
     853           2 :                 const var_t *ptr = (const var_t *) bi->base;
     854           2 :                 if (ci->tpe == cand_dense) {
     855          22 :                         for (p = 0; p < ci->ncand; p++) {
     856          20 :                                 o = canditer_next_dense(ci);
     857          20 :                                 if (ptr[o - hseq] == pos) {
     858           2 :                                         dst = buninsfix(bn, dst, cnt, o,
     859           2 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     860           2 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     861             :                                                         maximum);
     862           2 :                                         if (dst == NULL) {
     863           0 :                                                 BBPreclaim(bn);
     864           0 :                                                 return BUN_NONE;
     865             :                                         }
     866           2 :                                         cnt++;
     867             :                                 }
     868             :                         }
     869             :                 } else {
     870           0 :                         for (p = 0; p < ci->ncand; p++) {
     871           0 :                                 o = canditer_next(ci);
     872           0 :                                 if (ptr[o - hseq] == pos) {
     873           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     874           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     875           0 :                                                                * (dbl) (ci->ncand-p) * 1.1 + 1024),
     876             :                                                         maximum);
     877           0 :                                         if (dst == NULL) {
     878           0 :                                                 BBPreclaim(bn);
     879           0 :                                                 return BUN_NONE;
     880             :                                         }
     881           0 :                                         cnt++;
     882             :                                 }
     883             :                         }
     884             :                 }
     885             :                 break;
     886             :         }
     887             :         }
     888             :         return cnt;
     889             : }
     890             : 
     891             : /* scan select type switch */
     892             : #ifdef HAVE_HGE
     893             : #define scanfunc_hge(NAME, ISDENSE)             \
     894             :         scanfunc(NAME, hge, ISDENSE)
     895             : #else
     896             : #define scanfunc_hge(NAME, ISDENSE)
     897             : #endif
     898             : #define scan_sel(NAME, ISDENSE)                 \
     899             :         scanfunc(NAME, bte, ISDENSE)            \
     900             :         scanfunc(NAME, sht, ISDENSE)            \
     901             :         scanfunc(NAME, int, ISDENSE)            \
     902             :         scanfunc(NAME, flt, ISDENSE)            \
     903             :         scanfunc(NAME, dbl, ISDENSE)            \
     904             :         scanfunc(NAME, lng, ISDENSE)            \
     905             :         scanfunc_hge(NAME, ISDENSE)
     906             : 
     907             : /* scan/imprints select */
     908    10200248 : scan_sel(fullscan, )
     909   747519406 : scan_sel(densescan, _dense)
     910             : 
     911             : 
     912             : static BAT *
     913      136786 : scanselect(BAT *b, BATiter *bi, struct canditer *restrict ci, BAT *bn,
     914             :            const void *tl, const void *th,
     915             :            bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
     916             :            bool lnil, BUN maximum, Imprints *imprints, const char **algo)
     917             : {
     918             : #ifndef NDEBUG
     919             :         int (*cmp)(const void *, const void *);
     920             : #endif
     921             :         int t;
     922             :         BUN cnt = 0;
     923             :         oid *restrict dst;
     924             : 
     925      136786 :         assert(b != NULL);
     926      136786 :         assert(bn != NULL);
     927      136786 :         assert(bn->ttype == TYPE_oid);
     928      136786 :         assert(!lval || tl != NULL);
     929      136786 :         assert(!hval || th != NULL);
     930      136786 :         assert(!equi || (li && hi && !anti));
     931      136786 :         assert(!anti || lval || hval);
     932      136786 :         assert(b->ttype != TYPE_void || equi || b->tnonil);
     933             : 
     934             : #ifndef NDEBUG
     935      136786 :         cmp = ATOMcompare(b->ttype);
     936             : #endif
     937             : 
     938      136786 :         assert(!lval || !hval || (*cmp)(tl, th) <= 0);
     939             : 
     940      136781 :         dst = (oid *) Tloc(bn, 0);
     941             : 
     942      136781 :         t = ATOMbasetype(b->ttype);
     943             : 
     944             :         /* call type-specific core scan select function */
     945      136781 :         switch (t) {
     946       31859 :         case TYPE_bte:
     947       31859 :                 if (ci->tpe == cand_dense)
     948       30613 :                         cnt = densescan_bte(scanargs);
     949             :                 else
     950        1246 :                         cnt = fullscan_bte(scanargs);
     951             :                 break;
     952        4458 :         case TYPE_sht:
     953        4458 :                 if (ci->tpe == cand_dense)
     954        4138 :                         cnt = densescan_sht(scanargs);
     955             :                 else
     956         320 :                         cnt = fullscan_sht(scanargs);
     957             :                 break;
     958       75605 :         case TYPE_int:
     959       75605 :                 if (ci->tpe == cand_dense)
     960       47601 :                         cnt = densescan_int(scanargs);
     961             :                 else
     962       28004 :                         cnt = fullscan_int(scanargs);
     963             :                 break;
     964          22 :         case TYPE_flt:
     965          22 :                 if (ci->tpe == cand_dense)
     966          21 :                         cnt = densescan_flt(scanargs);
     967             :                 else
     968           1 :                         cnt = fullscan_flt(scanargs);
     969             :                 break;
     970         127 :         case TYPE_dbl:
     971         127 :                 if (ci->tpe == cand_dense)
     972         126 :                         cnt = densescan_dbl(scanargs);
     973             :                 else
     974           1 :                         cnt = fullscan_dbl(scanargs);
     975             :                 break;
     976        4304 :         case TYPE_lng:
     977        4304 :                 if (ci->tpe == cand_dense)
     978        4270 :                         cnt = densescan_lng(scanargs);
     979             :                 else
     980          34 :                         cnt = fullscan_lng(scanargs);
     981             :                 break;
     982             : #ifdef HAVE_HGE
     983         193 :         case TYPE_hge:
     984         193 :                 if (ci->tpe == cand_dense)
     985         184 :                         cnt = densescan_hge(scanargs);
     986             :                 else
     987           9 :                         cnt = fullscan_hge(scanargs);
     988             :                 break;
     989             : #endif
     990       20193 :         case TYPE_str:
     991       20193 :                 cnt = fullscan_str(scanargs);
     992       20229 :                 break;
     993          20 :         default:
     994          20 :                 cnt = fullscan_any(scanargs);
     995          20 :                 break;
     996             :         }
     997      137118 :         if (cnt == BUN_NONE) {
     998             :                 return NULL;
     999             :         }
    1000      137118 :         assert(bn->batCapacity >= cnt);
    1001             : 
    1002      137118 :         BATsetcount(bn, cnt);
    1003      137350 :         bn->tsorted = true;
    1004      137350 :         bn->trevsorted = bn->batCount <= 1;
    1005      137350 :         bn->tkey = true;
    1006      137350 :         bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == b->batCount ? b->hseqbase : oid_nil;
    1007             : 
    1008      137350 :         return bn;
    1009             : }
    1010             : 
    1011             : /* Normalize the variables li, hi, lval, hval, possibly changing anti
    1012             :  * in the process.  This works for all (and only) numeric types.
    1013             :  *
    1014             :  * Note that the expression x < v is equivalent to x <= v' where v' is
    1015             :  * the next smaller value in the domain of v (similarly for x > v).
    1016             :  * Also note that for floating point numbers there actually is such a
    1017             :  * value.  In fact, there is a function in standard C that calculates
    1018             :  * that value.
    1019             :  *
    1020             :  * The result of this macro is:
    1021             :  * li == !anti, hi == !anti, lval == true, hval == true
    1022             :  * This means that all ranges that we check for are closed ranges.  If
    1023             :  * a range is one-sided, we fill in the minimum resp. maximum value in
    1024             :  * the domain so that we create a closed range. */
    1025             : #define NORMALIZE(TYPE)                                                 \
    1026             :         do {                                                            \
    1027             :                 if (anti && li) {                                       \
    1028             :                         /* -inf < x < vl === -inf < x <= vl-1 */    \
    1029             :                         if (*(TYPE*)tl == MINVALUE##TYPE) {             \
    1030             :                                 /* -inf < x < MIN || *th <[=] x < +inf */ \
    1031             :                                 /* degenerates into half range */       \
    1032             :                                 /* *th <[=] x < +inf */                   \
    1033             :                                 anti = false;                           \
    1034             :                                 tl = th;                                \
    1035             :                                 li = !hi;                               \
    1036             :                                 hval = false;                           \
    1037             :                                 /* further dealt with below */          \
    1038             :                         } else {                                        \
    1039             :                                 vl.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)tl); \
    1040             :                                 tl = &vl.v_##TYPE;                  \
    1041             :                                 li = false;                             \
    1042             :                         }                                               \
    1043             :                 }                                                       \
    1044             :                 if (anti && hi) {                                       \
    1045             :                         /* vl < x < +inf === vl+1 <= x < +inf */    \
    1046             :                         if (*(TYPE*)th == MAXVALUE##TYPE) {             \
    1047             :                                 /* -inf < x <[=] *tl || MAX > x > +inf */ \
    1048             :                                 /* degenerates into half range */       \
    1049             :                                 /* -inf < x <[=] *tl */                   \
    1050             :                                 anti = false;                           \
    1051             :                                 if (tl == &vl.v_##TYPE) {           \
    1052             :                                         vh.v_##TYPE = vl.v_##TYPE;      \
    1053             :                                         th = &vh.v_##TYPE;          \
    1054             :                                 } else {                                \
    1055             :                                         th = tl;                        \
    1056             :                                 }                                       \
    1057             :                                 hi = !li;                               \
    1058             :                                 lval = false;                           \
    1059             :                                 /* further dealt with below */          \
    1060             :                         } else {                                        \
    1061             :                                 vh.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)th); \
    1062             :                                 th = &vh.v_##TYPE;                  \
    1063             :                                 hi = false;                             \
    1064             :                         }                                               \
    1065             :                 }                                                       \
    1066             :                 if (!anti) {                                            \
    1067             :                         if (lval) {                                     \
    1068             :                                 /* range bounded on left */             \
    1069             :                                 if (!li) {                              \
    1070             :                                         /* open range on left */        \
    1071             :                                         if (*(TYPE*)tl == MAXVALUE##TYPE) \
    1072             :                                                 return BATdense(0, 0, 0); \
    1073             :                                         /* vl < x === vl+1 <= x */        \
    1074             :                                         vl.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)tl); \
    1075             :                                         li = true;                      \
    1076             :                                         tl = &vl.v_##TYPE;          \
    1077             :                                 }                                       \
    1078             :                         } else {                                        \
    1079             :                                 /* -inf, i.e. smallest value */         \
    1080             :                                 vl.v_##TYPE = MINVALUE##TYPE;           \
    1081             :                                 li = true;                              \
    1082             :                                 tl = &vl.v_##TYPE;                  \
    1083             :                                 lval = true;                            \
    1084             :                         }                                               \
    1085             :                         if (hval) {                                     \
    1086             :                                 /* range bounded on right */            \
    1087             :                                 if (!hi) {                              \
    1088             :                                         /* open range on right */       \
    1089             :                                         if (*(TYPE*)th == MINVALUE##TYPE) \
    1090             :                                                 return BATdense(0, 0, 0); \
    1091             :                                         /* x < vh === x <= vh-1 */        \
    1092             :                                         vh.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)th); \
    1093             :                                         hi = true;                      \
    1094             :                                         th = &vh.v_##TYPE;          \
    1095             :                                 }                                       \
    1096             :                         } else {                                        \
    1097             :                                 /* +inf, i.e. largest value */          \
    1098             :                                 vh.v_##TYPE = MAXVALUE##TYPE;           \
    1099             :                                 hi = true;                              \
    1100             :                                 th = &vh.v_##TYPE;                  \
    1101             :                                 hval = true;                            \
    1102             :                         }                                               \
    1103             :                         if (*(TYPE*)tl > *(TYPE*)th)                 \
    1104             :                                 return BATdense(0, 0, 0);               \
    1105             :                 }                                                       \
    1106             :                 assert(lval);                                           \
    1107             :                 assert(hval);                                           \
    1108             :                 assert(li != anti);                                     \
    1109             :                 assert(hi != anti);                                     \
    1110             :                 /* if anti is set, we can now check */                  \
    1111             :                 /* (x <= *tl || x >= *th) && x != nil */          \
    1112             :                 /* if equi==true, the check is x != *tl && x != nil */  \
    1113             :                 /* if anti is not set, we can check just */             \
    1114             :                 /* *tl <= x && x <= *th */                                \
    1115             :                 /* if equi==true, the check is x == *tl */              \
    1116             :                 /* note that this includes the check for != nil */      \
    1117             :                 /* in the case where equi==true, the check is x == *tl */ \
    1118             :         } while (false)
    1119             : 
    1120             : /* generic range select
    1121             :  *
    1122             :  * Return a BAT with the OID values of b for qualifying tuples.  The
    1123             :  * return BAT is sorted (i.e. in the same order as the input BAT).
    1124             :  *
    1125             :  * If s is non-NULL, it is a list of candidates.  s must be sorted.
    1126             :  *
    1127             :  * tl may not be NULL, li, hi, and anti must be either 0 or 1.
    1128             :  *
    1129             :  * If th is NULL, hi is ignored.
    1130             :  *
    1131             :  * If anti is 0, qualifying tuples are those whose value is between tl
    1132             :  * and th (as in x >[=] tl && x <[=] th, where equality depends on li
    1133             :  * and hi--so if tl > th, nothing will be returned).  If li or hi is
    1134             :  * 1, the respective boundary is inclusive, otherwise exclusive.  If
    1135             :  * th is NULL it is taken to be equal to tl, turning this into an
    1136             :  * equi- or point-select.  Note that for a point select to return
    1137             :  * anything, li (and hi if th was not NULL) must be 1.  There is a
    1138             :  * special case if tl is nil and th is NULL.  This is the only way to
    1139             :  * select for nil values.
    1140             :  *
    1141             :  * If anti is 1, the result is the complement of what the result would
    1142             :  * be if anti were 0, except that nils are filtered out.
    1143             :  *
    1144             :  * In brief:
    1145             :  * - if tl==nil and th==NULL and anti==0, return all nils (only way to
    1146             :  *   get nils);
    1147             :  * - it tl==nil and th==nil, return all but nils;
    1148             :  * - if tl==nil and th!=NULL, no lower bound;
    1149             :  * - if th==NULL or tl==th, point (equi) select;
    1150             :  * - if th==nil, no upper bound
    1151             :  *
    1152             :  * A complete breakdown of the various arguments follows.  Here, v, v1
    1153             :  * and v2 are values from the appropriate domain, and
    1154             :  * v != nil, v1 != nil, v2 != nil, v1 < v2.
    1155             :  *      tl      th      li      hi      anti    result list of OIDs for values
    1156             :  *      -----------------------------------------------------------------
    1157             :  *      nil     NULL    true    ignored false   x == nil (only way to get nil)
    1158             :  *      nil     NULL    false   ignored false   NOTHING
    1159             :  *      nil     NULL    ignored ignored true    x != nil
    1160             :  *      nil     nil     ignored ignored false   x != nil
    1161             :  *      nil     nil     ignored ignored true    NOTHING
    1162             :  *      nil     v       ignored false   false   x < v
    1163             :  *      nil     v       ignored true    false   x <= v
    1164             :  *      nil     v       ignored false   true    x >= v
    1165             :  *      nil     v       ignored true    true    x > v
    1166             :  *      v       nil     false   ignored false   x > v
    1167             :  *      v       nil     true    ignored false   x >= v
    1168             :  *      v       nil     false   ignored true    x <= v
    1169             :  *      v       nil     true    ignored true    x < v
    1170             :  *      v       NULL    false   ignored false   NOTHING
    1171             :  *      v       NULL    true    ignored false   x == v
    1172             :  *      v       NULL    false   ignored true    x != nil
    1173             :  *      v       NULL    true    ignored true    x != v
    1174             :  *      v       v       false   false   false   NOTHING
    1175             :  *      v       v       true    false   false   NOTHING
    1176             :  *      v       v       false   true    false   NOTHING
    1177             :  *      v       v       true    true    false   x == v
    1178             :  *      v       v       false   false   true    x != nil
    1179             :  *      v       v       true    false   true    x != nil
    1180             :  *      v       v       false   true    true    x != nil
    1181             :  *      v       v       true    true    true    x != v
    1182             :  *      v1      v2      false   false   false   v1 < x < v2
    1183             :  *      v1      v2      true    false   false   v1 <= x < v2
    1184             :  *      v1      v2      false   true    false   v1 < x <= v2
    1185             :  *      v1      v2      true    true    false   v1 <= x <= v2
    1186             :  *      v1      v2      false   false   true    x <= v1 or x >= v2
    1187             :  *      v1      v2      true    false   true    x < v1 or x >= v2
    1188             :  *      v1      v2      false   true    true    x <= v1 or x > v2
    1189             :  *      v1      v2      true    true    true    x < v1 or x > v2
    1190             :  *      v2      v1      ignored ignored false   NOTHING
    1191             :  *      v2      v1      ignored ignored true    x != nil
    1192             :  */
    1193             : BAT *
    1194     2881844 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
    1195             :              bool li, bool hi, bool anti)
    1196             : {
    1197             :         bool lval;              /* low value used for comparison */
    1198             :         bool lnil;              /* low value is nil */
    1199             :         bool hval;              /* high value used for comparison */
    1200             :         bool equi;              /* select for single value (not range) */
    1201             :         bool wanthash = false;  /* use hash (equi must be true) */
    1202             :         bool havehash = false;  /* we have a hash (and the hashlock) */
    1203             :         bool phash = false;     /* use hash on parent BAT (if view) */
    1204             :         int t;                  /* data type */
    1205             :         bat parent;             /* b's parent bat (if b is a view) */
    1206             :         const void *nil;
    1207             :         BAT *bn, *tmp;
    1208             :         struct canditer ci;
    1209             :         BUN estimate = BUN_NONE, maximum = BUN_NONE;
    1210             :         oid vwl = 0, vwh = 0;
    1211             :         lng vwo = 0;
    1212             :         Heap *oidxh = NULL;
    1213             :         const char *algo;
    1214             :         union {
    1215             :                 bte v_bte;
    1216             :                 sht v_sht;
    1217             :                 int v_int;
    1218             :                 lng v_lng;
    1219             : #ifdef HAVE_HGE
    1220             :                 hge v_hge;
    1221             : #endif
    1222             :                 flt v_flt;
    1223             :                 dbl v_dbl;
    1224             :                 oid v_oid;
    1225             :         } vl, vh;
    1226     2881844 :         lng t0 = GDKusec();
    1227             : 
    1228     2881875 :         BATcheck(b, NULL);
    1229     2881875 :         if (tl == NULL) {
    1230           0 :                 GDKerror("tl value required");
    1231           0 :                 return NULL;
    1232             :         }
    1233             : 
    1234     2881875 :         if (s && s->ttype != TYPE_msk && !BATtordered(s)) {
    1235           0 :                 GDKerror("invalid argument: s must be sorted.\n");
    1236           0 :                 return NULL;
    1237             :         }
    1238             : 
    1239     2881875 :         if (canditer_init(&ci, b, s) == 0) {
    1240             :                 /* trivially empty result */
    1241     1583822 :                 MT_thread_setalgorithm("select: trivially empty");
    1242     1586868 :                 bn = BATdense(0, 0, 0);
    1243     1584105 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1244             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1245             :                           " (" LLFMT " usec): "
    1246             :                           "trivially empty\n",
    1247             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1248             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1249     1584105 :                 return bn;
    1250             :         }
    1251             : 
    1252     1296513 :         t = b->ttype;
    1253     1296513 :         nil = ATOMnilptr(t);
    1254             :         /* can we use the base type? */
    1255     1296513 :         t = ATOMbasetype(t);
    1256     1296513 :         lnil = ATOMcmp(t, tl, nil) == 0; /* low value = nil? */
    1257             : 
    1258     1296589 :         if (!lnil && th != NULL && (!li || !hi) && !anti && ATOMcmp(t, tl, th) == 0) {
    1259             :                 /* upper and lower bound of range are equal and we
    1260             :                  * want an interval that's open on at least one
    1261             :                  * side */
    1262          45 :                 MT_thread_setalgorithm("select: empty interval");
    1263          45 :                 bn = BATdense(0, 0, 0);
    1264          45 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1265             :                           ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
    1266             :                           ALGOOPTBATFMT " (" LLFMT " usec): "
    1267             :                           "empty interval\n",
    1268             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1269             :                           li, hi, anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1270          45 :                 return bn;
    1271             :         }
    1272             : 
    1273     1296560 :         lval = !lnil || th == NULL;      /* low value used for comparison */
    1274     1296560 :         equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
    1275     1296596 :         if (equi) {
    1276     1238447 :                 assert(lval);
    1277     1238447 :                 if (th == NULL)
    1278             :                         hi = li;
    1279             :                 th = tl;
    1280             :                 hval = true;
    1281             :         } else {
    1282       58149 :                 hval = ATOMcmp(t, th, nil) != 0;
    1283             :         }
    1284     1294870 :         if (anti) {
    1285       70662 :                 if (lval != hval) {
    1286             :                         /* one of the end points is nil and the other
    1287             :                          * isn't: swap sub-ranges */
    1288             :                         const void *tv;
    1289             :                         bool ti;
    1290          46 :                         assert(!equi);
    1291             :                         ti = li;
    1292          46 :                         li = !hi;
    1293          46 :                         hi = !ti;
    1294             :                         tv = tl;
    1295             :                         tl = th;
    1296             :                         th = tv;
    1297             :                         ti = lval;
    1298             :                         lval = hval;
    1299             :                         hval = ti;
    1300          46 :                         lnil = ATOMcmp(t, tl, nil) == 0;
    1301             :                         anti = false;
    1302          46 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1303             :                                   ",s=" ALGOOPTBATFMT ",anti=%d "
    1304             :                                   "anti: switch ranges...\n",
    1305             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1306             :                                   anti);
    1307       70616 :                 } else if (!lval && !hval) {
    1308             :                         /* antiselect for nil-nil range: all non-nil
    1309             :                          * values are in range; we must return all
    1310             :                          * other non-nil values, i.e. nothing */
    1311          18 :                         MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
    1312          18 :                         bn = BATdense(0, 0, 0);
    1313          18 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1314             :                                   ",s=" ALGOOPTBATFMT ",anti=%d -> "
    1315             :                                   ALGOOPTBATFMT " (" LLFMT " usec): "
    1316             :                                   "anti: nil-nil range, nonil\n",
    1317             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1318             :                                   anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1319          18 :                         return bn;
    1320       70598 :                 } else if (equi && lnil) {
    1321             :                         /* antiselect for nil value: turn into range
    1322             :                          * select for nil-nil range (i.e. everything
    1323             :                          * but nil) */
    1324             :                         equi = false;
    1325             :                         anti = false;
    1326             :                         lval = false;
    1327             :                         hval = false;
    1328        5189 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1329             :                                   ",s=" ALGOOPTBATFMT ",anti=0 "
    1330             :                                   "anti-nil...\n",
    1331             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s));
    1332       65409 :                 } else if (equi) {
    1333             :                         equi = false;
    1334       35464 :                         if (!(li && hi)) {
    1335             :                                 /* antiselect for nothing: turn into
    1336             :                                  * range select for nil-nil range
    1337             :                                  * (i.e. everything but nil) */
    1338             :                                 anti = false;
    1339             :                                 lval = false;
    1340             :                                 hval = false;
    1341          12 :                                 TRC_DEBUG(ALGO, "b="
    1342             :                                           ALGOBATFMT ",s="
    1343             :                                           ALGOOPTBATFMT ",anti=0 "
    1344             :                                           "anti-nothing...\n",
    1345             :                                           ALGOBATPAR(b),
    1346             :                                           ALGOOPTBATPAR(s));
    1347             :                         }
    1348       29945 :                 } else if (ATOMcmp(t, tl, th) > 0) {
    1349             :                         /* empty range: turn into range select for
    1350             :                          * nil-nil range (i.e. everything but nil) */
    1351             :                         equi = false;
    1352             :                         anti = false;
    1353             :                         lval = false;
    1354             :                         hval = false;
    1355          27 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1356             :                                   ",s=" ALGOOPTBATFMT ",anti=0 "
    1357             :                                   "anti-nil...\n",
    1358             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s));
    1359             :                 }
    1360             :         }
    1361             : 
    1362             :         /* if equi set, then so are both lval and hval */
    1363     1294850 :         assert(!equi || (lval && hval));
    1364             : 
    1365     1294850 :         if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
    1366             :                 /* empty range */
    1367          48 :                 MT_thread_setalgorithm("select: empty range");
    1368          48 :                 bn = BATdense(0, 0, 0);
    1369          48 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1370             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1371             :                           " (" LLFMT " usec) "
    1372             :                           "empty range\n",
    1373             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1374             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1375          48 :                 return bn;
    1376             :         }
    1377     1294861 :         if (equi && lnil && b->tnonil) {
    1378             :                 /* return all nils, but there aren't any */
    1379        6491 :                 MT_thread_setalgorithm("select: equi-nil, nonil");
    1380        6491 :                 bn = BATdense(0, 0, 0);
    1381        6491 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1382             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1383             :                           " (" LLFMT " usec): "
    1384             :                           "equi-nil, nonil\n",
    1385             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1386             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1387        6491 :                 return bn;
    1388             :         }
    1389             : 
    1390     1288370 :         if (!equi && !lval && !hval && lnil && b->tnonil) {
    1391             :                 /* return all non-nils from a BAT that doesn't have
    1392             :                  * any: i.e. return everything */
    1393         308 :                 MT_thread_setalgorithm("select: everything, nonil");
    1394         312 :                 bn = canditer_slice(&ci, 0, ci.ncand);
    1395         310 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1396             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1397             :                           " (" LLFMT " usec): "
    1398             :                           "everything, nonil\n",
    1399             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1400             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1401         310 :                 return bn;
    1402             :         }
    1403             : 
    1404     1288062 :         if (anti) {
    1405             :                 const ValRecord *prop;
    1406             :                 int c;
    1407             : 
    1408       65352 :                 MT_lock_set(&b->theaplock);
    1409       65538 :                 if ((prop = BATgetprop_nolock(b, GDK_MIN_VALUE)) != NULL) {
    1410         299 :                         c = ATOMcmp(t, tl, VALptr(prop));
    1411         299 :                         if (c < 0 || (li && c == 0)) {
    1412         183 :                                 if ((prop = BATgetprop_nolock(b, GDK_MAX_VALUE)) != NULL) {
    1413         183 :                                         c = ATOMcmp(t, th, VALptr(prop));
    1414         183 :                                         if (c > 0 || (hi && c == 0)) {
    1415          48 :                                                 MT_lock_unset(&b->theaplock);
    1416             :                                                 /* tl..th range fully
    1417             :                                                  * inside MIN..MAX
    1418             :                                                  * range of values in
    1419             :                                                  * BAT, so nothing
    1420             :                                                  * left over for
    1421             :                                                  * anti */
    1422          48 :                                                 MT_thread_setalgorithm("select: nothing, out of range");
    1423          48 :                                                 bn = BATdense(0, 0, 0);
    1424          48 :                                                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1425             :                                                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1426             :                                                           " (" LLFMT " usec): "
    1427             :                                                           "nothing, out of range\n",
    1428             :                                                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1429          48 :                                                 return bn;
    1430             :                                         }
    1431             :                                 }
    1432             :                         }
    1433             :                 }
    1434       65402 :                 MT_lock_unset(&b->theaplock);
    1435     1222710 :         } else if (!equi || !lnil) {
    1436             :                 const ValRecord *prop;
    1437             :                 int c;
    1438             : 
    1439     1215538 :                 if (hval) {
    1440     1191034 :                         MT_lock_set(&b->theaplock);
    1441     1193717 :                         if ((prop = BATgetprop_nolock(b, GDK_MIN_VALUE)) != NULL) {
    1442       27909 :                                 c = ATOMcmp(t, th, VALptr(prop));
    1443       27909 :                                 if (c < 0 || (!hi && c == 0)) {
    1444         505 :                                         MT_lock_unset(&b->theaplock);
    1445             :                                         /* smallest value in BAT larger than
    1446             :                                          * what we're looking for */
    1447         505 :                                         MT_thread_setalgorithm("select: nothing, out of range");
    1448         505 :                                         bn = BATdense(0, 0, 0);
    1449         501 :                                         TRC_DEBUG(ALGO, "b="
    1450             :                                                   ALGOBATFMT ",s="
    1451             :                                                   ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1452             :                                                   " (" LLFMT " usec): "
    1453             :                                                   "nothing, out of range\n",
    1454             :                                                   ALGOBATPAR(b),
    1455             :                                                   ALGOOPTBATPAR(s), anti,
    1456             :                                                   ALGOOPTBATPAR(bn), GDKusec() - t0);
    1457         501 :                                         return bn;
    1458             :                                 }
    1459             :                         }
    1460     1192593 :                         MT_lock_unset(&b->theaplock);
    1461             :                 }
    1462     1217448 :                 if (lval) {
    1463     1207744 :                         MT_lock_set(&b->theaplock);
    1464     1208003 :                         if ((prop = BATgetprop_nolock(b, GDK_MAX_VALUE)) != NULL) {
    1465       26756 :                                 c = ATOMcmp(t, tl, VALptr(prop));
    1466       26756 :                                 if (c > 0 || (!li && c == 0)) {
    1467         443 :                                         MT_lock_unset(&b->theaplock);
    1468             :                                         /* largest value in BAT smaller than
    1469             :                                          * what we're looking for */
    1470         443 :                                         MT_thread_setalgorithm("select: nothing, out of range");
    1471         443 :                                         bn = BATdense(0, 0, 0);
    1472         440 :                                         TRC_DEBUG(ALGO, "b="
    1473             :                                                   ALGOBATFMT ",s="
    1474             :                                                   ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1475             :                                                   " (" LLFMT " usec): "
    1476             :                                                   "nothing, out of range\n",
    1477             :                                                   ALGOBATPAR(b),
    1478             :                                                   ALGOOPTBATPAR(s), anti,
    1479             :                                                   ALGOOPTBATPAR(bn), GDKusec() - t0);
    1480         440 :                                         return bn;
    1481             :                                 }
    1482             :                         }
    1483     1207144 :                         MT_lock_unset(&b->theaplock);
    1484             :                 }
    1485             :         }
    1486             : 
    1487     1288562 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    1488       31226 :                 NORMALIZE(oid);
    1489             :         } else {
    1490     1257336 :                 switch (t) {
    1491      138839 :                 case TYPE_bte:
    1492      138839 :                         NORMALIZE(bte);
    1493             :                         break;
    1494       11276 :                 case TYPE_sht:
    1495       11276 :                         NORMALIZE(sht);
    1496             :                         break;
    1497      929126 :                 case TYPE_int:
    1498      929126 :                         NORMALIZE(int);
    1499             :                         break;
    1500        4154 :                 case TYPE_lng:
    1501        4154 :                         NORMALIZE(lng);
    1502             :                         break;
    1503             : #ifdef HAVE_HGE
    1504         488 :                 case TYPE_hge:
    1505         488 :                         NORMALIZE(hge);
    1506             :                         break;
    1507             : #endif
    1508         139 :                 case TYPE_flt:
    1509         139 :                         NORMALIZE(flt);
    1510             :                         break;
    1511         501 :                 case TYPE_dbl:
    1512         501 :                         NORMALIZE(dbl);
    1513             :                         break;
    1514             :                 }
    1515     1288554 :         }
    1516             : 
    1517     1288554 :         parent = VIEWtparent(b);
    1518     1082719 :         assert(parent >= 0);
    1519             :         /* use hash only for equi-join, and then only if b or its
    1520             :          * parent already has a hash, or if b or its parent is
    1521             :          * persistent and the total size wouldn't be too large; check
    1522             :          * for existence of hash last since that may involve I/O */
    1523     1288554 :         if (equi) {
    1524     1189981 :                 havehash = BATcheckhash(b);
    1525     1189572 :                 if (havehash) {
    1526       85263 :                         MT_rwlock_rdlock(&b->thashlock);
    1527       85265 :                         if (b->thash == NULL) {
    1528           0 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1529             :                                 havehash = false;
    1530             :                         }
    1531             :                 }
    1532     1189574 :                 wanthash = havehash ||
    1533     1104309 :                         (!b->batTransient &&
    1534       26852 :                          ATOMsize(b->ttype) >= sizeof(BUN) / 4 &&
    1535       26840 :                          BATcount(b) * (ATOMsize(b->ttype) + 2 * sizeof(BUN)) < GDK_mem_maxsize / 2);
    1536     1189574 :                 if (wanthash && !havehash) {
    1537             :                         const ValRecord *prop;
    1538       26840 :                         if ((prop = BATgetprop(b, GDK_UNIQUE_ESTIMATE)) != NULL &&
    1539          89 :                             prop->val.dval < BATcount(b) / NO_HASH_SELECT_FRACTION) {
    1540             :                                 /* too many duplicates: not worth it */
    1541             :                                 wanthash = false;
    1542             :                         }
    1543             :                 }
    1544             :         }
    1545             : 
    1546     1288147 :         if (equi && !havehash && parent != 0) {
    1547             :                 /* use parent hash if it already exists and if either
    1548             :                  * a quick check shows the value we're looking for
    1549             :                  * does not occur, or if it is cheaper to check the
    1550             :                  * candidate list for each value in the hash chain
    1551             :                  * than to scan (cost for probe is average length of
    1552             :                  * hash chain (count divided by #slots) times the cost
    1553             :                  * to do a binary search on the candidate list (or 1
    1554             :                  * if no need for search)) */
    1555      949883 :                 tmp = BBP_cache(parent);
    1556      949883 :                 if (tmp && BATcheckhash(tmp)) {
    1557      894345 :                         MT_rwlock_rdlock(&tmp->thashlock);
    1558      894682 :                         phash = tmp->thash != NULL &&
    1559     1091590 :                                 (BATcount(tmp) == BATcount(b) ||
    1560      248772 :                                  BATcount(tmp) / tmp->thash->nheads * (ci.tpe != cand_dense ? ilog2(BATcount(s)) : 1) < (s ? BATcount(s) : BATcount(b)) ||
    1561       37047 :                                  HASHget(tmp->thash, HASHprobe(tmp->thash, tl)) == BUN_NONE);
    1562      894689 :                         if (phash)
    1563             :                                 havehash = wanthash = true;
    1564             :                         else
    1565       34339 :                                 MT_rwlock_rdunlock(&tmp->thashlock);
    1566             :                 }
    1567             :                 /* create a hash on the parent bat (and use it) if it is
    1568             :                  * the same size as the view and it is persistent */
    1569      950534 :                 if (!phash &&
    1570       90027 :                     !tmp->batTransient &&
    1571       84945 :                     BATcount(tmp) == BATcount(b) &&
    1572        4564 :                     BAThash(tmp) == GDK_SUCCEED) {
    1573        4571 :                         MT_rwlock_rdlock(&tmp->thashlock);
    1574        4571 :                         if (tmp->thash)
    1575             :                                 havehash = wanthash = phash = true;
    1576             :                         else
    1577           0 :                                 MT_rwlock_rdunlock(&tmp->thashlock);
    1578             :                 }
    1579             :         }
    1580             :         /* at this point, if havehash is set, we have the hash lock
    1581             :          * the lock is on the parent if phash is set, on b itself if not
    1582             :          * also, if havehash is set, then so is wanthash (but not v.v.) */
    1583             : 
    1584     1288805 :         if (!havehash) {
    1585             :                 /* make sure tsorted and trevsorted flags are set, but
    1586             :                  * we only need to know if we're not yet sure that we're
    1587             :                  * going for the hash (i.e. it already exists) */
    1588      338434 :                 (void) BATordered(b);
    1589      339659 :                 (void) BATordered_rev(b);
    1590             :         }
    1591             : 
    1592             :         /* If there is an order index or it is a view and the parent
    1593             :          * has an ordered index, and the bat is not tsorted or
    1594             :          * trevstorted then use the order index.  And there is no cand
    1595             :          * list or if there is one, it is dense.
    1596             :          * TODO: we do not support anti-select with order index */
    1597             :         bool poidx = false;
    1598     1287940 :         if (!anti &&
    1599      290454 :             !havehash &&
    1600      290454 :             !b->tsorted &&
    1601      123767 :             !b->trevsorted &&
    1602      112339 :             ci.tpe == cand_dense) {
    1603             :                 BAT *view = NULL;
    1604       94317 :                 (void) BATcheckorderidx(b);
    1605       94264 :                 MT_lock_set(&b->batIdxLock);
    1606       94299 :                 if ((oidxh = b->torderidx) != NULL)
    1607          42 :                         HEAPincref(oidxh);
    1608       94299 :                 MT_lock_unset(&b->batIdxLock);
    1609       94275 :                 if (oidxh == NULL && parent) {
    1610       24590 :                         BAT *pb = BBP_cache(parent);
    1611       24590 :                         (void) BATcheckorderidx(pb);
    1612       24588 :                         MT_lock_set(&pb->batIdxLock);
    1613       24600 :                         if ((oidxh = pb->torderidx) != NULL) {
    1614           7 :                                 HEAPincref(oidxh);
    1615             :                                 view = b;
    1616             :                                 b = pb;
    1617             :                         }
    1618       24600 :                         MT_lock_unset(&pb->batIdxLock);
    1619             :                 }
    1620       94285 :                 if (oidxh) {
    1621             :                         /* Is query selective enough to use the ordered index ? */
    1622             :                         /* TODO: Test if this heuristic works in practice */
    1623             :                         /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < b->batCount/1000 ? (BUN)1000: b->batCount/1000))*/
    1624          49 :                         if ((ORDERfnd(b, oidxh, th) - ORDERfnd(b, oidxh, tl)) < b->batCount/3) {
    1625          26 :                                 if (view) {
    1626             :                                         poidx = true; /* using parent oidx */
    1627           7 :                                         vwo = (lng) (view->tbaseoff - b->tbaseoff);
    1628           7 :                                         vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
    1629           7 :                                         vwh = vwl + canditer_last(&ci) - ci.seq;
    1630           7 :                                         vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
    1631           7 :                                         TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
    1632             :                                 } else {
    1633          19 :                                         vwl = ci.seq;
    1634          19 :                                         vwh = canditer_last(&ci);
    1635             :                                 }
    1636             :                         } else {
    1637          23 :                                 if (view) {
    1638             :                                         b = view;
    1639             :                                         view = NULL;
    1640             :                                 }
    1641          23 :                                 HEAPdecref(oidxh, false);
    1642             :                                 oidxh = NULL;
    1643             :                         }
    1644             :                 }
    1645             :         }
    1646             : 
    1647     1287908 :         if (!havehash && (b->tsorted || b->trevsorted || oidxh != NULL)) {
    1648             :                 BUN low = 0;
    1649      201154 :                 BUN high = b->batCount;
    1650             : 
    1651      201154 :                 if (BATtdense(b)) {
    1652             :                         /* positional */
    1653             :                         /* we expect nonil to be set, in which case we
    1654             :                          * already know that we're not dealing with a
    1655             :                          * nil equiselect (dealt with above) */
    1656             :                         oid h, l;
    1657       27270 :                         assert(b->tnonil);
    1658       27270 :                         assert(b->tsorted);
    1659       27270 :                         assert(oidxh == NULL);
    1660       27270 :                         algo = "select: dense";
    1661       27270 :                         h = * (oid *) th + hi;
    1662       27270 :                         if (h > b->tseqbase)
    1663       27814 :                                 h -= b->tseqbase;
    1664             :                         else
    1665             :                                 h = 0;
    1666             :                         if ((BUN) h < high)
    1667             :                                 high = (BUN) h;
    1668             : 
    1669       27270 :                         l = *(oid *) tl + !li;
    1670       27270 :                         if (l > b->tseqbase)
    1671       22349 :                                 l -= b->tseqbase;
    1672             :                         else
    1673             :                                 l = 0;
    1674             :                         if ((BUN) l > low)
    1675             :                                 low = (BUN) l;
    1676             :                         if (low > high)
    1677             :                                 low = high;
    1678      173884 :                 } else if (b->tsorted) {
    1679      154807 :                         assert(oidxh == NULL);
    1680      154807 :                         algo = "select: sorted";
    1681      154807 :                         if (lval) {
    1682      154045 :                                 if (li)
    1683      145268 :                                         low = SORTfndfirst(b, tl);
    1684             :                                 else
    1685        8777 :                                         low = SORTfndlast(b, tl);
    1686             :                         } else {
    1687             :                                 /* skip over nils at start of column */
    1688         762 :                                 low = SORTfndlast(b, nil);
    1689             :                         }
    1690      155491 :                         if (hval) {
    1691      154701 :                                 if (hi)
    1692      145882 :                                         high = SORTfndlast(b, th);
    1693             :                                 else
    1694        8819 :                                         high = SORTfndfirst(b, th);
    1695             :                         }
    1696       19077 :                 } else if (b->trevsorted) {
    1697       19051 :                         assert(oidxh == NULL);
    1698       19051 :                         algo = "select: reverse sorted";
    1699       19051 :                         if (lval) {
    1700       18974 :                                 if (li)
    1701       18702 :                                         high = SORTfndlast(b, tl);
    1702             :                                 else
    1703         272 :                                         high = SORTfndfirst(b, tl);
    1704             :                         } else {
    1705             :                                 /* skip over nils at end of column */
    1706          77 :                                 high = SORTfndfirst(b, nil);
    1707             :                         }
    1708       19069 :                         if (hval) {
    1709       18991 :                                 if (hi)
    1710       18719 :                                         low = SORTfndfirst(b, th);
    1711             :                                 else
    1712         272 :                                         low = SORTfndlast(b, th);
    1713             :                         }
    1714             :                 } else {
    1715          26 :                         assert(oidxh != NULL);
    1716          26 :                         algo = poidx ? "select: parent orderidx" : "select: orderidx";
    1717          26 :                         if (lval) {
    1718          26 :                                 if (li)
    1719          26 :                                         low = ORDERfndfirst(b, oidxh, tl);
    1720             :                                 else
    1721           0 :                                         low = ORDERfndlast(b, oidxh, tl);
    1722             :                         } else {
    1723             :                                 /* skip over nils at start of column */
    1724           0 :                                 low = ORDERfndlast(b, oidxh, nil);
    1725             :                         }
    1726          26 :                         if (hval) {
    1727          26 :                                 if (hi)
    1728          26 :                                         high = ORDERfndlast(b, oidxh, th);
    1729             :                                 else
    1730           0 :                                         high = ORDERfndfirst(b, oidxh, th);
    1731             :                         }
    1732             :                 }
    1733      201905 :                 if (anti) {
    1734       23865 :                         assert(oidxh == NULL);
    1735       23865 :                         if (b->tsorted) {
    1736       16211 :                                 BUN first = SORTfndlast(b, nil);
    1737             :                                 /* match: [first..low) + [high..last) */
    1738       16218 :                                 bn = canditer_slice2val(&ci,
    1739             :                                                         first + b->hseqbase,
    1740             :                                                         low + b->hseqbase,
    1741       16218 :                                                         high + b->hseqbase,
    1742             :                                                         oid_nil);
    1743             :                         } else {
    1744        7654 :                                 BUN last = SORTfndfirst(b, nil);
    1745             :                                 /* match: [first..low) + [high..last) */
    1746        7654 :                                 bn = canditer_slice2val(&ci,
    1747             :                                                         oid_nil,
    1748             :                                                         low + b->hseqbase,
    1749             :                                                         high + b->hseqbase,
    1750        7654 :                                                         last + b->hseqbase);
    1751             :                         }
    1752             :                 } else {
    1753      178040 :                         if (b->tsorted || b->trevsorted) {
    1754      178014 :                                 assert(oidxh == NULL);
    1755             :                                 /* match: [low..high) */
    1756      178014 :                                 bn = canditer_sliceval(&ci,
    1757             :                                                        low + b->hseqbase,
    1758      178014 :                                                        high + b->hseqbase);
    1759             :                         } else {
    1760             :                                 BUN i;
    1761             :                                 BUN cnt = 0;
    1762             :                                 const oid *rs;
    1763             :                                 oid *rbn;
    1764             : 
    1765          26 :                                 rs = (const oid *) oidxh->base + ORDERIDXOFF;
    1766          26 :                                 rs += low;
    1767          26 :                                 bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
    1768          26 :                                 if (bn == NULL) {
    1769           0 :                                         HEAPdecref(oidxh, false);
    1770           0 :                                         return NULL;
    1771             :                                 }
    1772             : 
    1773          26 :                                 rbn = (oid *) Tloc((bn), 0);
    1774             : 
    1775        2932 :                                 for (i = low; i < high; i++) {
    1776        2906 :                                         if (vwl <= *rs && *rs <= vwh) {
    1777        1138 :                                                 *rbn++ = (oid) ((lng) *rs + vwo);
    1778        1138 :                                                 cnt++;
    1779             :                                         }
    1780        2906 :                                         rs++;
    1781             :                                 }
    1782          26 :                                 HEAPdecref(oidxh, false);
    1783          26 :                                 BATsetcount(bn, cnt);
    1784             : 
    1785             :                                 /* output must be sorted */
    1786          26 :                                 GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
    1787          26 :                                 bn->tsorted = true;
    1788          26 :                                 bn->trevsorted = bn->batCount <= 1;
    1789          26 :                                 bn->tkey = true;
    1790          26 :                                 bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
    1791          26 :                                 bn->tnil = false;
    1792          26 :                                 bn->tnonil = true;
    1793          26 :                                 if (s) {
    1794           8 :                                         s = BATintersectcand(bn, s);
    1795           8 :                                         BBPunfix(bn->batCacheid);
    1796             :                                         bn = s;
    1797             :                                 }
    1798             :                         }
    1799             :                 }
    1800             : 
    1801      202200 :                 bn = virtualize(bn);
    1802      202016 :                 MT_thread_setalgorithm(algo);
    1803      201964 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
    1804             :                           ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
    1805             :                           ALGOBATPAR(b), anti ? "true" : "false",
    1806             :                           ALGOOPTBATPAR(bn), algo,
    1807             :                           GDKusec() - t0);
    1808             : 
    1809      201964 :                 return bn;
    1810             :         }
    1811             : 
    1812     1086754 :         assert(oidxh == NULL);
    1813             :         /* upper limit for result size */
    1814     1086754 :         maximum = ci.ncand;
    1815     1086754 :         if (b->tkey) {
    1816             :                 /* exact result size in special cases */
    1817      162174 :                 if (equi) {
    1818             :                         estimate = 1;
    1819         125 :                 } else if (!anti && lval && hval) {
    1820          19 :                         switch (t) {
    1821           0 :                         case TYPE_bte:
    1822           0 :                                 estimate = (BUN) (*(bte *) th - *(bte *) tl);
    1823           0 :                                 break;
    1824           0 :                         case TYPE_sht:
    1825           0 :                                 estimate = (BUN) (*(sht *) th - *(sht *) tl);
    1826           0 :                                 break;
    1827          19 :                         case TYPE_int:
    1828          19 :                                 estimate = (BUN) (*(int *) th - *(int *) tl);
    1829          19 :                                 break;
    1830           0 :                         case TYPE_lng:
    1831           0 :                                 estimate = (BUN) (*(lng *) th - *(lng *) tl);
    1832           0 :                                 break;
    1833             : #ifdef HAVE_HGE
    1834           0 :                         case TYPE_hge:
    1835           0 :                                 if (*(hge *) th - *(hge *) tl < (hge) BUN_MAX)
    1836           0 :                                         estimate = (BUN) (*(hge *) th - *(hge *) tl);
    1837             :                                 break;
    1838             : #endif
    1839             :                         }
    1840          19 :                         if (estimate != BUN_NONE)
    1841          19 :                                 estimate += li + hi - 1;
    1842             :                 }
    1843             :         }
    1844             :         /* refine upper limit by exact size (if known) */
    1845     1086754 :         maximum = MIN(maximum, estimate);
    1846     1086754 :         if (wanthash && !havehash && estimate == BUN_NONE) {
    1847             :                 /* no exact result size, but we need estimate to
    1848             :                  * choose between hash- & scan-select (if we already
    1849             :                  * have a hash, it's a no-brainer: we use it) */
    1850       25370 :                 if (ci.ncand <= 10000) {
    1851             :                         /* "small" input: don't bother about more accurate
    1852             :                          * estimate */
    1853             :                         estimate = maximum;
    1854             :                 } else {
    1855             :                         /* layman's quick "pseudo-sample" of 1000 tuples,
    1856             :                          * i.e., 333 from begin, middle & end of BAT */
    1857             :                         BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
    1858             :                         BAT *smpl, *slct;
    1859             : 
    1860             :                         delta = 1000 / 3 / 2;
    1861           1 :                         skip = (BATcount(b) - (2 * delta)) / 2;
    1862           4 :                         for (pos = delta; pos < BATcount(b); pos += skip) {
    1863           3 :                                 smpl = BATslice(b, pos - delta, pos + delta);
    1864           3 :                                 if (smpl) {
    1865           3 :                                         slct = BATselect(smpl, NULL, tl,
    1866             :                                                             th, li, hi, anti);
    1867           3 :                                         if (slct) {
    1868           3 :                                                 smpl_cnt += BATcount(smpl);
    1869           3 :                                                 slct_cnt += BATcount(slct);
    1870           3 :                                                 BBPreclaim(slct);
    1871             :                                         }
    1872           3 :                                         BBPreclaim(smpl);
    1873             :                                 }
    1874             :                         }
    1875           1 :                         if (smpl_cnt > 0 && slct_cnt > 0) {
    1876             :                                 /* linear extrapolation plus 10% margin */
    1877           0 :                                 estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
    1878           0 :                                                   * (dbl) BATcount(b) * 1.1);
    1879           1 :                         } else if (smpl_cnt > 0 && slct_cnt == 0) {
    1880             :                                 /* estimate low enough to trigger hash select */
    1881           1 :                                 estimate = (ci.ncand / 100) - 1;
    1882             :                         }
    1883             :                 }
    1884       25370 :                 wanthash = estimate < ci.ncand / 100;
    1885             :         }
    1886     1086754 :         if (estimate == BUN_NONE) {
    1887             :                 /* no better estimate possible/required:
    1888             :                  * (pre-)allocate 1M tuples, i.e., avoid/delay extend
    1889             :                  * without too much overallocation */
    1890             :                 estimate = 1000000;
    1891             :         }
    1892             :         /* limit estimation by upper limit */
    1893     1086754 :         estimate = MIN(estimate, maximum);
    1894             : 
    1895     1086754 :         bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
    1896     1085564 :         if (bn == NULL)
    1897             :                 return NULL;
    1898             : 
    1899     1085564 :         BATiter bi = bat_iterator(b);
    1900     1087036 :         if (wanthash) {
    1901             :                 /* hashselect unlocks the hash lock */
    1902      949912 :                 bn = hashselect(b, &bi, &ci, bn, tl, maximum, havehash, phash, &algo);
    1903             :         } else {
    1904      137124 :                 assert(!havehash);
    1905             :                 /* use imprints if
    1906             :                  *   i) bat is persistent, or parent is persistent
    1907             :                  *  ii) it is not an equi-select, and
    1908             :                  * iii) imprints are supported.
    1909             :                  */
    1910             :                 tmp = NULL;
    1911             :                 Imprints *imprints = NULL;
    1912             :                 if (!equi &&
    1913             :                     /* DISABLES CODE */ (0) && imprintable(b->ttype) &&
    1914             :                     (!b->batTransient ||
    1915             :                      (parent != 0 &&
    1916             :                       (tmp = BBP_cache(parent)) != NULL &&
    1917             :                       !tmp->batTransient)) &&
    1918             :                     BATimprints(b) == GDK_SUCCEED) {
    1919             :                         if (tmp != NULL) {
    1920             :                                 MT_lock_set(&tmp->batIdxLock);
    1921             :                                 imprints = tmp->timprints;
    1922             :                                 if (imprints != NULL)
    1923             :                                         IMPSincref(imprints);
    1924             :                                 else
    1925             :                                         imprints = NULL;
    1926             :                                 MT_lock_unset(&tmp->batIdxLock);
    1927             :                         } else {
    1928             :                                 MT_lock_set(&b->batIdxLock);
    1929             :                                 imprints = b->timprints;
    1930             :                                 if (imprints != NULL)
    1931             :                                         IMPSincref(imprints);
    1932             :                                 else
    1933             :                                         imprints = NULL;
    1934             :                                 MT_lock_unset(&b->batIdxLock);
    1935             :                         }
    1936             :                 }
    1937      137124 :                 GDKclrerr();
    1938      136951 :                 bn = scanselect(b, &bi, &ci, bn, tl, th, li, hi, equi, anti,
    1939             :                                 lval, hval, lnil, maximum, imprints, &algo);
    1940             :                 if (imprints)
    1941             :                         IMPSdecref(imprints, false);
    1942             :         }
    1943     1087855 :         bat_iterator_end(&bi);
    1944             : 
    1945     1087745 :         bn = virtualize(bn);
    1946     1086974 :         MT_thread_setalgorithm(algo);
    1947     1087530 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
    1948             :                   " %s (" LLFMT " usec)\n",
    1949             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1950             :                   anti ? "true" : "false",
    1951             :                   ALGOOPTBATPAR(bn), algo,
    1952             :                   GDKusec() - t0);
    1953             : 
    1954             :         return bn;
    1955             : }
    1956             : 
    1957             : /* theta select
    1958             :  *
    1959             :  * Returns a BAT with the OID values of b for qualifying tuples.  The
    1960             :  * return BAT is sorted (i.e. in the same order as the input BAT).
    1961             :  *
    1962             :  * If s is not NULL, it is a list of candidates.  s must be sorted.
    1963             :  *
    1964             :  * Theta select returns all values from b which are less/greater than
    1965             :  * or (not) equal to the provided value depending on the value of op.
    1966             :  * Op is a string with one of the values: "=", "==", "<", "<=", ">",
    1967             :  * ">=", "<>", "!=" (the first two are equivalent and the last two are
    1968             :  * equivalent).  Theta select never returns nils.
    1969             :  *
    1970             :  * If value is nil, the result is empty.
    1971             :  */
    1972             : BAT *
    1973      550895 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
    1974             : {
    1975             :         const void *nil;
    1976             : 
    1977      550895 :         BATcheck(b, NULL);
    1978      550895 :         BATcheck(val, NULL);
    1979      550895 :         BATcheck(op, NULL);
    1980             : 
    1981      550895 :         nil = ATOMnilptr(b->ttype);
    1982      550895 :         if (ATOMcmp(b->ttype, val, nil) == 0)
    1983         103 :                 return BATdense(0, 0, 0);
    1984      551375 :         if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
    1985             :                 /* "=" or "==" */
    1986      450800 :                 return BATselect(b, s, val, NULL, true, true, false);
    1987             :         }
    1988      100575 :         if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
    1989             :                 /* "!=" (equivalent to "<>") */
    1990       58138 :                 return BATselect(b, s, val, NULL, true, true, true);
    1991             :         }
    1992       42437 :         if (op[0] == '<') {
    1993         963 :                 if (op[1] == 0) {
    1994             :                         /* "<" */
    1995         788 :                         return BATselect(b, s, nil, val, false, false, false);
    1996             :                 }
    1997         175 :                 if (op[1] == '=' && op[2] == 0) {
    1998             :                         /* "<=" */
    1999         175 :                         return BATselect(b, s, nil, val, false, true, false);
    2000             :                 }
    2001           0 :                 if (op[1] == '>' && op[2] == 0) {
    2002             :                         /* "<>" (equivalent to "!=") */
    2003           0 :                         return BATselect(b, s, val, NULL, true, true, true);
    2004             :                 }
    2005             :         }
    2006       41474 :         if (op[0] == '>') {
    2007       41474 :                 if (op[1] == 0) {
    2008             :                         /* ">" */
    2009       40805 :                         return BATselect(b, s, val, nil, false, false, false);
    2010             :                 }
    2011         669 :                 if (op[1] == '=' && op[2] == 0) {
    2012             :                         /* ">=" */
    2013         670 :                         return BATselect(b, s, val, nil, true, false, false);
    2014             :                 }
    2015             :         }
    2016           0 :         GDKerror("unknown operator.\n");
    2017           0 :         return NULL;
    2018             : }
    2019             : 
    2020             : #define VALUE(s, x)     (s##vars ?                                      \
    2021             :                          s##vars + VarHeapVal(s##vals, (x), s##width) : \
    2022             :                          s##vals + ((x) * s##width))
    2023             : #define FVALUE(s, x)    (s##vals + ((x) * s##width))
    2024             : 
    2025             : #define LTany(a,b)      ((*cmp)(a, b) < 0)
    2026             : #define EQany(a,b)      ((*cmp)(a, b) == 0)
    2027             : #define is_any_nil(v)   ((v) == NULL || (*cmp)((v), nil) == 0)
    2028             : 
    2029             : #define less3(a,b,i,t)  (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(a, b) || (i && EQ##t(a, b)))
    2030             : #define grtr3(a,b,i,t)  (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(b, a) || (i && EQ##t(a, b)))
    2031             : #define or3(a,b)        ((a) == 1 || (b) == 1 ? 1 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 0)
    2032             : #define and3(a,b)       ((a) == 0 || (b) == 0 ? 0 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 1)
    2033             : #define not3(a)         (is_bit_nil(a) ? bit_nil : !(a))
    2034             : 
    2035             : #define between3(v, lo, linc, hi, hinc, TYPE)                           \
    2036             :         and3(grtr3(v, lo, linc, TYPE), less3(v, hi, hinc, TYPE))
    2037             : 
    2038             : #define BETWEEN(v, lo, linc, hi, hinc, TYPE)                            \
    2039             :         (is_##TYPE##_nil(v)                                             \
    2040             :          ? bit_nil                                                      \
    2041             :          : (bit) (anti                                                  \
    2042             :                   ? (symmetric                                          \
    2043             :                      ? not3(or3(between3(v, lo, linc, hi, hinc, TYPE),  \
    2044             :                                 between3(v, hi, hinc, lo, linc, TYPE))) \
    2045             :                      : not3(between3(v, lo, linc, hi, hinc, TYPE)))     \
    2046             :                   : (symmetric                                          \
    2047             :                      ? or3(between3(v, lo, linc, hi, hinc, TYPE),       \
    2048             :                            between3(v, hi, hinc, lo, linc, TYPE))       \
    2049             :                      : between3(v, lo, linc, hi, hinc, TYPE))))
    2050             : 
    2051             : gdk_return
    2052         129 : rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh,
    2053             :           struct canditer *lci, struct canditer *rci,
    2054             :           bool linc, bool hinc, bool anti, bool symmetric, BUN maxsize)
    2055             : {
    2056             :         const char *rlvals, *rhvals;
    2057             :         const char *lvars, *rlvars, *rhvars;
    2058             :         int rlwidth, rhwidth;
    2059             :         int lwidth;
    2060         129 :         const void *nil = ATOMnilptr(l->ttype);
    2061         129 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    2062             :         int t;
    2063             :         BUN cnt, ncnt;
    2064             :         oid *restrict dst1, *restrict dst2;
    2065             :         const void *vrl, *vrh;
    2066             :         oid ro;
    2067         129 :         oid rlval = oid_nil, rhval = oid_nil;
    2068             :         int sorted = 0;         /* which output column is sorted */
    2069             :         BAT *tmp = NULL;
    2070             :         const char *algo = NULL;
    2071         129 :         BATiter li = bat_iterator(l);
    2072         129 :         BATiter rli = bat_iterator(rl);
    2073         130 :         BATiter rhi = bat_iterator(rh);
    2074             :         Heap *oidxh = NULL;
    2075             : 
    2076         390 :         assert(ATOMtype(l->ttype) == ATOMtype(rl->ttype));
    2077         260 :         assert(ATOMtype(l->ttype) == ATOMtype(rh->ttype));
    2078         130 :         assert(BATcount(rl) == BATcount(rh));
    2079         130 :         assert(rl->hseqbase == rh->hseqbase);
    2080         130 :         assert(r1->ttype == TYPE_oid);
    2081         130 :         assert(r2 == NULL || r2->ttype == TYPE_oid);
    2082         130 :         assert(r2 == NULL || BATcount(r1) == BATcount(r2));
    2083         130 :         assert(l->ttype != TYPE_void || !is_oid_nil(l->tseqbase));
    2084         130 :         assert(rl->ttype != TYPE_void || !is_oid_nil(rl->tseqbase));
    2085         130 :         assert(rh->ttype != TYPE_void || !is_oid_nil(rh->tseqbase));
    2086             : 
    2087         130 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    2088             :                   "rl=" ALGOBATFMT ",rh=" ALGOBATFMT ","
    2089             :                   "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT ","
    2090             :                   "anti=%s,symmetric=%s\n",
    2091             :                   ALGOBATPAR(l),
    2092             :                   ALGOBATPAR(rl),
    2093             :                   ALGOBATPAR(rh),
    2094             :                   ALGOOPTBATPAR(lci->s),
    2095             :                   ALGOOPTBATPAR(rci->s),
    2096             :                   anti ? "true" : "false",
    2097             :                   symmetric ? "true" : "false");
    2098             : 
    2099         130 :         rlvals = rl->ttype == TYPE_void ? NULL : (const char *) rli.base;
    2100         130 :         rhvals = rh->ttype == TYPE_void ? NULL : (const char *) rhi.base;
    2101         130 :         lwidth = li.width;
    2102         130 :         rlwidth = rli.width;
    2103         130 :         rhwidth = rhi.width;
    2104         130 :         dst1 = (oid *) Tloc(r1, 0);
    2105         130 :         dst2 = r2 ? (oid *) Tloc(r2, 0) : NULL;
    2106             : 
    2107         130 :         t = ATOMtype(l->ttype);
    2108             :         t = ATOMbasetype(t);
    2109             : 
    2110         130 :         if (l->tvarsized && l->ttype) {
    2111          17 :                 assert(rl->tvarsized && rl->ttype);
    2112          17 :                 assert(rh->tvarsized && rh->ttype);
    2113          17 :                 lvars = li.vh->base;
    2114          17 :                 rlvars = rli.vh->base;
    2115          17 :                 rhvars = rhi.vh->base;
    2116             :         } else {
    2117         113 :                 assert(!rl->tvarsized || !rl->ttype);
    2118         113 :                 assert(!rh->tvarsized || !rh->ttype);
    2119             :                 lvars = rlvars = rhvars = NULL;
    2120             :         }
    2121             : 
    2122         130 :         if (!anti && !symmetric && !BATordered(l) && !BATordered_rev(l)) {
    2123          13 :                 (void) BATcheckorderidx(l);
    2124          13 :                 MT_lock_set(&l->batIdxLock);
    2125          13 :                 if ((oidxh = l->torderidx) != NULL)
    2126           0 :                         HEAPincref(oidxh);
    2127          13 :                 MT_lock_unset(&l->batIdxLock);
    2128             : #if 0 /* needs checking */
    2129             :                 if (oidxh == NULL && VIEWtparent(l)) {
    2130             :                         BAT *pb = BBP_cache(VIEWtparent(l));
    2131             :                         (void) BATcheckorderidx(pb);
    2132             :                         MT_lock_set(&pb->batIdxLock);
    2133             :                         if ((oidxh = pb->torderidx) != NULL) {
    2134             :                                 HEAPincref(oidxh);
    2135             :                                 l = pb;
    2136             :                         }
    2137             :                         MT_lock_unset(&pb->batIdxLock);
    2138             :                 }
    2139             : #endif
    2140             :         }
    2141             : 
    2142             :         vrl = &rlval;
    2143             :         vrh = &rhval;
    2144         129 :         if (!anti && !symmetric && (BATordered(l) || BATordered_rev(l) || oidxh)) {
    2145             :                 /* left column is sorted, use binary search */
    2146             :                 sorted = 2;
    2147         530 :                 for (BUN i = 0; i < rci->ncand; i++) {
    2148             :                         BUN low, high;
    2149             : 
    2150         424 :                         ro = canditer_next(rci);
    2151         418 :                         if (rlvals) {
    2152         418 :                                 vrl = VALUE(rl, ro - rl->hseqbase);
    2153             :                         } else {
    2154             :                                 /* TYPE_void */
    2155           0 :                                 rlval = ro - rl->hseqbase + rl->tseqbase;
    2156             :                         }
    2157         418 :                         if (rhvals) {
    2158         418 :                                 vrh = VALUE(rh, ro - rh->hseqbase);
    2159             :                         } else {
    2160             :                                 /* TYPE_void */
    2161           0 :                                 rhval = ro - rh->hseqbase + rh->tseqbase;
    2162             :                         }
    2163         418 :                         if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
    2164           0 :                                 continue;
    2165         404 :                         if (l->tsorted) {
    2166         401 :                                 if (linc)
    2167         294 :                                         low = SORTfndfirst(l, vrl);
    2168             :                                 else
    2169         107 :                                         low = SORTfndlast(l, vrl);
    2170         420 :                                 if (hinc)
    2171         359 :                                         high = SORTfndlast(l, vrh);
    2172             :                                 else
    2173          61 :                                         high = SORTfndfirst(l, vrh);
    2174           3 :                         } else  if (l->trevsorted) {
    2175           3 :                                 if (hinc)
    2176           3 :                                         low = SORTfndfirst(l, vrh);
    2177             :                                 else
    2178           0 :                                         low = SORTfndlast(l, vrh);
    2179           3 :                                 if (linc)
    2180           3 :                                         high = SORTfndlast(l, vrl);
    2181             :                                 else
    2182           0 :                                         high = SORTfndfirst(l, vrl);
    2183             :                         } else {
    2184           0 :                                 assert(oidxh);
    2185           0 :                                 if (linc)
    2186           0 :                                         low = ORDERfndfirst(l, oidxh, vrl);
    2187             :                                 else
    2188           0 :                                         low = ORDERfndlast(l, oidxh, vrl);
    2189           0 :                                 if (hinc)
    2190           0 :                                         high = ORDERfndlast(l, oidxh, vrh);
    2191             :                                 else
    2192           0 :                                         high = ORDERfndfirst(l, oidxh, vrh);
    2193             :                         }
    2194         424 :                         if (high <= low)
    2195         229 :                                 continue;
    2196         195 :                         if (l->tsorted || l->trevsorted) {
    2197         195 :                                 low = canditer_search(lci, low + l->hseqbase, true);
    2198         195 :                                 high = canditer_search(lci, high + l->hseqbase, true);
    2199         194 :                                 assert(high >= low);
    2200             : 
    2201         194 :                                 if (BATcapacity(r1) < BUNlast(r1) + high - low) {
    2202           0 :                                         cnt = BUNlast(r1) + high - low + 1024;
    2203             :                                         if (cnt > maxsize)
    2204             :                                                 cnt = maxsize;
    2205           0 :                                         BATsetcount(r1, BATcount(r1));
    2206           0 :                                         if (BATextend(r1, cnt) != GDK_SUCCEED)
    2207           0 :                                                 goto bailout;
    2208           0 :                                         dst1 = (oid *) Tloc(r1, 0);
    2209           0 :                                         if (r2) {
    2210           0 :                                                 BATsetcount(r2, BATcount(r2));
    2211           0 :                                                 if (BATextend(r2, cnt) != GDK_SUCCEED)
    2212           0 :                                                         goto bailout;
    2213           0 :                                                 assert(BATcapacity(r1) == BATcapacity(r2));
    2214           0 :                                                 dst2 = (oid *) Tloc(r2, 0);
    2215             :                                         }
    2216             :                                 }
    2217         194 :                                 canditer_setidx(lci, low);
    2218        1046 :                                 while (low < high) {
    2219         851 :                                         dst1[r1->batCount++] = canditer_next(lci);
    2220         852 :                                         if (r2) {
    2221         841 :                                                 dst2[r2->batCount++] = ro;
    2222             :                                         }
    2223         852 :                                         low++;
    2224             :                                 }
    2225             :                         } else {
    2226             :                                 const oid *ord;
    2227             : 
    2228           0 :                                 assert(oidxh);
    2229           0 :                                 ord = (const oid *) oidxh->base + ORDERIDXOFF;
    2230             : 
    2231           0 :                                 if (BATcapacity(r1) < BUNlast(r1) + high - low) {
    2232           0 :                                         cnt = BUNlast(r1) + high - low + 1024;
    2233             :                                         if (cnt > maxsize)
    2234             :                                                 cnt = maxsize;
    2235           0 :                                         BATsetcount(r1, BATcount(r1));
    2236           0 :                                         if (BATextend(r1, cnt) != GDK_SUCCEED)
    2237           0 :                                                 goto bailout;
    2238           0 :                                         dst1 = (oid *) Tloc(r1, 0);
    2239           0 :                                         if (r2) {
    2240           0 :                                                 BATsetcount(r2, BATcount(r2));
    2241           0 :                                                 if (BATextend(r2, cnt) != GDK_SUCCEED)
    2242           0 :                                                         goto bailout;
    2243           0 :                                                 assert(BATcapacity(r1) == BATcapacity(r2));
    2244           0 :                                                 dst2 = (oid *) Tloc(r2, 0);
    2245             :                                         }
    2246             :                                 }
    2247             : 
    2248           0 :                                 while (low < high) {
    2249           0 :                                         if (canditer_contains(lci, ord[low])) {
    2250           0 :                                                 dst1[r1->batCount++] = ord[low];
    2251           0 :                                                 if (r2) {
    2252           0 :                                                         dst2[r2->batCount++] = ro;
    2253             :                                                 }
    2254             :                                         }
    2255           0 :                                         low++;
    2256             :                                 }
    2257             :                         }
    2258             :                 }
    2259         106 :                 if (oidxh)
    2260           0 :                         HEAPdecref(oidxh, false);
    2261         106 :                 cnt = BATcount(r1);
    2262         106 :                 assert(r2 == NULL || BATcount(r1) == BATcount(r2));
    2263             :         } else if (!anti && !symmetric &&
    2264             :                    /* DISABLES CODE */ (0) && imprintable(l->ttype) &&
    2265             :                    (BATcount(rl) > 2 ||
    2266             :                     !l->batTransient ||
    2267             :                     (VIEWtparent(l) != 0 &&
    2268             :                      (tmp = BBP_cache(VIEWtparent(l))) != NULL &&
    2269             :                      !tmp->batTransient) ||
    2270             :                     BATcheckimprints(l)) &&
    2271             :                    BATimprints(l) == GDK_SUCCEED) {
    2272             :                 /* implementation using imprints on left column
    2273             :                  *
    2274             :                  * we use imprints if we can (the type is right for
    2275             :                  * imprints) and either the left bat is persistent or
    2276             :                  * already has imprints, or the right bats are long
    2277             :                  * enough (for creating imprints being worth it) */
    2278             :                 Imprints *imprints = NULL;
    2279             : 
    2280             :                 if (tmp) {
    2281             :                         MT_lock_set(&tmp->batIdxLock);
    2282             :                         imprints = tmp->timprints;
    2283             :                         if (imprints != NULL)
    2284             :                                 IMPSincref(imprints);
    2285             :                         else
    2286             :                                 imprints = NULL;
    2287             :                         MT_lock_unset(&tmp->batIdxLock);
    2288             :                 } else {
    2289             :                         MT_lock_set(&l->batIdxLock);
    2290             :                         imprints = l->timprints;
    2291             :                         if (imprints != NULL)
    2292             :                                 IMPSincref(imprints);
    2293             :                         else
    2294             :                                 imprints = NULL;
    2295             :                         MT_lock_unset(&l->batIdxLock);
    2296             :                 }
    2297             :                 /* in the unlikely case that the imprints were removed
    2298             :                  * before we could get at them, take the slow, nested
    2299             :                  * loop implementation */
    2300             :                 if (imprints == NULL)
    2301             :                         goto nestedloop;
    2302             : 
    2303             :                 sorted = 2;
    2304             :                 cnt = 0;
    2305             :                 for (BUN i = 0; i < rci->ncand; i++) {
    2306             :                         maxsize = cnt + (rci->ncand - i) * lci->ncand;
    2307             :                         ro = canditer_next(rci);
    2308             :                         if (rlvals) {
    2309             :                                 vrl = FVALUE(rl, ro - rl->hseqbase);
    2310             :                         } else {
    2311             :                                 /* TYPE_void */
    2312             :                                 rlval = ro - rl->hseqbase + rl->tseqbase;
    2313             :                         }
    2314             :                         if (rhvals) {
    2315             :                                 vrh = FVALUE(rh, ro - rh->hseqbase);
    2316             :                         } else {
    2317             :                                 /* TYPE_void */
    2318             :                                 rhval = ro - rl->hseqbase + rl->tseqbase;
    2319             :                         }
    2320             :                         dst1 = (oid *) Tloc(r1, 0);
    2321             :                         canditer_reset(lci);
    2322             :                         switch (t) {
    2323             :                         case TYPE_bte: {
    2324             :                                 bte vl, vh;
    2325             :                                 if (is_bte_nil((vl = *(bte *) vrl)))
    2326             :                                         continue;
    2327             :                                 if (is_bte_nil((vh = *(bte *) vrh)))
    2328             :                                         continue;
    2329             :                                 if (!linc) {
    2330             :                                         if (vl == MAXVALUEbte)
    2331             :                                                 continue;
    2332             :                                         vl = NEXTVALUEbte(vl);
    2333             :                                 }
    2334             :                                 if (!hinc) {
    2335             :                                         if (vh == MINVALUEbte)
    2336             :                                                 continue;
    2337             :                                         vh = PREVVALUEbte(vh);
    2338             :                                 }
    2339             :                                 if (vl > vh)
    2340             :                                         continue;
    2341             :                                 ncnt = fullscan_bte(l, &li, lci, r1, &vl, &vh,
    2342             :                                                     true, true, false,
    2343             :                                                     false, true, true,
    2344             :                                                     false, cnt,
    2345             :                                                     l->hseqbase, dst1,
    2346             :                                                     maxsize,
    2347             :                                                     imprints, &algo);
    2348             :                                 break;
    2349             :                         }
    2350             :                         case TYPE_sht: {
    2351             :                                 sht vl, vh;
    2352             :                                 if (is_sht_nil((vl = *(sht *) vrl)))
    2353             :                                         continue;
    2354             :                                 if (is_sht_nil((vh = *(sht *) vrh)))
    2355             :                                         continue;
    2356             :                                 if (!linc) {
    2357             :                                         if (vl == MAXVALUEsht)
    2358             :                                                 continue;
    2359             :                                         vl = NEXTVALUEsht(vl);
    2360             :                                 }
    2361             :                                 if (!hinc) {
    2362             :                                         if (vh == MINVALUEsht)
    2363             :                                                 continue;
    2364             :                                         vh = PREVVALUEsht(vh);
    2365             :                                 }
    2366             :                                 if (vl > vh)
    2367             :                                         continue;
    2368             :                                 ncnt = fullscan_sht(l, &li, lci, r1, &vl, &vh,
    2369             :                                                     true, true, false,
    2370             :                                                     false, true, true,
    2371             :                                                     false, cnt,
    2372             :                                                     l->hseqbase, dst1,
    2373             :                                                     maxsize,
    2374             :                                                     imprints, &algo);
    2375             :                                 break;
    2376             :                         }
    2377             :                         case TYPE_int:
    2378             : #if SIZEOF_OID == SIZEOF_INT
    2379             :                         case TYPE_oid:
    2380             : #endif
    2381             :                         {
    2382             :                                 int vl, vh;
    2383             :                                 if (is_int_nil((vl = *(int *) vrl)))
    2384             :                                         continue;
    2385             :                                 if (is_int_nil((vh = *(int *) vrh)))
    2386             :                                         continue;
    2387             :                                 if (!linc) {
    2388             :                                         if (vl == MAXVALUEint)
    2389             :                                                 continue;
    2390             :                                         vl = NEXTVALUEint(vl);
    2391             :                                 }
    2392             :                                 if (!hinc) {
    2393             : #if SIZEOF_OID == SIZEOF_INT
    2394             :                                         if (t == TYPE_oid) {
    2395             :                                                 if (vh == MINVALUEoid)
    2396             :                                                         continue;
    2397             :                                                 vh = PREVVALUEoid(vh);
    2398             :                                         } else
    2399             : #endif
    2400             :                                         {
    2401             :                                                 if (vh == MINVALUEint)
    2402             :                                                         continue;
    2403             :                                                 vh = PREVVALUEint(vh);
    2404             :                                         }
    2405             :                                 }
    2406             :                                 if (vl > vh)
    2407             :                                         continue;
    2408             :                                 ncnt = fullscan_int(l, &li, lci, r1, &vl, &vh,
    2409             :                                                     true, true, false,
    2410             :                                                     false, true, true,
    2411             :                                                     false, cnt,
    2412             :                                                     l->hseqbase, dst1,
    2413             :                                                     maxsize,
    2414             :                                                     imprints, &algo);
    2415             :                                 break;
    2416             :                         }
    2417             :                         case TYPE_lng:
    2418             : #if SIZEOF_OID == SIZEOF_LNG
    2419             :                         case TYPE_oid:
    2420             : #endif
    2421             :                         {
    2422             :                                 lng vl, vh;
    2423             :                                 if (is_lng_nil((vl = *(lng *) vrl)))
    2424             :                                         continue;
    2425             :                                 if (is_lng_nil((vh = *(lng *) vrh)))
    2426             :                                         continue;
    2427             :                                 if (!linc) {
    2428             :                                         if (vl == MAXVALUElng)
    2429             :                                                 continue;
    2430             :                                         vl = NEXTVALUElng(vl);
    2431             :                                 }
    2432             :                                 if (!hinc) {
    2433             : #if SIZEOF_OID == SIZEOF_LNG
    2434             :                                         if (t == TYPE_oid) {
    2435             :                                                 if (vh == MINVALUEoid)
    2436             :                                                         continue;
    2437             :                                                 vh = PREVVALUEoid(vh);
    2438             :                                         } else
    2439             : #endif
    2440             :                                         {
    2441             :                                                 if (vh == MINVALUElng)
    2442             :                                                         continue;
    2443             :                                                 vh = PREVVALUElng(vh);
    2444             :                                         }
    2445             :                                 }
    2446             :                                 if (vl > vh)
    2447             :                                         continue;
    2448             :                                 ncnt = fullscan_lng(l, &li, lci, r1, &vl, &vh,
    2449             :                                                     true, true, false,
    2450             :                                                     false, true, true,
    2451             :                                                     false, cnt,
    2452             :                                                     l->hseqbase, dst1,
    2453             :                                                     maxsize,
    2454             :                                                     imprints, &algo);
    2455             :                                 break;
    2456             :                         }
    2457             : #ifdef HAVE_HGE
    2458             :                         case TYPE_hge: {
    2459             :                                 hge vl, vh;
    2460             :                                 if (is_hge_nil((vl = *(hge *) vrl)))
    2461             :                                         continue;
    2462             :                                 if (is_hge_nil((vh = *(hge *) vrh)))
    2463             :                                         continue;
    2464             :                                 if (!linc) {
    2465             :                                         if (vl == MAXVALUEhge)
    2466             :                                                 continue;
    2467             :                                         vl = NEXTVALUEhge(vl);
    2468             :                                 }
    2469             :                                 if (!hinc) {
    2470             :                                         if (vh == MINVALUEhge)
    2471             :                                                 continue;
    2472             :                                         vh = PREVVALUEhge(vh);
    2473             :                                 }
    2474             :                                 if (vl > vh)
    2475             :                                         continue;
    2476             :                                 ncnt = fullscan_hge(l, &li, lci, r1, &vl, &vh,
    2477             :                                                     true, true, false,
    2478             :                                                     false, true, true,
    2479             :                                                     false, cnt,
    2480             :                                                     l->hseqbase, dst1,
    2481             :                                                     maxsize,
    2482             :                                                     imprints, &algo);
    2483             :                                 break;
    2484             :                         }
    2485             : #endif
    2486             :                         case TYPE_flt: {
    2487             :                                 flt vl, vh;
    2488             :                                 vl = *(flt *) vrl;
    2489             :                                 if (is_flt_nil(vl))
    2490             :                                         continue;
    2491             :                                 vh = *(flt *) vrh;
    2492             :                                 if (is_flt_nil(vh))
    2493             :                                         continue;
    2494             :                                 if (!linc) {
    2495             :                                         if (vl == MAXVALUEflt)
    2496             :                                                 continue;
    2497             :                                         vl = NEXTVALUEflt(vl);
    2498             :                                 }
    2499             :                                 if (!hinc) {
    2500             :                                         if (vh == MINVALUEflt)
    2501             :                                                 continue;
    2502             :                                         vh = PREVVALUEflt(vh);
    2503             :                                 }
    2504             :                                 if (vl > vh)
    2505             :                                         continue;
    2506             :                                 ncnt = fullscan_flt(l, &li, lci, r1, &vl, &vh,
    2507             :                                                     true, true, false,
    2508             :                                                     false, true, true,
    2509             :                                                     false, cnt,
    2510             :                                                     l->hseqbase, dst1,
    2511             :                                                     maxsize,
    2512             :                                                     imprints, &algo);
    2513             :                                 break;
    2514             :                         }
    2515             :                         case TYPE_dbl: {
    2516             :                                 dbl vl, vh;
    2517             :                                 vl = *(dbl *) vrl;
    2518             :                                 if (is_dbl_nil(vl))
    2519             :                                         continue;
    2520             :                                 vh = *(dbl *) vrh;
    2521             :                                 if (is_dbl_nil(vh))
    2522             :                                         continue;
    2523             :                                 if (!linc) {
    2524             :                                         if (vl == MAXVALUEdbl)
    2525             :                                                 continue;
    2526             :                                         vl = NEXTVALUEdbl(vl);
    2527             :                                 }
    2528             :                                 if (!hinc) {
    2529             :                                         if (vh == MINVALUEdbl)
    2530             :                                                 continue;
    2531             :                                         vh = PREVVALUEdbl(vh);
    2532             :                                 }
    2533             :                                 if (vl > vh)
    2534             :                                         continue;
    2535             :                                 ncnt = fullscan_dbl(l, &li, lci, r1, &vl, &vh,
    2536             :                                                     true, true, false,
    2537             :                                                     false, true, true,
    2538             :                                                     false, cnt,
    2539             :                                                     l->hseqbase, dst1,
    2540             :                                                     maxsize,
    2541             :                                                     imprints, &algo);
    2542             :                                 break;
    2543             :                         }
    2544             :                         default:
    2545             :                                 ncnt = BUN_NONE;
    2546             :                                 GDKerror("unsupported type\n");
    2547             :                                 assert(0);
    2548             :                         }
    2549             :                         if (ncnt == BUN_NONE) {
    2550             :                                 IMPSdecref(imprints, false);
    2551             :                                 goto bailout;
    2552             :                         }
    2553             :                         assert(ncnt >= cnt || ncnt == 0);
    2554             :                         if (ncnt == cnt || ncnt == 0)
    2555             :                                 continue;
    2556             :                         if (r2) {
    2557             :                                 if (BATcapacity(r2) < ncnt) {
    2558             :                                         BATsetcount(r2, cnt);
    2559             :                                         if (BATextend(r2, BATcapacity(r1)) != GDK_SUCCEED) {
    2560             :                                                 IMPSdecref(imprints, false);
    2561             :                                                 goto bailout;
    2562             :                                         }
    2563             :                                         dst2 = (oid *) Tloc(r2, 0);
    2564             :                                 }
    2565             :                                 while (cnt < ncnt)
    2566             :                                         dst2[cnt++] = ro;
    2567             :                         } else {
    2568             :                                 cnt = ncnt;
    2569             :                         }
    2570             :                 }
    2571             :                 IMPSdecref(imprints, false);
    2572             :         } else {
    2573          24 :           nestedloop:;
    2574             :                 /* nested loop implementation */
    2575             :                 const void *vl;
    2576             :                 const char *lvals;
    2577             :                 oid lval;
    2578             : 
    2579          24 :                 GDKclrerr();    /* not interested in BATimprints errors */
    2580             :                 sorted = 1;
    2581          24 :                 lvals = l->ttype == TYPE_void ? NULL : (const char *) li.base;
    2582             :                 vl = &lval;
    2583         261 :                 for (BUN i = 0; i < lci->ncand; i++) {
    2584             :                         oid lo;
    2585             : 
    2586         237 :                         lo = canditer_next(lci);
    2587         236 :                         if (lvals) {
    2588         236 :                                 vl = VALUE(l, lo - l->hseqbase);
    2589         236 :                                 if (cmp(vl, nil) == 0)
    2590           8 :                                         continue;
    2591             :                         } else {
    2592           0 :                                 lval = lo - l->hseqbase + l->tseqbase;
    2593             :                         }
    2594         229 :                         canditer_reset(rci);
    2595       25860 :                         for (BUN j = 0; j < rci->ncand; j++) {
    2596       25631 :                                 ro = canditer_next(rci);
    2597       18991 :                                 if (rlvals) {
    2598       18991 :                                         vrl = VALUE(rl, ro - rl->hseqbase);
    2599             :                                 } else {
    2600             :                                         /* TYPE_void */
    2601           0 :                                         rlval = ro - rl->hseqbase + rl->tseqbase;
    2602             :                                 }
    2603       18991 :                                 if (rhvals) {
    2604       18991 :                                         vrh = VALUE(rh, ro - rh->hseqbase);
    2605             :                                 } else {
    2606             :                                         /* TYPE_void */
    2607           0 :                                         rhval = ro - rh->hseqbase + rh->tseqbase;
    2608             :                                 }
    2609       18991 :                                 if (BETWEEN(vl, vrl, linc, vrh, hinc, any) != 1)
    2610       15755 :                                         continue;
    2611        9876 :                                 if (BUNlast(r1) == BATcapacity(r1)) {
    2612           2 :                                         BUN newcap = BATgrows(r1);
    2613             :                                         if (newcap > maxsize)
    2614             :                                                 newcap = maxsize;
    2615           2 :                                         BATsetcount(r1, BATcount(r1));
    2616           2 :                                         if (BATextend(r1, newcap) != GDK_SUCCEED)
    2617           0 :                                                 goto bailout;
    2618           2 :                                         dst1 = (oid *) Tloc(r1, 0);
    2619           2 :                                         if (r2) {
    2620           2 :                                                 BATsetcount(r2, BATcount(r2));
    2621           2 :                                                 if (BATextend(r2, newcap) != GDK_SUCCEED)
    2622           0 :                                                         goto bailout;
    2623           2 :                                                 assert(BATcapacity(r1) == BATcapacity(r2));
    2624           2 :                                                 dst2 = (oid *) Tloc(r2, 0);
    2625             :                                         }
    2626             :                                 }
    2627        9876 :                                 dst1[r1->batCount++] = lo;
    2628        9876 :                                 if (r2) {
    2629        9867 :                                         dst2[r2->batCount++] = ro;
    2630             :                                 }
    2631             :                         }
    2632             :                 }
    2633          24 :                 cnt = BATcount(r1);
    2634          24 :                 assert(r2 == NULL || BATcount(r1) == BATcount(r2));
    2635             :         }
    2636             : 
    2637             :         /* also set other bits of heap to correct value to indicate size */
    2638         130 :         BATsetcount(r1, cnt);
    2639             : 
    2640             :         /* set properties using an extra scan (usually not complete) */
    2641         129 :         dst1 = (oid *) Tloc(r1, 0);
    2642         129 :         r1->tkey = true;
    2643         129 :         r1->tsorted = true;
    2644         129 :         r1->trevsorted = true;
    2645         129 :         r1->tseqbase = 0;
    2646         129 :         r1->tnil = false;
    2647         129 :         r1->tnonil = true;
    2648         612 :         for (ncnt = 1; ncnt < cnt; ncnt++) {
    2649         494 :                 if (dst1[ncnt - 1] == dst1[ncnt]) {
    2650         429 :                         r1->tseqbase = oid_nil;
    2651         429 :                         r1->tkey = false;
    2652          65 :                 } else if (dst1[ncnt - 1] < dst1[ncnt]) {
    2653          62 :                         r1->trevsorted = false;
    2654          62 :                         if (dst1[ncnt - 1] + 1 != dst1[ncnt])
    2655           5 :                                 r1->tseqbase = oid_nil;
    2656             :                 } else {
    2657           3 :                         assert(sorted != 1);
    2658           3 :                         r1->tsorted = false;
    2659           3 :                         r1->tseqbase = oid_nil;
    2660           3 :                         r1->tkey = false;
    2661             :                 }
    2662         939 :                 if (!(r1->trevsorted | BATtdense(r1) | r1->tkey | ((sorted != 1) & r1->tsorted)))
    2663             :                         break;
    2664             :         }
    2665         129 :         if (BATtdense(r1))
    2666         102 :                 r1->tseqbase = cnt > 0 ? dst1[0] : 0;
    2667         129 :         if (r2) {
    2668         109 :                 BATsetcount(r2, cnt);
    2669         109 :                 dst2 = (oid *) Tloc(r2, 0);
    2670         109 :                 r2->tkey = true;
    2671         109 :                 r2->tsorted = true;
    2672         109 :                 r2->trevsorted = true;
    2673         109 :                 r2->tseqbase = 0;
    2674         109 :                 r2->tnil = false;
    2675         109 :                 r2->tnonil = true;
    2676         585 :                 for (ncnt = 1; ncnt < cnt; ncnt++) {
    2677         489 :                         if (dst2[ncnt - 1] == dst2[ncnt]) {
    2678          44 :                                 r2->tseqbase = oid_nil;
    2679          44 :                                 r2->tkey = false;
    2680         445 :                         } else if (dst2[ncnt - 1] < dst2[ncnt]) {
    2681         435 :                                 r2->trevsorted = false;
    2682         435 :                                 if (dst2[ncnt - 1] + 1 != dst2[ncnt])
    2683         129 :                                         r2->tseqbase = oid_nil;
    2684             :                         } else {
    2685          10 :                                 assert(sorted != 2);
    2686          10 :                                 r2->tsorted = false;
    2687          10 :                                 r2->tseqbase = oid_nil;
    2688          10 :                                 r2->tkey = false;
    2689             :                         }
    2690         804 :                         if (!(r2->trevsorted | BATtdense(r2) | r2->tkey | ((sorted != 2) & r2->tsorted)))
    2691             :                                 break;
    2692             :                 }
    2693         109 :                 if (BATtdense(r2))
    2694          79 :                         r2->tseqbase = cnt > 0 ? dst2[0] : 0;
    2695             :         }
    2696         129 :         TRC_DEBUG(ALGO, "l=%s,rl=%s,rh=%s -> "
    2697             :                   "(" ALGOBATFMT "," ALGOOPTBATFMT ")\n",
    2698             :                   BATgetId(l), BATgetId(rl), BATgetId(rh),
    2699             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2));
    2700         129 :         bat_iterator_end(&li);
    2701         129 :         bat_iterator_end(&rli);
    2702         129 :         bat_iterator_end(&rhi);
    2703         128 :         return GDK_SUCCEED;
    2704             : 
    2705           0 :   bailout:
    2706           0 :         bat_iterator_end(&li);
    2707           0 :         bat_iterator_end(&rli);
    2708           0 :         bat_iterator_end(&rhi);
    2709           0 :         BBPreclaim(r1);
    2710           0 :         BBPreclaim(r2);
    2711           0 :         return GDK_FAIL;
    2712             : }

Generated by: LCOV version 1.14