LCOV - code coverage report
Current view: top level - gdk - gdk_batop.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1165 1577 73.9 %
Date: 2021-10-13 02:24:04 Functions: 19 27 70.4 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * (c) M. L. Kersten, P. Boncz, S. Manegold, N. Nes, K.S. Mullender
      11             :  * Common BAT Operations
      12             :  * We factor out all possible overhead by inlining code.  This
      13             :  * includes the macros BUNhead and BUNtail, which do a test to see
      14             :  * whether the atom resides in the buns or in a variable storage
      15             :  * heap.
      16             :  */
      17             : #include "monetdb_config.h"
      18             : #include "gdk.h"
      19             : #include "gdk_private.h"
      20             : 
      21             : gdk_return
      22    54111355 : unshare_varsized_heap(BAT *b)
      23             : {
      24    54111355 :         if (ATOMvarsized(b->ttype) &&
      25    20919269 :             b->tvheap->parentid != b->batCacheid) {
      26         281 :                 Heap *h = GDKmalloc(sizeof(Heap));
      27         281 :                 if (h == NULL)
      28             :                         return GDK_FAIL;
      29         281 :                 MT_thread_setalgorithm("unshare vheap");
      30         281 :                 *h = (Heap) {
      31         281 :                         .parentid = b->batCacheid,
      32         281 :                         .farmid = BBPselectfarm(b->batRole, TYPE_str, varheap),
      33             :                 };
      34         281 :                 strconcat_len(h->filename, sizeof(h->filename),
      35         281 :                               BBP_physical(b->batCacheid), ".theap", NULL);
      36         280 :                 if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
      37           0 :                         HEAPfree(h, true);
      38           0 :                         GDKfree(h);
      39           0 :                         return GDK_FAIL;
      40             :                 }
      41         280 :                 ATOMIC_INIT(&h->refs, 1);
      42         280 :                 MT_lock_set(&b->theaplock);
      43         281 :                 int parent = b->tvheap->parentid;
      44         281 :                 HEAPdecref(b->tvheap, false);
      45         281 :                 b->tvheap = h;
      46         281 :                 MT_lock_unset(&b->theaplock);
      47         281 :                 BBPunshare(parent);
      48         281 :                 BBPunfix(parent);
      49             :         }
      50             :         return GDK_SUCCEED;
      51             : }
      52             : 
      53             : /* We try to be clever when appending one string bat to another.
      54             :  * First of all, we try to actually share the string heap so that we
      55             :  * don't need an extra copy, and if that can't be done, we see whether
      56             :  * it makes sense to just quickly copy the whole string heap instead
      57             :  * of inserting individual strings.  See the comments in the code for
      58             :  * more information. */
      59             : static gdk_return
      60      146654 : insert_string_bat(BAT *b, BAT *n, struct canditer *ci, bool force, bool mayshare)
      61             : {
      62             :         BATiter ni;             /* iterator */
      63             :         size_t toff = ~(size_t) 0;      /* tail offset */
      64             :         BUN p, r;               /* loop variables */
      65             :         const void *tp = NULL;  /* tail value pointer */
      66             :         unsigned char tbv;      /* tail value-as-bte */
      67             :         unsigned short tsv;     /* tail value-as-sht */
      68             : #if SIZEOF_VAR_T == 8
      69             :         unsigned int tiv;       /* tail value-as-int */
      70             : #endif
      71             :         var_t v;                /* value */
      72             :         size_t off;             /* offset within n's string heap */
      73      146654 :         BUN cnt = ci->ncand;
      74      146654 :         BUN oldcnt = BATcount(b);
      75             : 
      76      146654 :         assert(b->ttype == TYPE_str);
      77      146654 :         assert(b->tbaseoff == 0);
      78      146654 :         assert(b->theap->parentid == b->batCacheid);
      79             :         /* only transient bats can use some other bat's string heap */
      80      146654 :         assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
      81      146654 :         if (cnt == 0)
      82             :                 return GDK_SUCCEED;
      83      146655 :         ni = bat_iterator(n);
      84             : 
      85      146664 :         if (b->tvheap == ni.vh) {
      86             :                 /* vheaps are already shared, continue doing so: we just
      87             :                  * need to append the offsets */
      88             :                 toff = 0;
      89       10518 :                 MT_thread_setalgorithm("shared vheap");
      90      136146 :         } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
      91             :                 /* we can share the vheaps, so we then only need to
      92             :                  * append the offsets */
      93      124998 :                 MT_lock_set(&b->theaplock);
      94      124987 :                 if (b->tvheap->parentid != b->batCacheid)
      95           0 :                         BBPunshare(b->tvheap->parentid);
      96      124987 :                 HEAPdecref(b->tvheap, b->tvheap->parentid == b->batCacheid);
      97      125003 :                 HEAPincref(ni.vh);
      98      125005 :                 b->tvheap = ni.vh;
      99      125005 :                 BBPshare(ni.vh->parentid);
     100      124999 :                 b->batDirtydesc = true;
     101      124999 :                 MT_lock_unset(&b->theaplock);
     102             :                 toff = 0;
     103      124998 :                 MT_thread_setalgorithm("share vheap");
     104             :         } else {
     105             :                 /* no heap sharing, so also make sure the heap isn't
     106             :                  * shared currently (we're not allowed to write in
     107             :                  * another bat's heap) */
     108       11429 :                 if (b->tvheap->parentid != b->batCacheid &&
     109         281 :                     unshare_varsized_heap(b) != GDK_SUCCEED) {
     110           0 :                         bat_iterator_end(&ni);
     111           0 :                         return GDK_FAIL;
     112             :                 }
     113       11148 :                 if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
     114         129 :                                     !GDK_ELIMDOUBLES(ni.vh))) {
     115             :                         /* we'll consider copying the string heap completely
     116             :                          *
     117             :                          * we first estimate how much space the string heap
     118             :                          * should occupy, given the number of rows we need to
     119             :                          * insert, then, if that is way smaller than the actual
     120             :                          * space occupied, we will skip the copy and just insert
     121             :                          * one by one */
     122             :                         size_t len = 0;
     123     4393187 :                         for (int i = 0; i < 1024; i++) {
     124     4388891 :                                 p = (BUN) (((double) rand() / RAND_MAX) * (cnt - 1));
     125     4392703 :                                 p = canditer_idx(ci, p) - n->hseqbase;
     126     4388892 :                                 len += strlen(BUNtvar(ni, p)) + 1;
     127             :                         }
     128        4296 :                         len = (len + 512) / 1024; /* rounded average length */
     129        4296 :                         r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
     130             :                         /* r is estimate of number of strings in
     131             :                          * double-eliminated area */
     132        4296 :                         if (r < ci->ncand)
     133         778 :                                 len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
     134             :                         else
     135        3518 :                                 len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
     136             :                         /* len is total estimated expected size of vheap */
     137             : 
     138        4296 :                         if (len > ni.vhfree / 2) {
     139             :                                 /* we copy the string heap, perhaps appending */
     140        4256 :                                 if (oldcnt == 0) {
     141             :                                         toff = 0;
     142        4197 :                                         MT_thread_setalgorithm("copy vheap");
     143             :                                 } else {
     144          59 :                                         toff = (b->tvheap->free + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
     145          59 :                                         MT_thread_setalgorithm("append vheap");
     146             :                                 }
     147             : 
     148        4255 :                                 if (HEAPgrow(&b->theaplock, &b->tvheap, toff + ni.vh->size, force) != GDK_SUCCEED) {
     149           0 :                                         bat_iterator_end(&ni);
     150           0 :                                         return GDK_FAIL;
     151             :                                 }
     152        4256 :                                 memcpy(b->tvheap->base + toff, ni.vh->base, ni.vhfree);
     153        4256 :                                 b->tvheap->free = toff + ni.vhfree;
     154             :                         }
     155             :                 }
     156             :         }
     157             :         /* if toff has the initial value of ~0, we insert strings
     158             :          * individually, otherwise we only copy (insert) offsets */
     159             :         if (toff == ~(size_t) 0)
     160             :                 v = GDK_VAROFFSET;
     161             :         else
     162      139772 :                 v = b->tvheap->free - 1;
     163             : 
     164             :         /* make sure there is (vertical) space in the offset heap, we
     165             :          * may also widen thanks to v, set above */
     166      146665 :         if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
     167           0 :                 bat_iterator_end(&ni);
     168           0 :                 return GDK_FAIL;
     169             :         }
     170             : 
     171      146666 :         if (toff == 0 && ni.width == b->twidth && ci->tpe == cand_dense) {
     172             :                 /* we don't need to do any translation of offset
     173             :                  * values, so we can use fast memcpy */
     174      139132 :                 MT_thread_setalgorithm("memcpy offsets");
     175      139114 :                 memcpy(Tloc(b, BUNlast(b)), (const char *) ni.base + ((ci->seq - n->hseqbase) << ni.shift), cnt << ni.shift);
     176        7534 :         } else if (toff != ~(size_t) 0) {
     177             :                 /* we don't need to insert any actual strings since we
     178             :                  * have already made sure that they are all in b's
     179             :                  * string heap at known locations (namely the offset
     180             :                  * in n added to toff), so insert offsets from n after
     181             :                  * adding toff into b */
     182             :                 /* note the use of the "restrict" qualifier here: all
     183             :                  * four pointers below point to the same value, but
     184             :                  * only one of them will actually be used, hence we
     185             :                  * still obey the rule for restrict-qualified
     186             :                  * pointers */
     187         642 :                 const unsigned char *restrict tbp = (const unsigned char *) ni.base;
     188             :                 const unsigned short *restrict tsp = (const unsigned short *) ni.base;
     189             : #if SIZEOF_VAR_T == 8
     190             :                 const unsigned int *restrict tip = (const unsigned int *) ni.base;
     191             : #endif
     192             :                 const var_t *restrict tvp = (const var_t *) ni.base;
     193             : 
     194         642 :                 switch (b->twidth) {
     195             :                 case 1:
     196             :                         tp = &tbv;
     197             :                         break;
     198             :                 case 2:
     199             :                         tp = &tsv;
     200             :                         break;
     201             : #if SIZEOF_VAR_T == 8
     202             :                 case 4:
     203             :                         tp = &tiv;
     204             :                         break;
     205             :                 case 8:
     206             :                         tp = &v;
     207             :                         break;
     208             : #else
     209             :                 case 4:
     210             :                         tp = &v;
     211             :                         break;
     212             : #endif
     213             :                 default:
     214           0 :                         assert(0);
     215             :                 }
     216         642 :                 MT_thread_setalgorithm("copy offset values");
     217         642 :                 r = b->batCount;
     218     4297124 :                 while (cnt > 0) {
     219     4296482 :                         cnt--;
     220     4296482 :                         p = canditer_next(ci) - n->hseqbase;
     221     4296482 :                         switch (ni.width) {
     222        1081 :                         case 1:
     223        1081 :                                 v = (var_t) tbp[p] + GDK_VAROFFSET;
     224        1081 :                                 break;
     225       13449 :                         case 2:
     226       13449 :                                 v = (var_t) tsp[p] + GDK_VAROFFSET;
     227       13449 :                                 break;
     228             : #if SIZEOF_VAR_T == 8
     229     4281952 :                         case 4:
     230     4281952 :                                 v = (var_t) tip[p];
     231     4281952 :                                 break;
     232             : #endif
     233           0 :                         default:
     234           0 :                                 v = tvp[p];
     235           0 :                                 break;
     236             :                         }
     237     4296482 :                         v = (var_t) ((size_t) v + toff);
     238     4296482 :                         assert(v >= GDK_VAROFFSET);
     239     4296482 :                         assert((size_t) v < b->tvheap->free);
     240     4296482 :                         switch (b->twidth) {
     241       13449 :                         case 1:
     242       13449 :                                 assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
     243       13449 :                                 ((uint8_t *) b->theap->base)[r++] = (uint8_t) (v - GDK_VAROFFSET);
     244       13449 :                                 break;
     245        1081 :                         case 2:
     246        1081 :                                 assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
     247        1081 :                                 ((uint16_t *) b->theap->base)[r++] = (uint16_t) (v - GDK_VAROFFSET);
     248        1081 :                                 break;
     249             : #if SIZEOF_VAR_T == 8
     250     4281949 :                         case 4:
     251     4281949 :                                 assert(v < ((var_t) 1 << 32));
     252     4281949 :                                 ((uint32_t *) b->theap->base)[r++] = (uint32_t) v;
     253     4281949 :                                 break;
     254             : #endif
     255           3 :                         default:
     256           3 :                                 ((var_t *) b->theap->base)[r++] = v;
     257           3 :                                 break;
     258             :                         }
     259             :                 }
     260        6892 :         } else if (b->tvheap->free < ni.vhfree / 2 ||
     261             :                    GDK_ELIMDOUBLES(b->tvheap)) {
     262             :                 /* if b's string heap is much smaller than n's string
     263             :                  * heap, don't bother checking whether n's string
     264             :                  * values occur in b's string heap; also, if b is
     265             :                  * (still) fully double eliminated, we must continue
     266             :                  * to use the double elimination mechanism */
     267        6822 :                 r = b->batCount;
     268        6822 :                 oid hseq = n->hseqbase;
     269        6822 :                 MT_thread_setalgorithm("insert string values");
     270     7108269 :                 while (cnt > 0) {
     271     7101446 :                         cnt--;
     272     7101446 :                         p = canditer_next(ci) - hseq;
     273     7097236 :                         tp = BUNtvar(ni, p);
     274     7097236 :                         if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
     275           0 :                                 bat_iterator_end(&ni);
     276           0 :                                 return GDK_FAIL;
     277             :                         }
     278     7101447 :                         r++;
     279             :                 }
     280             :         } else {
     281             :                 /* Insert values from n individually into b; however,
     282             :                  * we check whether there is a string in b's string
     283             :                  * heap at the same offset as the string is in n's
     284             :                  * string heap (in case b's string heap is a copy of
     285             :                  * n's).  If this is the case, we just copy the
     286             :                  * offset, otherwise we insert normally.  */
     287          70 :                 r = b->batCount;
     288          70 :                 MT_thread_setalgorithm("insert string values with check");
     289       97689 :                 while (cnt > 0) {
     290       97619 :                         cnt--;
     291       97619 :                         p = canditer_next(ci) - n->hseqbase;
     292       97619 :                         off = BUNtvaroff(ni, p); /* the offset */
     293       97619 :                         tp = ni.vh->base + off; /* the string */
     294       97619 :                         if (off < b->tvheap->free &&
     295       97619 :                             strcmp(b->tvheap->base + off, tp) == 0) {
     296             :                                 /* we found the string at the same
     297             :                                  * offset in b's string heap as it was
     298             :                                  * in n's string heap, so we don't
     299             :                                  * have to insert a new string into b:
     300             :                                  * we can just copy the offset */
     301             :                                 v = (var_t) off;
     302       90712 :                                 switch (b->twidth) {
     303           0 :                                 case 1:
     304           0 :                                         assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
     305           0 :                                         ((uint8_t *) b->theap->base)[r] = (unsigned char) (v - GDK_VAROFFSET);
     306           0 :                                         break;
     307           0 :                                 case 2:
     308           0 :                                         assert(v - GDK_VAROFFSET < ((var_t) 1 << 16));
     309           0 :                                         ((uint16_t *) b->theap->base)[r] = (unsigned short) (v - GDK_VAROFFSET);
     310           0 :                                         break;
     311             : #if SIZEOF_VAR_T == 8
     312       90712 :                                 case 4:
     313       90712 :                                         assert(v < ((var_t) 1 << 32));
     314       90712 :                                         ((uint32_t *) b->theap->base)[r] = (unsigned int) v;
     315       90712 :                                         break;
     316             : #endif
     317           0 :                                 default:
     318           0 :                                         ((var_t *) b->theap->base)[r] = v;
     319           0 :                                         break;
     320             :                                 }
     321             :                         } else {
     322        6907 :                                 if (tfastins_nocheckVAR(b, r, tp) != GDK_SUCCEED) {
     323           0 :                                         bat_iterator_end(&ni);
     324           0 :                                         return GDK_FAIL;
     325             :                                 }
     326             :                         }
     327       97619 :                         r++;
     328             :                 }
     329             :         }
     330      146649 :         BATsetcount(b, oldcnt + ci->ncand);
     331      146655 :         bat_iterator_end(&ni);
     332      146671 :         assert(b->batCapacity >= b->batCount);
     333             :         /* maintain hash */
     334      146671 :         MT_rwlock_wrlock(&b->thashlock);
     335      146730 :         for (r = oldcnt, cnt = BATcount(b); b->thash && r < cnt; r++) {
     336          65 :                 HASHappend_locked(b, r, b->tvheap->base + VarHeapVal(Tloc(b, 0), r, b->twidth));
     337             :         }
     338      146665 :         MT_rwlock_wrunlock(&b->thashlock);
     339      146651 :         return GDK_SUCCEED;
     340             : }
     341             : 
     342             : static gdk_return
     343         279 : append_varsized_bat(BAT *b, BAT *n, struct canditer *ci, bool mayshare)
     344             : {
     345             :         BATiter ni;
     346         279 :         BUN cnt = ci->ncand, r;
     347         279 :         oid hseq = n->hseqbase;
     348             : 
     349             :         /* only transient bats can use some other bat's vheap */
     350         279 :         assert(b->batRole == TRANSIENT || b->tvheap->parentid == b->batCacheid);
     351             :         /* make sure the bats use var_t */
     352         279 :         assert(b->twidth == n->twidth);
     353         279 :         assert(b->twidth == SIZEOF_VAR_T);
     354         279 :         if (cnt == 0)
     355             :                 return GDK_SUCCEED;
     356         279 :         if (cnt > BATcapacity(b) - BATcount(b)) {
     357             :                 /* if needed space exceeds a normal growth extend just
     358             :                  * with what's needed */
     359           5 :                 BUN ncap = BATcount(b) + cnt;
     360           5 :                 BUN grows = BATgrows(b);
     361             : 
     362             :                 if (ncap > grows)
     363             :                         grows = ncap;
     364           5 :                 if (BATextend(b, grows) != GDK_SUCCEED)
     365             :                         return GDK_FAIL;
     366             :         }
     367         279 :         ni = bat_iterator(n);
     368         279 :         if (mayshare &&
     369         279 :             BATcount(b) == 0 &&
     370         164 :             b->batRole == TRANSIENT &&
     371          99 :             n->batRestricted == BAT_READ &&
     372          99 :             b->tvheap != ni.vh) {
     373             :                 /* if b is still empty, in the transient farm, and n
     374             :                  * is read-only, we replace b's vheap with a reference
     375             :                  * to n's */
     376             :                 /* make sure locking happens in a predictable order:
     377             :                  * lowest id first */
     378          99 :                 MT_lock_set(&b->theaplock);
     379          99 :                 if (b->tvheap->parentid != b->batCacheid)
     380           0 :                         BBPunshare(b->tvheap->parentid);
     381          99 :                 BBPshare(ni.vh->parentid);
     382          99 :                 HEAPdecref(b->tvheap, true);
     383          99 :                 HEAPincref(ni.vh);
     384          99 :                 b->tvheap = ni.vh;
     385          99 :                 b->batDirtydesc = true;
     386          99 :                 MT_lock_unset(&b->theaplock);
     387             :         }
     388         279 :         if (b->tvheap == ni.vh) {
     389             :                 /* if b and n use the same vheap, we only need to copy
     390             :                  * the offsets from n to b */
     391         182 :                 if (ci->tpe == cand_dense) {
     392             :                         /* fast memcpy since we copy a consecutive
     393             :                          * chunk of memory */
     394         182 :                         memcpy(Tloc(b, BUNlast(b)),
     395         182 :                                (const var_t *) ni.base + (ci->seq - hseq),
     396         182 :                                cnt << b->tshift);
     397             :                 } else {
     398           0 :                         var_t *restrict dst = (var_t *) Tloc(b, BUNlast(b));
     399           0 :                         const var_t *restrict src = (const var_t *) ni.base;
     400           0 :                         while (cnt > 0) {
     401           0 :                                 cnt--;
     402           0 :                                 *dst++ = src[canditer_next(ci) - hseq];
     403             :                         }
     404             :                 }
     405         182 :                 BATsetcount(b, BATcount(b) + ci->ncand);
     406             :                 /* maintain hash table */
     407         182 :                 MT_rwlock_wrlock(&b->thashlock);
     408         182 :                 for (BUN i = BATcount(b) - ci->ncand;
     409         182 :                      b->thash && i < BATcount(b);
     410           0 :                      i++) {
     411           0 :                         HASHappend_locked(b, i, b->tvheap->base + *(var_t *) Tloc(b, i));
     412             :                 }
     413         182 :                 MT_rwlock_wrunlock(&b->thashlock);
     414         182 :                 bat_iterator_end(&ni);
     415         182 :                 return GDK_SUCCEED;
     416             :         }
     417             :         /* b and n do not share their vheap, so we need to copy data */
     418          97 :         if (b->tvheap->parentid != b->batCacheid) {
     419             :                 /* if b shares its vheap with some other bat, unshare it */
     420          11 :                 Heap *h = GDKmalloc(sizeof(Heap));
     421          11 :                 if (h == NULL) {
     422           0 :                         bat_iterator_end(&ni);
     423           0 :                         return GDK_FAIL;
     424             :                 }
     425          11 :                 *h = (Heap) {
     426          11 :                         .parentid = b->batCacheid,
     427          11 :                         .farmid = BBPselectfarm(b->batRole, b->ttype, varheap),
     428             :                 };
     429          11 :                 strconcat_len(h->filename, sizeof(h->filename),
     430          11 :                               BBP_physical(b->batCacheid), ".theap", NULL);
     431          11 :                 if (HEAPcopy(h, b->tvheap, 0) != GDK_SUCCEED) {
     432           0 :                         bat_iterator_end(&ni);
     433           0 :                         HEAPfree(h, true);
     434           0 :                         GDKfree(h);
     435           0 :                         return GDK_FAIL;
     436             :                 }
     437          11 :                 BBPunshare(b->tvheap->parentid);
     438          11 :                 MT_lock_set(&b->theaplock);
     439          11 :                 HEAPdecref(b->tvheap, false);
     440          11 :                 ATOMIC_INIT(&h->refs, 1);
     441          11 :                 b->tvheap = h;
     442          11 :                 MT_lock_unset(&b->theaplock);
     443             :         }
     444             :         /* copy data from n to b */
     445          97 :         r = BUNlast(b);
     446          97 :         MT_rwlock_wrlock(&b->thashlock);
     447     4200535 :         while (cnt > 0) {
     448     4200439 :                 cnt--;
     449     4200439 :                 BUN p = canditer_next(ci) - hseq;
     450     4200439 :                 const void *t = BUNtvar(ni, p);
     451     4200439 :                 if (tfastins_nocheckVAR(b, r, t) != GDK_SUCCEED) {
     452           0 :                         MT_rwlock_wrunlock(&b->thashlock);
     453           0 :                         bat_iterator_end(&ni);
     454           0 :                         return GDK_FAIL;
     455             :                 }
     456     4200438 :                 if (b->thash)
     457           0 :                         HASHappend_locked(b, r, t);
     458     4200438 :                 r++;
     459             :         }
     460          96 :         MT_rwlock_wrunlock(&b->thashlock);
     461          97 :         BATsetcount(b, r);
     462          97 :         bat_iterator_end(&ni);
     463          97 :         return GDK_SUCCEED;
     464             : }
     465             : 
     466             : static gdk_return
     467         448 : append_msk_bat(BAT *b, BAT *n, struct canditer *ci)
     468             : {
     469         448 :         if (ci->ncand == 0)
     470             :                 return GDK_SUCCEED;
     471         448 :         if (BATextend(b, BATcount(b) + ci->ncand) != GDK_SUCCEED)
     472             :                 return GDK_FAIL;
     473             : 
     474         448 :         MT_lock_set(&b->theaplock);
     475             : 
     476         448 :         uint32_t boff = b->batCount % 32;
     477         448 :         uint32_t *bp = (uint32_t *) b->theap->base + b->batCount / 32;
     478         448 :         b->batCount += ci->ncand;
     479         448 :         b->theap->dirty = true;
     480         448 :         b->theap->free = ((b->batCount + 31) / 32) * 4;
     481         448 :         BATiter ni = bat_iterator(n);
     482         448 :         if (ci->tpe == cand_dense) {
     483             :                 const uint32_t *np;
     484             :                 uint32_t noff, mask;
     485             :                 BUN cnt;
     486         448 :                 noff = (ci->seq - n->hseqbase) % 32;
     487         448 :                 cnt = ci->ncand;
     488         448 :                 np = (const uint32_t *) ni.base + (ci->seq - n->hseqbase) / 32;
     489         448 :                 if (boff == noff) {
     490             :                         /* words of b and n are aligned, so we don't
     491             :                          * need to shift bits around */
     492          46 :                         if (boff + cnt <= 32) {
     493             :                                 /* all new bits within one word */
     494          39 :                                 if (cnt == 32) {
     495           0 :                                         *bp = *np;
     496             :                                 } else {
     497          39 :                                         mask = ((1U << cnt) - 1) << boff;
     498          39 :                                         *bp &= ~mask;
     499          39 :                                         *bp |= *np & mask;
     500             :                                 }
     501             :                         } else {
     502             :                                 /* multiple words of b are affected */
     503           7 :                                 if (boff != 0) {
     504             :                                         /* first fill up the rest of the first
     505             :                                          * word */
     506           0 :                                         mask = ~0U << boff;
     507           0 :                                         *bp &= ~mask;
     508           0 :                                         *bp++ |= *np++ & mask;
     509           0 :                                         cnt -= 32 - boff;
     510             :                                 }
     511           7 :                                 if (cnt >= 32) {
     512             :                                         /* copy an integral number of words fast */
     513           7 :                                         BUN nw = cnt / 32;
     514           7 :                                         memcpy(bp, np, nw*sizeof(int));
     515           7 :                                         bp += nw;
     516           7 :                                         np += nw;
     517           7 :                                         cnt %= 32;
     518             :                                 }
     519           7 :                                 if (cnt > 0) {
     520             :                                         /* do the left over bits */
     521           7 :                                         mask = (1U << cnt) - 1;
     522           7 :                                         *bp = *np & mask;
     523             :                                 }
     524             :                         }
     525         402 :                 } else if (boff > noff) {
     526         402 :                         if (boff + cnt <= 32) {
     527             :                                 /* we only need to copy bits from a
     528             :                                  * single word of n to a single word
     529             :                                  * of b */
     530             :                                 /* boff > 0, so cnt < 32, hence the
     531             :                                  * shift is ok */
     532         359 :                                 mask = (1U << cnt) - 1;
     533         359 :                                 *bp &= ~(mask << boff);
     534         359 :                                 *bp |= (*np & (mask << noff)) << (boff - noff);
     535             :                         } else {
     536             :                                 /* first fill the rest of the last partial
     537             :                                  * word of b, so that's 32-boff bits */
     538          43 :                                 mask = (1U << (32 - boff)) - 1;
     539          43 :                                 *bp &= ~(mask << boff);
     540          43 :                                 *bp++ |= (*np & (mask << noff)) << (boff - noff);
     541          43 :                                 cnt -= 32 - boff;
     542             : 
     543             :                                 /* set boff and noff to the amount we need to
     544             :                                  * shift bits in consecutive words of n around
     545             :                                  * to fit into the next word of b; set mask to
     546             :                                  * the mask of the bottom bits of n that fit
     547             :                                  * in a word of b (and the complement are the
     548             :                                  * top bits that go to another word of b) */
     549             :                                 boff -= noff;
     550          43 :                                 noff = 32 - boff;
     551          43 :                                 mask = (1U << noff) - 1;
     552         258 :                                 while (cnt >= 32) {
     553         215 :                                         *bp = (*np++ & ~mask) >> noff;
     554         215 :                                         *bp++ |= (*np & mask) << boff;
     555         215 :                                         cnt -= 32;
     556             :                                 }
     557          43 :                                 if (cnt > boff) {
     558             :                                         /* the last bits come from two words
     559             :                                          * in n */
     560          13 :                                         *bp = (*np++ & ~mask) >> noff;
     561          13 :                                         cnt -= noff;
     562          13 :                                         mask = (1U << cnt) - 1;
     563          13 :                                         *bp++ |= (*np & mask) << boff;
     564          30 :                                 } else if (cnt > 0) {
     565             :                                         /* the last bits come from a single
     566             :                                          * word in n */
     567          29 :                                         mask = ((1U << cnt) - 1) << noff;
     568          29 :                                         *bp = (*np & mask) >> noff;
     569             :                                 }
     570             :                         }
     571             :                 } else {
     572             :                         /* boff < noff */
     573           0 :                         if (noff + cnt <= 32) {
     574             :                                 /* only need part of the first word of n */
     575           0 :                                 mask = (1U << cnt) - 1;
     576           0 :                                 *bp &= ~(mask << boff);
     577           0 :                                 *bp |= (*np & (mask << noff)) >> (noff - boff);
     578           0 :                         } else if (boff + cnt <= 32) {
     579             :                                 /* only need to fill a single word of
     580             :                                  * b, but from two of n */
     581           0 :                                 if (cnt < 32)
     582           0 :                                         *bp &= ~(((1U << cnt) - 1) << boff);
     583             :                                 else
     584           0 :                                         *bp = 0;
     585           0 :                                 mask = ~((1U << noff) - 1);
     586           0 :                                 *bp |= (*np++ & mask) >> (noff - boff);
     587           0 :                                 cnt -= 32 - noff;
     588           0 :                                 mask = (1U << cnt) - 1;
     589           0 :                                 *bp |= (*np & mask) << (32 - noff);
     590             :                         } else {
     591           0 :                                 if (boff > 0) {
     592             :                                         /* fill the rest of the first word of b */
     593           0 :                                         cnt -= 32 - boff;
     594           0 :                                         *bp &= (1U << boff) - 1;
     595           0 :                                         mask = ~((1U << noff) - 1);
     596           0 :                                         noff -= boff;
     597           0 :                                         boff = 32 - noff;
     598           0 :                                         *bp |= (*np++ & mask) >> noff;
     599           0 :                                         *bp |= (*np & ((1U << noff) - 1)) << boff;
     600             :                                 } else {
     601           0 :                                         boff = 32 - noff;
     602             :                                 }
     603           0 :                                 mask = (1U << noff) - 1;
     604           0 :                                 while (cnt >= 32) {
     605           0 :                                         *bp = (*np++ & ~mask) >> noff;
     606           0 :                                         *bp++ |= (*np & mask) << boff;
     607           0 :                                         cnt -= 32;
     608             :                                 }
     609           0 :                                 if (cnt > 0) {
     610           0 :                                         *bp = (*np++ & ~mask) >> noff;
     611           0 :                                         if (cnt > noff)
     612           0 :                                                 *bp++ |= (*np & mask) << boff;
     613             :                                 }
     614             :                         }
     615             :                 }
     616             :         } else {
     617             :                 oid o;
     618           0 :                 uint32_t v = boff > 0 ? *bp & ((1U << boff) - 1) : 0;
     619             :                 do {
     620           0 :                         for (uint32_t i = boff; i < 32; i++) {
     621           0 :                                 o = canditer_next(ci);
     622           0 :                                 if (is_oid_nil(o))
     623             :                                         break;
     624           0 :                                 o -= n->hseqbase;
     625           0 :                                 v |= (uint32_t) Tmskval(&ni, o - n->hseqbase) << i;
     626             :                         }
     627           0 :                         *bp++ = v;
     628             :                         v = 0;
     629             :                         boff = 0;
     630           0 :                 } while (!is_oid_nil(o));
     631             :         }
     632         448 :         bat_iterator_end(&ni);
     633         448 :         MT_lock_unset(&b->theaplock);
     634         448 :         return GDK_SUCCEED;
     635             : }
     636             : 
     637             : /* Append the contents of BAT n (subject to the optional candidate
     638             :  * list s) to BAT b.  If b is empty, b will get the seqbase of s if it
     639             :  * was passed in, and else the seqbase of n. */
     640             : gdk_return
     641     1339889 : BATappend2(BAT *b, BAT *n, BAT *s, bool force, bool mayshare)
     642             : {
     643             :         struct canditer ci;
     644             :         BUN cnt;
     645             :         BUN r;
     646     1339889 :         oid hseq = n->hseqbase;
     647             :         char buf[64];
     648             :         lng t0 = 0;
     649             : 
     650     1339889 :         if (b == NULL || n == NULL || BATcount(n) == 0) {
     651             :                 return GDK_SUCCEED;
     652             :         }
     653      440116 :         assert(b->theap->parentid == b->batCacheid);
     654             : 
     655      440116 :         TRC_DEBUG_IF(ALGO) {
     656           0 :                 t0 = GDKusec();
     657           0 :                 snprintf(buf, sizeof(buf), ALGOBATFMT, ALGOBATPAR(b));
     658             :         }
     659             : 
     660      440116 :         ALIGNapp(b, force, GDK_FAIL);
     661             : 
     662     1313159 :         if (ATOMstorage(ATOMtype(b->ttype)) != ATOMstorage(ATOMtype(n->ttype))) {
     663           0 :                 GDKerror("Incompatible operands.\n");
     664           0 :                 return GDK_FAIL;
     665             :         }
     666             : 
     667      440116 :         if (BATttype(b) != BATttype(n) &&
     668             :             ATOMtype(b->ttype) != ATOMtype(n->ttype)) {
     669           0 :                 TRC_DEBUG(CHECK_, "Interpreting %s as %s.\n",
     670             :                           ATOMname(BATttype(n)), ATOMname(BATttype(b)));
     671             :         }
     672             : 
     673      440116 :         BATiter ni = bat_iterator(n);
     674             : 
     675      440081 :         cnt = canditer_init(&ci, n, s);
     676      440112 :         if (cnt == 0) {
     677           0 :                 goto doreturn;
     678             :         }
     679             : 
     680      440112 :         if (BUNlast(b) + cnt > BUN_MAX) {
     681           0 :                 bat_iterator_end(&ni);
     682           0 :                 GDKerror("combined BATs too large\n");
     683           0 :                 return GDK_FAIL;
     684             :         }
     685             : 
     686      440112 :         if (b->hseqbase + BATcount(b) + cnt >= GDK_oid_max) {
     687           0 :                 bat_iterator_end(&ni);
     688           0 :                 GDKerror("overflow of head value\n");
     689           0 :                 return GDK_FAIL;
     690             :         }
     691             : 
     692      440112 :         b->batDirtydesc = true;
     693             : 
     694      440112 :         IMPSdestroy(b);         /* imprints do not support updates yet */
     695      440113 :         OIDXdestroy(b);
     696      440117 :         MT_lock_set(&b->theaplock);
     697      440130 :         if (BATcount(b) == 0 || b->tmaxpos != BUN_NONE) {
     698      301675 :                 if (ni.maxpos != BUN_NONE) {
     699       16957 :                         BATiter bi = bat_iterator_nolock(b);
     700       16954 :                         if (BATcount(b) == 0 || ATOMcmp(b->ttype, BUNtail(bi, b->tmaxpos), BUNtail(ni, ni.maxpos)) < 0) {
     701       15350 :                                 if (s == NULL) {
     702       14982 :                                         b->tmaxpos = BATcount(b) + ni.maxpos;
     703             :                                 } else {
     704         368 :                                         b->tmaxpos = BUN_NONE;
     705             :                                 }
     706             :                         }
     707             :                 } else {
     708      284718 :                         b->tmaxpos = BUN_NONE;
     709             :                 }
     710             :         }
     711      440127 :         if (BATcount(b) == 0 || b->tminpos != BUN_NONE) {
     712      305426 :                 if (ni.minpos != BUN_NONE) {
     713       22784 :                         BATiter bi = bat_iterator_nolock(b);
     714       22778 :                         if (BATcount(b) == 0 || ATOMcmp(b->ttype, BUNtail(bi, b->tminpos), BUNtail(ni, ni.minpos)) > 0) {
     715       20335 :                                 if (s == NULL) {
     716       19967 :                                         b->tminpos = BATcount(b) + ni.minpos;
     717             :                                 } else {
     718         368 :                                         b->tminpos = BUN_NONE;
     719             :                                 }
     720             :                         }
     721             :                 } else {
     722      282642 :                         b->tminpos = BUN_NONE;
     723             :                 }
     724             :         }
     725      440121 :         if (cnt > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION) {
     726      439927 :                 b->tunique_est = 0;
     727             :         }
     728      440121 :         MT_lock_unset(&b->theaplock);
     729             :         /* load hash so that we can maintain it */
     730      440084 :         (void) BATcheckhash(b);
     731             : 
     732      440085 :         if (b->ttype == TYPE_void) {
     733             :                 /* b does not have storage, keep it that way if we can */
     734        2269 :                 HASHdestroy(b); /* we're not maintaining the hash here */
     735        2269 :                 if (BATtdense(n) && ci.tpe == cand_dense &&
     736        1896 :                     (BATcount(b) == 0 ||
     737        1057 :                      (BATtdense(b) &&
     738        1057 :                       b->tseqbase + BATcount(b) == n->tseqbase + ci.seq - hseq))) {
     739             :                         /* n is also dense and consecutive with b */
     740        1841 :                         if (BATcount(b) == 0)
     741         839 :                                 BATtseqbase(b, n->tseqbase + ci.seq - hseq);
     742        1841 :                         BATsetcount(b, BATcount(b) + cnt);
     743        1841 :                         goto doreturn;
     744             :                 }
     745         428 :                 if ((BATcount(b) == 0 || is_oid_nil(b->tseqbase)) &&
     746          54 :                     ni.type == TYPE_void && is_oid_nil(n->tseqbase)) {
     747             :                         /* both b and n are void/nil */
     748          21 :                         BATtseqbase(b, oid_nil);
     749          21 :                         BATsetcount(b, BATcount(b) + cnt);
     750          21 :                         goto doreturn;
     751             :                 }
     752             :                 /* we need to materialize b; allocate enough capacity */
     753         407 :                 b->batCapacity = BATcount(b) + cnt;
     754         407 :                 if (BATmaterialize(b) != GDK_SUCCEED) {
     755           0 :                         bat_iterator_end(&ni);
     756           0 :                         return GDK_FAIL;
     757             :                 }
     758             :         }
     759             : 
     760      438223 :         r = BUNlast(b);
     761             : 
     762             :         /* property setting */
     763      438223 :         if (BATcount(b) == 0) {
     764      297104 :                 b->tsorted = n->tsorted;
     765      297104 :                 b->trevsorted = n->trevsorted;
     766      297104 :                 b->tseqbase = oid_nil;
     767      297104 :                 b->tnonil = n->tnonil;
     768      297104 :                 b->tnil = n->tnil && cnt == BATcount(n);
     769      297104 :                 if (ci.tpe == cand_dense) {
     770      296550 :                         b->tnosorted = ci.seq - hseq <= n->tnosorted && n->tnosorted < ci.seq + cnt - hseq ? n->tnosorted + hseq - ci.seq : 0;
     771      296550 :                         b->tnorevsorted = ci.seq - hseq <= n->tnorevsorted && n->tnorevsorted < ci.seq + cnt - hseq ? n->tnorevsorted + hseq - ci.seq : 0;
     772      296550 :                         if (BATtdense(n)) {
     773        1598 :                                 b->tseqbase = n->tseqbase + ci.seq - hseq;
     774             :                         }
     775             :                 } else {
     776         554 :                         b->tnosorted = 0;
     777         554 :                         b->tnorevsorted = 0;
     778             :                 }
     779      297104 :                 b->tkey = n->tkey;
     780      297104 :                 if (cnt == BATcount(n)) {
     781      292640 :                         b->tnokey[0] = n->tnokey[0];
     782      292640 :                         b->tnokey[1] = n->tnokey[1];
     783             :                 } else {
     784        4464 :                         b->tnokey[0] = b->tnokey[1] = 0;
     785             :                 }
     786             :         } else {
     787      141119 :                 BUN last = r - 1;
     788      141119 :                 BATiter bi = bat_iterator_nolock(b);
     789      141513 :                 int xx = ATOMcmp(b->ttype,
     790             :                                  BUNtail(ni, ci.seq - hseq),
     791             :                                  BUNtail(bi, last));
     792      141087 :                 if (BATtordered(b) && (!BATtordered(n) || xx < 0)) {
     793       14987 :                         b->tsorted = false;
     794       14987 :                         b->tnosorted = 0;
     795       14987 :                         b->tseqbase = oid_nil;
     796             :                 }
     797      141087 :                 if (BATtrevordered(b) &&
     798       23289 :                     (!BATtrevordered(n) || xx > 0)) {
     799       10710 :                         b->trevsorted = false;
     800       10710 :                         b->tnorevsorted = 0;
     801             :                 }
     802      141087 :                 if (b->tkey &&
     803       33327 :                     (!(BATtordered(b) || BATtrevordered(b)) ||
     804       22904 :                      !n->tkey || xx == 0)) {
     805       13969 :                         BATkey(b, false);
     806             :                 }
     807      141089 :                 if (b->ttype != TYPE_void && b->tsorted && BATtdense(b) &&
     808         921 :                     (!BATtdense(n) ||
     809        1642 :                      ci.tpe != cand_dense ||
     810         821 :                      1 + *(oid *) BUNtloc(bi, last) != BUNtoid(n, ci.seq - hseq))) {
     811         160 :                         b->tseqbase = oid_nil;
     812             :                 }
     813      141089 :                 b->tnonil &= n->tnonil;
     814      273782 :                 b->tnil |= n->tnil && cnt == ni.count;
     815             :         }
     816      438193 :         if (b->ttype == TYPE_str) {
     817      146635 :                 if (insert_string_bat(b, n, &ci, force, mayshare) != GDK_SUCCEED) {
     818           0 :                         bat_iterator_end(&ni);
     819           0 :                         return GDK_FAIL;
     820             :                 }
     821      291558 :         } else if (ATOMvarsized(b->ttype)) {
     822         253 :                 if (append_varsized_bat(b, n, &ci, mayshare) != GDK_SUCCEED) {
     823           0 :                         bat_iterator_end(&ni);
     824           0 :                         return GDK_FAIL;
     825             :                 }
     826      291305 :         } else if (ATOMstorage(b->ttype) == TYPE_msk) {
     827         448 :                 if (append_msk_bat(b, n, &ci) != GDK_SUCCEED) {
     828           0 :                         bat_iterator_end(&ni);
     829           0 :                         return GDK_FAIL;
     830             :                 }
     831             :         } else {
     832      290857 :                 if (cnt > BATcapacity(b) - BATcount(b)) {
     833             :                         /* if needed space exceeds a normal growth
     834             :                          * extend just with what's needed */
     835       12490 :                         BUN ncap = BATcount(b) + cnt;
     836       12490 :                         BUN grows = BATgrows(b);
     837             : 
     838             :                         if (ncap > grows)
     839             :                                 grows = ncap;
     840       12492 :                         if (BATextend(b, grows) != GDK_SUCCEED) {
     841           0 :                                 bat_iterator_end(&ni);
     842           0 :                                 return GDK_FAIL;
     843             :                         }
     844             :                 }
     845      290861 :                 MT_rwlock_wrlock(&b->thashlock);
     846      290823 :                 if (BATatoms[b->ttype].atomFix == NULL &&
     847      290809 :                     b->ttype != TYPE_void &&
     848      290809 :                     ni.type != TYPE_void &&
     849      287797 :                     ci.tpe == cand_dense) {
     850             :                         /* use fast memcpy if we can */
     851           0 :                         memcpy(Tloc(b, BUNlast(b)),
     852      287233 :                                (const char *) ni.base + ((ci.seq - hseq) << ni.shift),
     853      287233 :                                cnt << ni.shift);
     854      287532 :                         for (BUN i = 0; b->thash && i < cnt; i++) {
     855         300 :                                 HASHappend_locked(b, r, Tloc(b, r));
     856         299 :                                 r++;
     857             :                         }
     858             :                 } else {
     859      623845 :                         while (cnt > 0) {
     860      620255 :                                 cnt--;
     861      620255 :                                 BUN p = canditer_next(&ci) - hseq;
     862      620255 :                                 const void *t = BUNtail(ni, p);
     863      620255 :                                 if (tfastins_nocheck(b, r, t) != GDK_SUCCEED) {
     864           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
     865           0 :                                         bat_iterator_end(&ni);
     866           0 :                                         return GDK_FAIL;
     867             :                                 }
     868      620255 :                                 if (b->thash)
     869           0 :                                         HASHappend_locked(b, r, t);
     870      620255 :                                 r++;
     871             :                         }
     872             :                 }
     873      290822 :                 MT_rwlock_wrunlock(&b->thashlock);
     874      290870 :                 BATsetcount(b, b->batCount + ci.ncand);
     875             :         }
     876             : 
     877      440091 :   doreturn:
     878      440091 :         bat_iterator_end(&ni);
     879      440109 :         TRC_DEBUG(ALGO, "b=%s,n=" ALGOBATFMT ",s=" ALGOOPTBATFMT
     880             :                   " -> " ALGOBATFMT " (" LLFMT " usec)\n",
     881             :                   buf, ALGOBATPAR(n), ALGOOPTBATPAR(s), ALGOBATPAR(b),
     882             :                   GDKusec() - t0);
     883             : 
     884             :         return GDK_SUCCEED;
     885             : }
     886             : 
     887             : gdk_return
     888     1339856 : BATappend(BAT *b, BAT *n, BAT *s, bool force)
     889             : {
     890     1339856 :         return BATappend2(b, n, s, force, true);
     891             : }
     892             : 
     893             : gdk_return
     894           4 : BATdel(BAT *b, BAT *d)
     895             : {
     896           4 :         gdk_return (*unfix) (const void *) = BATatoms[b->ttype].atomUnfix;
     897           4 :         void (*atmdel) (Heap *, var_t *) = BATatoms[b->ttype].atomDel;
     898           4 :         BATiter bi = bat_iterator_nolock(b);
     899             : 
     900           4 :         assert(ATOMtype(d->ttype) == TYPE_oid);
     901           4 :         assert(d->tsorted);
     902           4 :         assert(d->tkey);
     903           4 :         if (BATcount(d) == 0)
     904             :                 return GDK_SUCCEED;
     905           4 :         IMPSdestroy(b);
     906           4 :         OIDXdestroy(b);
     907           4 :         HASHdestroy(b);
     908           4 :         PROPdestroy(b);
     909           6 :         if (BATtdense(d)) {
     910             :                 oid o = d->tseqbase;
     911           2 :                 BUN c = BATcount(d);
     912             : 
     913           2 :                 if (o + c <= b->hseqbase)
     914             :                         return GDK_SUCCEED;
     915           2 :                 if (o < b->hseqbase) {
     916           0 :                         c -= b->hseqbase - o;
     917             :                         o = b->hseqbase;
     918             :                 }
     919           2 :                 if (o - b->hseqbase < b->batInserted) {
     920           0 :                         GDKerror("cannot delete committed values\n");
     921           0 :                         return GDK_FAIL;
     922             :                 }
     923           2 :                 if (o + c > b->hseqbase + BATcount(b))
     924           0 :                         c = b->hseqbase + BATcount(b) - o;
     925           2 :                 if (c == 0)
     926             :                         return GDK_SUCCEED;
     927           2 :                 if (unfix || atmdel) {
     928             :                         BUN p = o - b->hseqbase;
     929           0 :                         BUN q = p + c;
     930           0 :                         while (p < q) {
     931           0 :                                 if (unfix && (*unfix)(BUNtail(bi, p)) != GDK_SUCCEED)
     932             :                                         return GDK_FAIL;
     933           0 :                                 if (atmdel)
     934           0 :                                         (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, p));
     935           0 :                                 p++;
     936             :                         }
     937             :                 }
     938           2 :                 if (BATtdense(b) && BATmaterialize(b) != GDK_SUCCEED)
     939             :                         return GDK_FAIL;
     940           2 :                 if (o + c < b->hseqbase + BATcount(b)) {
     941           0 :                         o -= b->hseqbase;
     942           0 :                         if (ATOMstorage(b->ttype) == TYPE_msk) {
     943           0 :                                 BUN n = BATcount(b) - (o + c);
     944             :                                 /* not very efficient, but first see
     945             :                                  * how much this is used */
     946           0 :                                 for (BUN i = 0; i < n; i++)
     947           0 :                                         mskSetVal(b, o + i,
     948           0 :                                                   mskGetVal(b, o + c + i));
     949             :                         } else {
     950           0 :                                 memmove(Tloc(b, o),
     951           0 :                                         Tloc(b, o + c),
     952           0 :                                         Tsize(b) * (BATcount(b) - (o + c)));
     953             :                         }
     954           0 :                         b->theap->dirty = true;
     955             :                         // o += b->hseqbase; // if this were to be used again
     956             :                 }
     957           2 :                 MT_lock_set(&b->theaplock);
     958           2 :                 b->batCount -= c;
     959           2 :                 MT_lock_unset(&b->theaplock);
     960             :         } else {
     961           2 :                 BATiter di = bat_iterator(d);
     962           2 :                 const oid *o = (const oid *) di.base;
     963             :                 const oid *s;
     964           2 :                 BUN c = di.count;
     965             :                 BUN nd = 0;
     966             :                 BUN pos;
     967             :                 char *p = NULL;
     968             : 
     969           2 :                 if (o[c - 1] <= b->hseqbase) {
     970           0 :                         bat_iterator_end(&di);
     971           0 :                         return GDK_SUCCEED;
     972             :                 }
     973           2 :                 while (*o < b->hseqbase) {
     974           0 :                         o++;
     975           0 :                         c--;
     976             :                 }
     977           2 :                 if (*o - b->hseqbase < b->batInserted) {
     978           0 :                         bat_iterator_end(&di);
     979           0 :                         GDKerror("cannot delete committed values\n");
     980           0 :                         return GDK_FAIL;
     981             :                 }
     982           2 :                 if (BATtdense(b) && BATmaterialize(b) != GDK_SUCCEED) {
     983           0 :                         bat_iterator_end(&di);
     984           0 :                         return GDK_FAIL;
     985             :                 }
     986             :                 s = o;
     987           2 :                 pos = *o - b->hseqbase;
     988           2 :                 if (ATOMstorage(b->ttype) != TYPE_msk)
     989           2 :                         p = Tloc(b, pos);
     990           6 :                 while (c > 0 && *o < b->hseqbase + BATcount(b)) {
     991             :                         size_t n;
     992           4 :                         if (unfix)
     993           0 :                                 (*unfix)(BUNtail(bi, *o - b->hseqbase));
     994           4 :                         if (atmdel)
     995           0 :                                 (*atmdel)(b->tvheap, (var_t *) BUNtloc(bi, *o - b->hseqbase));
     996           4 :                         o++;
     997           4 :                         c--;
     998           4 :                         nd++;
     999           4 :                         if (c == 0 || *o - b->hseqbase >= BATcount(b))
    1000           2 :                                 n = b->hseqbase + BATcount(b) - o[-1] - 1;
    1001           2 :                         else if ((oid) (o - s) < *o - *s)
    1002           2 :                                 n = o[0] - o[-1] - 1;
    1003             :                         else
    1004             :                                 n = 0;
    1005           4 :                         if (n > 0) {
    1006           2 :                                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    1007           0 :                                         BUN opos = o[-1] + 1 - b->hseqbase;
    1008             :                                         /* not very efficient, but
    1009             :                                          * first see how much this is
    1010             :                                          * used */
    1011           0 :                                         for (BUN i = 0; i < n; i++) {
    1012           0 :                                                 mskSetVal(b, pos + i,
    1013           0 :                                                           mskGetVal(b, opos + i));
    1014             :                                         }
    1015           0 :                                         pos += n;
    1016             :                                 } else {
    1017           2 :                                         n *= Tsize(b);
    1018           2 :                                         memmove(p,
    1019           2 :                                                 Tloc(b, o[-1] + 1 - b->hseqbase),
    1020             :                                                 n);
    1021           2 :                                         p += n;
    1022             :                                 }
    1023             :                                 s = o;
    1024             :                         }
    1025             :                 }
    1026           2 :                 bat_iterator_end(&di);
    1027           2 :                 MT_lock_set(&b->theaplock);
    1028           2 :                 b->theap->dirty = true;
    1029           2 :                 b->batCount -= nd;
    1030           2 :                 MT_lock_unset(&b->theaplock);
    1031             :         }
    1032           4 :         if (b->batCount <= 1) {
    1033             :                 /* some trivial properties */
    1034           2 :                 b->tkey = true;
    1035           2 :                 b->tsorted = b->trevsorted = true;
    1036           2 :                 if (b->batCount == 0) {
    1037           2 :                         b->tnil = false;
    1038           2 :                         b->tnonil = true;
    1039             :                 }
    1040             :         }
    1041             :         /* not sure about these anymore */
    1042           4 :         MT_lock_set(&b->theaplock);
    1043           4 :         b->tnosorted = b->tnorevsorted = 0;
    1044           4 :         b->tnokey[0] = b->tnokey[1] = 0;
    1045           4 :         b->tminpos = BUN_NONE;
    1046           4 :         b->tmaxpos = BUN_NONE;
    1047           4 :         b->tunique_est = 0.0;
    1048           4 :         MT_lock_unset(&b->theaplock);
    1049             : 
    1050           4 :         return GDK_SUCCEED;
    1051             : }
    1052             : 
    1053             : /*
    1054             :  * Replace all values in b with values from n whose location is given by
    1055             :  * the oid in either p or positions.
    1056             :  * If positions is used, autoincr specifies whether it is the first of a
    1057             :  * dense range of positions or whether it is a full-blown array of
    1058             :  * position.
    1059             :  * If mayappend is set, the position in p/positions may refer to
    1060             :  * locations beyond the end of b.
    1061             :  */
    1062             : static gdk_return
    1063       72377 : BATappend_or_update(BAT *b, BAT *p, const oid *positions, BAT *n,
    1064             :                     bool mayappend, bool autoincr, bool force)
    1065             : {
    1066       72377 :         lng t0 = GDKusec();
    1067       72333 :         oid pos = oid_nil;
    1068             : 
    1069       72333 :         if (b == NULL || b->ttype == TYPE_void || n == NULL) {
    1070             :                 return GDK_SUCCEED;
    1071             :         }
    1072             :         /* either p or positions */
    1073       72333 :         assert((p == NULL) != (positions == NULL));
    1074       72333 :         if (p != NULL) {
    1075       72144 :                 if (BATcount(p) != BATcount(n)) {
    1076           0 :                         GDKerror("update BATs not the same size\n");
    1077           0 :                         return GDK_FAIL;
    1078             :                 }
    1079       72144 :                 if (ATOMtype(p->ttype) != TYPE_oid) {
    1080           0 :                         GDKerror("positions BAT not type OID\n");
    1081           0 :                         return GDK_FAIL;
    1082             :                 }
    1083       72144 :                 if (BATtdense(p)) {
    1084       63863 :                         pos = p->tseqbase;
    1085             :                         positions = &pos;
    1086             :                         autoincr = true;
    1087       63863 :                         p = NULL;
    1088        8281 :                 } else if (p->ttype != TYPE_void) {
    1089        8280 :                         positions = (const oid *) Tloc(p, 0);
    1090             :                         autoincr = false;
    1091             :                 } else {
    1092             :                         autoincr = false;
    1093             :                 }
    1094         189 :         } else if (autoincr) {
    1095         189 :                 pos = *positions;
    1096             :         }
    1097       72333 :         if (BATcount(n) == 0) {
    1098             :                 return GDK_SUCCEED;
    1099             :         }
    1100       22844 :         if (!force && (b->batRestricted != BAT_WRITE || b->batSharecnt > 0)) {
    1101           0 :                 GDKerror("access denied to %s, aborting.\n", BATgetId(b));
    1102           0 :                 return GDK_FAIL;
    1103             :         }
    1104             : 
    1105       22844 :         BATiter bi = bat_iterator_nolock(b);
    1106       22842 :         BATiter ni = bat_iterator(n);
    1107             : 
    1108       22843 :         OIDXdestroy(b);
    1109       22844 :         IMPSdestroy(b);
    1110       22844 :         MT_lock_set(&b->theaplock);
    1111       22843 :         if (ni.count > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION) {
    1112       22666 :                 b->tunique_est = 0;
    1113             :         }
    1114       22843 :         BUN minpos = b->tminpos;
    1115       22843 :         BUN maxpos = b->tmaxpos;
    1116       22843 :         MT_lock_unset(&b->theaplock);
    1117             :         /* load hash so that we can maintain it */
    1118       22843 :         (void) BATcheckhash(b);
    1119             : 
    1120       22813 :         b->tsorted = b->trevsorted = false;
    1121       22813 :         b->tnosorted = b->tnorevsorted = 0;
    1122       22813 :         b->tseqbase = oid_nil;
    1123       22813 :         b->tkey = false;
    1124       22813 :         b->tnokey[0] = b->tnokey[1] = 0;
    1125             : 
    1126       22813 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
    1127       22813 :         const void *nil = ATOMnilptr(b->ttype);
    1128       22813 :         oid hseqend = b->hseqbase + BATcount(b);
    1129             :         bool anynil = false;
    1130             :         bool locked = false;
    1131             : 
    1132       22813 :         if (b->tvarsized) {
    1133      285008 :                 for (BUN i = 0; i < ni.count; i++) {
    1134             :                         oid updid;
    1135      282883 :                         if (positions) {
    1136      282866 :                                 updid = autoincr ? pos++ : *positions++;
    1137             :                         } else {
    1138          17 :                                 updid = BUNtoid(p, i);
    1139             :                         }
    1140             : 
    1141      282883 :                         if (updid < b->hseqbase ||
    1142      282883 :                             (!mayappend && updid >= hseqend)) {
    1143           0 :                                 GDKerror("id out of range\n");
    1144           0 :                                 goto bailout;
    1145             :                         }
    1146      282883 :                         updid -= b->hseqbase;
    1147      282883 :                         if (!force && updid < b->batInserted) {
    1148           0 :                                 GDKerror("updating committed value\n");
    1149           0 :                                 goto bailout;
    1150             :                         }
    1151             : 
    1152      282883 :                         const void *new = BUNtvar(ni, i);
    1153             : 
    1154      282883 :                         if (updid >= BATcount(b)) {
    1155         266 :                                 assert(mayappend);
    1156         266 :                                 if (locked) {
    1157           5 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1158             :                                         locked = false;
    1159             :                                 }
    1160         266 :                                 if (BATcount(b) < updid &&
    1161           0 :                                     BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
    1162           0 :                                         bat_iterator_end(&ni);
    1163           0 :                                         return GDK_FAIL;
    1164             :                                 }
    1165         266 :                                 if (BUNappend(b, new, force) != GDK_SUCCEED) {
    1166           0 :                                         bat_iterator_end(&ni);
    1167           0 :                                         return GDK_FAIL;
    1168             :                                 }
    1169          30 :                                 bi = bat_iterator_nolock(b);
    1170      182126 :                                 continue;
    1171             :                         }
    1172             : 
    1173      282617 :                         const void *old = BUNtvar(bi, updid);
    1174             : 
    1175      282617 :                         if (atomcmp(old, new) == 0) {
    1176             :                                 /* replacing with the same value:
    1177             :                                  * nothing to do */
    1178      182096 :                                 continue;
    1179             :                         }
    1180             : 
    1181      100017 :                         bool isnil = atomcmp(new, nil) == 0;
    1182       99623 :                         anynil |= isnil;
    1183       99623 :                         if (b->tnil &&
    1184         523 :                             !anynil &&
    1185         523 :                             atomcmp(old, nil) == 0) {
    1186             :                                 /* if old value is nil and no new
    1187             :                                  * value is, we're not sure anymore
    1188             :                                  * about the nil property, so we must
    1189             :                                  * clear it */
    1190         523 :                                 b->tnil = false;
    1191             :                         }
    1192       99623 :                         b->tnonil &= !isnil;
    1193       99623 :                         b->tnil |= isnil;
    1194       99623 :                         if (maxpos != BUN_NONE) {
    1195         196 :                                 if (!isnil &&
    1196          98 :                                     atomcmp(BUNtvar(bi, maxpos), new) < 0) {
    1197             :                                         /* new value is larger than
    1198             :                                          * previous largest */
    1199             :                                         maxpos = updid;
    1200          88 :                                 } else if (atomcmp(BUNtvar(bi, maxpos), old) == 0 &&
    1201          12 :                                            atomcmp(new, old) != 0) {
    1202             :                                         /* old value is equal to
    1203             :                                          * largest and new value is
    1204             :                                          * smaller, so we don't know
    1205             :                                          * anymore which is the
    1206             :                                          * largest */
    1207             :                                         maxpos = BUN_NONE;
    1208             :                                 }
    1209             :                         }
    1210       99623 :                         if (minpos != BUN_NONE) {
    1211         174 :                                 if (!isnil &&
    1212          87 :                                     atomcmp(BUNtvar(bi, minpos), new) > 0) {
    1213             :                                         /* new value is smaller than
    1214             :                                          * previous smallest */
    1215             :                                         minpos = updid;
    1216          95 :                                 } else if (atomcmp(BUNtvar(bi, minpos), old) == 0 &&
    1217          23 :                                            atomcmp(new, old) != 0) {
    1218             :                                         /* old value is equal to
    1219             :                                          * smallest and new value is
    1220             :                                          * larger, so we don't know
    1221             :                                          * anymore which is the
    1222             :                                          * smallest */
    1223             :                                         minpos = BUN_NONE;
    1224             :                                 }
    1225             :                         }
    1226       99623 :                         if (!locked) {
    1227        1922 :                                 MT_rwlock_wrlock(&b->thashlock);
    1228             :                                 locked = true;
    1229             :                         }
    1230       99623 :                         HASHdelete_locked(b, updid, old);
    1231             : 
    1232             :                         var_t d;
    1233       99606 :                         switch (b->twidth) {
    1234       85211 :                         default: /* only three or four cases possible */
    1235       85211 :                                 d = (var_t) ((uint8_t *) b->theap->base)[updid] + GDK_VAROFFSET;
    1236       85211 :                                 break;
    1237        8558 :                         case 2:
    1238        8558 :                                 d = (var_t) ((uint16_t *) b->theap->base)[updid] + GDK_VAROFFSET;
    1239        8558 :                                 break;
    1240        5818 :                         case 4:
    1241        5818 :                                 d = (var_t) ((uint32_t *) b->theap->base)[updid];
    1242        5818 :                                 break;
    1243             : #if SIZEOF_VAR_T == 8
    1244          19 :                         case 8:
    1245          19 :                                 d = (var_t) ((uint64_t *) b->theap->base)[updid];
    1246          19 :                                 break;
    1247             : #endif
    1248             :                         }
    1249       99606 :                         if (ATOMreplaceVAR(b, &d, new) != GDK_SUCCEED) {
    1250           0 :                                 goto bailout;
    1251             :                         }
    1252       99536 :                         if (b->twidth < SIZEOF_VAR_T &&
    1253       99517 :                             (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 << b->tshift))) {
    1254             :                                 /* doesn't fit in current heap, upgrade it */
    1255          15 :                                 if (GDKupgradevarheap(b, d, 0, MAX(updid, b->batCount)) != GDK_SUCCEED) {
    1256           0 :                                         goto bailout;
    1257             :                                 }
    1258             :                         }
    1259             :                         /* in case ATOMreplaceVAR and/or
    1260             :                          * GDKupgradevarheap replaces a heap, we need to
    1261             :                          * reinitialize the iterator */
    1262       99536 :                         bi = bat_iterator_nolock(b);
    1263      101107 :                         switch (b->twidth) {
    1264       86700 :                         case 1:
    1265       86700 :                                 ((uint8_t *) b->theap->base)[updid] = (uint8_t) (d - GDK_VAROFFSET);
    1266       86700 :                                 break;
    1267        8567 :                         case 2:
    1268        8567 :                                 ((uint16_t *) b->theap->base)[updid] = (uint16_t) (d - GDK_VAROFFSET);
    1269        8567 :                                 break;
    1270        5821 :                         case 4:
    1271        5821 :                                 ((uint32_t *) b->theap->base)[updid] = (uint32_t) d;
    1272        5821 :                                 break;
    1273             : #if SIZEOF_VAR_T == 8
    1274          19 :                         case 8:
    1275          19 :                                 ((uint64_t *) b->theap->base)[updid] = (uint64_t) d;
    1276          19 :                                 break;
    1277             : #endif
    1278             :                         }
    1279      101107 :                         HASHinsert_locked(b, updid, new);
    1280             : 
    1281             :                 }
    1282        2125 :                 if (locked) {
    1283        1917 :                         MT_rwlock_wrunlock(&b->thashlock);
    1284             :                         locked = false;
    1285             :                 }
    1286        2125 :                 MT_lock_set(&b->theaplock);
    1287        2125 :                 b->tvheap->dirty = true;
    1288        2125 :                 MT_lock_unset(&b->theaplock);
    1289       20715 :         } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1290           0 :                 HASHdestroy(b); /* hash doesn't make sense for msk */
    1291           0 :                 for (BUN i = 0; i < ni.count; i++) {
    1292             :                         oid updid;
    1293           0 :                         if (positions) {
    1294           0 :                                 updid = autoincr ? pos++ : *positions++;
    1295             :                         } else {
    1296           0 :                                 updid = BUNtoid(p, i);
    1297             :                         }
    1298             : 
    1299           0 :                         if (updid < b->hseqbase ||
    1300           0 :                             (!mayappend && updid >= hseqend)) {
    1301           0 :                                 GDKerror("id out of range\n");
    1302           0 :                                 bat_iterator_end(&ni);
    1303           0 :                                 return GDK_FAIL;
    1304             :                         }
    1305           0 :                         updid -= b->hseqbase;
    1306           0 :                         if (!force && updid < b->batInserted) {
    1307           0 :                                 GDKerror("updating committed value\n");
    1308           0 :                                 bat_iterator_end(&ni);
    1309           0 :                                 return GDK_FAIL;
    1310             :                         }
    1311           0 :                         if (updid >= BATcount(b)) {
    1312           0 :                                 assert(mayappend);
    1313           0 :                                 if (BATcount(b) < updid &&
    1314           0 :                                     BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
    1315           0 :                                         bat_iterator_end(&ni);
    1316           0 :                                         return GDK_FAIL;
    1317             :                                 }
    1318           0 :                                 if (BUNappend(b, Tmsk(&ni, i), force) != GDK_SUCCEED) {
    1319           0 :                                         bat_iterator_end(&ni);
    1320           0 :                                         return GDK_FAIL;
    1321             :                                 }
    1322           0 :                                 continue;
    1323             :                         }
    1324           0 :                         mskSetVal(b, updid, Tmskval(&ni, i));
    1325             :                 }
    1326       20715 :         } else if (autoincr) {
    1327       12708 :                 if (pos < b->hseqbase ||
    1328       12538 :                     (!mayappend && pos + ni.count > hseqend)) {
    1329           0 :                         GDKerror("id out of range\n");
    1330           0 :                         bat_iterator_end(&ni);
    1331           0 :                         return GDK_FAIL;
    1332             :                 }
    1333       12708 :                 pos -= b->hseqbase;
    1334       12708 :                 if (!force && pos < b->batInserted) {
    1335           0 :                         GDKerror("updating committed value\n");
    1336           0 :                         bat_iterator_end(&ni);
    1337           0 :                         return GDK_FAIL;
    1338             :                 }
    1339             : 
    1340       12708 :                 if (pos >= BATcount(b)) {
    1341           0 :                         assert(mayappend);
    1342           0 :                         bat_iterator_end(&ni);
    1343           0 :                         if (BATcount(b) < pos &&
    1344           0 :                             BUNappendmulti(b, NULL, (BUN) (pos - BATcount(b)), force) != GDK_SUCCEED) {
    1345             :                                 return GDK_FAIL;
    1346             :                         }
    1347           0 :                         return BATappend(b, n, NULL, force);
    1348             :                 }
    1349       12708 :                 if (pos + ni.count > BATcount(b) &&
    1350           0 :                     BUNappendmulti(b, NULL, (BUN) (pos + ni.count - BATcount(b)), force) != GDK_SUCCEED) {
    1351           0 :                         bat_iterator_end(&ni);
    1352           0 :                         return GDK_FAIL;
    1353             :                 }
    1354             : 
    1355             :                 /* we copy all of n, so if there are nils in n we get
    1356             :                  * nils in b (and else we don't know) */
    1357       12708 :                 b->tnil = n->tnil;
    1358             :                 /* we may not copy over all of b, so we only know that
    1359             :                  * there are no nils in b afterward if there weren't
    1360             :                  * any in either b or n to begin with */
    1361       12708 :                 b->tnonil &= n->tnonil;
    1362             :                 /* if there is no hash, we don't start the loop, if
    1363             :                  * there is only a persisted hash, it will get destroyed
    1364             :                  * in the first iteration, after which there is no hash
    1365             :                  * and the loop ends */
    1366       12708 :                 MT_rwlock_wrlock(&b->thashlock);
    1367             :                 locked = true;
    1368      214812 :                 for (BUN i = pos, j = pos + ni.count; i < j && b->thash; i++)
    1369      202101 :                         HASHdelete_locked(b, i, Tloc(b, i));
    1370       12711 :                 if (n->ttype == TYPE_void) {
    1371          40 :                         assert(b->ttype == TYPE_oid);
    1372          40 :                         oid *o = Tloc(b, pos);
    1373          40 :                         if (is_oid_nil(ni.tseq)) {
    1374             :                                 /* we may or may not overwrite the old
    1375             :                                  * min/max values */
    1376             :                                 minpos = BUN_NONE;
    1377             :                                 maxpos = BUN_NONE;
    1378           0 :                                 for (BUN i = 0, j = ni.count; i < j; i++)
    1379           0 :                                         o[i] = oid_nil;
    1380           0 :                                 b->tnil = true;
    1381             :                         } else {
    1382             :                                 oid v = ni.tseq;
    1383             :                                 /* we know min/max of n, so we know
    1384             :                                  * the new min/max of b if those of n
    1385             :                                  * are smaller/larger than the old */
    1386          40 :                                 if (minpos != BUN_NONE) {
    1387           0 :                                         if (v <= BUNtoid(b, minpos))
    1388             :                                                 minpos = pos;
    1389           0 :                                         else if (pos <= minpos && minpos < pos + ni.count)
    1390             :                                                 minpos = BUN_NONE;
    1391             :                                 }
    1392          40 :                                 if (maxpos != BUN_NONE) {
    1393           0 :                                         if (v + ni.count - 1 >= BUNtoid(b, maxpos))
    1394           0 :                                                 maxpos = pos + ni.count - 1;
    1395           0 :                                         else if (pos <= maxpos && maxpos < pos + ni.count)
    1396             :                                                 maxpos = BUN_NONE;
    1397             :                                 }
    1398        1558 :                                 for (BUN i = 0, j = ni.count; i < j; i++)
    1399        1518 :                                         o[i] = v++;
    1400             :                         }
    1401             :                 } else {
    1402             :                         /* if the extremes of n are at least as
    1403             :                          * extreme as those of b, we can replace b's
    1404             :                          * min/max, else we don't know what b's new
    1405             :                          * min/max are*/
    1406       12786 :                         if (minpos != BUN_NONE && n->tminpos != BUN_NONE &&
    1407         115 :                             atomcmp(BUNtloc(bi, minpos), BUNtail(ni, n->tminpos)) >= 0) {
    1408          80 :                                 minpos = pos + n->tminpos;
    1409             :                         } else {
    1410             :                                 minpos = BUN_NONE;
    1411             :                         }
    1412       12835 :                         if (maxpos != BUN_NONE && n->tmaxpos != BUN_NONE &&
    1413         164 :                             atomcmp(BUNtloc(bi, maxpos), BUNtail(ni, n->tmaxpos)) <= 0) {
    1414         111 :                                 maxpos = pos + n->tmaxpos;
    1415             :                         } else {
    1416             :                                 maxpos = BUN_NONE;
    1417             :                         }
    1418       12671 :                         memcpy(Tloc(b, pos), ni.base,
    1419       12671 :                                ni.count << b->tshift);
    1420             :                 }
    1421             :                 /* either we have a hash that was updated above, or we
    1422             :                  * have no hash; we cannot have the case where there is
    1423             :                  * only a persisted (unloaded) hash since it would have
    1424             :                  * been destroyed above */
    1425       12711 :                 if (b->thash != NULL) {
    1426      202523 :                         for (BUN i = pos, j = pos + ni.count; i < j; i++)
    1427      202104 :                                 HASHinsert_locked(b, i, Tloc(b, i));
    1428             :                 }
    1429       12710 :                 MT_rwlock_wrunlock(&b->thashlock);
    1430             :                 locked = false;
    1431       12712 :                 if (ni.count == BATcount(b)) {
    1432             :                         /* if we replaced all values of b by values
    1433             :                          * from n, we can also copy the min/max
    1434             :                          * properties */
    1435        5460 :                         minpos = n->tminpos;
    1436        5460 :                         maxpos = n->tmaxpos;
    1437        5460 :                         if (BATtdense(n)) {
    1438             :                                 /* replaced all of b with a dense sequence */
    1439          46 :                                 BATtseqbase(b, ni.tseq);
    1440             :                         }
    1441             :                 }
    1442             :         } else {
    1443   116631334 :                 for (BUN i = 0, j = ni.count; i < j; i++) {
    1444             :                         oid updid;
    1445   116623327 :                         if (positions) {
    1446             :                                 /* assert(!autoincr) */
    1447   116623327 :                                 updid = *positions++;
    1448             :                         } else {
    1449           0 :                                 updid = BUNtoid(p, i);
    1450             :                         }
    1451             : 
    1452   116623327 :                         if (updid < b->hseqbase ||
    1453   116623327 :                             (!mayappend && updid >= hseqend)) {
    1454           0 :                                 GDKerror("id out of range\n");
    1455           0 :                                 goto bailout;
    1456             :                         }
    1457   116623327 :                         updid -= b->hseqbase;
    1458   116623327 :                         if (!force && updid < b->batInserted) {
    1459           0 :                                 GDKerror("updating committed value\n");
    1460           0 :                                 goto bailout;
    1461             :                         }
    1462             : 
    1463   116623327 :                         const void *new = BUNtail(ni, i);
    1464             : 
    1465   116596820 :                         if (updid >= BATcount(b)) {
    1466          91 :                                 assert(mayappend);
    1467          91 :                                 if (locked) {
    1468           9 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1469             :                                         locked = false;
    1470             :                                 }
    1471          91 :                                 if (BATcount(b) < updid &&
    1472           0 :                                     BUNappendmulti(b, NULL, (BUN) (updid - BATcount(b)), force) != GDK_SUCCEED) {
    1473           0 :                                         goto bailout;
    1474             :                                 }
    1475          91 :                                 if (BUNappend(b, new, force) != GDK_SUCCEED) {
    1476           0 :                                         bat_iterator_end(&ni);
    1477           0 :                                         return GDK_FAIL;
    1478             :                                 }
    1479          91 :                                 bi = bat_iterator_nolock(b);
    1480          91 :                                 continue;
    1481             :                         }
    1482             : 
    1483   116596729 :                         const void *old = BUNtloc(bi, updid);
    1484   116596729 :                         bool isnil = atomcmp(new, nil) == 0;
    1485   116696643 :                         anynil |= isnil;
    1486   116696643 :                         if (b->tnil &&
    1487        1911 :                             !anynil &&
    1488        1911 :                             atomcmp(old, nil) == 0) {
    1489             :                                 /* if old value is nil and no new
    1490             :                                  * value is, we're not sure anymore
    1491             :                                  * about the nil property, so we must
    1492             :                                  * clear it */
    1493        1911 :                                 b->tnil = false;
    1494             :                         }
    1495   116696643 :                         b->tnonil &= !isnil;
    1496   116696643 :                         b->tnil |= isnil;
    1497   116696643 :                         if (maxpos != BUN_NONE) {
    1498          90 :                                 if (!isnil &&
    1499          43 :                                     atomcmp(BUNtloc(bi, maxpos), new) < 0) {
    1500             :                                         /* new value is larger than
    1501             :                                          * previous largest */
    1502             :                                         maxpos = updid;
    1503          53 :                                 } else if (atomcmp(BUNtloc(bi, maxpos), old) == 0 &&
    1504           6 :                                            atomcmp(new, old) != 0) {
    1505             :                                         /* old value is equal to
    1506             :                                          * largest and new value is
    1507             :                                          * smaller, so we don't know
    1508             :                                          * anymore which is the
    1509             :                                          * largest */
    1510             :                                         maxpos = BUN_NONE;
    1511             :                                 }
    1512             :                         }
    1513   116696643 :                         if (minpos != BUN_NONE) {
    1514          70 :                                 if (!isnil &&
    1515          33 :                                     atomcmp(BUNtloc(bi, minpos), new) > 0) {
    1516             :                                         /* new value is smaller than
    1517             :                                          * previous smallest */
    1518             :                                         minpos = updid;
    1519          40 :                                 } else if (atomcmp(BUNtloc(bi, minpos), old) == 0 &&
    1520           8 :                                            atomcmp(new, old) != 0) {
    1521             :                                         /* old value is equal to
    1522             :                                          * smallest and new value is
    1523             :                                          * larger, so we don't know
    1524             :                                          * anymore which is the
    1525             :                                          * smallest */
    1526             :                                         minpos = BUN_NONE;
    1527             :                                 }
    1528             :                         }
    1529             : 
    1530   116696643 :                         if (!locked) {
    1531        8007 :                                 MT_rwlock_wrlock(&b->thashlock);
    1532             :                                 locked = true;
    1533             :                         }
    1534   116696643 :                         HASHdelete_locked(b, updid, old);
    1535   116683084 :                         switch (b->twidth) {
    1536     1809177 :                         case 1:
    1537     1809177 :                                 ((bte *) b->theap->base)[updid] = * (bte *) new;
    1538     1809177 :                                 break;
    1539       26385 :                         case 2:
    1540       26385 :                                 ((sht *) b->theap->base)[updid] = * (sht *) new;
    1541       26385 :                                 break;
    1542    16743515 :                         case 4:
    1543    16743515 :                                 ((int *) b->theap->base)[updid] = * (int *) new;
    1544    16743515 :                                 break;
    1545    96417834 :                         case 8:
    1546    96417834 :                                 ((lng *) b->theap->base)[updid] = * (lng *) new;
    1547    96417834 :                                 break;
    1548     1686173 :                         case 16:
    1549             : #ifdef HAVE_HGE
    1550     1686173 :                                 ((hge *) b->theap->base)[updid] = * (hge *) new;
    1551             : #else
    1552             :                                 ((uuid *) b->theap->base)[updid] = * (uuid *) new;
    1553             : #endif
    1554     1686173 :                                 break;
    1555           0 :                         default:
    1556           0 :                                 memcpy(BUNtloc(bi, updid), new, ATOMsize(b->ttype));
    1557           0 :                                 break;
    1558             :                         }
    1559   116683084 :                         HASHinsert_locked(b, updid, new);
    1560             :                 }
    1561        8007 :                 if (locked) {
    1562        7998 :                         MT_rwlock_wrunlock(&b->thashlock);
    1563             :                         locked = false;
    1564             :                 }
    1565             :         }
    1566       22844 :         bat_iterator_end(&ni);
    1567       22844 :         MT_lock_set(&b->theaplock);
    1568       22841 :         b->tminpos = minpos;
    1569       22841 :         b->tmaxpos = maxpos;
    1570       22841 :         b->theap->dirty = true;
    1571       22841 :         MT_lock_unset(&b->theaplock);
    1572       22840 :         TRC_DEBUG(ALGO,
    1573             :                   "BATreplace(" ALGOBATFMT "," ALGOOPTBATFMT "," ALGOBATFMT ") " LLFMT " usec\n",
    1574             :                   ALGOBATPAR(b), ALGOOPTBATPAR(p), ALGOBATPAR(n),
    1575             :                   GDKusec() - t0);
    1576             :         return GDK_SUCCEED;
    1577             : 
    1578           0 :   bailout:
    1579           0 :         bat_iterator_end(&ni);
    1580           0 :         if (locked) {
    1581           0 :                 Hash *h = b->thash;
    1582           0 :                 b->thash = NULL;
    1583           0 :                 MT_rwlock_wrunlock(&b->thashlock);
    1584           0 :                 doHASHdestroy(b, h);
    1585             :         }
    1586             :         return GDK_FAIL;
    1587             : }
    1588             : 
    1589             : /* replace values from b at locations specified in p with values in n */
    1590             : gdk_return
    1591       72173 : BATreplace(BAT *b, BAT *p, BAT *n, bool force)
    1592             : {
    1593       72173 :         return BATappend_or_update(b, p, NULL, n, false, false, force);
    1594             : }
    1595             : 
    1596             : /* like BATreplace, but p may specify locations beyond the end of b */
    1597             : gdk_return
    1598          14 : BATupdate(BAT *b, BAT *p, BAT *n, bool force)
    1599             : {
    1600          14 :         return BATappend_or_update(b, p, NULL, n, true, false, force);
    1601             : }
    1602             : 
    1603             : /* like BATreplace, but the positions are given by an array of oid values */
    1604             : gdk_return
    1605           0 : BATreplacepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
    1606             : {
    1607           0 :         return BATappend_or_update(b, NULL, positions, n, false, autoincr, force);
    1608             : }
    1609             : 
    1610             : /* like BATreplace, but the positions are given by an array of oid
    1611             :  * values, and they may specify locations beyond the end of b */
    1612             : gdk_return
    1613         189 : BATupdatepos(BAT *b, const oid *positions, BAT *n, bool autoincr, bool force)
    1614             : {
    1615         189 :         return BATappend_or_update(b, NULL, positions, n, true, autoincr, force);
    1616             : }
    1617             : 
    1618             : /*
    1619             :  *  BAT Selections
    1620             :  * The BAT selectors are among the most heavily used operators.
    1621             :  * Their efficient implementation is therefore mandatory.
    1622             :  *
    1623             :  * BAT slice
    1624             :  * This function returns a horizontal slice from a BAT. It optimizes
    1625             :  * execution by avoiding to copy when the BAT is memory mapped (in
    1626             :  * this case, an independent submap is created) or else when it is
    1627             :  * read-only, then a VIEW bat is created as a result.
    1628             :  *
    1629             :  * If a new copy has to be created, this function takes care to
    1630             :  * preserve void-columns (in this case, the seqbase has to be
    1631             :  * recomputed in the result).
    1632             :  *
    1633             :  * The selected range is excluding the high value.
    1634             :  */
    1635             : BAT *
    1636     4559313 : BATslice(BAT *b, BUN l, BUN h)
    1637             : {
    1638             :         BUN low = l;
    1639             :         BAT *bn = NULL;
    1640             :         BATiter bni;
    1641             :         oid foid;               /* first oid value if oid column */
    1642             : 
    1643     4559313 :         BATcheck(b, NULL);
    1644     4559313 :         if (h > BATcount(b))
    1645             :                 h = BATcount(b);
    1646             :         if (h < l)
    1647             :                 h = l;
    1648             : 
    1649     4559313 :         if (l > BUN_MAX || h > BUN_MAX) {
    1650           0 :                 GDKerror("boundary out of range\n");
    1651           0 :                 goto doreturn;
    1652             :         }
    1653             : 
    1654     4559313 :         if (complex_cand(b)) {
    1655             :                 /* slicing a candidate list with exceptions */
    1656             :                 struct canditer ci;
    1657          91 :                 canditer_init(&ci, NULL, b);
    1658          91 :                 if (b->hseqbase + l >= ci.hseq) {
    1659          91 :                         l = b->hseqbase + l - ci.hseq;
    1660          91 :                         h = b->hseqbase + h - ci.hseq;
    1661             :                 } else {
    1662             :                         l = 0;
    1663           0 :                         if (b->hseqbase + h >= ci.hseq)
    1664           0 :                                 h = b->hseqbase + h - ci.hseq;
    1665             :                         else
    1666             :                                 h = 0;
    1667             :                 }
    1668          91 :                 bn = canditer_slice(&ci, l, h);
    1669          91 :                 goto doreturn;
    1670             :         }
    1671             :         /* If the source BAT is readonly, then we can obtain a VIEW
    1672             :          * that just reuses the memory of the source. */
    1673     4559222 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1674             :                 /* forget about slices for bit masks: we can't deal
    1675             :                  * with difference in alignment, so we'll just make a
    1676             :                  * copy */
    1677           0 :                 bn = COLnew((oid) (b->hseqbase + low), b->ttype, h - l, TRANSIENT);
    1678             :                 /* we use BATappend with a candidate list to easily
    1679             :                  * copy the part of b that we need */
    1680           0 :                 BAT *s = BATdense(0, (oid) (b->hseqbase + low), h - l);
    1681           0 :                 if (bn == NULL ||
    1682           0 :                     s == NULL ||
    1683           0 :                     BATappend(bn, b, s, false) != GDK_SUCCEED) {
    1684           0 :                         BBPreclaim(bn);
    1685           0 :                         BBPreclaim(s);
    1686             :                         bn = NULL;
    1687           0 :                         goto doreturn;
    1688             :                 }
    1689           0 :                 BBPunfix(s->batCacheid);
    1690           0 :                 goto doreturn;
    1691     4559222 :         } else if (b->batRestricted == BAT_READ &&
    1692     4477222 :             (!VIEWtparent(b) ||
    1693     1094880 :              BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ)) {
    1694     4477191 :                 bn = VIEWcreate(b->hseqbase + low, b);
    1695     4476628 :                 if (bn == NULL)
    1696           0 :                         goto doreturn;
    1697     4476628 :                 VIEWbounds(b, bn, l, h);
    1698             :         } else {
    1699             :                 /* create a new BAT and put everything into it */
    1700             :                 BUN p = l;
    1701             :                 BUN q = h;
    1702             : 
    1703      143675 :                 bn = COLnew((oid) (b->hseqbase + low), BATtdense(b) ? TYPE_void : b->ttype, h - l, TRANSIENT);
    1704       82030 :                 if (bn == NULL)
    1705           0 :                         goto doreturn;
    1706             : 
    1707       82030 :                 if (bn->ttype == TYPE_void ||
    1708       61622 :                     (!bn->tvarsized &&
    1709       29833 :                      BATatoms[bn->ttype].atomPut == NULL &&
    1710       29833 :                      BATatoms[bn->ttype].atomFix == NULL)) {
    1711       50242 :                         if (bn->ttype) {
    1712       29834 :                                 BATiter bi = bat_iterator(b);
    1713       29834 :                                 memcpy(Tloc(bn, 0), (const char *) bi.base + (p << bi.shift),
    1714       29834 :                                        (q - p) << bn->tshift);
    1715       29834 :                                 bat_iterator_end(&bi);
    1716       29834 :                                 bn->theap->dirty = true;
    1717             :                         }
    1718       50242 :                         BATsetcount(bn, h - l);
    1719             :                 } else {
    1720       31788 :                         BATiter bi = bat_iterator(b);
    1721     1826526 :                         for (; p < q; p++) {
    1722     1794739 :                                 if (bunfastapp(bn, BUNtail(bi, p)) != GDK_SUCCEED) {
    1723           0 :                                         bat_iterator_end(&bi);
    1724           0 :                                         BBPreclaim(bn);
    1725             :                                         bn = NULL;
    1726           0 :                                         goto doreturn;
    1727             :                                 }
    1728             :                         }
    1729       31787 :                         bat_iterator_end(&bi);
    1730             :                 }
    1731       82016 :                 bn->theap->dirty = true;
    1732       82016 :                 bn->tsorted = b->tsorted;
    1733       82016 :                 bn->trevsorted = b->trevsorted;
    1734       82016 :                 bn->tkey = b->tkey;
    1735       82016 :                 bn->tnonil = b->tnonil;
    1736       82016 :                 if (b->tnosorted > l && b->tnosorted < h)
    1737        5062 :                         bn->tnosorted = b->tnosorted - l;
    1738             :                 else
    1739       76954 :                         bn->tnosorted = 0;
    1740       82016 :                 if (b->tnorevsorted > l && b->tnorevsorted < h)
    1741        9314 :                         bn->tnorevsorted = b->tnorevsorted - l;
    1742             :                 else
    1743       72702 :                         bn->tnorevsorted = 0;
    1744       82016 :                 if (b->tnokey[0] >= l && b->tnokey[0] < h &&
    1745       71879 :                     b->tnokey[1] >= l && b->tnokey[1] < h &&
    1746             :                     b->tnokey[0] != b->tnokey[1]) {
    1747         511 :                         bn->tnokey[0] = b->tnokey[0] - l;
    1748         511 :                         bn->tnokey[1] = b->tnokey[1] - l;
    1749             :                 } else {
    1750       81505 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
    1751             :                 }
    1752             :         }
    1753     4556810 :         bn->tnonil = b->tnonil || bn->batCount == 0;
    1754     4556810 :         bn->tnil = false;    /* we just don't know */
    1755     4556810 :         bn->tnosorted = 0;
    1756     4556810 :         bn->tnokey[0] = bn->tnokey[1] = 0;
    1757     4556810 :         bni = bat_iterator_nolock(bn);
    1758     4558773 :         if (BATtdense(b)) {
    1759       24662 :                 BATtseqbase(bn, (oid) (b->tseqbase + low));
    1760     4534111 :         } else if (bn->ttype == TYPE_oid) {
    1761       24915 :                 if (BATcount(bn) == 0) {
    1762           7 :                         BATtseqbase(bn, 0);
    1763       24908 :                 } else if (!is_oid_nil((foid = *(oid *) BUNtloc(bni, 0))) &&
    1764       22360 :                            (BATcount(bn) == 1 ||
    1765       22360 :                             (bn->tkey &&
    1766        8454 :                              bn->tsorted &&
    1767        8454 :                              foid + BATcount(bn) - 1 == *(oid *) BUNtloc(bni, BUNlast(bn) - 1)))) {
    1768        2900 :                         BATtseqbase(bn, foid);
    1769             :                 }
    1770             :         }
    1771     4558898 :         if (bn->batCount <= 1) {
    1772      473841 :                 bn->tsorted = ATOMlinear(b->ttype);
    1773      473841 :                 bn->trevsorted = ATOMlinear(b->ttype);
    1774      473841 :                 BATkey(bn, true);
    1775             :         } else {
    1776     4085057 :                 bn->tsorted = b->tsorted;
    1777     4085057 :                 bn->trevsorted = b->trevsorted;
    1778     7788569 :                 BATkey(bn, BATtkey(b));
    1779             :         }
    1780     4557760 :   doreturn:
    1781     4557760 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",lo=" BUNFMT ",hi=" BUNFMT " -> "
    1782             :                   ALGOOPTBATFMT "\n",
    1783             :                   ALGOBATPAR(b), l, h, ALGOOPTBATPAR(bn));
    1784             :         return bn;
    1785             : }
    1786             : 
    1787             : #define BAT_ORDERED(TPE)                                                \
    1788             :         do {                                                            \
    1789             :                 const TPE *restrict vals = Tloc(b, 0);                  \
    1790             :                 for (BUN q = BUNlast(b), p = 1; p < q; p++) {                \
    1791             :                         if (vals[p - 1] > vals[p]) {                 \
    1792             :                                 b->tnosorted = p;                    \
    1793             :                                 TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
    1794             :                                 goto doreturn;                          \
    1795             :                         } else if (vals[p - 1] < vals[p]) {          \
    1796             :                                 if (!b->trevsorted && b->tnorevsorted == 0) { \
    1797             :                                         b->tnorevsorted = p;         \
    1798             :                                         TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
    1799             :                                 }                                       \
    1800             :                         } else if (!b->tkey && b->tnokey[1] == 0) {       \
    1801             :                                 b->tnokey[0] = p - 1;                        \
    1802             :                                 b->tnokey[1] = p;                    \
    1803             :                                 TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
    1804             :                         }                                               \
    1805             :                 }                                                       \
    1806             :         } while (0)
    1807             : 
    1808             : #define BAT_ORDERED_FP(TPE)                                             \
    1809             :         do {                                                            \
    1810             :                 const TPE *restrict vals = Tloc(b, 0);                  \
    1811             :                 TPE prev = vals[0];                                     \
    1812             :                 bool prevnil = is_##TPE##_nil(prev);                    \
    1813             :                 for (BUN q = BUNlast(b), p = 1; p < q; p++) {                \
    1814             :                         TPE next = vals[p];                             \
    1815             :                         int cmp = prevnil ? -!(prevnil = is_##TPE##_nil(next)) : (prevnil = is_##TPE##_nil(next)) ? 1 : (prev > next) - (prev < next); \
    1816             :                         prev = next;                                    \
    1817             :                         if (cmp > 0) {                                       \
    1818             :                                 b->tnosorted = p;                    \
    1819             :                                 TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
    1820             :                                 goto doreturn;                          \
    1821             :                         } else if (cmp < 0) {                                \
    1822             :                                 if (!b->trevsorted && b->tnorevsorted == 0) { \
    1823             :                                         b->tnorevsorted = p;         \
    1824             :                                         TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b)); \
    1825             :                                 }                                       \
    1826             :                         } else if (!b->tkey && b->tnokey[1] == 0) {       \
    1827             :                                 b->tnokey[0] = p - 1;                        \
    1828             :                                 b->tnokey[1] = p;                    \
    1829             :                                 TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b)); \
    1830             :                         }                                               \
    1831             :                 }                                                       \
    1832             :         } while (0)
    1833             : 
    1834             : /* Return whether the BAT is ordered or not.  If we don't know, invest
    1835             :  * in a scan and record the results in the bat descriptor.  If during
    1836             :  * the scan we happen to find evidence that the BAT is not reverse
    1837             :  * sorted, we record the location.  */
    1838             : bool
    1839      506859 : BATordered(BAT *b)
    1840             : {
    1841      506859 :         lng t0 = GDKusec();
    1842             : 
    1843      506852 :         if (b->ttype == TYPE_void || b->tsorted || BATcount(b) == 0)
    1844             :                 return true;
    1845      363828 :         if (b->tnosorted > 0 || !ATOMlinear(b->ttype))
    1846             :                 return false;
    1847             : 
    1848             :         /* In order that multiple threads don't scan the same BAT at the
    1849             :          * same time (happens a lot with mitosis/mergetable), we use a
    1850             :          * lock.  We reuse the batIdxLock lock for this, not because this
    1851             :          * scanning interferes with heap reference counting, but because
    1852             :          * it's there, and not so likely to be used at the same time. */
    1853      193386 :         MT_lock_set(&b->batIdxLock);
    1854      193374 :         BATiter bi = bat_iterator_nolock(b);
    1855      193379 :         if (!b->tsorted && b->tnosorted == 0) {
    1856      193327 :                 b->batDirtydesc = true;
    1857      308252 :                 switch (ATOMbasetype(b->ttype)) {
    1858       68870 :                 case TYPE_bte:
    1859    88305021 :                         BAT_ORDERED(bte);
    1860             :                         break;
    1861        5450 :                 case TYPE_sht:
    1862      505635 :                         BAT_ORDERED(sht);
    1863             :                         break;
    1864       80239 :                 case TYPE_int:
    1865    76481796 :                         BAT_ORDERED(int);
    1866             :                         break;
    1867       14896 :                 case TYPE_lng:
    1868    49529936 :                         BAT_ORDERED(lng);
    1869             :                         break;
    1870             : #ifdef HAVE_HGE
    1871         610 :                 case TYPE_hge:
    1872        3637 :                         BAT_ORDERED(hge);
    1873             :                         break;
    1874             : #endif
    1875          75 :                 case TYPE_flt:
    1876         161 :                         BAT_ORDERED_FP(flt);
    1877             :                         break;
    1878         331 :                 case TYPE_dbl:
    1879      365618 :                         BAT_ORDERED_FP(dbl);
    1880             :                         break;
    1881       22799 :                 case TYPE_str:
    1882     9477488 :                         for (BUN q = BUNlast(b), p = 1; p < q; p++) {
    1883             :                                 int c;
    1884     9475506 :                                 const char *p1 = BUNtail(bi, p - 1);
    1885     9476557 :                                 const char *p2 = BUNtail(bi, p);
    1886     9475505 :                                 if (p1 == p2)
    1887             :                                         c = 0;
    1888      120072 :                                 else if (p1[0] == '\200') {
    1889         752 :                                         if (p2[0] == '\200')
    1890             :                                                 c = 0;
    1891             :                                         else
    1892             :                                                 c = -1;
    1893      119320 :                                 } else if (p2[0] == '\200')
    1894             :                                         c = 1;
    1895             :                                 else
    1896      118680 :                                         c = strcmp(p1, p2);
    1897      118680 :                                 if (c > 0) {
    1898       20816 :                                         b->tnosorted = p;
    1899       20816 :                                         TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
    1900       20816 :                                         goto doreturn;
    1901     9454689 :                                 } else if (c < 0) {
    1902       99238 :                                         assert(!b->trevsorted);
    1903       99238 :                                         if (b->tnorevsorted == 0) {
    1904       13731 :                                                 b->tnorevsorted = p;
    1905       13731 :                                                 TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
    1906             :                                         }
    1907     9355451 :                                 } else if (b->tnokey[1] == 0) {
    1908        7732 :                                         assert(!b->tkey);
    1909        7732 :                                         b->tnokey[0] = p - 1;
    1910        7732 :                                         b->tnokey[1] = p;
    1911        7732 :                                         TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
    1912             :                                 }
    1913             :                         }
    1914             :                         break;
    1915          57 :                 default: {
    1916          57 :                         int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
    1917         162 :                         for (BUN q = BUNlast(b), p = 1; p < q; p++) {
    1918             :                                 int c;
    1919         152 :                                 if ((c = cmpf(BUNtail(bi, p - 1), BUNtail(bi, p))) > 0) {
    1920          47 :                                         b->tnosorted = p;
    1921          47 :                                         TRC_DEBUG(ALGO, "Fixed nosorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
    1922          47 :                                         goto doreturn;
    1923         105 :                                 } else if (c < 0) {
    1924          40 :                                         if (!b->trevsorted && b->tnorevsorted == 0) {
    1925          21 :                                                 b->tnorevsorted = p;
    1926          21 :                                                 TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT "\n", p, ALGOBATPAR(b));
    1927             :                                         }
    1928          65 :                                 } else if (!b->tkey && b->tnokey[1] == 0) {
    1929           9 :                                         b->tnokey[0] = p - 1;
    1930           9 :                                         b->tnokey[1] = p;
    1931           9 :                                         TRC_DEBUG(ALGO, "Fixed nokey(" BUNFMT "," BUNFMT") for " ALGOBATFMT "\n", p - 1, p, ALGOBATPAR(b));
    1932             :                                 }
    1933             :                         }
    1934             :                         break;
    1935             :                 }
    1936             :                 }
    1937             :                 /* we only get here if we completed the scan; note that
    1938             :                  * if we didn't record evidence about *reverse*
    1939             :                  * sortedness, we know that the BAT is also reverse
    1940             :                  * sorted; similarly, if we didn't record evidence about
    1941             :                  * keyness, we know the BAT is key */
    1942       53859 :                 b->tsorted = true;
    1943       53859 :                 TRC_DEBUG(ALGO, "Fixed sorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
    1944       53882 :                 if (!b->trevsorted && b->tnorevsorted == 0) {
    1945       29798 :                         b->trevsorted = true;
    1946       29798 :                         TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT "\n", ALGOBATPAR(b));
    1947             :                 }
    1948       53882 :                 if (!b->tkey && b->tnokey[1] == 0) {
    1949       15090 :                         b->tkey = true;
    1950       15090 :                         TRC_DEBUG(ALGO, "Fixed key for " ALGOBATFMT "\n", ALGOBATPAR(b));
    1951             :                 }
    1952             :         }
    1953       53934 :   doreturn:
    1954      193295 :         MT_lock_unset(&b->batIdxLock);
    1955      193390 :         return b->tsorted;
    1956             : }
    1957             : 
    1958             : #define BAT_REVORDERED(TPE)                                             \
    1959             :         do {                                                            \
    1960             :                 const TPE *restrict vals = Tloc(b, 0);                  \
    1961             :                 for (BUN q = BUNlast(b), p = 1; p < q; p++) {                \
    1962             :                         if (vals[p - 1] < vals[p]) {                 \
    1963             :                                 b->tnorevsorted = p;                 \
    1964             :                                 TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
    1965             :                                 goto doreturn;                          \
    1966             :                         }                                               \
    1967             :                 }                                                       \
    1968             :         } while (0)
    1969             : 
    1970             : #define BAT_REVORDERED_FP(TPE)                                          \
    1971             :         do {                                                            \
    1972             :                 const TPE *restrict vals = Tloc(b, 0);                  \
    1973             :                 for (BUN q = BUNlast(b), p = 1; p < q; p++) {                \
    1974             :                         TPE prev = vals[p - 1], next = vals[p];         \
    1975             :                         int cmp = is_flt_nil(prev) ? -!is_flt_nil(next) : is_flt_nil(next) ? 1 : (prev > next) - (prev < next);   \
    1976             :                         if (cmp < 0) {                                       \
    1977             :                                 b->tnorevsorted = p;                 \
    1978             :                                 TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0); \
    1979             :                                 goto doreturn;                          \
    1980             :                         }                                               \
    1981             :                 }                                                       \
    1982             :         } while (0)
    1983             : 
    1984             : /* Return whether the BAT is reverse ordered or not.  If we don't
    1985             :  * know, invest in a scan and record the results in the bat
    1986             :  * descriptor.  */
    1987             : bool
    1988      397156 : BATordered_rev(BAT *b)
    1989             : {
    1990      397156 :         lng t0 = GDKusec();
    1991             : 
    1992      397137 :         if (b == NULL || !ATOMlinear(b->ttype))
    1993             :                 return false;
    1994      397137 :         if (BATcount(b) <= 1 || b->trevsorted)
    1995             :                 return true;
    1996      324112 :         if (b->ttype == TYPE_void)
    1997       20527 :                 return is_oid_nil(b->tseqbase);
    1998      303585 :         if (BATtdense(b) || b->tnorevsorted > 0)
    1999             :                 return false;
    2000       64080 :         MT_lock_set(&b->batIdxLock);
    2001       64082 :         BATiter bi = bat_iterator_nolock(b);
    2002       64083 :         if (!b->trevsorted && b->tnorevsorted == 0) {
    2003       64073 :                 b->batDirtydesc = true;
    2004      104307 :                 switch (ATOMbasetype(b->ttype)) {
    2005       20445 :                 case TYPE_bte:
    2006     8009619 :                         BAT_REVORDERED(bte);
    2007             :                         break;
    2008        1502 :                 case TYPE_sht:
    2009      202438 :                         BAT_REVORDERED(sht);
    2010             :                         break;
    2011       29110 :                 case TYPE_int:
    2012      465407 :                         BAT_REVORDERED(int);
    2013             :                         break;
    2014        5758 :                 case TYPE_lng:
    2015      240166 :                         BAT_REVORDERED(lng);
    2016             :                         break;
    2017             : #ifdef HAVE_HGE
    2018         532 :                 case TYPE_hge:
    2019        2431 :                         BAT_REVORDERED(hge);
    2020             :                         break;
    2021             : #endif
    2022          46 :                 case TYPE_flt:
    2023         153 :                         BAT_REVORDERED_FP(flt);
    2024             :                         break;
    2025         174 :                 case TYPE_dbl:
    2026         974 :                         BAT_REVORDERED_FP(dbl);
    2027             :                         break;
    2028        6506 :                 default: {
    2029        6506 :                         int (*cmpf)(const void *, const void *) = ATOMcompare(b->ttype);
    2030      756359 :                         for (BUN q = BUNlast(b), p = 1; p < q; p++) {
    2031      755944 :                                 if (cmpf(BUNtail(bi, p - 1), BUNtail(bi, p)) < 0) {
    2032        6092 :                                         b->tnorevsorted = p;
    2033        6092 :                                         TRC_DEBUG(ALGO, "Fixed norevsorted(" BUNFMT ") for " ALGOBATFMT " (" LLFMT " usec)\n", p, ALGOBATPAR(b), GDKusec() - t0);
    2034        6092 :                                         goto doreturn;
    2035             :                                 }
    2036             :                         }
    2037             :                         break;
    2038             :                 }
    2039             :                 }
    2040        3385 :                 b->trevsorted = true;
    2041        3385 :                 TRC_DEBUG(ALGO, "Fixed revsorted for " ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b), GDKusec() - t0);
    2042             :         }
    2043        3395 :   doreturn:
    2044       64084 :         MT_lock_unset(&b->batIdxLock);
    2045       64086 :         return b->trevsorted;
    2046             : }
    2047             : 
    2048             : /* figure out which sort function is to be called
    2049             :  * stable sort can produce an error (not enough memory available),
    2050             :  * "quick" sort does not produce errors */
    2051             : static gdk_return
    2052     3933556 : do_sort(void *restrict h, void *restrict t, const void *restrict base,
    2053             :         size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
    2054             :         bool stable)
    2055             : {
    2056     3933556 :         if (n <= 1)          /* trivially sorted */
    2057             :                 return GDK_SUCCEED;
    2058     2759086 :         if (stable) {
    2059         183 :                 if (reverse)
    2060           1 :                         return GDKssort_rev(h, t, base, n, hs, ts, tpe);
    2061             :                 else
    2062         182 :                         return GDKssort(h, t, base, n, hs, ts, tpe);
    2063             :         } else {
    2064     2758903 :                 GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
    2065             :         }
    2066     2758917 :         return GDK_SUCCEED;
    2067             : }
    2068             : 
    2069             : /* Sort the bat b according to both o and g.  The stable and reverse
    2070             :  * parameters indicate whether the sort should be stable or descending
    2071             :  * respectively.  The parameter b is required, o and g are optional
    2072             :  * (i.e., they may be NULL).
    2073             :  *
    2074             :  * A sorted copy is returned through the sorted parameter, the new
    2075             :  * ordering is returned through the order parameter, group information
    2076             :  * is returned through the groups parameter.  All three output
    2077             :  * parameters may be NULL.  If they're all NULL, this function does
    2078             :  * nothing.
    2079             :  *
    2080             :  * If o is specified, it is used to first rearrange b according to the
    2081             :  * order specified in o, after which b is sorted taking g into
    2082             :  * account.
    2083             :  *
    2084             :  * If g is specified, it indicates groups which should be individually
    2085             :  * ordered.  Each row of consecutive equal values in g indicates a
    2086             :  * group which is sorted according to stable and reverse.  g is used
    2087             :  * after the order in b was rearranged according to o.
    2088             :  *
    2089             :  * The outputs order and groups can be used in subsequent calls to
    2090             :  * this function.  This can be used if multiple BATs need to be sorted
    2091             :  * together.  The BATs should then be sorted in order of significance,
    2092             :  * and each following call should use the original unordered BAT plus
    2093             :  * the order and groups bat from the previous call.  In this case, the
    2094             :  * sorted BATs are not of much use, so the sorted output parameter
    2095             :  * does not need to be specified.
    2096             :  * Apart from error checking and maintaining reference counts, sorting
    2097             :  * three columns (col1, col2, col3) could look like this with the
    2098             :  * sorted results in (col1s, col2s, col3s):
    2099             :  *      BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
    2100             :  *      BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
    2101             :  *      BATsort(&col3s,  NULL,  NULL, col3, ord2, grp2, false, false, false);
    2102             :  * Note that the "reverse" parameter can be different for each call.
    2103             :  */
    2104             : gdk_return
    2105       37727 : BATsort(BAT **sorted, BAT **order, BAT **groups,
    2106             :         BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
    2107             : {
    2108             :         BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
    2109             :         oid *restrict grps, *restrict ords, prev;
    2110             :         BUN p, q, r;
    2111       37727 :         lng t0 = GDKusec();
    2112             :         bool mkorderidx, orderidxlock = false;
    2113             :         Heap *oidxh = NULL;
    2114             : 
    2115             :         /* we haven't implemented NILs as largest value for stable
    2116             :          * sort, so NILs come first for ascending and last for
    2117             :          * descending */
    2118       37727 :         assert(!stable || reverse == nilslast);
    2119             : 
    2120       37727 :         if (b == NULL) {
    2121           0 :                 GDKerror("b must exist\n");
    2122           0 :                 return GDK_FAIL;
    2123             :         }
    2124       37727 :         if (stable && reverse != nilslast) {
    2125           0 :                 GDKerror("stable sort cannot have reverse != nilslast\n");
    2126           0 :                 return GDK_FAIL;
    2127             :         }
    2128       37727 :         if (!ATOMlinear(b->ttype)) {
    2129           0 :                 GDKerror("type %s cannot be sorted\n", ATOMname(b->ttype));
    2130           0 :                 return GDK_FAIL;
    2131             :         }
    2132       37727 :         if (b->ttype == TYPE_void) {
    2133         126 :                 if (!b->tsorted) {
    2134           0 :                         b->tsorted = true;
    2135           0 :                         b->batDirtydesc = true;
    2136             :                 }
    2137         158 :                 if (b->trevsorted != (is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
    2138           0 :                         b->trevsorted = !b->trevsorted;
    2139           0 :                         b->batDirtydesc = true;
    2140             :                 }
    2141         126 :                 if (b->tkey != (!is_oid_nil(b->tseqbase) || b->batCount <= 1)) {
    2142           0 :                         b->tkey = !b->tkey;
    2143           0 :                         b->batDirtydesc = true;
    2144             :                 }
    2145       37601 :         } else if (b->batCount <= 1) {
    2146       16840 :                 if (!b->tsorted || !b->trevsorted) {
    2147           0 :                         b->tsorted = b->trevsorted = true;
    2148           0 :                         b->batDirtydesc = true;
    2149             :                 }
    2150             :         }
    2151       37727 :         if (o != NULL &&
    2152       19565 :             (ATOMtype(o->ttype) != TYPE_oid || /* oid tail */
    2153       19565 :              BATcount(o) != BATcount(b) ||     /* same size as b */
    2154        9075 :              (o->ttype == TYPE_void &&              /* no nil tail */
    2155        3650 :               BATcount(o) != 0 &&
    2156        3650 :               is_oid_nil(o->tseqbase)))) {
    2157           0 :                 GDKerror("o must have type oid and same size as b\n");
    2158           0 :                 return GDK_FAIL;
    2159             :         }
    2160       37727 :         if (g != NULL &&
    2161       19565 :             (ATOMtype(g->ttype) != TYPE_oid || /* oid tail */
    2162       19565 :              !g->tsorted ||                 /* sorted */
    2163       19565 :              BATcount(o) != BATcount(b) ||     /* same size as b */
    2164        9110 :              (g->ttype == TYPE_void &&              /* no nil tail */
    2165        9110 :               BATcount(g) != 0 &&
    2166        3988 :               is_oid_nil(g->tseqbase)))) {
    2167           0 :                 GDKerror("g must have type oid, sorted on the tail, "
    2168             :                          "and same size as b\n");
    2169           0 :                 return GDK_FAIL;
    2170             :         }
    2171       37727 :         if (sorted == NULL && order == NULL) {
    2172             :                 /* no place to put result, so we're done quickly */
    2173           0 :                 GDKerror("no place to put the result.\n");
    2174           0 :                 return GDK_FAIL;
    2175             :         }
    2176       37727 :         if (g == NULL && !stable) {
    2177             :                 /* pre-ordering doesn't make sense if we're not
    2178             :                  * subsorting and the sort is not stable */
    2179             :                 o = NULL;
    2180             :         }
    2181       37727 :         if (b->tnonil) {
    2182             :                 /* if there are no nils, placement of nils doesn't
    2183             :                  * matter, so set nilslast such that ordered bits can
    2184             :                  * be used */
    2185             :                 nilslast = reverse;
    2186             :         }
    2187       38483 :         if (BATcount(b) <= 1 ||
    2188       20764 :             (reverse == nilslast &&
    2189             :              (reverse ? BATtrevordered(b) : BATtordered(b)) &&
    2190        3527 :              o == NULL && g == NULL &&
    2191        1938 :              (groups == NULL || BATtkey(b) ||
    2192             :               (reverse ? BATtordered(b) : BATtrevordered(b))))) {
    2193             :                 /* trivially (sub)sorted, and either we don't need to
    2194             :                  * return group information, or we can trivially
    2195             :                  * deduce the groups */
    2196       18159 :                 if (sorted) {
    2197       17370 :                         bn = COLcopy(b, b->ttype, false, TRANSIENT);
    2198       17370 :                         if (bn == NULL)
    2199           0 :                                 goto error;
    2200       17370 :                         *sorted = bn;
    2201             :                 }
    2202       18159 :                 if (order) {
    2203       18055 :                         on = BATdense(b->hseqbase, b->hseqbase, BATcount(b));
    2204       18055 :                         if (on == NULL)
    2205           0 :                                 goto error;
    2206       18055 :                         *order = on;
    2207             :                 }
    2208       18159 :                 if (groups) {
    2209        8966 :                         if (BATtkey(b)) {
    2210             :                                 /* singleton groups */
    2211        8359 :                                 gn = BATdense(0, 0, BATcount(b));
    2212        8359 :                                 if (gn == NULL)
    2213           0 :                                         goto error;
    2214             :                         } else {
    2215             :                                 /* single group */
    2216         607 :                                 const oid *o = 0;
    2217         607 :                                 assert(BATcount(b) == 1 ||
    2218             :                                        (BATtordered(b) && BATtrevordered(b)));
    2219         607 :                                 gn = BATconstant(0, TYPE_oid, &o, BATcount(b), TRANSIENT);
    2220         607 :                                 if (gn == NULL)
    2221           0 :                                         goto error;
    2222             :                         }
    2223        8966 :                         *groups = gn;
    2224             :                 }
    2225       18159 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
    2226             :                           ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
    2227             :                           ",reverse=%d,nilslast=%d,stable=%d) = ("
    2228             :                           ALGOOPTBATFMT "," ALGOOPTBATFMT ","
    2229             :                           ALGOOPTBATFMT " -- trivial (" LLFMT
    2230             :                           " usec)\n",
    2231             :                           ALGOBATPAR(b), ALGOOPTBATPAR(o),
    2232             :                           ALGOOPTBATPAR(g), reverse, nilslast, stable,
    2233             :                           ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
    2234             :                           ALGOOPTBATPAR(on), GDKusec() - t0);
    2235       18159 :                 return GDK_SUCCEED;
    2236             :         }
    2237       19568 :         if (VIEWtparent(b)) {
    2238        3426 :                 pb = BBP_cache(VIEWtparent(b));
    2239        3426 :                 if (b->tbaseoff != pb->tbaseoff ||
    2240        2681 :                     BATcount(b) != BATcount(pb) ||
    2241        2472 :                     b->hseqbase != pb->hseqbase ||
    2242        2472 :                     BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)
    2243             :                         pb = NULL;
    2244             :         } else {
    2245             :                 pb = b;
    2246             :         }
    2247             :         /* when we will create an order index if it doesn't already exist */
    2248       19568 :         mkorderidx = (g == NULL && !reverse && !nilslast && pb != NULL && (order || !pb->batTransient));
    2249       19568 :         if (g == NULL && !reverse && !nilslast && pb != NULL) {
    2250        7029 :                 (void) BATcheckorderidx(pb);
    2251        7029 :                 MT_lock_set(&pb->batIdxLock);
    2252        7029 :                 if (pb->torderidx) {
    2253          39 :                         if (!stable || ((oid *) pb->torderidx->base)[2]) {
    2254             :                                 /* we can use the order index */
    2255             :                                 oidxh = pb->torderidx;
    2256          39 :                                 HEAPincref(oidxh);
    2257             :                         }
    2258             :                         mkorderidx = false;
    2259        6990 :                 } else if (b != pb) {
    2260             :                         /* don't build orderidx on parent bat */
    2261             :                         mkorderidx = false;
    2262        5846 :                 } else if (mkorderidx) {
    2263             :                         /* keep lock when going to create */
    2264             :                         orderidxlock = true;
    2265             :                 }
    2266        7029 :                 if (!orderidxlock)
    2267        1255 :                         MT_lock_unset(&pb->batIdxLock);
    2268             :         }
    2269       19568 :         if (g == NULL && o == NULL && !reverse && !nilslast && oidxh != NULL) {
    2270             :                 /* there is an order index that we can use */
    2271          39 :                 on = COLnew(pb->hseqbase, TYPE_oid, BATcount(pb), TRANSIENT);
    2272          39 :                 if (on == NULL)
    2273           0 :                         goto error;
    2274          39 :                 memcpy(Tloc(on, 0), (oid *) oidxh->base + ORDERIDXOFF, BATcount(pb) * sizeof(oid));
    2275          39 :                 BATsetcount(on, BATcount(b));
    2276          39 :                 HEAPdecref(oidxh, false);
    2277             :                 oidxh = NULL;
    2278          39 :                 on->tkey = true;
    2279          39 :                 on->tnil = false;
    2280          39 :                 on->tnonil = true;
    2281          39 :                 on->tsorted = on->trevsorted = false;
    2282          39 :                 on->tseqbase = oid_nil;
    2283          39 :                 if (sorted || groups) {
    2284          37 :                         bn = BATproject(on, b);
    2285          37 :                         if (bn == NULL)
    2286           0 :                                 goto error;
    2287          37 :                         bn->tsorted = true;
    2288          37 :                         if (groups) {
    2289           0 :                                 if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
    2290           0 :                                         goto error;
    2291           0 :                                 if (sorted &&
    2292           0 :                                     (*groups)->tkey &&
    2293             :                                     g == NULL) {
    2294             :                                         /* if new groups bat is key
    2295             :                                          * and since there is no input
    2296             :                                          * groups bat, we know the
    2297             :                                          * result bat is key */
    2298           0 :                                         bn->tkey = true;
    2299             :                                 }
    2300             :                         }
    2301          37 :                         if (sorted)
    2302          37 :                                 *sorted = bn;
    2303             :                         else {
    2304           0 :                                 BBPunfix(bn->batCacheid);
    2305             :                                 bn = NULL;
    2306             :                         }
    2307             :                 }
    2308          39 :                 if (order)
    2309           7 :                         *order = on;
    2310             :                 else {
    2311          32 :                         BBPunfix(on->batCacheid);
    2312             :                         on = NULL;
    2313             :                 }
    2314          39 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o="
    2315             :                           ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
    2316             :                           ",reverse=%d,nilslast=%d,stable=%d) = ("
    2317             :                           ALGOOPTBATFMT "," ALGOOPTBATFMT ","
    2318             :                           ALGOOPTBATFMT " -- orderidx (" LLFMT
    2319             :                           " usec)\n",
    2320             :                           ALGOBATPAR(b), ALGOOPTBATPAR(o),
    2321             :                           ALGOOPTBATPAR(g), reverse, nilslast, stable,
    2322             :                           ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
    2323             :                           ALGOOPTBATPAR(on), GDKusec() - t0);
    2324          39 :                 return GDK_SUCCEED;
    2325       19529 :         } else if (oidxh) {
    2326           0 :                 HEAPdecref(oidxh, false);
    2327             :                 oidxh = NULL;
    2328             :         }
    2329       19529 :         if (o) {
    2330       11518 :                 bn = BATproject(o, b);
    2331       11518 :                 if (bn == NULL)
    2332           0 :                         goto error;
    2333       11518 :                 if (bn->ttype == TYPE_void || isVIEW(bn)) {
    2334        3852 :                         BAT *b2 = COLcopy(bn, ATOMtype(bn->ttype), true, TRANSIENT);
    2335        1926 :                         BBPunfix(bn->batCacheid);
    2336             :                         bn = b2;
    2337             :                 }
    2338             :                 pb = NULL;
    2339             :         } else {
    2340        8011 :                 bn = COLcopy(b, b->ttype, true, TRANSIENT);
    2341             :         }
    2342       19530 :         if (bn == NULL)
    2343           0 :                 goto error;
    2344       19530 :         if (order) {
    2345             :                 /* prepare order bat */
    2346       19349 :                 if (o) {
    2347             :                         /* make copy of input so that we can refine it;
    2348             :                          * copy can be read-only if we take the shortcut
    2349             :                          * below in the case g is "key" */
    2350       11499 :                         on = COLcopy(o, TYPE_oid,
    2351       11499 :                                      g == NULL ||
    2352       11499 :                                      !(g->tkey || g->ttype == TYPE_void),
    2353             :                                      TRANSIENT);
    2354       11499 :                         if (on == NULL)
    2355           0 :                                 goto error;
    2356       11499 :                         BAThseqbase(on, b->hseqbase);
    2357       11498 :                         on->tminpos = BUN_NONE;
    2358       11498 :                         on->tmaxpos = BUN_NONE;
    2359             :                 } else {
    2360             :                         /* create new order */
    2361        7850 :                         on = COLnew(b->hseqbase, TYPE_oid, BATcount(bn), TRANSIENT);
    2362        7850 :                         if (on == NULL)
    2363           0 :                                 goto error;
    2364        7850 :                         ords = (oid *) Tloc(on, 0);
    2365    23286980 :                         for (p = 0, q = BATcount(bn); p < q; p++)
    2366    23279130 :                                 ords[p] = p + b->hseqbase;
    2367        7850 :                         BATsetcount(on, BATcount(bn));
    2368        7850 :                         on->tkey = true;
    2369        7850 :                         on->tnil = false;
    2370        7850 :                         on->tnonil = true;
    2371             :                 }
    2372             :                 /* COLcopy above can create TYPE_void */
    2373       19348 :                 if (on->ttype != TYPE_void) {
    2374       18610 :                         on->tsorted = on->trevsorted = false; /* it won't be sorted */
    2375       18610 :                         on->tseqbase = oid_nil;      /* and hence not dense */
    2376       18610 :                         on->tnosorted = on->tnorevsorted = 0;
    2377             :                 }
    2378       19348 :                 *order = on;
    2379       19348 :                 ords = (oid *) Tloc(on, 0);
    2380             :         } else {
    2381             :                 ords = NULL;
    2382             :         }
    2383       19529 :         if (g) {
    2384       11518 :                 if (g->tkey || g->ttype == TYPE_void) {
    2385             :                         /* if g is "key", all groups are size 1, so no
    2386             :                          * subsorting needed */
    2387        5819 :                         if (sorted) {
    2388        5306 :                                 *sorted = bn;
    2389             :                         } else {
    2390         513 :                                 BBPunfix(bn->batCacheid);
    2391             :                                 bn = NULL;
    2392             :                         }
    2393        5819 :                         if (order) {
    2394        5818 :                                 *order = on;
    2395        5818 :                                 if (o) {
    2396             :                                         /* we can inherit sortedness
    2397             :                                          * after all */
    2398        5818 :                                         on->tsorted = o->tsorted;
    2399        5818 :                                         on->trevsorted = o->trevsorted;
    2400        5818 :                                         if (o->tnosorted)
    2401         246 :                                                 on->tnosorted = o->tnosorted;
    2402        5818 :                                         if (o->tnorevsorted)
    2403          36 :                                                 on->tnorevsorted = o->tnorevsorted;
    2404             :                                 } else {
    2405             :                                         /* we didn't rearrange, so
    2406             :                                          * still sorted */
    2407           0 :                                         on->tsorted = true;
    2408           0 :                                         on->trevsorted = false;
    2409             :                                 }
    2410        5818 :                                 if (BATcount(on) <= 1) {
    2411           0 :                                         on->tsorted = true;
    2412           0 :                                         on->trevsorted = true;
    2413             :                                 }
    2414             :                         }
    2415        5819 :                         if (groups) {
    2416        2958 :                                 gn = COLcopy(g, g->ttype, false, TRANSIENT);
    2417        2958 :                                 if (gn == NULL)
    2418           0 :                                         goto error;
    2419        2958 :                                 *groups = gn;
    2420             :                         }
    2421        5819 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    2422             :                                   ",o=" ALGOOPTBATFMT ",g=" ALGOBATFMT
    2423             :                                   ",reverse=%d,nilslast=%d,stable=%d"
    2424             :                                   ") = (" ALGOOPTBATFMT ","
    2425             :                                   ALGOOPTBATFMT "," ALGOOPTBATFMT
    2426             :                                   " -- key group (" LLFMT " usec)\n",
    2427             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(o),
    2428             :                                   ALGOBATPAR(g), reverse, nilslast,
    2429             :                                   stable, ALGOOPTBATPAR(bn),
    2430             :                                   ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
    2431             :                                   GDKusec() - t0);
    2432        5819 :                         return GDK_SUCCEED;
    2433             :                 }
    2434        5699 :                 assert(g->ttype == TYPE_oid);
    2435        5699 :                 grps = (oid *) Tloc(g, 0);
    2436        5699 :                 prev = grps[0];
    2437        5699 :                 if (BATmaterialize(bn) != GDK_SUCCEED)
    2438           0 :                         goto error;
    2439    36166064 :                 for (r = 0, p = 1, q = BATcount(g); p < q; p++) {
    2440    36160365 :                         if (grps[p] != prev) {
    2441             :                                 /* sub sort [r,p) */
    2442     7839478 :                                 if (do_sort(Tloc(bn, r),
    2443     3919723 :                                             ords ? ords + r : NULL,
    2444     3919755 :                                             bn->tvheap ? bn->tvheap->base : NULL,
    2445     3919755 :                                             p - r, Tsize(bn), ords ? sizeof(oid) : 0,
    2446     3919755 :                                             bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
    2447           0 :                                         goto error;
    2448             :                                 r = p;
    2449     3919755 :                                 prev = grps[p];
    2450             :                         }
    2451             :                 }
    2452             :                 /* sub sort [r,q) */
    2453       11380 :                 if (do_sort(Tloc(bn, r),
    2454        5681 :                             ords ? ords + r : NULL,
    2455        5699 :                             bn->tvheap ? bn->tvheap->base : NULL,
    2456        5699 :                             p - r, Tsize(bn), ords ? sizeof(oid) : 0,
    2457        5699 :                             bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)
    2458           0 :                         goto error;
    2459             :                 /* if single group (r==0) the result is (rev)sorted,
    2460             :                  * otherwise (maybe) not */
    2461        5699 :                 bn->tsorted = r == 0 && !reverse && !nilslast;
    2462       11388 :                 bn->trevsorted = r == 0 && reverse && nilslast;
    2463             :         } else {
    2464             :                 Heap *m = NULL;
    2465             :                 /* only invest in creating an order index if the BAT
    2466             :                  * is persistent */
    2467        8011 :                 if (mkorderidx) {
    2468        5774 :                         assert(orderidxlock);
    2469        5774 :                         if ((m = createOIDXheap(pb, stable)) != NULL &&
    2470             :                             ords == NULL) {
    2471           6 :                                 ords = (oid *) m->base + ORDERIDXOFF;
    2472           6 :                                 if (o && o->ttype != TYPE_void)
    2473           0 :                                         memcpy(ords, Tloc(o, 0), BATcount(o) * sizeof(oid));
    2474           6 :                                 else if (o)
    2475           0 :                                         for (p = 0, q = BATcount(o); p < q; p++)
    2476           0 :                                                 ords[p] = p + o->tseqbase;
    2477             :                                 else
    2478        1161 :                                         for (p = 0, q = BATcount(b); p < q; p++)
    2479        1155 :                                                 ords[p] = p + b->hseqbase;
    2480             :                         }
    2481             :                 }
    2482        8011 :                 if ((reverse != nilslast ||
    2483       15552 :                      (reverse ? !bn->trevsorted : !bn->tsorted)) &&
    2484       15118 :                     (BATmaterialize(bn) != GDK_SUCCEED ||
    2485        7559 :                      do_sort(Tloc(bn, 0),
    2486             :                              ords,
    2487        7559 :                              bn->tvheap ? bn->tvheap->base : NULL,
    2488        7559 :                              BATcount(bn), Tsize(bn), ords ? sizeof(oid) : 0,
    2489        7559 :                              bn->ttype, reverse, nilslast, stable) != GDK_SUCCEED)) {
    2490           0 :                         if (m != NULL) {
    2491           0 :                                 HEAPfree(m, true);
    2492           0 :                                 GDKfree(m);
    2493             :                         }
    2494           0 :                         goto error;
    2495             :                 }
    2496        8011 :                 bn->tsorted = !reverse && !nilslast;
    2497        8011 :                 bn->trevsorted = reverse && nilslast;
    2498        8011 :                 if (m != NULL) {
    2499        5774 :                         assert(orderidxlock);
    2500        5774 :                         if (pb->torderidx == NULL) {
    2501        5774 :                                 pb->batDirtydesc = true;
    2502        5774 :                                 if (ords != (oid *) m->base + ORDERIDXOFF) {
    2503        5768 :                                         memcpy((oid *) m->base + ORDERIDXOFF,
    2504             :                                                ords,
    2505        5768 :                                                BATcount(pb) * sizeof(oid));
    2506             :                                 }
    2507        5774 :                                 ATOMIC_INIT(&m->refs, 1);
    2508        5774 :                                 pb->torderidx = m;
    2509        5774 :                                 persistOIDX(pb);
    2510             :                         } else {
    2511           0 :                                 HEAPfree(m, true);
    2512           0 :                                 GDKfree(m);
    2513             :                         }
    2514             :                 }
    2515             :         }
    2516       13710 :         if (orderidxlock) {
    2517        5774 :                 MT_lock_unset(&pb->batIdxLock);
    2518             :                 orderidxlock = false;
    2519             :         }
    2520       13710 :         bn->theap->dirty = true;
    2521       13710 :         bn->tnosorted = 0;
    2522       13710 :         bn->tnorevsorted = 0;
    2523       13710 :         bn->tnokey[0] = bn->tnokey[1] = 0;
    2524       13710 :         bn->tminpos = BUN_NONE;
    2525       13710 :         bn->tmaxpos = BUN_NONE;
    2526       13710 :         if (groups) {
    2527        8400 :                 if (BATgroup_internal(groups, NULL, NULL, bn, NULL, g, NULL, NULL, true) != GDK_SUCCEED)
    2528           0 :                         goto error;
    2529        8400 :                 if ((*groups)->tkey &&
    2530         954 :                     (g == NULL || (g->tsorted && g->trevsorted))) {
    2531             :                         /* if new groups bat is key and the input
    2532             :                          * group bat has a single value (both sorted
    2533             :                          * and revsorted), we know the result bat is
    2534             :                          * key */
    2535        1895 :                         bn->tkey = true;
    2536             :                 }
    2537             :         }
    2538             : 
    2539       13710 :         if (sorted)
    2540       10674 :                 *sorted = bn;
    2541             :         else {
    2542        3036 :                 BBPunfix(bn->batCacheid);
    2543             :                 bn = NULL;
    2544             :         }
    2545             : 
    2546       13710 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",o=" ALGOOPTBATFMT
    2547             :                   ",g=" ALGOOPTBATFMT ",reverse=%d,nilslast=%d,"
    2548             :                   "stable=%d) = (" ALGOOPTBATFMT "," ALGOOPTBATFMT ","
    2549             :                   ALGOOPTBATFMT " -- %ssort (" LLFMT " usec)\n",
    2550             :                   ALGOBATPAR(b), ALGOOPTBATPAR(o), ALGOOPTBATPAR(g),
    2551             :                   reverse, nilslast, stable, ALGOOPTBATPAR(bn),
    2552             :                   ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on),
    2553             :                   g ? "grouped " : "", GDKusec() - t0);
    2554             :         return GDK_SUCCEED;
    2555             : 
    2556           0 :   error:
    2557           0 :         if (orderidxlock)
    2558           0 :                 MT_lock_unset(&pb->batIdxLock);
    2559           0 :         if (oidxh)
    2560           0 :                 HEAPdecref(oidxh, false);
    2561           0 :         if (bn)
    2562           0 :                 BBPunfix(bn->batCacheid);
    2563           0 :         BBPreclaim(on);
    2564           0 :         if (sorted)
    2565           0 :                 *sorted = NULL;
    2566           0 :         if (order)
    2567           0 :                 *order = NULL;
    2568           0 :         if (groups)
    2569           0 :                 *groups = NULL;
    2570             :         return GDK_FAIL;
    2571             : }
    2572             : 
    2573             : /* return a new BAT of length n with seqbase hseq, and the constant v
    2574             :  * in the tail */
    2575             : BAT *
    2576     1578553 : BATconstant(oid hseq, int tailtype, const void *v, BUN n, role_t role)
    2577             : {
    2578             :         BAT *bn;
    2579             :         void *restrict p;
    2580             :         BUN i;
    2581             :         lng t0 = 0;
    2582             : 
    2583     1578553 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    2584     1578553 :         if (v == NULL)
    2585             :                 return NULL;
    2586     1578553 :         bn = COLnew(hseq, tailtype, n, role);
    2587     1578375 :         if (bn != NULL && n > 0) {
    2588       89782 :                 p = Tloc(bn, 0);
    2589       89782 :                 switch (ATOMstorage(tailtype)) {
    2590          45 :                 case TYPE_void:
    2591             :                         v = &oid_nil;
    2592          45 :                         BATtseqbase(bn, oid_nil);
    2593          45 :                         break;
    2594         254 :                 case TYPE_msk:
    2595         254 :                         if (*(msk*)v) {
    2596           0 :                                 memset(p, 0xFF, 4 * ((n + 31) / 32));
    2597           0 :                                 if (n & 31) {
    2598             :                                         uint32_t *m = p;
    2599           0 :                                         m[n / 32] &= (1U << (n % 32)) - 1;
    2600             :                                 }
    2601             :                         } else
    2602         254 :                                 memset(p, 0x00, 4 * ((n + 31) / 32));
    2603             :                         break;
    2604        7358 :                 case TYPE_bte:
    2605        7358 :                         memset(p, *(bte*)v, n);
    2606        7358 :                         break;
    2607             :                 case TYPE_sht:
    2608      134851 :                         for (i = 0; i < n; i++)
    2609      120342 :                                 ((sht *) p)[i] = *(sht *) v;
    2610             :                         break;
    2611             :                 case TYPE_int:
    2612             :                 case TYPE_flt:
    2613             :                         assert(sizeof(int) == sizeof(flt));
    2614   216499815 :                         for (i = 0; i < n; i++)
    2615   216494785 :                                 ((int *) p)[i] = *(int *) v;
    2616             :                         break;
    2617             :                 case TYPE_lng:
    2618             :                 case TYPE_dbl:
    2619             :                         assert(sizeof(lng) == sizeof(dbl));
    2620   192509553 :                         for (i = 0; i < n; i++)
    2621   192460353 :                                 ((lng *) p)[i] = *(lng *) v;
    2622             :                         break;
    2623             : #ifdef HAVE_HGE
    2624             :                 case TYPE_hge:
    2625    25522813 :                         for (i = 0; i < n; i++)
    2626    25520424 :                                 ((hge *) p)[i] = *(hge *) v;
    2627             :                         break;
    2628             : #endif
    2629             :                 case TYPE_uuid:
    2630      200011 :                         for (i = 0; i < n; i++)
    2631      200006 :                                 ((uuid *) p)[i] = *(uuid *) v;
    2632             :                         break;
    2633       10971 :                 case TYPE_str:
    2634             :                         /* insert the first value, then just copy the
    2635             :                          * offset lots of times */
    2636       10971 :                         if (tfastins_nocheck(bn, 0, v) != GDK_SUCCEED) {
    2637           0 :                                 BBPreclaim(bn);
    2638           0 :                                 return NULL;
    2639             :                         }
    2640             :                         char val[sizeof(var_t)];
    2641       10971 :                         memcpy(val, Tloc(bn, 0), bn->twidth);
    2642       10971 :                         if (bn->twidth == 1 && n > 1) {
    2643             :                                 /* single byte value: we have a
    2644             :                                  * function for that */
    2645        3588 :                                 memset(Tloc(bn, 1), val[0], n - 1);
    2646             :                         } else {
    2647        7383 :                                 char *p = Tloc(bn, 0);
    2648        7383 :                                 for (i = 1; i < n; i++) {
    2649           0 :                                         p += bn->twidth;
    2650           0 :                                         memcpy(p, val, bn->twidth);
    2651             :                                 }
    2652             :                         }
    2653             :                         break;
    2654             :                 default:
    2655         168 :                         for (i = 0; i < n; i++)
    2656         147 :                                 if (tfastins_nocheck(bn, i, v) != GDK_SUCCEED) {
    2657           0 :                                         BBPreclaim(bn);
    2658           0 :                                         return NULL;
    2659             :                                 }
    2660             :                         break;
    2661             :                 }
    2662       89782 :                 bn->theap->dirty = true;
    2663       89782 :                 bn->tnil = n >= 1 && ATOMnilptr(tailtype) && (*ATOMcompare(tailtype))(v, ATOMnilptr(tailtype)) == 0;
    2664       89780 :                 BATsetcount(bn, n);
    2665       89779 :                 bn->tsorted = bn->trevsorted = ATOMlinear(tailtype);
    2666       89779 :                 bn->tnonil = !bn->tnil;
    2667       89779 :                 bn->tkey = BATcount(bn) <= 1;
    2668             :         }
    2669     1578372 :         TRC_DEBUG(ALGO, "-> " ALGOOPTBATFMT " " LLFMT "usec\n",
    2670             :                   ALGOOPTBATPAR(bn), GDKusec() - t0);
    2671             :         return bn;
    2672             : }
    2673             : 
    2674             : /*
    2675             :  * BAT Aggregates
    2676             :  *
    2677             :  * We retain the size() and card() aggregate results in the column
    2678             :  * descriptor.  We would like to have such functionality in an
    2679             :  * extensible way for many aggregates, for DD (1) we do not want to
    2680             :  * change the binary BAT format on disk and (2) aggr and size are the
    2681             :  * most relevant aggregates.
    2682             :  *
    2683             :  * It is all hacked into the aggr[3] records; three adjacent integers
    2684             :  * that were left over in the column record. We refer to these as if
    2685             :  * it where an int aggr[3] array.  The below routines set and retrieve
    2686             :  * the aggregate values from the tail of the BAT, as many
    2687             :  * aggregate-manipulating BAT functions work on tail.
    2688             :  *
    2689             :  * The rules are as follows: aggr[0] contains the alignment ID of the
    2690             :  * column (if set i.e. nonzero).  Hence, if this value is nonzero and
    2691             :  * equal to b->talign, the precomputed aggregate values in
    2692             :  * aggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one
    2693             :  * of them may be set at the time. This is encoded by the value
    2694             :  * int_nil, which cannot occur in these two aggregates.
    2695             :  *
    2696             :  * This was now extended to record the property whether we know there
    2697             :  * is a nil value present by mis-using the highest bits of both
    2698             :  * GDK_AGGR_SIZE and GDK_AGGR_CARD.
    2699             :  */
    2700             : 
    2701             : void
    2702    25031802 : PROPdestroy(BAT *b)
    2703             : {
    2704    25031802 :         PROPrec *p = b->tprops;
    2705             :         PROPrec *n;
    2706             : 
    2707    25031802 :         b->tprops = NULL;
    2708    25031615 :         while (p) {
    2709           0 :                 n = p->next;
    2710           0 :                 VALclear(&p->v);
    2711           0 :                 GDKfree(p);
    2712             :                 p = n;
    2713             :         }
    2714    25031615 : }
    2715             : 
    2716             : ValPtr
    2717           0 : BATgetprop_nolock(BAT *b, enum prop_t idx)
    2718             : {
    2719             :         PROPrec *p;
    2720             : 
    2721           0 :         p = b->tprops;
    2722           0 :         while (p && p->id != idx)
    2723           0 :                 p = p->next;
    2724           0 :         return p ? &p->v : NULL;
    2725             : }
    2726             : 
    2727             : void
    2728           0 : BATrmprop_nolock(BAT *b, enum prop_t idx)
    2729             : {
    2730           0 :         PROPrec *prop = b->tprops, *prev = NULL;
    2731             : 
    2732           0 :         while (prop) {
    2733           0 :                 if (prop->id == idx) {
    2734           0 :                         if (prev)
    2735           0 :                                 prev->next = prop->next;
    2736             :                         else
    2737           0 :                                 b->tprops = prop->next;
    2738           0 :                         VALclear(&prop->v);
    2739           0 :                         GDKfree(prop);
    2740           0 :                         return;
    2741             :                 }
    2742             :                 prev = prop;
    2743           0 :                 prop = prop->next;
    2744             :         }
    2745             : }
    2746             : 
    2747             : ValPtr
    2748           0 : BATsetprop_nolock(BAT *b, enum prop_t idx, int type, const void *v)
    2749             : {
    2750             :         PROPrec *p;
    2751             : 
    2752           0 :         p = b->tprops;
    2753           0 :         while (p && p->id != idx)
    2754           0 :                 p = p->next;
    2755           0 :         if (p == NULL) {
    2756           0 :                 if ((p = GDKmalloc(sizeof(PROPrec))) == NULL) {
    2757             :                         /* properties are hints, so if we can't create
    2758             :                          * one we ignore the error */
    2759           0 :                         GDKclrerr();
    2760           0 :                         return NULL;
    2761             :                 }
    2762           0 :                 p->id = idx;
    2763           0 :                 p->next = b->tprops;
    2764           0 :                 p->v.vtype = 0;
    2765           0 :                 b->tprops = p;
    2766             :         } else {
    2767           0 :                 VALclear(&p->v);
    2768             :         }
    2769           0 :         if (VALinit(&p->v, type, v) == NULL) {
    2770             :                 /* failed to initialize, so remove property */
    2771           0 :                 BATrmprop_nolock(b, idx);
    2772           0 :                 GDKclrerr();
    2773             :                 p = NULL;
    2774             :         }
    2775           0 :         b->batDirtydesc = true;
    2776           0 :         return p ? &p->v : NULL;
    2777             : }
    2778             : 
    2779             : ValPtr
    2780           0 : BATgetprop_try(BAT *b, enum prop_t idx)
    2781             : {
    2782             :         ValPtr p = NULL;
    2783           0 :         if (MT_lock_try(&b->theaplock)) {
    2784             :                 p = BATgetprop_nolock(b, idx);
    2785           0 :                 MT_lock_unset(&b->theaplock);
    2786             :         }
    2787           0 :         return p;
    2788             : }
    2789             : 
    2790             : ValPtr
    2791           0 : BATgetprop(BAT *b, enum prop_t idx)
    2792             : {
    2793             :         ValPtr p;
    2794             : 
    2795           0 :         MT_lock_set(&b->theaplock);
    2796             :         p = BATgetprop_nolock(b, idx);
    2797           0 :         MT_lock_unset(&b->theaplock);
    2798           0 :         return p;
    2799             : }
    2800             : 
    2801             : ValPtr
    2802           0 : BATsetprop(BAT *b, enum prop_t idx, int type, const void *v)
    2803             : {
    2804             :         ValPtr p;
    2805           0 :         MT_lock_set(&b->theaplock);
    2806           0 :         p = BATsetprop_nolock(b, idx, type, v);
    2807           0 :         MT_lock_unset(&b->theaplock);
    2808           0 :         return p;
    2809             : }
    2810             : 
    2811             : void
    2812           0 : BATrmprop(BAT *b, enum prop_t idx)
    2813             : {
    2814           0 :         MT_lock_set(&b->theaplock);
    2815           0 :         BATrmprop_nolock(b, idx);
    2816           0 :         MT_lock_unset(&b->theaplock);
    2817           0 : }
    2818             : 
    2819             : 
    2820             : /*
    2821             :  * The BATcount_no_nil function counts all BUN in a BAT that have a
    2822             :  * non-nil tail value.
    2823             :  */
    2824             : BUN
    2825         315 : BATcount_no_nil(BAT *b, BAT *s)
    2826             : {
    2827             :         BUN cnt = 0;
    2828             :         BUN i, n;
    2829             :         const void *restrict p, *restrict nil;
    2830             :         const char *restrict base;
    2831             :         int t;
    2832             :         int (*cmp)(const void *, const void *);
    2833             :         struct canditer ci;
    2834             :         oid hseq;
    2835             : 
    2836         315 :         BATcheck(b, 0);
    2837             : 
    2838         315 :         hseq = b->hseqbase;
    2839         315 :         n = canditer_init(&ci, b, s);
    2840         315 :         if (b->tnonil)
    2841             :                 return n;
    2842         254 :         BATiter bi = bat_iterator(b);
    2843         254 :         p = bi.base;
    2844         254 :         t = ATOMbasetype(b->ttype);
    2845         254 :         switch (t) {
    2846           1 :         case TYPE_void:
    2847           1 :                 cnt = n * BATtdense(b);
    2848           1 :                 break;
    2849             :         case TYPE_msk:
    2850             :                 cnt = n;
    2851             :                 break;
    2852             :         case TYPE_bte:
    2853          31 :                 for (i = 0; i < n; i++)
    2854          16 :                         cnt += !is_bte_nil(((const bte *) p)[canditer_next(&ci) - hseq]);
    2855             :                 break;
    2856             :         case TYPE_sht:
    2857           2 :                 for (i = 0; i < n; i++)
    2858           1 :                         cnt += !is_sht_nil(((const sht *) p)[canditer_next(&ci) - hseq]);
    2859             :                 break;
    2860             :         case TYPE_int:
    2861     9791987 :                 for (i = 0; i < n; i++)
    2862     9791791 :                         cnt += !is_int_nil(((const int *) p)[canditer_next(&ci) - hseq]);
    2863             :                 break;
    2864             :         case TYPE_lng:
    2865          46 :                 for (i = 0; i < n; i++)
    2866          29 :                         cnt += !is_lng_nil(((const lng *) p)[canditer_next(&ci) - hseq]);
    2867             :                 break;
    2868             : #ifdef HAVE_HGE
    2869             :         case TYPE_hge:
    2870           0 :                 for (i = 0; i < n; i++)
    2871           0 :                         cnt += !is_hge_nil(((const hge *) p)[canditer_next(&ci) - hseq]);
    2872             :                 break;
    2873             : #endif
    2874             :         case TYPE_flt:
    2875           0 :                 for (i = 0; i < n; i++)
    2876           0 :                         cnt += !is_flt_nil(((const flt *) p)[canditer_next(&ci) - hseq]);
    2877             :                 break;
    2878             :         case TYPE_dbl:
    2879          34 :                 for (i = 0; i < n; i++)
    2880          18 :                         cnt += !is_dbl_nil(((const dbl *) p)[canditer_next(&ci) - hseq]);
    2881             :                 break;
    2882             :         case TYPE_uuid:
    2883           4 :                 for (i = 0; i < n; i++)
    2884           3 :                         cnt += !is_uuid_nil(((const uuid *) p)[canditer_next(&ci) - hseq]);
    2885             :                 break;
    2886           7 :         case TYPE_str:
    2887           7 :                 base = bi.vh->base;
    2888           7 :                 switch (bi.width) {
    2889             :                 case 1:
    2890          40 :                         for (i = 0; i < n; i++)
    2891          33 :                                 cnt += base[(var_t) ((const unsigned char *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
    2892             :                         break;
    2893             :                 case 2:
    2894           0 :                         for (i = 0; i < n; i++)
    2895           0 :                                 cnt += base[(var_t) ((const unsigned short *) p)[canditer_next(&ci) - hseq] + GDK_VAROFFSET] != '\200';
    2896             :                         break;
    2897             : #if SIZEOF_VAR_T != SIZEOF_INT
    2898             :                 case 4:
    2899           0 :                         for (i = 0; i < n; i++)
    2900           0 :                                 cnt += base[(var_t) ((const unsigned int *) p)[canditer_next(&ci) - hseq]] != '\200';
    2901             :                         break;
    2902             : #endif
    2903             :                 default:
    2904           0 :                         for (i = 0; i < n; i++)
    2905           0 :                                 cnt += base[((const var_t *) p)[canditer_next(&ci) - hseq]] != '\200';
    2906             :                         break;
    2907             :                 }
    2908             :                 break;
    2909           0 :         default:
    2910           0 :                 nil = ATOMnilptr(t);
    2911           0 :                 cmp = ATOMcompare(t);
    2912           0 :                 if (nil == NULL) {
    2913             :                         cnt = n;
    2914           0 :                 } else if (b->tvarsized) {
    2915           0 :                         base = b->tvheap->base;
    2916           0 :                         for (i = 0; i < n; i++)
    2917           0 :                                 cnt += (*cmp)(nil, base + ((const var_t *) p)[canditer_next(&ci) - hseq]) != 0;
    2918             :                 } else {
    2919           0 :                         for (i = 0, n += i; i < n; i++)
    2920           0 :                                 cnt += (*cmp)(BUNtloc(bi, canditer_next(&ci) - hseq), nil) != 0;
    2921             :                 }
    2922             :                 break;
    2923             :         }
    2924         254 :         if (cnt == BATcount(b)) {
    2925             :                 /* we learned something */
    2926         131 :                 b->tnonil = true;
    2927         131 :                 assert(!b->tnil);
    2928         131 :                 b->tnil = false;
    2929             :         }
    2930         254 :         bat_iterator_end(&bi);
    2931         254 :         return cnt;
    2932             : }

Generated by: LCOV version 1.14