LCOV - code coverage report
Current view: top level - gdk - gdk_bat.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1016 1399 72.6 %
Date: 2021-10-13 02:24:04 Functions: 37 42 88.1 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * @a M. L. Kersten, P. Boncz, N. Nes
      11             :  * @* BAT Module
      12             :  * In this Chapter we describe the BAT implementation in more detail.
      13             :  * The routines mentioned are primarily meant to simplify the library
      14             :  * implementation.
      15             :  *
      16             :  * @+ BAT Construction
      17             :  * BATs are implemented in several blocks of memory, prepared for disk
      18             :  * storage and easy shipment over a network.
      19             :  *
      20             :  * The BAT starts with a descriptor, which indicates the required BAT
      21             :  * library version and the BAT administration details.  In particular,
      22             :  * it describes the binary relationship maintained and the location of
      23             :  * fields required for storage.
      24             :  *
      25             :  * The general layout of the BAT in this implementation is as follows.
      26             :  * Each BAT comes with a heap for the loc-size buns and, optionally,
      27             :  * with heaps to manage the variable-sized data items of both
      28             :  * dimensions.  The buns are assumed to be stored as loc-size objects.
      29             :  * This is essentially an array of structs to store the associations.
      30             :  * The size is determined at BAT creation time using an upper bound on
      31             :  * the number of elements to be accommodated.  In case of overflow,
      32             :  * its storage space is extended automatically.
      33             :  *
      34             :  * The capacity of a BAT places an upper limit on the number of BUNs
      35             :  * to be stored initially. The actual space set aside may be quite
      36             :  * large.  Moreover, the size is aligned to int boundaries to speedup
      37             :  * access and avoid some machine limitations.
      38             :  *
      39             :  * Initialization of the variable parts rely on type specific routines
      40             :  * called atomHeap.
      41             :  */
      42             : #include "monetdb_config.h"
      43             : #include "gdk.h"
      44             : #include "gdk_private.h"
      45             : #include "mutils.h"
      46             : 
      47             : #ifdef ALIGN
      48             : #undef ALIGN
      49             : #endif
      50             : #define ALIGN(n,b)      ((b)?(b)*(1+(((n)-1)/(b))):n)
      51             : 
      52             : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
      53             : 
      54             : static char *BATstring_t = "t";
      55             : 
      56             : #define default_ident(s)        ((s) == BATstring_t)
      57             : 
      58             : void
      59     4498371 : BATinit_idents(BAT *bn)
      60             : {
      61     4498371 :         bn->tident = BATstring_t;
      62     4498371 : }
      63             : 
      64             : BAT *
      65    12494252 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role, uint16_t width)
      66             : {
      67             :         BAT *bn;
      68             : 
      69             :         /*
      70             :          * Alloc space for the BAT and its dependent records.
      71             :          */
      72    12494252 :         assert(tt >= 0);
      73             : 
      74    12494252 :         bn = GDKmalloc(sizeof(BAT));
      75             : 
      76    12491763 :         if (bn == NULL)
      77             :                 return NULL;
      78             : 
      79             :         /*
      80             :          * Fill in basic column info
      81             :          */
      82    12491763 :         *bn = (BAT) {
      83             :                 .hseqbase = hseq,
      84             : 
      85             :                 .ttype = tt,
      86             :                 .tkey = false,
      87             :                 .tnonil = true,
      88             :                 .tnil = false,
      89    12491763 :                 .tsorted = ATOMlinear(tt),
      90             :                 .trevsorted = ATOMlinear(tt),
      91             :                 .tident = BATstring_t,
      92             :                 .tseqbase = oid_nil,
      93             :                 .tminpos = BUN_NONE,
      94             :                 .tmaxpos = BUN_NONE,
      95             :                 .tunique_est = 0.0,
      96             : 
      97             :                 .batRole = role,
      98             :                 .batTransient = true,
      99             :                 .batRestricted = BAT_WRITE,
     100             :         };
     101    12491763 :         if (heapnames && (bn->theap = GDKmalloc(sizeof(Heap))) == NULL) {
     102           0 :                 GDKfree(bn);
     103           0 :                 return NULL;
     104             :         }
     105             : 
     106             :         /*
     107             :          * add to BBP
     108             :          */
     109    12491176 :         if (BBPinsert(bn) == 0) {
     110           0 :                 GDKfree(bn->theap);
     111           0 :                 GDKfree(bn);
     112           0 :                 return NULL;
     113             :         }
     114             :         /*
     115             :          * fill in heap names, so HEAPallocs can resort to disk for
     116             :          * very large writes.
     117             :          */
     118    12493001 :         if (heapnames) {
     119     7994284 :                 assert(bn->theap != NULL);
     120     7994010 :                 *bn->theap = (Heap) {
     121     7994010 :                         .parentid = bn->batCacheid,
     122     7994284 :                         .farmid = BBPselectfarm(role, bn->ttype, offheap),
     123             :                 };
     124             : 
     125     7994010 :                 const char *nme = BBP_physical(bn->batCacheid);
     126     7994010 :                 settailname(bn->theap, nme, tt, width);
     127             : 
     128     7995375 :                 if (ATOMneedheap(tt)) {
     129     1105145 :                         if ((bn->tvheap = GDKmalloc(sizeof(Heap))) == NULL) {
     130           0 :                                 BBPclear(bn->batCacheid, true);
     131           0 :                                 HEAPfree(bn->theap, true);
     132           0 :                                 GDKfree(bn->theap);
     133           0 :                                 GDKfree(bn);
     134             :                                 return NULL;
     135             :                         }
     136     1105099 :                         *bn->tvheap = (Heap) {
     137     1105099 :                                 .parentid = bn->batCacheid,
     138     1105138 :                                 .farmid = BBPselectfarm(role, bn->ttype, varheap),
     139             :                         };
     140     1105099 :                         ATOMIC_INIT(&bn->tvheap->refs, 1);
     141     1105099 :                         strconcat_len(bn->tvheap->filename,
     142             :                                       sizeof(bn->tvheap->filename),
     143             :                                       nme, ".theap", NULL);
     144             :                 }
     145     7994580 :                 ATOMIC_INIT(&bn->theap->refs, 1);
     146             :         } else {
     147     4498717 :                 assert(bn->theap == NULL);
     148             :         }
     149             :         char name[MT_NAME_LEN];
     150    12493297 :         snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
     151    12493297 :         MT_lock_init(&bn->theaplock, name);
     152    12492072 :         snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     153    12492072 :         MT_lock_init(&bn->batIdxLock, name);
     154    12491766 :         snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
     155    12491766 :         MT_rwlock_init(&bn->thashlock, name);
     156    12492865 :         bn->batDirtydesc = true;
     157    12492865 :         return bn;
     158             : }
     159             : 
     160             : uint8_t
     161    16007255 : ATOMelmshift(int sz)
     162             : {
     163             :         uint8_t sh;
     164    16007255 :         int i = sz >> 1;
     165             : 
     166    32283073 :         for (sh = 0; i != 0; sh++) {
     167    16275818 :                 i >>= 1;
     168             :         }
     169    16007255 :         return sh;
     170             : }
     171             : 
     172             : 
     173             : void
     174     7994941 : BATsetdims(BAT *b)
     175             : {
     176     7994941 :         b->twidth = b->ttype == TYPE_str ? 1 : ATOMsize(b->ttype);
     177     7994941 :         b->tshift = ATOMelmshift(b->twidth);
     178     7994941 :         assert_shift_width(b->tshift, b->twidth);
     179     7994941 :         b->tvarsized = b->ttype == TYPE_void || BATatoms[b->ttype].atomPut != NULL;
     180     7994941 : }
     181             : 
     182             : const char *
     183     3730330 : gettailname(const BAT *b)
     184             : {
     185     3730330 :         if (b->ttype == TYPE_str) {
     186      869547 :                 switch (b->twidth) {
     187      634878 :                 case 1:
     188      634878 :                         return "tail1";
     189      196413 :                 case 2:
     190      196413 :                         return "tail2";
     191             : #if SIZEOF_VAR_T == 8
     192       38256 :                 case 4:
     193       38256 :                         return "tail4";
     194             : #endif
     195             :                 default:
     196             :                         break;
     197             :                 }
     198     2860783 :         }
     199             :         return "tail";
     200             : }
     201             : 
     202             : void
     203     9168333 : settailname(Heap *restrict tail, const char *restrict physnme, int tt, int width)
     204             : {
     205     9168333 :         if (tt == TYPE_str) {
     206     2258716 :                 switch (width) {
     207     1105849 :                 case 1:
     208     1105849 :                         strconcat_len(tail->filename,
     209             :                                       sizeof(tail->filename), physnme,
     210             :                                       ".tail1", NULL);
     211     1105928 :                         return;
     212      114933 :                 case 2:
     213      114933 :                         strconcat_len(tail->filename,
     214             :                                       sizeof(tail->filename), physnme,
     215             :                                       ".tail2", NULL);
     216      114911 :                         return;
     217             : #if SIZEOF_VAR_T == 8
     218       25742 :                 case 4:
     219       25742 :                         strconcat_len(tail->filename,
     220             :                                       sizeof(tail->filename), physnme,
     221             :                                       ".tail4", NULL);
     222       25742 :                         return;
     223             : #endif
     224             :                 default:
     225             :                         break;
     226             :                 }
     227     6909617 :         }
     228     7921809 :         strconcat_len(tail->filename, sizeof(tail->filename), physnme,
     229             :                       ".tail", NULL);
     230             : }
     231             : 
     232             : /*
     233             :  * @- BAT allocation
     234             :  * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
     235             :  * We now initialize new BATs with their heapname such that the
     236             :  * modified HEAPalloc/HEAPextend primitives can possibly use memory
     237             :  * mapped files as temporary heap storage.
     238             :  *
     239             :  * In case of huge bats, we want HEAPalloc to write a file to disk,
     240             :  * and memory map it. To make this possible, we must provide it with
     241             :  * filenames.
     242             :  */
     243             : BAT *
     244     7995811 : COLnew_intern(oid hseq, int tt, BUN cap, role_t role, uint16_t width)
     245             : {
     246             :         BAT *bn;
     247             : 
     248     7995811 :         assert(cap <= BUN_MAX);
     249     7995811 :         assert(hseq <= oid_nil);
     250     7995811 :         assert(tt != TYPE_bat);
     251     7995811 :         ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
     252             : 
     253             :         /* round up to multiple of BATTINY */
     254     7995811 :         if (cap < BUN_MAX - BATTINY)
     255     7995986 :                 cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
     256     7995811 :         if (ATOMstorage(tt) == TYPE_msk) {
     257      176825 :                 if (cap < 8*BATTINY)
     258             :                         cap = 8*BATTINY;
     259             :                 else
     260       45461 :                         cap = (cap + 31) & ~(BUN)31;
     261     7818986 :         } else if (cap < BATTINY)
     262             :                 cap = BATTINY;
     263             :         /* limit the size */
     264             :         if (cap > BUN_MAX)
     265             :                 cap = BUN_MAX;
     266             : 
     267     7995811 :         bn = BATcreatedesc(hseq, tt, true, role, width);
     268     7994464 :         if (bn == NULL)
     269             :                 return NULL;
     270             : 
     271     7994464 :         BATsetdims(bn);
     272     7993711 :         bn->batCapacity = cap;
     273             : 
     274     7993711 :         if (ATOMstorage(tt) == TYPE_msk)
     275      176825 :                 cap /= 8;       /* 8 values per byte */
     276     7816886 :         else if (tt == TYPE_str) {
     277     1101891 :                 if (width != 0) {
     278             :                         /* power of two and not too large */
     279       89728 :                         assert((width & (width - 1)) == 0);
     280       89728 :                         assert(width <= sizeof(var_t));
     281       89728 :                         bn->twidth = width;
     282             :                 }
     283     1101891 :                 settailname(bn->theap, BBP_physical(bn->batCacheid), tt, bn->twidth);
     284             :         }
     285             : 
     286             :         /* alloc the main heaps */
     287     7993653 :         if (tt && HEAPalloc(bn->theap, cap, bn->twidth, ATOMsize(bn->ttype)) != GDK_SUCCEED) {
     288           0 :                 goto bailout;
     289             :         }
     290             : 
     291     7992138 :         if (bn->tvheap && width == 0 && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
     292           0 :                 goto bailout;
     293             :         }
     294     7992990 :         DELTAinit(bn);
     295     7992735 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {
     296           0 :                 goto bailout;
     297             :         }
     298     7995891 :         TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
     299             :         return bn;
     300           0 :   bailout:
     301           0 :         BBPclear(bn->batCacheid, true);
     302           0 :         if (bn->theap)
     303           0 :                 HEAPdecref(bn->theap, true);
     304           0 :         if (bn->tvheap)
     305           0 :                 HEAPdecref(bn->tvheap, true);
     306           0 :         MT_lock_destroy(&bn->theaplock);
     307           0 :         MT_lock_destroy(&bn->batIdxLock);
     308           0 :         MT_rwlock_destroy(&bn->thashlock);
     309           0 :         GDKfree(bn);
     310           0 :         return NULL;
     311             : }
     312             : 
     313             : BAT *
     314     7669180 : COLnew(oid hseq, int tt, BUN cap, role_t role)
     315             : {
     316     7669180 :         return COLnew_intern(hseq, tt, cap, role, 0);
     317             : }
     318             : 
     319             : BAT *
     320     2968030 : BATdense(oid hseq, oid tseq, BUN cnt)
     321             : {
     322             :         BAT *bn;
     323             : 
     324     2968030 :         bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
     325     2967871 :         if (bn != NULL) {
     326     2967871 :                 BATtseqbase(bn, tseq);
     327     2967723 :                 BATsetcount(bn, cnt);
     328     2967789 :                 TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
     329             :                           "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
     330             :                           ALGOBATPAR(bn));
     331             :         }
     332     2967789 :         return bn;
     333             : }
     334             : 
     335             : BAT *
     336           0 : BATattach(int tt, const char *heapfile, role_t role)
     337             : {
     338             :         BAT *bn;
     339             :         char *p;
     340             :         size_t m;
     341             :         FILE *f;
     342             : 
     343           0 :         ERRORcheck(tt <= 0 , "bad tail type (<=0)\n", NULL);
     344           0 :         ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "bad tail type (varsized and not str)\n", NULL);
     345           0 :         ERRORcheck(heapfile == NULL, "bad heapfile name\n", NULL);
     346             : 
     347           0 :         if ((f = MT_fopen(heapfile, "rb")) == NULL) {
     348           0 :                 GDKsyserror("BATattach: cannot open %s\n", heapfile);
     349             :                 return NULL;
     350             :         }
     351           0 :         if (ATOMstorage(tt) == TYPE_str) {
     352             :                 size_t n;
     353             :                 char *s;
     354             :                 int c, u;
     355             : 
     356           0 :                 if ((bn = COLnew(0, tt, 0, role)) == NULL) {
     357           0 :                         fclose(f);
     358           0 :                         return NULL;
     359             :                 }
     360             :                 m = 4096;
     361             :                 n = 0;
     362             :                 u = 0;
     363           0 :                 s = p = GDKmalloc(m);
     364           0 :                 if (p == NULL) {
     365           0 :                         fclose(f);
     366           0 :                         BBPreclaim(bn);
     367           0 :                         return NULL;
     368             :                 }
     369           0 :                 while ((c = getc(f)) != EOF) {
     370           0 :                         if (n == m) {
     371           0 :                                 m += 4096;
     372           0 :                                 s = GDKrealloc(p, m);
     373           0 :                                 if (s == NULL) {
     374           0 :                                         GDKfree(p);
     375           0 :                                         BBPreclaim(bn);
     376           0 :                                         fclose(f);
     377           0 :                                         return NULL;
     378             :                                 }
     379             :                                 p = s;
     380           0 :                                 s = p + n;
     381             :                         }
     382           0 :                         if (c == '\n' && n > 0 && s[-1] == '\r') {
     383             :                                 /* deal with CR-LF sequence */
     384           0 :                                 s[-1] = c;
     385             :                         } else {
     386           0 :                                 *s++ = c;
     387           0 :                                 n++;
     388             :                         }
     389           0 :                         if (u) {
     390           0 :                                 if ((c & 0xC0) == 0x80)
     391           0 :                                         u--;
     392             :                                 else
     393           0 :                                         goto notutf8;
     394           0 :                         } else if ((c & 0xF8) == 0xF0)
     395             :                                 u = 3;
     396           0 :                         else if ((c & 0xF0) == 0xE0)
     397             :                                 u = 2;
     398           0 :                         else if ((c & 0xE0) == 0xC0)
     399             :                                 u = 1;
     400           0 :                         else if ((c & 0x80) == 0x80)
     401           0 :                                 goto notutf8;
     402           0 :                         else if (c == 0) {
     403           0 :                                 if (BUNappend(bn, p, false) != GDK_SUCCEED) {
     404           0 :                                         BBPreclaim(bn);
     405           0 :                                         fclose(f);
     406           0 :                                         GDKfree(p);
     407           0 :                                         return NULL;
     408             :                                 }
     409             :                                 s = p;
     410             :                                 n = 0;
     411             :                         }
     412             :                 }
     413           0 :                 fclose(f);
     414           0 :                 GDKfree(p);
     415           0 :                 if (n > 0) {
     416           0 :                         BBPreclaim(bn);
     417           0 :                         GDKerror("last string is not null-terminated\n");
     418           0 :                         return NULL;
     419             :                 }
     420             :         } else {
     421             :                 struct stat st;
     422             :                 int atomsize;
     423             :                 BUN cap;
     424             :                 lng n;
     425             : 
     426           0 :                 if (fstat(fileno(f), &st) < 0) {
     427           0 :                         GDKsyserror("BATattach: cannot stat %s\n", heapfile);
     428           0 :                         fclose(f);
     429             :                         return NULL;
     430             :                 }
     431           0 :                 atomsize = ATOMsize(tt);
     432           0 :                 if (st.st_size % atomsize != 0) {
     433           0 :                         fclose(f);
     434           0 :                         GDKerror("heapfile size not integral number of atoms\n");
     435           0 :                         return NULL;
     436             :                 }
     437           0 :                 if (ATOMstorage(tt) == TYPE_msk ?
     438             :                     (st.st_size > (off_t) (BUN_MAX / 8)) :
     439           0 :                     ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX)) {
     440           0 :                         fclose(f);
     441           0 :                         GDKerror("heapfile too large\n");
     442           0 :                         return NULL;
     443             :                 }
     444           0 :                 cap = (BUN) (ATOMstorage(tt) == TYPE_msk ?
     445           0 :                              st.st_size * 8 :
     446           0 :                              st.st_size / atomsize);
     447           0 :                 bn = COLnew(0, tt, cap, role);
     448           0 :                 if (bn == NULL) {
     449           0 :                         fclose(f);
     450           0 :                         return NULL;
     451             :                 }
     452           0 :                 p = Tloc(bn, 0);
     453           0 :                 n = (lng) st.st_size;
     454           0 :                 while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
     455           0 :                         p += m;
     456           0 :                         n -= m;
     457             :                 }
     458           0 :                 fclose(f);
     459           0 :                 if (n > 0) {
     460           0 :                         GDKerror("couldn't read the complete file\n");
     461           0 :                         BBPreclaim(bn);
     462           0 :                         return NULL;
     463             :                 }
     464           0 :                 BATsetcount(bn, cap);
     465           0 :                 bn->tnonil = cap == 0;
     466           0 :                 bn->tnil = false;
     467           0 :                 bn->tseqbase = oid_nil;
     468           0 :                 if (cap > 1) {
     469           0 :                         bn->tsorted = false;
     470           0 :                         bn->trevsorted = false;
     471           0 :                         bn->tkey = false;
     472             :                 } else {
     473           0 :                         bn->tsorted = ATOMlinear(tt);
     474           0 :                         bn->trevsorted = ATOMlinear(tt);
     475           0 :                         bn->tkey = true;
     476             :                 }
     477             :         }
     478             :         return bn;
     479             : 
     480           0 :   notutf8:
     481           0 :         fclose(f);
     482           0 :         BBPreclaim(bn);
     483           0 :         GDKfree(p);
     484           0 :         GDKerror("input is not UTF-8\n");
     485           0 :         return NULL;
     486             : }
     487             : 
     488             : /*
     489             :  * If the BAT runs out of storage for BUNS it will reallocate space.
     490             :  * For memory mapped BATs we simple extend the administration after
     491             :  * having an assurance that the BAT still can be safely stored away.
     492             :  */
     493             : BUN
     494       20133 : BATgrows(BAT *b)
     495             : {
     496             :         BUN oldcap, newcap;
     497             : 
     498       20133 :         BATcheck(b, 0);
     499             : 
     500       20133 :         newcap = oldcap = BATcapacity(b);
     501       20133 :         if (newcap < BATTINY)
     502             :                 newcap = 2 * BATTINY;
     503       20117 :         else if (newcap < 10 * BATTINY)
     504       17384 :                 newcap = 4 * newcap;
     505        2733 :         else if (newcap < 50 * BATTINY)
     506        2152 :                 newcap = 2 * newcap;
     507         581 :         else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
     508         581 :                 newcap = (BUN) ((double) newcap * BATMARGIN);
     509             :         else
     510             :                 newcap = BUN_MAX;
     511       20133 :         if (newcap == oldcap) {
     512           0 :                 if (newcap <= BUN_MAX - 10)
     513           0 :                         newcap += 10;
     514             :                 else
     515             :                         newcap = BUN_MAX;
     516             :         }
     517       20133 :         if (ATOMstorage(b->ttype) == TYPE_msk) /* round up to multiple of 32 */
     518           0 :                 newcap = (newcap + 31) & ~(BUN)31;
     519             :         return newcap;
     520             : }
     521             : 
     522             : /*
     523             :  * The routine should ensure that the BAT keeps its location in the
     524             :  * BAT buffer.
     525             :  *
     526             :  * Overflow in the other heaps are dealt with in the atom routines.
     527             :  * Here we merely copy their references into the new administration
     528             :  * space.
     529             :  */
     530             : gdk_return
     531       38645 : BATextend(BAT *b, BUN newcap)
     532             : {
     533             :         size_t theap_size;
     534             : 
     535       38645 :         assert(newcap <= BUN_MAX);
     536       38645 :         BATcheck(b, GDK_FAIL);
     537             :         /*
     538             :          * The main issue is to properly predict the new BAT size.
     539             :          * storage overflow. The assumption taken is that capacity
     540             :          * overflow is rare. It is changed only when the position of
     541             :          * the next available BUN surpasses the free area marker.  Be
     542             :          * aware that the newcap should be greater than the old value,
     543             :          * otherwise you may easily corrupt the administration of
     544             :          * malloc.
     545             :          */
     546       38645 :         if (newcap <= BATcapacity(b)) {
     547             :                 return GDK_SUCCEED;
     548             :         }
     549             : 
     550       31167 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
     551         899 :                 newcap = (newcap + 31) & ~(BUN)31; /* round up to multiple of 32 */
     552         899 :                 theap_size = (size_t) (newcap / 8); /* in bytes */
     553             :         } else {
     554       30268 :                 theap_size = (size_t) newcap << b->tshift;
     555             :         }
     556       31167 :         b->batCapacity = newcap;
     557             : 
     558       31167 :         if (b->theap->base) {
     559       31150 :                 TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
     560             :                           b->theap->filename, b->theap->size, theap_size);
     561       31150 :                 return HEAPgrow(&b->theaplock, &b->theap, theap_size, b->batRestricted == BAT_READ);
     562             :         }
     563             :         return GDK_SUCCEED;
     564             : }
     565             : 
     566             : 
     567             : 
     568             : /*
     569             :  * @+ BAT destruction
     570             :  * BATclear quickly removes all elements from a BAT. It must respect
     571             :  * the transaction rules; so stable elements must be moved to the
     572             :  * "deleted" section of the BAT (they cannot be fully deleted
     573             :  * yet). For the elements that really disappear, we must free
     574             :  * heapspace and unfix the atoms if they have fix/unfix handles. As an
     575             :  * optimization, in the case of no stable elements, we quickly empty
     576             :  * the heaps by copying a standard small empty image over them.
     577             :  */
     578             : gdk_return
     579        2310 : BATclear(BAT *b, bool force)
     580             : {
     581             :         BUN p, q;
     582             : 
     583        2310 :         BATcheck(b, GDK_FAIL);
     584             : 
     585        2310 :         if (!force && b->batInserted > 0) {
     586           0 :                 GDKerror("cannot clear committed BAT\n");
     587           0 :                 return GDK_FAIL;
     588             :         }
     589             : 
     590             :         /* kill all search accelerators */
     591        2310 :         HASHdestroy(b);
     592        2310 :         IMPSdestroy(b);
     593        2310 :         OIDXdestroy(b);
     594        2310 :         PROPdestroy(b);
     595             : 
     596             :         /* we must dispose of all inserted atoms */
     597        2310 :         MT_lock_set(&b->theaplock);
     598        2310 :         if (force && BATatoms[b->ttype].atomDel == NULL) {
     599        2303 :                 assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
     600             :                 /* no stable elements: we do a quick heap clean */
     601             :                 /* need to clean heap which keeps data even though the
     602             :                    BUNs got removed. This means reinitialize when
     603             :                    free > 0
     604             :                 */
     605        2303 :                 if (b->tvheap && b->tvheap->free > 0) {
     606          21 :                         Heap *th = GDKmalloc(sizeof(Heap));
     607             : 
     608          21 :                         if (th == NULL) {
     609           0 :                                 MT_lock_unset(&b->theaplock);
     610           0 :                                 return GDK_FAIL;
     611             :                         }
     612          21 :                         *th = (Heap) {
     613          21 :                                 .farmid = b->tvheap->farmid,
     614             :                         };
     615          21 :                         strcpy_len(th->filename, b->tvheap->filename, sizeof(th->filename));
     616          21 :                         if (ATOMheap(b->ttype, th, 0) != GDK_SUCCEED) {
     617           0 :                                 MT_lock_unset(&b->theaplock);
     618           0 :                                 return GDK_FAIL;
     619             :                         }
     620          21 :                         ATOMIC_INIT(&th->refs, 1);
     621          21 :                         th->parentid = b->tvheap->parentid;
     622          21 :                         th->dirty = true;
     623          21 :                         HEAPdecref(b->tvheap, false);
     624          21 :                         b->tvheap = th;
     625             :                 }
     626             :         } else {
     627             :                 /* do heap-delete of all inserted atoms */
     628           7 :                 void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
     629             : 
     630             :                 /* TYPE_str has no del method, so we shouldn't get here */
     631           7 :                 assert(tatmdel == NULL || b->twidth == sizeof(var_t));
     632           7 :                 if (tatmdel) {
     633           0 :                         BATiter bi = bat_iterator_nolock(b);
     634             : 
     635           0 :                         for (p = b->batInserted, q = BUNlast(b); p < q; p++)
     636           0 :                                 (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
     637           0 :                         b->tvheap->dirty = true;
     638             :                 }
     639             :         }
     640             : 
     641        2310 :         if (force)
     642        2303 :                 b->batInserted = 0;
     643        2310 :         b->batCount = 0;
     644        2310 :         if (b->ttype == TYPE_void)
     645           0 :                 b->batCapacity = 0;
     646        2310 :         BAThseqbase(b, 0);
     647        2310 :         BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
     648        2310 :         b->batDirtydesc = true;
     649        2310 :         b->theap->dirty = true;
     650        2310 :         BATsettrivprop(b);
     651        2310 :         b->tnosorted = b->tnorevsorted = 0;
     652        2310 :         b->tnokey[0] = b->tnokey[1] = 0;
     653        2310 :         b->tminpos = BUN_NONE;
     654        2310 :         b->tmaxpos = BUN_NONE;
     655        2310 :         b->tunique_est = 0.0;
     656        2310 :         MT_lock_unset(&b->theaplock);
     657        2310 :         return GDK_SUCCEED;
     658             : }
     659             : 
     660             : /* free a cached BAT; leave the bat descriptor cached */
     661             : void
     662     4762565 : BATfree(BAT *b)
     663             : {
     664     4762565 :         if (b == NULL)
     665             :                 return;
     666             : 
     667             :         /* deallocate all memory for a bat */
     668     4762565 :         if (b->tident && !default_ident(b->tident))
     669           1 :                 GDKfree(b->tident);
     670     4762565 :         b->tident = BATstring_t;
     671     4762565 :         MT_rwlock_rdlock(&b->thashlock);
     672             :         BUN nunique = BUN_NONE;
     673     4762413 :         if (b->thash && b->thash != (Hash *) 1) {
     674        6665 :                 nunique = b->thash->nunique;
     675             :         }
     676     4762413 :         MT_rwlock_rdunlock(&b->thashlock);
     677     4762250 :         HASHfree(b);
     678     4761999 :         IMPSfree(b);
     679     4761798 :         OIDXfree(b);
     680     4761641 :         MT_lock_set(&b->theaplock);
     681     4762069 :         if (nunique != BUN_NONE) {
     682        6665 :                 b->tunique_est = (double) nunique;
     683             :         }
     684     4762069 :         if (b->theap) {
     685       93230 :                 assert(ATOMIC_GET(&b->theap->refs) == 1);
     686       93230 :                 assert(b->theap->parentid == b->batCacheid);
     687       93230 :                 HEAPfree(b->theap, false);
     688             :         }
     689     4762119 :         if (b->tvheap) {
     690       24354 :                 assert(ATOMIC_GET(&b->tvheap->refs) == 1);
     691       24354 :                 assert(b->tvheap->parentid == b->batCacheid);
     692       24354 :                 HEAPfree(b->tvheap, false);
     693             :         }
     694     4762119 :         MT_lock_unset(&b->theaplock);
     695             : }
     696             : 
     697             : /* free a cached BAT descriptor */
     698             : void
     699    12516394 : BATdestroy(BAT *b)
     700             : {
     701    12516394 :         if (b->tident && !default_ident(b->tident))
     702         663 :                 GDKfree(b->tident);
     703    12515414 :         b->tident = BATstring_t;
     704    12515414 :         if (b->tvheap) {
     705             :                 ATOMIC_DESTROY(&b->tvheap->refs);
     706     1178366 :                 GDKfree(b->tvheap);
     707             :         }
     708    12515424 :         PROPdestroy(b);
     709    12514582 :         MT_lock_destroy(&b->theaplock);
     710    12514954 :         MT_lock_destroy(&b->batIdxLock);
     711    12514209 :         MT_rwlock_destroy(&b->thashlock);
     712    12516404 :         if (b->theap) {
     713             :                 ATOMIC_DESTROY(&b->theap->refs);
     714     7847903 :                 GDKfree(b->theap);
     715             :         }
     716    12516083 :         GDKfree(b);
     717    12516804 : }
     718             : 
     719             : /*
     720             :  * @+ BAT copying
     721             :  *
     722             :  * BAT copying is an often used operation. So it deserves attention.
     723             :  * When making a copy of a BAT, the following aspects are of
     724             :  * importance:
     725             :  *
     726             :  * - the requested head and tail types. The purpose of the copy may be
     727             :  *   to slightly change these types (e.g. void <-> oid). We may also
     728             :  *   remap between types as long as they share the same
     729             :  *   ATOMstorage(type), i.e. the types have the same physical
     730             :  *   implementation. We may even want to allow 'dirty' trick such as
     731             :  *   viewing a flt-column suddenly as int.
     732             :  *
     733             :  *   To allow such changes, the desired column-types is a
     734             :  *   parameter of COLcopy.
     735             :  *
     736             :  * - access mode. If we want a read-only copy of a read-only BAT, a
     737             :  *   VIEW may do (in this case, the user may be after just an
     738             :  *   independent BAT header and id). This is indicated by the
     739             :  *   parameter (writable = FALSE).
     740             :  *
     741             :  *   In other cases, we really want an independent physical copy
     742             :  *   (writable = TRUE).  Changing the mode to BAT_WRITE will be a
     743             :  *   zero-cost operation if the BAT was copied with (writable = TRUE).
     744             :  *
     745             :  * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
     746             :  * TRUE).
     747             :  *
     748             :  * In these cases the copy becomes a logical view on the original,
     749             :  * which ensures that the original cannot be modified or destroyed
     750             :  * (which could affect the shared heaps).
     751             :  */
     752             : static bool
     753         378 : wrongtype(int t1, int t2)
     754             : {
     755             :         /* check if types are compatible. be extremely forgiving */
     756         378 :         if (t1 != TYPE_void) {
     757         378 :                 t1 = ATOMtype(ATOMstorage(t1));
     758         378 :                 t2 = ATOMtype(ATOMstorage(t2));
     759         378 :                 if (t1 != t2) {
     760         327 :                         if (ATOMvarsized(t1) ||
     761         327 :                             ATOMvarsized(t2) ||
     762         327 :                             t1 == TYPE_msk || t2 == TYPE_msk ||
     763         327 :                             ATOMsize(t1) != ATOMsize(t2) ||
     764         327 :                             BATatoms[t1].atomFix ||
     765         327 :                             BATatoms[t2].atomFix)
     766           0 :                                 return true;
     767             :                 }
     768             :         }
     769             :         return false;
     770             : }
     771             : 
     772             : /*
     773             :  * There are four main implementation cases:
     774             :  * (1) we are allowed to return a view (zero effort),
     775             :  * (2) the result is void,void (zero effort),
     776             :  * (3) we can copy the heaps (memcopy, or even VM page sharing)
     777             :  * (4) we must insert BUN-by-BUN into the result (fallback)
     778             :  * The latter case is still optimized for the case that the result
     779             :  * is bat[void,T] for a simple fixed-size type T. In that case we
     780             :  * do inline array[T] inserts.
     781             :  */
     782             : BAT *
     783       83380 : COLcopy(BAT *b, int tt, bool writable, role_t role)
     784             : {
     785             :         bool slowcopy = false;
     786             :         BAT *bn = NULL;
     787             :         BATiter bi;
     788             : 
     789       83380 :         BATcheck(b, NULL);
     790       83380 :         assert(tt != TYPE_bat);
     791             : 
     792             :         /* maybe a bit ugly to change the requested bat type?? */
     793       83380 :         if (b->ttype == TYPE_void && !writable)
     794             :                 tt = TYPE_void;
     795             : 
     796       83380 :         if (tt != b->ttype && wrongtype(tt, b->ttype)) {
     797           0 :                 GDKerror("wrong tail-type requested\n");
     798           0 :                 return NULL;
     799             :         }
     800             : 
     801       83380 :         bi = bat_iterator(b);
     802             : 
     803             :         /* first try case (1); create a view, possibly with different
     804             :          * atom-types */
     805       83381 :         if (!writable &&
     806       83381 :             role == TRANSIENT &&
     807       32888 :             b->batRestricted == BAT_READ &&
     808       24471 :             ATOMstorage(b->ttype) != TYPE_msk && /* no view on TYPE_msk */
     809       24471 :             (!VIEWtparent(b) ||
     810        4495 :              BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ)) {
     811       24471 :                 bn = VIEWcreate(b->hseqbase, b);
     812       24471 :                 if (bn == NULL) {
     813           0 :                         bat_iterator_end(&bi);
     814           0 :                         return NULL;
     815             :                 }
     816       24471 :                 if (tt != bn->ttype) {
     817          48 :                         bn->ttype = tt;
     818          48 :                         bn->tvarsized = ATOMvarsized(tt);
     819          48 :                         bn->tseqbase = ATOMtype(tt) == TYPE_oid ? bi.tseq : oid_nil;
     820             :                 }
     821             :         } else {
     822             :                 /* check whether we need case (4); BUN-by-BUN copy (by
     823             :                  * setting slowcopy to false) */
     824       58910 :                 if (ATOMsize(tt) != ATOMsize(bi.type)) {
     825             :                         /* oops, void materialization */
     826             :                         slowcopy = true;
     827       58582 :                 } else if (BATatoms[tt].atomFix) {
     828             :                         /* oops, we need to fix/unfix atoms */
     829             :                         slowcopy = true;
     830       58582 :                 } else if (bi.h && bi.h->parentid != b->batCacheid) {
     831             :                         /* extra checks needed for views */
     832        5582 :                         if (BATcapacity(BBP_cache(bi.h->parentid)) > bi.count + bi.count)
     833             :                                 /* reduced slice view: do not copy too
     834             :                                  * much garbage */
     835             :                                 slowcopy = true;
     836             :                 }
     837             : 
     838       58910 :                 bn = COLnew_intern(b->hseqbase, tt, bi.count, role, bi.width);
     839       58911 :                 if (bn == NULL) {
     840           0 :                         bat_iterator_end(&bi);
     841           0 :                         return NULL;
     842             :                 }
     843       58911 :                 if (bn->tvheap != NULL && bn->tvheap->base == NULL) {
     844             :                         /* this combination can happen since the last
     845             :                          * argument of COLnew_intern not being zero
     846             :                          * triggers a skip in the allocation of the
     847             :                          * tvheap */
     848       39331 :                         if (ATOMheap(bn->ttype, bn->tvheap, bn->batCapacity) != GDK_SUCCEED) {
     849           0 :                                 bat_iterator_end(&bi);
     850           0 :                                 BBPreclaim(bn);
     851           0 :                                 return NULL;
     852             :                         }
     853             :                 }
     854             : 
     855       58908 :                 if (tt == TYPE_void) {
     856             :                         /* case (2): a void,void result => nothing to
     857             :                          * copy! */
     858         731 :                         bn->theap->free = 0;
     859       58177 :                 } else if (!slowcopy) {
     860             :                         /* case (3): just copy the heaps */
     861       52563 :                         if (bn->tvheap && HEAPextend(bn->tvheap, bi.vhfree, true) != GDK_SUCCEED) {
     862           0 :                                 goto bunins_failed;
     863             :                         }
     864       52564 :                         memcpy(bn->theap->base, bi.base, bi.count << bi.shift);
     865       52564 :                         bn->theap->free = bi.count << bi.shift;
     866       52564 :                         bn->theap->dirty = true;
     867       52564 :                         if (bn->tvheap) {
     868       38872 :                                 memcpy(bn->tvheap->base, bi.vh->base, bi.vhfree);
     869       38872 :                                 bn->tvheap->free = bi.vhfree;
     870       38872 :                                 bn->tvheap->dirty = true;
     871             :                         }
     872             : 
     873             :                         /* make sure we use the correct capacity */
     874       52564 :                         if (ATOMstorage(bn->ttype) == TYPE_msk)
     875           0 :                                 bn->batCapacity = (BUN) (bn->theap->size * 8);
     876       52564 :                         else if (bn->ttype)
     877       52564 :                                 bn->batCapacity = (BUN) (bn->theap->size >> bn->tshift);
     878             :                         else
     879           0 :                                 bn->batCapacity = 0;
     880        5613 :                 } else if (BATatoms[tt].atomFix || tt != TYPE_void || ATOMextern(tt)) {
     881             :                         /* case (4): one-by-one BUN insert (really slow) */
     882             :                         BUN p, q, r = 0;
     883             : 
     884      150290 :                         BATloop(b, p, q) {
     885      144677 :                                 const void *t = BUNtail(bi, p);
     886             : 
     887      144677 :                                 if (bunfastapp_nocheck(bn, t) != GDK_SUCCEED) {
     888           0 :                                         goto bunins_failed;
     889             :                                 }
     890      144676 :                                 r++;
     891             :                         }
     892        5613 :                         bn->theap->dirty |= bi.count > 0;
     893             :                 } else if (tt != TYPE_void && bi.type == TYPE_void) {
     894             :                         /* case (4): optimized for unary void
     895             :                          * materialization */
     896             :                         oid cur = bi.tseq, *dst = (oid *) Tloc(bn, 0);
     897             :                         const oid inc = !is_oid_nil(cur);
     898             : 
     899             :                         bn->theap->free = bi.count * sizeof(oid);
     900             :                         bn->theap->dirty |= bi.count > 0;
     901             :                         for (BUN p = 0; p < bi.count; p++) {
     902             :                                 dst[p] = cur;
     903             :                                 cur += inc;
     904             :                         }
     905             :                 } else if (ATOMstorage(bi.type) == TYPE_msk) {
     906             :                         /* convert number of bits to number of bytes,
     907             :                          * and round the latter up to a multiple of
     908             :                          * 4 (copy in units of 4 bytes) */
     909             :                         bn->theap->free = (bi.count + 7) / 8;
     910             :                         bn->theap->free = (bn->theap->free + 3) & ~(size_t)3;
     911             :                         bn->theap->dirty |= bi.count > 0;
     912             :                         memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
     913             :                 } else {
     914             :                         /* case (4): optimized for simple array copy */
     915             :                         bn->theap->free = bi.count << bn->tshift;
     916             :                         bn->theap->dirty |= bi.count > 0;
     917             :                         memcpy(Tloc(bn, 0), bi.base, bn->theap->free);
     918             :                 }
     919             :                 /* copy all properties (size+other) from the source bat */
     920       58908 :                 BATsetcount(bn, bi.count);
     921             :         }
     922             :         /* set properties (note that types may have changed in the copy) */
     923      166433 :         if (ATOMtype(tt) == ATOMtype(bi.type)) {
     924       83329 :                 if (ATOMtype(tt) == TYPE_oid) {
     925       15639 :                         BATtseqbase(bn, bi.tseq);
     926             :                 } else {
     927       67690 :                         BATtseqbase(bn, oid_nil);
     928             :                 }
     929      109761 :                 BATkey(bn, BATtkey(b));
     930       83329 :                 bn->tsorted = BATtordered(b);
     931       83329 :                 bn->trevsorted = BATtrevordered(b);
     932       83329 :                 bn->batDirtydesc = true;
     933       83329 :                 bn->tnorevsorted = b->tnorevsorted;
     934       83329 :                 if (b->tnokey[0] != b->tnokey[1]) {
     935         894 :                         bn->tnokey[0] = b->tnokey[0];
     936         894 :                         bn->tnokey[1] = b->tnokey[1];
     937             :                 } else {
     938       82435 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     939             :                 }
     940       83329 :                 bn->tnosorted = b->tnosorted;
     941       83329 :                 bn->tnonil = b->tnonil;
     942       83329 :                 bn->tnil = b->tnil;
     943       83329 :                 bn->tminpos = bi.minpos;
     944       83329 :                 bn->tmaxpos = bi.maxpos;
     945       83329 :                 bn->tunique_est = bi.unique_est;
     946          51 :         } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
     947          51 :                    ATOMcompare(tt) == ATOMcompare(b->ttype)) {
     948          51 :                 BUN h = BUNlast(b);
     949          51 :                 bn->tsorted = b->tsorted;
     950          51 :                 bn->trevsorted = b->trevsorted;
     951          51 :                 if (b->tkey)
     952          12 :                         BATkey(bn, true);
     953          51 :                 bn->tnonil = b->tnonil;
     954          51 :                 bn->tnil = b->tnil;
     955          51 :                 if (b->tnosorted > 0 && b->tnosorted < h)
     956           2 :                         bn->tnosorted = b->tnosorted;
     957             :                 else
     958          49 :                         bn->tnosorted = 0;
     959          51 :                 if (b->tnorevsorted > 0 && b->tnorevsorted < h)
     960           3 :                         bn->tnorevsorted = b->tnorevsorted;
     961             :                 else
     962          48 :                         bn->tnorevsorted = 0;
     963          51 :                 if (b->tnokey[0] < h &&
     964          51 :                     b->tnokey[1] < h &&
     965             :                     b->tnokey[0] != b->tnokey[1]) {
     966           0 :                         bn->tnokey[0] = b->tnokey[0];
     967           0 :                         bn->tnokey[1] = b->tnokey[1];
     968             :                 } else {
     969          51 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     970             :                 }
     971          51 :                 bn->tminpos = bi.minpos;
     972          51 :                 bn->tmaxpos = bi.maxpos;
     973          51 :                 bn->tunique_est = bi.unique_est;
     974             :         } else {
     975           0 :                 bn->tsorted = bn->trevsorted = false; /* set based on count later */
     976           0 :                 bn->tnonil = bn->tnil = false;
     977           0 :                 bn->tnosorted = bn->tnorevsorted = 0;
     978           0 :                 bn->tnokey[0] = bn->tnokey[1] = 0;
     979             :         }
     980       83380 :         if (BATcount(bn) <= 1) {
     981       41139 :                 bn->tsorted = ATOMlinear(b->ttype);
     982       41139 :                 bn->trevsorted = ATOMlinear(b->ttype);
     983       41139 :                 bn->tkey = true;
     984             :         }
     985       83380 :         if (!writable)
     986       32889 :                 bn->batRestricted = BAT_READ;
     987       83380 :         TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
     988             :                   ALGOBATPAR(b), ALGOBATPAR(bn));
     989       83380 :         bat_iterator_end(&bi);
     990       83380 :         return bn;
     991           0 :       bunins_failed:
     992           0 :         bat_iterator_end(&bi);
     993           0 :         BBPreclaim(bn);
     994           0 :         return NULL;
     995             : }
     996             : 
     997             : /* Append an array of values of length count to the bat.  For
     998             :  * fixed-sized values, `values' is an array of values, for
     999             :  * variable-sized values, `values' is an array of pointers to values.
    1000             :  * If values equals NULL, count times nil will be appended. */
    1001             : gdk_return
    1002    54087396 : BUNappendmulti(BAT *b, const void *values, BUN count, bool force)
    1003             : {
    1004             :         BUN p;
    1005             : 
    1006    54087396 :         BATcheck(b, GDK_FAIL);
    1007             : 
    1008    54087396 :         assert(!VIEWtparent(b));
    1009             : 
    1010    54087396 :         if (count == 0)
    1011             :                 return GDK_SUCCEED;
    1012             : 
    1013    54085927 :         p = BUNlast(b);         /* insert at end */
    1014    54085927 :         if (p == BUN_MAX || BATcount(b) + count >= BUN_MAX) {
    1015           0 :                 GDKerror("bat too large\n");
    1016           0 :                 return GDK_FAIL;
    1017             :         }
    1018             : 
    1019    54085927 :         ALIGNapp(b, force, GDK_FAIL);
    1020    54085927 :         b->batDirtydesc = true;
    1021             : 
    1022    54085927 :         if (b->ttype == TYPE_void && BATtdense(b)) {
    1023             :                 const oid *ovals = values;
    1024           0 :                 bool dense = b->batCount == 0 || (ovals != NULL && b->tseqbase + 1 == ovals[0]);
    1025           0 :                 if (ovals) {
    1026           0 :                         for (BUN i = 1; dense && i < count; i++) {
    1027           0 :                                 dense = ovals[i - 1] + 1 == ovals[i];
    1028             :                         }
    1029             :                 }
    1030           0 :                 if (dense) {
    1031           0 :                         if (b->batCount == 0)
    1032           0 :                                 b->tseqbase = ovals ? ovals[0] : oid_nil;
    1033           0 :                         BATsetcount(b, BATcount(b) + count);
    1034           0 :                         return GDK_SUCCEED;
    1035             :                 } else {
    1036             :                         /* we need to materialize b; allocate enough capacity */
    1037           0 :                         b->batCapacity = BATcount(b) + count;
    1038           0 :                         if (BATmaterialize(b) != GDK_SUCCEED)
    1039             :                                 return GDK_FAIL;
    1040             :                 }
    1041             :         }
    1042             : 
    1043    54085927 :         if (unshare_varsized_heap(b) != GDK_SUCCEED) {
    1044             :                 return GDK_FAIL;
    1045             :         }
    1046             : 
    1047    54084092 :         if (BATcount(b) + count > BATcapacity(b)) {
    1048             :                 /* if needed space exceeds a normal growth extend just
    1049             :                  * with what's needed */
    1050             :                 BUN ncap = BATcount(b) + count;
    1051        7266 :                 BUN grows = BATgrows(b);
    1052             : 
    1053             :                 if (ncap > grows)
    1054             :                         grows = ncap;
    1055        7266 :                 gdk_return rc = BATextend(b, grows);
    1056        7266 :                 if (rc != GDK_SUCCEED)
    1057             :                         return rc;
    1058             :         }
    1059             : 
    1060    54084092 :         if (count > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION) {
    1061    12613245 :                 MT_lock_set(&b->theaplock);
    1062    12625730 :                 b->tunique_est = 0;
    1063    12625730 :                 MT_lock_unset(&b->theaplock);
    1064             :         }
    1065    54096552 :         b->theap->dirty = true;
    1066    54096552 :         const void *t = b->ttype == TYPE_msk ? &(msk){false} : ATOMnilptr(b->ttype);
    1067    54096552 :         if (b->ttype == TYPE_oid) {
    1068             :                 /* spend extra effort on oid (possible candidate list) */
    1069      250257 :                 if (values == NULL || is_oid_nil(((oid *) values)[0])) {
    1070          37 :                         b->tnil = true;
    1071          37 :                         b->tnonil = false;
    1072          37 :                         b->tsorted = false;
    1073          37 :                         b->trevsorted = false;
    1074          37 :                         b->tkey = false;
    1075          37 :                         b->tseqbase = oid_nil;
    1076             :                 } else {
    1077      250220 :                         if (b->batCount == 0) {
    1078        7607 :                                 b->tsorted = true;
    1079        7607 :                                 b->trevsorted = true;
    1080        7607 :                                 b->tkey = true;
    1081        7607 :                                 b->tseqbase = count == 1 ? ((oid *) values)[0] : oid_nil;
    1082        7607 :                                 b->tnil = false;
    1083        7607 :                                 b->tnonil = true;
    1084             :                         } else {
    1085      242613 :                                 if (!is_oid_nil(b->tseqbase) &&
    1086       97877 :                                     (count > 1 ||
    1087       97877 :                                      b->tseqbase + b->batCount != ((oid *) values)[0]))
    1088        1696 :                                         b->tseqbase = oid_nil;
    1089      242613 :                                 if (b->tsorted && ((oid *) b->theap->base)[b->batCount - 1] > ((oid *) values)[0]) {
    1090         318 :                                         b->tsorted = false;
    1091         318 :                                         if (b->tnosorted == 0)
    1092         318 :                                                 b->tnosorted = b->batCount;
    1093             :                                 }
    1094      242613 :                                 if (b->trevsorted && ((oid *) b->theap->base)[b->batCount - 1] < ((oid *) values)[0]) {
    1095        6765 :                                         b->trevsorted = false;
    1096        6765 :                                         if (b->tnorevsorted == 0)
    1097        6765 :                                                 b->tnorevsorted = b->batCount;
    1098             :                                 }
    1099      242613 :                                 if (b->tkey) {
    1100      237583 :                                         if (((oid *) b->theap->base)[b->batCount - 1] == ((oid *) values)[0]) {
    1101          20 :                                                 b->tkey = false;
    1102          20 :                                                 if (b->tnokey[1] == 0) {
    1103          20 :                                                         b->tnokey[0] = b->batCount - 1;
    1104          20 :                                                         b->tnokey[1] = b->batCount;
    1105             :                                                 }
    1106      237563 :                                         } else if (!b->tsorted && !b->trevsorted)
    1107         539 :                                                 b->tkey = false;
    1108             :                                 }
    1109             :                         }
    1110      250220 :                         for (BUN i = 1; i < count; i++) {
    1111           0 :                                 if (is_oid_nil(((oid *) values)[i])) {
    1112           0 :                                         b->tnil = true;
    1113           0 :                                         b->tnonil = false;
    1114           0 :                                         b->tsorted = false;
    1115           0 :                                         b->trevsorted = false;
    1116           0 :                                         b->tkey = false;
    1117           0 :                                         b->tseqbase = oid_nil;
    1118           0 :                                         break;
    1119             :                                 }
    1120           0 :                                 if (((oid *) values)[i - 1] == ((oid *) values)[i]) {
    1121           0 :                                         b->tkey = false;
    1122           0 :                                         if (b->tnokey[1] == 0) {
    1123           0 :                                                 b->tnokey[0] = b->batCount + i - 1;
    1124           0 :                                                 b->tnokey[1] = b->batCount + i;
    1125             :                                         }
    1126           0 :                                 } else if (((oid *) values)[i - 1] > ((oid *) values)[i]) {
    1127           0 :                                         b->tsorted = false;
    1128           0 :                                         if (b->tnosorted == 0)
    1129           0 :                                                 b->tnosorted = b->batCount + i;
    1130           0 :                                         if (!b->trevsorted)
    1131           0 :                                                 b->tkey = false;
    1132             :                                 } else {
    1133           0 :                                         if (((oid *) values)[i - 1] + 1 != ((oid *) values)[i])
    1134           0 :                                                 b->tseqbase = oid_nil;
    1135           0 :                                         b->trevsorted = false;
    1136           0 :                                         if (b->tnorevsorted == 0)
    1137           0 :                                                 b->tnorevsorted = b->batCount + i;
    1138           0 :                                         if (!b->tsorted)
    1139           0 :                                                 b->tkey = false;
    1140             :                                 }
    1141             :                         }
    1142             :                 }
    1143    53846295 :         } else if (!ATOMlinear(b->ttype)) {
    1144       17993 :                 b->tnil = b->tnonil = false;
    1145       17993 :                 b->tsorted = b->trevsorted = b->tkey = false;
    1146    53828302 :         } else if (b->batCount == 0) {
    1147      348855 :                 if (values == NULL) {
    1148           0 :                         b->tsorted = b->trevsorted = true;
    1149           0 :                         b->tkey = count == 1;
    1150           0 :                         b->tnil = true;
    1151           0 :                         b->tnonil = false;
    1152             :                 } else {
    1153      348855 :                         b->tsorted = b->trevsorted = b->tkey = count == 1;
    1154      348855 :                         b->tnil = b->tnonil = false;
    1155             :                 }
    1156             :         } else {
    1157    53479447 :                 b->tnil = values == NULL;
    1158    53479447 :                 b->tnonil = false;
    1159    53479447 :                 b->tsorted = b->trevsorted = b->tkey = false;
    1160             :         }
    1161    54096552 :         MT_rwlock_wrlock(&b->thashlock);
    1162   108252971 :         if (values && b->ttype) {
    1163    54124690 :                 int (*atomcmp) (const void *, const void *) = ATOMcompare(b->ttype);
    1164    54124690 :                 const void *atomnil = ATOMnilptr(b->ttype);
    1165    54124690 :                 MT_lock_set(&b->theaplock);
    1166    54119858 :                 BUN minpos = b->tminpos;
    1167    54119858 :                 BUN maxpos = b->tmaxpos;
    1168    54119858 :                 MT_lock_unset(&b->theaplock);
    1169             :                 const void *minvalp = NULL, *maxvalp = NULL;
    1170    54124435 :                 BATiter bi = bat_iterator_nolock(b);
    1171    54120396 :                 if (minpos != BUN_NONE)
    1172    50172376 :                         minvalp = BUNtail(bi, minpos);
    1173    54120396 :                 if (maxpos != BUN_NONE)
    1174    50197418 :                         maxvalp = BUNtail(bi, maxpos);
    1175    54120396 :                 if (b->tvarsized) {
    1176    20917673 :                         const void *vbase = b->tvheap->base;
    1177    41854576 :                         for (BUN i = 0; i < count; i++) {
    1178    20939574 :                                 t = ((void **) values)[i];
    1179    20939574 :                                 gdk_return rc = tfastins_nocheckVAR(b, p, t);
    1180    20939633 :                                 if (rc != GDK_SUCCEED) {
    1181           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1182           0 :                                         return rc;
    1183             :                                 }
    1184    20939633 :                                 if (vbase != b->tvheap->base) {
    1185             :                                         /* tvheap changed location, so
    1186             :                                          * pointers may need to be
    1187             :                                          * updated (not if they were
    1188             :                                          * initialized from t below, but
    1189             :                                          * we don't know) */
    1190        2498 :                                         bi = bat_iterator_nolock(b);
    1191        2498 :                                         vbase = b->tvheap->base;
    1192        2498 :                                         if (minpos != BUN_NONE)
    1193        2042 :                                                 minvalp = BUNtvar(bi, minpos);
    1194        2498 :                                         if (maxpos != BUN_NONE)
    1195        2042 :                                                 maxvalp = BUNtvar(bi, maxpos);
    1196             :                                 }
    1197    20939633 :                                 if (b->thash) {
    1198        8664 :                                         HASHappend_locked(b, p, t);
    1199             :                                 }
    1200    20939633 :                                 if (atomcmp(t, atomnil) != 0) {
    1201    19560360 :                                         if (p == 0) {
    1202             :                                                 minpos = maxpos = 0;
    1203             :                                                 minvalp = maxvalp = t;
    1204             :                                         } else {
    1205    37691012 :                                                 if (minpos != BUN_NONE &&
    1206    18297842 :                                                     atomcmp(minvalp, t) > 0) {
    1207             :                                                         minpos = p;
    1208             :                                                         minvalp = t;
    1209             :                                                 }
    1210    37687798 :                                                 if (maxpos != BUN_NONE &&
    1211    18295707 :                                                     atomcmp(maxvalp, t) < 0) {
    1212             :                                                         maxpos = p;
    1213             :                                                         maxvalp = t;
    1214             :                                                 }
    1215             :                                         }
    1216             :                                 }
    1217    20936903 :                                 p++;
    1218             :                         }
    1219    33202723 :                 } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1220             :                         minpos = maxpos = BUN_NONE;
    1221             :                         minvalp = maxvalp = NULL;
    1222       35986 :                         for (BUN i = 0; i < count; i++) {
    1223       17993 :                                 t = (void *) ((char *) values + (i << b->tshift));
    1224       17993 :                                 mskSetVal(b, p, *(msk *) t);
    1225       17993 :                                 p++;
    1226             :                         }
    1227             :                 } else {
    1228    66422699 :                         for (BUN i = 0; i < count; i++) {
    1229    33254986 :                                 t = (void *) ((char *) values + (i << b->tshift));
    1230    33254986 :                                 gdk_return rc = tfastins_nocheckFIX(b, p, t);
    1231    33249051 :                                 if (rc != GDK_SUCCEED) {
    1232           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1233           0 :                                         return rc;
    1234             :                                 }
    1235    33249051 :                                 if (b->thash) {
    1236     1096604 :                                         HASHappend_locked(b, p, t);
    1237             :                                 }
    1238    33249051 :                                 if (atomcmp(t, atomnil) != 0) {
    1239    32411803 :                                         if (p == 0) {
    1240             :                                                 minpos = maxpos = 0;
    1241             :                                                 minvalp = maxvalp = t;
    1242             :                                         } else {
    1243    64090805 :                                                 if (minpos != BUN_NONE &&
    1244    31862013 :                                                     atomcmp(minvalp, t) > 0) {
    1245             :                                                         minpos = p;
    1246             :                                                         minvalp = t;
    1247             :                                                 }
    1248    64112216 :                                                 if (maxpos != BUN_NONE &&
    1249    31886739 :                                                     atomcmp(maxvalp, t) < 0) {
    1250             :                                                         maxpos = p;
    1251             :                                                         maxvalp = t;
    1252             :                                                 }
    1253             :                                         }
    1254             :                                 }
    1255    33237969 :                                 p++;
    1256             :                         }
    1257             :                 }
    1258    54100708 :                 MT_lock_set(&b->theaplock);
    1259    54121914 :                 b->tminpos = minpos;
    1260    54121914 :                 b->tmaxpos = maxpos;
    1261    54121914 :                 MT_lock_unset(&b->theaplock);
    1262             :         } else {
    1263           0 :                 for (BUN i = 0; i < count; i++) {
    1264           0 :                         gdk_return rc = tfastins_nocheck(b, p, t);
    1265          14 :                         if (rc != GDK_SUCCEED) {
    1266           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1267           0 :                                 return rc;
    1268             :                         }
    1269          14 :                         if (b->thash) {
    1270           0 :                                 HASHappend_locked(b, p, t);
    1271             :                         }
    1272          14 :                         p++;
    1273             :                 }
    1274             :         }
    1275    54129235 :         MT_rwlock_wrunlock(&b->thashlock);
    1276    54121718 :         BATsetcount(b, p);
    1277             : 
    1278    54119596 :         IMPSdestroy(b); /* no support for inserts in imprints yet */
    1279    54108122 :         OIDXdestroy(b);
    1280    54092017 :         return GDK_SUCCEED;
    1281             : }
    1282             : 
    1283             : /* Append a single value to the bat. */
    1284             : gdk_return
    1285    40553035 : BUNappend(BAT *b, const void *t, bool force)
    1286             : {
    1287    40553035 :         return BUNappendmulti(b, b->ttype && b->tvarsized ? (const void *) &t : (const void *) t, 1, force);
    1288             : }
    1289             : 
    1290             : gdk_return
    1291           4 : BUNdelete(BAT *b, oid o)
    1292             : {
    1293             :         BUN p;
    1294           4 :         BATiter bi = bat_iterator_nolock(b);
    1295             :         const void *val;
    1296             : 
    1297           4 :         assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
    1298           4 :         if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
    1299             :                 /* value already not there */
    1300             :                 return GDK_SUCCEED;
    1301             :         }
    1302           4 :         assert(BATcount(b) > 0); /* follows from "if" above */
    1303           4 :         p = o - b->hseqbase;
    1304           4 :         if (p < b->batInserted) {
    1305           0 :                 GDKerror("cannot delete committed value\n");
    1306           0 :                 return GDK_FAIL;
    1307             :         }
    1308           4 :         b->batDirtydesc = true;
    1309           4 :         val = BUNtail(bi, p);
    1310             :         /* writing the values should be locked, reading could be done
    1311             :          * unlocked (since we're the only thread that should be changing
    1312             :          * anything) */
    1313           4 :         MT_lock_set(&b->theaplock);
    1314           4 :         if (b->tmaxpos == p)
    1315           1 :                 b->tmaxpos = BUN_NONE;
    1316           4 :         if (b->tminpos == p)
    1317           0 :                 b->tminpos = BUN_NONE;
    1318           4 :         MT_lock_unset(&b->theaplock);
    1319           4 :         if (ATOMunfix(b->ttype, val) != GDK_SUCCEED)
    1320             :                 return GDK_FAIL;
    1321           4 :         HASHdelete(b, p, val);
    1322           4 :         ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
    1323           4 :         if (p != BUNlast(b) - 1 &&
    1324           2 :             (b->ttype != TYPE_void || BATtdense(b))) {
    1325             :                 /* replace to-be-delete BUN with last BUN; materialize
    1326             :                  * void column before doing so */
    1327           2 :                 if (b->ttype == TYPE_void &&
    1328           0 :                     BATmaterialize(b) != GDK_SUCCEED)
    1329             :                         return GDK_FAIL;
    1330           2 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    1331           0 :                         msk mval = mskGetVal(b, BUNlast(b) - 1);
    1332           0 :                         HASHdelete(b, BUNlast(b) - 1, &mval);
    1333           0 :                         mskSetVal(b, p, mval);
    1334             :                         /* don't leave garbage */
    1335           0 :                         mskClr(b, BUNlast(b) - 1);
    1336           0 :                         HASHinsert(b, p, &mval);
    1337             :                 } else {
    1338           2 :                         val = Tloc(b, BUNlast(b) - 1);
    1339           2 :                         HASHdelete(b, BUNlast(b) - 1, val);
    1340           2 :                         memcpy(Tloc(b, p), val, Tsize(b));
    1341           2 :                         HASHinsert(b, p, val);
    1342           2 :                         MT_lock_set(&b->theaplock);
    1343           2 :                         if (b->tminpos == BUNlast(b) - 1)
    1344           0 :                                 b->tminpos = p;
    1345           2 :                         if (b->tmaxpos == BUNlast(b) - 1)
    1346           1 :                                 b->tmaxpos = p;
    1347           2 :                         MT_lock_unset(&b->theaplock);
    1348             :                 }
    1349             :                 /* no longer sorted */
    1350           2 :                 b->tsorted = b->trevsorted = false;
    1351           2 :                 b->theap->dirty = true;
    1352             :         }
    1353           4 :         if (b->tnosorted >= p)
    1354           0 :                 b->tnosorted = 0;
    1355           4 :         if (b->tnorevsorted >= p)
    1356           1 :                 b->tnorevsorted = 0;
    1357           4 :         MT_lock_set(&b->theaplock);
    1358           4 :         b->batCount--;
    1359           4 :         if (BATcount(b) < GDK_UNIQUE_ESTIMATE_KEEP_FRACTION)
    1360           4 :                 b->tunique_est = 0;
    1361           4 :         MT_lock_unset(&b->theaplock);
    1362           4 :         if (b->batCount <= 1) {
    1363             :                 /* some trivial properties */
    1364           0 :                 b->tkey = true;
    1365           0 :                 b->tsorted = b->trevsorted = true;
    1366           0 :                 b->tnosorted = b->tnorevsorted = 0;
    1367           0 :                 if (b->batCount == 0) {
    1368           0 :                         b->tnil = false;
    1369           0 :                         b->tnonil = true;
    1370             :                 }
    1371             :         }
    1372           4 :         IMPSdestroy(b);
    1373           4 :         OIDXdestroy(b);
    1374           4 :         return GDK_SUCCEED;
    1375             : }
    1376             : 
    1377             : /* @-  BUN replace
    1378             :  * The last operation in this context is BUN replace. It assumes that
    1379             :  * the header denotes a key. The old value association is destroyed
    1380             :  * (if it exists in the first place) and the new value takes its
    1381             :  * place.
    1382             :  *
    1383             :  * In order to make updates on void columns workable; replaces on them
    1384             :  * are always done in-place. Performing them without bun-movements
    1385             :  * greatly simplifies the problem. The 'downside' is that when
    1386             :  * transaction management has to be performed, replaced values should
    1387             :  * be saved explicitly.
    1388             :  */
    1389             : static gdk_return
    1390      657770 : BUNinplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force, bool autoincr)
    1391             : {
    1392      657770 :         BUN last = BUNlast(b) - 1;
    1393      657770 :         BATiter bi = bat_iterator_nolock(b);
    1394             :         int tt;
    1395             :         BUN prv, nxt;
    1396             :         const void *val;
    1397             : 
    1398             :         /* zap alignment info */
    1399      657475 :         if (!force && (b->batRestricted != BAT_WRITE || b->batSharecnt > 0)) {
    1400           0 :                 GDKerror("access denied to %s, aborting.\n",
    1401             :                          BATgetId(b));
    1402           0 :                 return GDK_FAIL;
    1403             :         }
    1404      657475 :         MT_lock_set(&b->theaplock);
    1405      657841 :         if (b->ttype == TYPE_void) {
    1406           0 :                 PROPdestroy(b);
    1407           0 :                 b->tminpos = BUN_NONE;
    1408           0 :                 b->tmaxpos = BUN_NONE;
    1409           0 :                 b->tunique_est = 0.0;
    1410      657841 :         } else if (count > BATcount(b) / GDK_UNIQUE_ESTIMATE_KEEP_FRACTION) {
    1411      316993 :                 b->tunique_est = 0;
    1412             :         }
    1413      657841 :         BUN minpos = b->tminpos;
    1414      657841 :         BUN maxpos = b->tmaxpos;
    1415      657841 :         MT_lock_unset(&b->theaplock);
    1416      657750 :         MT_rwlock_wrlock(&b->thashlock);
    1417     1315287 :         for (BUN i = 0; i < count; i++) {
    1418      657886 :                 BUN p = autoincr ? positions[0] - b->hseqbase + i : positions[i] - b->hseqbase;
    1419      657645 :                 const void *t = b->ttype && b->tvarsized ?
    1420      802368 :                         ((const void **) values)[i] :
    1421      513404 :                         (const void *) ((const char *) values + (i << b->tshift));
    1422     1310911 :                 const bool isnil = ATOMlinear(b->ttype) &&
    1423      653112 :                         ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) == 0;
    1424             : 
    1425             :                 /* retrieve old value, but if this comes from the
    1426             :                  * logger, we need to deal with offsets that point
    1427             :                  * outside of the valid vheap */
    1428      657799 :                 if (b->tvarsized) {
    1429      144498 :                         if (b->ttype) {
    1430      144498 :                                 size_t off = BUNtvaroff(bi, p);
    1431      144498 :                                 if (off < bi.vhfree)
    1432      144479 :                                         val = bi.vh->base + off;
    1433             :                                 else
    1434             :                                         val = NULL; /* bad offset */
    1435             :                         } else {
    1436           0 :                                 val = BUNtpos(bi, p);
    1437             :                         }
    1438             :                 } else {
    1439      513301 :                         val = BUNtloc(bi, p);
    1440             :                 }
    1441             : 
    1442      657780 :                 if (val) {
    1443      657780 :                         if (ATOMcmp(b->ttype, val, t) == 0)
    1444      236386 :                                 continue; /* nothing to do */
    1445      421364 :                         if (!isnil &&
    1446           2 :                             b->tnil &&
    1447           2 :                             ATOMcmp(b->ttype, val, ATOMnilptr(b->ttype)) == 0) {
    1448             :                                 /* if old value is nil and new value
    1449             :                                  * isn't, we're not sure anymore about
    1450             :                                  * the nil property, so we must clear
    1451             :                                  * it */
    1452           0 :                                 b->tnil = false;
    1453             :                         }
    1454      421364 :                         if (b->ttype != TYPE_void) {
    1455      421337 :                                 if (maxpos != BUN_NONE) {
    1456      241074 :                                         if (!isnil && ATOMcmp(b->ttype, BUNtail(bi, maxpos), t) < 0) {
    1457             :                                                 /* new value is larger
    1458             :                                                  * than previous
    1459             :                                                  * largest */
    1460             :                                                 maxpos = p;
    1461      200668 :                                         } else if (maxpos == p && ATOMcmp(b->ttype, BUNtail(bi, maxpos), t) != 0) {
    1462             :                                                 /* old value is equal to
    1463             :                                                  * largest and new value
    1464             :                                                  * is smaller or nil (see
    1465             :                                                  * above), so we don't
    1466             :                                                  * know anymore which is
    1467             :                                                  * the largest */
    1468             :                                                 maxpos = BUN_NONE;
    1469             :                                         }
    1470             :                                 }
    1471      421337 :                                 if (minpos != BUN_NONE) {
    1472      170128 :                                         if (!isnil && ATOMcmp(b->ttype, BUNtail(bi, minpos), t) > 0) {
    1473             :                                                 /* new value is smaller
    1474             :                                                  * than previous
    1475             :                                                  * smallest */
    1476             :                                                 minpos = p;
    1477      169765 :                                         } else if (minpos == p && ATOMcmp(b->ttype, BUNtail(bi, minpos), t) != 0) {
    1478             :                                                 /* old value is equal to
    1479             :                                                  * smallest and new value
    1480             :                                                  * is larger or nil (see
    1481             :                                                  * above), so we don't
    1482             :                                                  * know anymore which is
    1483             :                                                  * the largest */
    1484             :                                                 minpos = BUN_NONE;
    1485             :                                         }
    1486             :                                 }
    1487             :                         }
    1488      421364 :                         HASHdelete_locked(b, p, val);   /* first delete old value from hash */
    1489             :                 } else {
    1490             :                         /* out of range old value, so the properties and
    1491             :                          * hash cannot be trusted */
    1492          19 :                         PROPdestroy(b);
    1493           0 :                         Hash *hs = b->thash;
    1494           0 :                         if (hs) {
    1495           0 :                                 b->thash = NULL;
    1496           0 :                                 doHASHdestroy(b, hs);
    1497             :                         }
    1498           0 :                         MT_lock_set(&b->theaplock);
    1499             :                         minpos = BUN_NONE;
    1500             :                         maxpos = BUN_NONE;
    1501           0 :                         b->tunique_est = 0.0;
    1502           0 :                         MT_lock_unset(&b->theaplock);
    1503             :                 }
    1504      421343 :                 OIDXdestroy(b);
    1505      421401 :                 IMPSdestroy(b);
    1506             : 
    1507      496969 :                 if (b->tvarsized && b->ttype) {
    1508             :                         var_t _d;
    1509             :                         ptr _ptr;
    1510       75535 :                         _ptr = BUNtloc(bi, p);
    1511       75535 :                         switch (b->twidth) {
    1512        7557 :                         default:        /* only three or four cases possible */
    1513        7557 :                                 _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
    1514        7557 :                                 break;
    1515       62893 :                         case 2:
    1516       62893 :                                 _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
    1517       62893 :                                 break;
    1518        5085 :                         case 4:
    1519        5085 :                                 _d = (var_t) * (uint32_t *) _ptr;
    1520        5085 :                                 break;
    1521             : #if SIZEOF_VAR_T == 8
    1522           0 :                         case 8:
    1523           0 :                                 _d = (var_t) * (uint64_t *) _ptr;
    1524           0 :                                 break;
    1525             : #endif
    1526             :                         }
    1527       75535 :                         if (ATOMreplaceVAR(b, &_d, t) != GDK_SUCCEED) {
    1528           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1529           0 :                                 return GDK_FAIL;
    1530             :                         }
    1531       75528 :                         if (b->twidth < SIZEOF_VAR_T &&
    1532       75528 :                             (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 << b->tshift))) {
    1533             :                                 /* doesn't fit in current heap, upgrade it */
    1534          55 :                                 if (GDKupgradevarheap(b, _d, 0, bi.count) != GDK_SUCCEED) {
    1535           0 :                                         MT_rwlock_wrunlock(&b->thashlock);
    1536           0 :                                         return GDK_FAIL;
    1537             :                                 }
    1538             :                         }
    1539             :                         /* reinitialize iterator after possible heap upgrade */
    1540       75528 :                         bi = bat_iterator_nolock(b);
    1541       75537 :                         _ptr = BUNtloc(bi, p);
    1542       75537 :                         switch (b->twidth) {
    1543        7502 :                         default:        /* only three or four cases possible */
    1544        7502 :                                 * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
    1545        7502 :                                 break;
    1546       62950 :                         case 2:
    1547       62950 :                                 * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
    1548       62950 :                                 break;
    1549        5085 :                         case 4:
    1550        5085 :                                 * (uint32_t *) _ptr = (uint32_t) _d;
    1551        5085 :                                 break;
    1552             : #if SIZEOF_VAR_T == 8
    1553           0 :                         case 8:
    1554           0 :                                 * (uint64_t *) _ptr = (uint64_t) _d;
    1555           0 :                                 break;
    1556             : #endif
    1557             :                         }
    1558      345897 :                 } else if (ATOMstorage(b->ttype) == TYPE_msk) {
    1559        4601 :                         mskSetVal(b, p, * (msk *) t);
    1560             :                 } else {
    1561      341296 :                         assert(BATatoms[b->ttype].atomPut == NULL);
    1562      341296 :                         if (ATOMfix(b->ttype, t) != GDK_SUCCEED ||
    1563      341298 :                             ATOMunfix(b->ttype, BUNtloc(bi, p)) != GDK_SUCCEED) {
    1564           0 :                                 MT_rwlock_wrunlock(&b->thashlock);
    1565           0 :                                 return GDK_FAIL;
    1566             :                         }
    1567      341302 :                         switch (ATOMsize(b->ttype)) {
    1568             :                         case 0:      /* void */
    1569             :                                 break;
    1570       17086 :                         case 1:
    1571       17086 :                                 ((bte *) b->theap->base)[p] = * (bte *) t;
    1572       17086 :                                 break;
    1573        6187 :                         case 2:
    1574        6187 :                                 ((sht *) b->theap->base)[p] = * (sht *) t;
    1575        6187 :                                 break;
    1576      172899 :                         case 4:
    1577      172899 :                                 ((int *) b->theap->base)[p] = * (int *) t;
    1578      172899 :                                 break;
    1579      145130 :                         case 8:
    1580      145130 :                                 ((lng *) b->theap->base)[p] = * (lng *) t;
    1581      145130 :                                 break;
    1582           0 :                         case 16:
    1583             : #ifdef HAVE_HGE
    1584           0 :                                 ((hge *) b->theap->base)[p] = * (hge *) t;
    1585             : #else
    1586             :                                 ((uuid *) b->theap->base)[p] = * (uuid *) t;
    1587             : #endif
    1588           0 :                                 break;
    1589           0 :                         default:
    1590           0 :                                 memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
    1591           0 :                                 break;
    1592             :                         }
    1593             :                 }
    1594             : 
    1595      421440 :                 HASHinsert_locked(b, p, t);     /* insert new value into hash */
    1596             : 
    1597      421394 :                 tt = b->ttype;
    1598      421394 :                 prv = p > 0 ? p - 1 : BUN_NONE;
    1599      421394 :                 nxt = p < last ? p + 1 : BUN_NONE;
    1600             : 
    1601      421394 :                 if (BATtordered(b)) {
    1602         503 :                         if (prv != BUN_NONE &&
    1603          47 :                             ATOMcmp(tt, t, BUNtail(bi, prv)) < 0) {
    1604           1 :                                 b->tsorted = false;
    1605           1 :                                 b->tnosorted = p;
    1606         485 :                         } else if (nxt != BUN_NONE &&
    1607          30 :                                    ATOMcmp(tt, t, BUNtail(bi, nxt)) > 0) {
    1608          28 :                                 b->tsorted = false;
    1609          28 :                                 b->tnosorted = nxt;
    1610         427 :                         } else if (b->ttype != TYPE_void && BATtdense(b)) {
    1611           0 :                                 if (prv != BUN_NONE &&
    1612           0 :                                     1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
    1613           0 :                                         b->tseqbase = oid_nil;
    1614           0 :                                 } else if (nxt != BUN_NONE &&
    1615           0 :                                            * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
    1616           0 :                                         b->tseqbase = oid_nil;
    1617           0 :                                 } else if (prv == BUN_NONE &&
    1618           0 :                                            nxt == BUN_NONE) {
    1619           0 :                                         b->tseqbase = * (oid *) t;
    1620             :                                 }
    1621             :                         }
    1622      420938 :                 } else if (b->tnosorted >= p)
    1623        1012 :                         b->tnosorted = 0;
    1624      421394 :                 if (BATtrevordered(b)) {
    1625         604 :                         if (prv != BUN_NONE &&
    1626          99 :                             ATOMcmp(tt, t, BUNtail(bi, prv)) > 0) {
    1627           0 :                                 b->trevsorted = false;
    1628           0 :                                 b->tnorevsorted = p;
    1629         505 :                         } else if (nxt != BUN_NONE &&
    1630           0 :                                    ATOMcmp(tt, t, BUNtail(bi, nxt)) < 0) {
    1631           0 :                                 b->trevsorted = false;
    1632           0 :                                 b->tnorevsorted = nxt;
    1633             :                         }
    1634      420889 :                 } else if (b->tnorevsorted >= p)
    1635         845 :                         b->tnorevsorted = 0;
    1636      421394 :                 if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
    1637         286 :                         BATkey(b, false);
    1638      421108 :                 } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
    1639         789 :                         b->tnokey[0] = b->tnokey[1] = 0;
    1640      421394 :                 if (b->tnonil && ATOMstorage(b->ttype) != TYPE_msk)
    1641       92557 :                         b->tnonil = t && ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0;
    1642             :         }
    1643      657401 :         MT_rwlock_wrunlock(&b->thashlock);
    1644      657820 :         MT_lock_set(&b->theaplock);
    1645      657314 :         b->tminpos = minpos;
    1646      657314 :         b->tmaxpos = maxpos;
    1647      657314 :         b->theap->dirty = true;
    1648      657314 :         if (b->tvheap)
    1649      144390 :                 b->tvheap->dirty = true;
    1650      657314 :         MT_lock_unset(&b->theaplock);
    1651             : 
    1652      657668 :         return GDK_SUCCEED;
    1653             : }
    1654             : 
    1655             : /* Replace multiple values given by their positions with the given values. */
    1656             : gdk_return
    1657      156062 : BUNreplacemulti(BAT *b, const oid *positions, const void *values, BUN count, bool force)
    1658             : {
    1659      156062 :         BATcheck(b, GDK_FAIL);
    1660             : 
    1661      156062 :         if (b->ttype == TYPE_void && BATmaterialize(b) != GDK_SUCCEED)
    1662             :                 return GDK_FAIL;
    1663             : 
    1664      156062 :         return BUNinplacemulti(b, positions, values, count, force, false);
    1665             : }
    1666             : 
    1667             : /* Replace multiple values starting from a given position with the given
    1668             :  * values. */
    1669             : gdk_return
    1670      498309 : BUNreplacemultiincr(BAT *b, oid position, const void *values, BUN count, bool force)
    1671             : {
    1672      498309 :         BATcheck(b, GDK_FAIL);
    1673             : 
    1674      498309 :         if (b->ttype == TYPE_void && BATmaterialize(b) != GDK_SUCCEED)
    1675             :                 return GDK_FAIL;
    1676             : 
    1677      498309 :         return BUNinplacemulti(b, &position, values, count, force, true);
    1678             : }
    1679             : 
    1680             : gdk_return
    1681      156062 : BUNreplace(BAT *b, oid id, const void *t, bool force)
    1682             : {
    1683      156062 :         return BUNreplacemulti(b, &id, b->ttype && b->tvarsized ? (const void *) &t : t, 1, force);
    1684             : }
    1685             : 
    1686             : /* very much like BUNreplace, but this doesn't make any changes if the
    1687             :  * tail column is void */
    1688             : gdk_return
    1689        3451 : void_inplace(BAT *b, oid id, const void *val, bool force)
    1690             : {
    1691        3451 :         assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
    1692             :         if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
    1693             :                 GDKerror("id out of range\n");
    1694             :                 return GDK_FAIL;
    1695             :         }
    1696        3451 :         if (b->ttype == TYPE_void)
    1697             :                 return GDK_SUCCEED;
    1698        3451 :         return BUNinplacemulti(b, &id, b->ttype && b->tvarsized ? (const void *) &val : (const void *) val, 1, force, false);
    1699             : }
    1700             : 
    1701             : /*
    1702             :  * @- BUN Lookup
    1703             :  * Location of a BUN using a value should use the available indexes to
    1704             :  * speed up access. If indexes are lacking then a hash index is
    1705             :  * constructed under the assumption that 1) multiple access to the BAT
    1706             :  * can be expected and 2) building the hash is only slightly more
    1707             :  * expensive than the full linear scan.  BUN_NONE is returned if no
    1708             :  * such element could be found.  In those cases where the type is
    1709             :  * known and a hash index is available, one should use the inline
    1710             :  * functions to speed-up processing.
    1711             :  */
    1712             : static BUN
    1713           0 : slowfnd(BAT *b, const void *v)
    1714             : {
    1715           0 :         BATiter bi = bat_iterator(b);
    1716             :         BUN p, q;
    1717           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
    1718             : 
    1719           0 :         BATloop(b, p, q) {
    1720           0 :                 if ((*cmp)(v, BUNtail(bi, p)) == 0) {
    1721           0 :                         bat_iterator_end(&bi);
    1722           0 :                         return p;
    1723             :                 }
    1724             :         }
    1725           0 :         bat_iterator_end(&bi);
    1726           0 :         return BUN_NONE;
    1727             : }
    1728             : 
    1729             : static BUN
    1730           0 : mskfnd(BAT *b, msk v)
    1731             : {
    1732             :         BUN p, q;
    1733             : 
    1734           0 :         if (* (msk *) v) {
    1735             :                 /* find a 1 value */
    1736           0 :                 for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
    1737           0 :                         if (((uint32_t *) b->theap->base)[p] != 0) {
    1738             :                                 /* there's at least one 1 bit */
    1739           0 :                                 return p * 32 + candmask_lobit(((uint32_t *) b->theap->base)[p]);
    1740             :                         }
    1741             :                 }
    1742             :         } else {
    1743             :                 /* find a 0 value */
    1744           0 :                 for (p = 0, q = (BATcount(b) + 31) / 32; p < q; p++) {
    1745           0 :                         if (((uint32_t *) b->theap->base)[p] != ~0U) {
    1746             :                                 /* there's at least one 0 bit */
    1747           0 :                                 return p * 32 + candmask_lobit(~((uint32_t *) b->theap->base)[p]);
    1748             :                         }
    1749             :                 }
    1750             :         }
    1751             :         return BUN_NONE;
    1752             : }
    1753             : 
    1754             : BUN
    1755      483850 : BUNfnd(BAT *b, const void *v)
    1756             : {
    1757             :         BUN r = BUN_NONE;
    1758             :         BATiter bi;
    1759             : 
    1760      483850 :         BATcheck(b, BUN_NONE);
    1761      483850 :         if (!v || BATcount(b) == 0)
    1762             :                 return r;
    1763      364283 :         if (complex_cand(b)) {
    1764             :                 struct canditer ci;
    1765           0 :                 canditer_init(&ci, NULL, b);
    1766           0 :                 return canditer_search(&ci, * (const oid *) v, false);
    1767             :         }
    1768      364283 :         if (BATtvoid(b))
    1769       70264 :                 return BUNfndVOID(b, v);
    1770      294019 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1771           0 :                 return mskfnd(b, *(msk*)v);
    1772             :         }
    1773      294019 :         if (!BATcheckhash(b)) {
    1774        2958 :                 if (BATordered(b) || BATordered_rev(b))
    1775        2304 :                         return SORTfnd(b, v);
    1776             :         }
    1777      291715 :         if (BAThash(b) == GDK_SUCCEED) {
    1778      291715 :                 bi = bat_iterator(b); /* outside of hashlock */
    1779      291715 :                 MT_rwlock_rdlock(&b->thashlock);
    1780      291715 :                 if (b->thash == NULL) {
    1781           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1782           0 :                         bat_iterator_end(&bi);
    1783           0 :                         goto hashfnd_failed;
    1784             :                 }
    1785      567370 :                 switch (ATOMbasetype(b->ttype)) {
    1786           0 :                 case TYPE_bte:
    1787           0 :                         HASHloop_bte(bi, b->thash, r, v)
    1788             :                                 break;
    1789             :                         break;
    1790           0 :                 case TYPE_sht:
    1791           0 :                         HASHloop_sht(bi, b->thash, r, v)
    1792             :                                 break;
    1793             :                         break;
    1794         212 :                 case TYPE_int:
    1795         297 :                         HASHloop_int(bi, b->thash, r, v)
    1796             :                                 break;
    1797             :                         break;
    1798           0 :                 case TYPE_flt:
    1799           0 :                         HASHloop_flt(bi, b->thash, r, v)
    1800             :                                 break;
    1801             :                         break;
    1802           0 :                 case TYPE_dbl:
    1803           0 :                         HASHloop_dbl(bi, b->thash, r, v)
    1804             :                                 break;
    1805             :                         break;
    1806       16060 :                 case TYPE_lng:
    1807       25063 :                         HASHloop_lng(bi, b->thash, r, v)
    1808             :                                 break;
    1809             :                         break;
    1810             : #ifdef HAVE_HGE
    1811           0 :                 case TYPE_hge:
    1812           0 :                         HASHloop_hge(bi, b->thash, r, v)
    1813             :                                 break;
    1814             :                         break;
    1815             : #endif
    1816           0 :                 case TYPE_uuid:
    1817           0 :                         HASHloop_uuid(bi, b->thash, r, v)
    1818             :                                 break;
    1819             :                         break;
    1820      275443 :                 case TYPE_str:
    1821      372536 :                         HASHloop_str(bi, b->thash, r, v)
    1822             :                                 break;
    1823             :                         break;
    1824           0 :                 default:
    1825           0 :                         HASHloop(bi, b->thash, r, v)
    1826             :                                 break;
    1827             :                         break;
    1828             :                 }
    1829      291715 :                 MT_rwlock_rdunlock(&b->thashlock);
    1830      291715 :                 bat_iterator_end(&bi);
    1831      291715 :                 return r;
    1832             :         }
    1833           0 :   hashfnd_failed:
    1834             :         /* can't build hash table, search the slow way */
    1835           0 :         GDKclrerr();
    1836           0 :         return slowfnd(b, v);
    1837             : }
    1838             : 
    1839             : /*
    1840             :  * @+ BAT Property Management
    1841             :  *
    1842             :  * The function BATcount returns the number of active elements in a
    1843             :  * BAT.  Counting is type independent.  It can be implemented quickly,
    1844             :  * because the system ensures a dense BUN list.
    1845             :  */
    1846             : void
    1847     4477804 : BATsetcapacity(BAT *b, BUN cnt)
    1848             : {
    1849     4477804 :         b->batCapacity = cnt;
    1850     4477804 :         assert(b->batCount <= cnt);
    1851     4477804 : }
    1852             : 
    1853             : void
    1854    73255733 : BATsetcount(BAT *b, BUN cnt)
    1855             : {
    1856             :         /* head column is always VOID, and some head properties never change */
    1857    73255733 :         assert(!is_oid_nil(b->hseqbase));
    1858    73255733 :         assert(cnt <= BUN_MAX);
    1859             : 
    1860    73255733 :         MT_lock_set(&b->theaplock);
    1861    73252678 :         b->batCount = cnt;
    1862    73252678 :         b->batDirtydesc = true;
    1863    73252678 :         b->theap->dirty |= b->ttype != TYPE_void && b->theap->parentid == b->batCacheid && cnt > 0;
    1864    73252678 :         if (b->theap->parentid == b->batCacheid)
    1865    68778045 :                 b->theap->free = tailsize(b, cnt);
    1866    73252678 :         if (b->ttype == TYPE_void)
    1867     6557340 :                 b->batCapacity = cnt;
    1868    73252678 :         if (cnt <= 1) {
    1869    12130433 :                 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
    1870    12130433 :                 b->tnosorted = b->tnorevsorted = 0;
    1871             :         }
    1872             :         /* if the BAT was made smaller, we need to zap some values */
    1873    73252678 :         if (b->tnosorted >= BUNlast(b))
    1874    10573671 :                 b->tnosorted = 0;
    1875    73252678 :         if (b->tnorevsorted >= BUNlast(b))
    1876    10579047 :                 b->tnorevsorted = 0;
    1877    73252678 :         if (b->tnokey[0] >= BUNlast(b) || b->tnokey[1] >= BUNlast(b)) {
    1878    10573434 :                 b->tnokey[0] = 0;
    1879    10573434 :                 b->tnokey[1] = 0;
    1880             :         }
    1881    73252678 :         if (b->ttype == TYPE_void) {
    1882     6558284 :                 b->tsorted = true;
    1883     6558284 :                 if (is_oid_nil(b->tseqbase)) {
    1884     3429399 :                         b->tkey = cnt <= 1;
    1885     3429399 :                         b->trevsorted = true;
    1886     3429399 :                         b->tnil = true;
    1887     3429399 :                         b->tnonil = false;
    1888             :                 } else {
    1889     3128885 :                         b->tkey = true;
    1890     3128885 :                         b->trevsorted = cnt <= 1;
    1891     3128885 :                         b->tnil = false;
    1892     3128885 :                         b->tnonil = true;
    1893             :                 }
    1894             :         }
    1895    73252678 :         assert(b->batCapacity >= cnt);
    1896    73252678 :         MT_lock_unset(&b->theaplock);
    1897    73255431 : }
    1898             : 
    1899             : /*
    1900             :  * The key and name properties can be changed at any time.  Keyed
    1901             :  * dimensions are automatically supported by an auxiliary hash-based
    1902             :  * access structure to speed up searching. Turning off the key
    1903             :  * integrity property does not cause the index to disappear. It can
    1904             :  * still be used to speed-up retrieval. The routine BATkey sets the
    1905             :  * key property of the association head.
    1906             :  */
    1907             : gdk_return
    1908     4695960 : BATkey(BAT *b, bool flag)
    1909             : {
    1910     4695960 :         BATcheck(b, GDK_FAIL);
    1911     4695960 :         if (b->ttype == TYPE_void) {
    1912       23464 :                 if (BATtdense(b) && !flag) {
    1913           0 :                         GDKerror("dense column must be unique.\n");
    1914           0 :                         return GDK_FAIL;
    1915             :                 }
    1916       23464 :                 if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
    1917           0 :                         GDKerror("void column cannot be unique.\n");
    1918           0 :                         return GDK_FAIL;
    1919             :                 }
    1920             :         }
    1921     4695960 :         if (b->tkey != flag)
    1922      215730 :                 b->batDirtydesc = true;
    1923     4695960 :         b->tkey = flag;
    1924     4695960 :         if (!flag) {
    1925     3760441 :                 b->tseqbase = oid_nil;
    1926             :         } else
    1927      935519 :                 b->tnokey[0] = b->tnokey[1] = 0;
    1928             :         gdk_return rc = GDK_SUCCEED;
    1929     4695960 :         if (flag && VIEWtparent(b)) {
    1930             :                 /* if a view is key, then so is the parent if the two
    1931             :                  * are aligned */
    1932      811229 :                 BAT *bp = BBP_cache(VIEWtparent(b));
    1933      811229 :                 MT_lock_set(&bp->theaplock);
    1934     1284955 :                 if (BATcount(b) == BATcount(bp) &&
    1935      473759 :                     ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
    1936      473764 :                     !BATtkey(bp) &&
    1937       12859 :                     ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
    1938             :                      BATcount(b) == 0))
    1939       10959 :                         rc = BATkey(bp, true);
    1940      811193 :                 MT_lock_unset(&bp->theaplock);
    1941             :         }
    1942             :         return rc;
    1943             : }
    1944             : 
    1945             : void
    1946     1749536 : BAThseqbase(BAT *b, oid o)
    1947             : {
    1948     1749536 :         if (b != NULL) {
    1949     1749536 :                 assert(o <= GDK_oid_max);    /* i.e., not oid_nil */
    1950     1749536 :                 assert(o + BATcount(b) <= GDK_oid_max);
    1951     1749536 :                 if (b->hseqbase != o) {
    1952      197133 :                         b->batDirtydesc = true;
    1953      197133 :                         b->hseqbase = o;
    1954             :                 }
    1955             :         }
    1956     1749536 : }
    1957             : 
    1958             : void
    1959     4315180 : BATtseqbase(BAT *b, oid o)
    1960             : {
    1961     4315180 :         assert(o <= oid_nil);
    1962     4315180 :         if (b == NULL)
    1963             :                 return;
    1964     4315180 :         assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
    1965     4315180 :         if (b->tseqbase != o) {
    1966     3259343 :                 b->batDirtydesc = true;
    1967             :         }
    1968     4315180 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    1969     3311855 :                 b->tseqbase = o;
    1970             : 
    1971             :                 /* adapt keyness */
    1972     3311855 :                 if (BATtvoid(b)) {
    1973     3274721 :                         b->tsorted = true;
    1974     3274721 :                         if (is_oid_nil(o)) {
    1975          68 :                                 b->tkey = b->batCount <= 1;
    1976          68 :                                 b->tnonil = b->batCount == 0;
    1977          68 :                                 b->tnil = b->batCount > 0;
    1978          68 :                                 b->trevsorted = true;
    1979          68 :                                 b->tnosorted = b->tnorevsorted = 0;
    1980          68 :                                 if (!b->tkey) {
    1981           0 :                                         b->tnokey[0] = 0;
    1982           0 :                                         b->tnokey[1] = 1;
    1983             :                                 } else {
    1984          68 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    1985             :                                 }
    1986             :                         } else {
    1987     3274653 :                                 if (!b->tkey) {
    1988        7052 :                                         b->tkey = true;
    1989        7052 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    1990             :                                 }
    1991     3274653 :                                 b->tnonil = true;
    1992     3274653 :                                 b->tnil = false;
    1993     3274653 :                                 b->trevsorted = b->batCount <= 1;
    1994     3274653 :                                 if (!b->trevsorted)
    1995       20985 :                                         b->tnorevsorted = 1;
    1996             :                         }
    1997             :                 }
    1998             :         } else {
    1999     1003325 :                 assert(o == oid_nil);
    2000     1003325 :                 b->tseqbase = oid_nil;
    2001             :         }
    2002             : }
    2003             : 
    2004             : gdk_return
    2005       22699 : BATroles(BAT *b, const char *tnme)
    2006             : {
    2007       22699 :         if (b == NULL)
    2008             :                 return GDK_SUCCEED;
    2009       22699 :         if (b->tident && !default_ident(b->tident))
    2010           0 :                 GDKfree(b->tident);
    2011       22699 :         if (tnme)
    2012         664 :                 b->tident = GDKstrdup(tnme);
    2013             :         else
    2014       22035 :                 b->tident = BATstring_t;
    2015       22699 :         return b->tident ? GDK_SUCCEED : GDK_FAIL;
    2016             : }
    2017             : 
    2018             : /*
    2019             :  * @- Change the BAT access permissions (read, append, write)
    2020             :  * Regrettably, BAT access-permissions, persistent status and memory
    2021             :  * map modes, interact in ways that makes one's brain sizzle. This
    2022             :  * makes BATsetaccess and TMcommit (where a change in BAT persistence
    2023             :  * mode is made permanent) points in which the memory map status of
    2024             :  * bats needs to be carefully re-assessed and ensured.
    2025             :  *
    2026             :  * Another complication is the fact that during commit, concurrent
    2027             :  * users may access the heaps, such that the simple solution
    2028             :  * unmap;re-map is out of the question.
    2029             :  * Even worse, it is not possible to even rename an open mmap file in
    2030             :  * Windows. For this purpose, we dropped the old .priv scheme, which
    2031             :  * relied on file moves. Now, the file that is opened with mmap is
    2032             :  * always the X file, in case of newstorage=STORE_PRIV, we save in a
    2033             :  * new file X.new
    2034             :  *
    2035             :  * we must consider the following dimensions:
    2036             :  *
    2037             :  * persistence:
    2038             :  *     not simply the current persistence mode but whether the bat *was*
    2039             :  *     present at the last commit point (BBP status & BBPEXISTING).
    2040             :  *     The crucial issue is namely whether we must guarantee recovery
    2041             :  *     to a previous sane state.
    2042             :  *
    2043             :  * access:
    2044             :  *     whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
    2045             :  *     is usually the same as BAT_READ (as our concern are only data pages
    2046             :  *     that already existed at the last commit).
    2047             :  *
    2048             :  * storage:
    2049             :  *     the current way the heap file X is memory-mapped;
    2050             :  *     STORE_MMAP uses direct mapping (so dirty pages may be flushed
    2051             :  *     at any time to disk), STORE_PRIV uses copy-on-write.
    2052             :  *
    2053             :  * newstorage:
    2054             :  *     the current save-regime. STORE_MMAP calls msync() on the heap X,
    2055             :  *     whereas STORE_PRIV writes the *entire* heap in a file: X.new
    2056             :  *     If a BAT is loaded from disk, the field newstorage is used
    2057             :  *     to set storage as well (so before change-access and commit-
    2058             :  *     persistence mayhem, we always have newstorage=storage).
    2059             :  *
    2060             :  * change-access:
    2061             :  *     what happens if the bat-access mode is changed from
    2062             :  *     BAT_READ into BAT_WRITE (or vice versa).
    2063             :  *
    2064             :  * commit-persistence:
    2065             :  *     what happens during commit if the bat-persistence mode was
    2066             :  *     changed (from TRANSIENT into PERSISTENT, or vice versa).
    2067             :  *
    2068             :  * this is the scheme:
    2069             :  *
    2070             :  *  persistence access    newstorage storage    change-access commit-persistence
    2071             :  *  =========== ========= ========== ========== ============= ==================
    2072             :  * 0 transient  BAT_READ  STORE_MMAP STORE_MMAP =>2           =>4
    2073             :  * 1 transient  BAT_READ  STORE_PRIV STORE_PRIV =>3           =>5
    2074             :  * 2 transient  BAT_WRITE STORE_MMAP STORE_MMAP =>0           =>6+
    2075             :  * 3 transient  BAT_WRITE STORE_PRIV STORE_PRIV =>1           =>7
    2076             :  * 4 persistent BAT_READ  STORE_MMAP STORE_MMAP =>6+          =>0
    2077             :  * 5 persistent BAT_READ  STORE_PRIV STORE_PRIV =>7           =>1
    2078             :  * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
    2079             :  * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5           =>3
    2080             :  *
    2081             :  * exception states:
    2082             :  * a transient  BAT_READ  STORE_PRIV STORE_MMAP =>b           =>c
    2083             :  * b transient  BAT_WRITE STORE_PRIV STORE_MMAP =>a           =>6
    2084             :  * c persistent BAT_READ  STORE_PRIV STORE_MMAP =>6           =>a
    2085             :  *
    2086             :  * (+) indicates that we must ensure that the heap gets saved in its new mode
    2087             :  *
    2088             :  * Note that we now allow a heap with save-regime STORE_PRIV that was
    2089             :  * actually mapped STORE_MMAP. In effect, the potential corruption of
    2090             :  * the X file is compensated by writing out full X.new files that take
    2091             :  * precedence.  When transitioning out of this state towards one with
    2092             :  * both storage regime and OS as STORE_MMAP we need to move the X.new
    2093             :  * files into the backup directory. Then msync the X file and (on
    2094             :  * success) remove the X.new; see backup_new().
    2095             :  *
    2096             :  * Exception states are only reachable if the commit fails and those
    2097             :  * new persistent bats have already been processed (but never become
    2098             :  * part of a committed state). In that case a transition 2=>6 may end
    2099             :  * up 2=>b.  Exception states a and c are reachable from b.
    2100             :  *
    2101             :  * Errors in HEAPchangeaccess() can be handled atomically inside the
    2102             :  * routine.  The work on changing mmap modes HEAPcommitpersistence()
    2103             :  * is done during the BBPsync() for all bats that are newly persistent
    2104             :  * (BBPNEW). After the TMcommit(), it is done for those bats that are
    2105             :  * no longer persistent after the commit (BBPDELETED), only if it
    2106             :  * succeeds.  Such transient bats cannot be processed before the
    2107             :  * commit, because the commit may fail and then the more unsafe
    2108             :  * transient mmap modes would be present on a persistent bat.
    2109             :  *
    2110             :  * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
    2111             :  * gdk_tm.c.
    2112             :  *
    2113             :  * Including the exception states, we have 11 of the 16
    2114             :  * combinations. As for the 5 avoided states, all four
    2115             :  * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
    2116             :  * omitted (this would amount to an msync() save regime on a
    2117             :  * copy-on-write heap -- which does not work). The remaining avoided
    2118             :  * state is the patently unsafe
    2119             :  * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
    2120             :  *
    2121             :  * Note that after a server restart exception states are gone, as on
    2122             :  * BAT loads the saved descriptor is inspected again (which will
    2123             :  * reproduce the state at the last succeeded commit).
    2124             :  *
    2125             :  * To avoid exception states, a TMsubcommit protocol would need to be
    2126             :  * used which is too heavy for BATsetaccess().
    2127             :  *
    2128             :  * Note that this code is not about making heaps mmap-ed in the first
    2129             :  * place.  It is just about determining which flavor of mmap should be
    2130             :  * used. The MAL user is oblivious of such details.
    2131             :  */
    2132             : 
    2133             : /* rather than deleting X.new, we comply with the commit protocol and
    2134             :  * move it to backup storage */
    2135             : static gdk_return
    2136           0 : backup_new(Heap *hp, bool lock)
    2137             : {
    2138             :         int batret, bakret, ret = -1;
    2139             :         char *batpath, *bakpath;
    2140             :         struct stat st;
    2141             : 
    2142             :         /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
    2143           0 :         batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, ".new");
    2144           0 :         bakpath = GDKfilepath(hp->farmid, BAKDIR, hp->filename, ".new");
    2145           0 :         if (batpath != NULL && bakpath != NULL) {
    2146             :                 /* file actions here interact with the global commits */
    2147           0 :                 if (lock)
    2148           0 :                         MT_lock_set(&GDKtmLock);
    2149             : 
    2150             :                 batret = MT_stat(batpath, &st);
    2151             :                 bakret = MT_stat(bakpath, &st);
    2152             : 
    2153           0 :                 if (batret == 0 && bakret) {
    2154             :                         /* no backup yet, so move the existing X.new there out
    2155             :                          * of the way */
    2156           0 :                         if ((ret = MT_rename(batpath, bakpath)) < 0)
    2157           0 :                                 GDKsyserror("backup_new: rename %s to %s failed\n",
    2158             :                                             batpath, bakpath);
    2159           0 :                         TRC_DEBUG(IO_, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
    2160           0 :                 } else if (batret == 0) {
    2161             :                         /* there is a backup already; just remove the X.new */
    2162           0 :                         if ((ret = MT_remove(batpath)) != 0)
    2163           0 :                                 GDKsyserror("backup_new: remove %s failed\n", batpath);
    2164           0 :                         TRC_DEBUG(IO_, "remove(%s) = %d\n", batpath, ret);
    2165             :                 } else {
    2166             :                         ret = 0;
    2167             :                 }
    2168           0 :                 if (lock)
    2169           0 :                         MT_lock_unset(&GDKtmLock);
    2170             :         }
    2171           0 :         GDKfree(batpath);
    2172           0 :         GDKfree(bakpath);
    2173           0 :         return ret ? GDK_FAIL : GDK_SUCCEED;
    2174             : }
    2175             : 
    2176             : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
    2177             : 
    2178             : /* transition heap from readonly to writable */
    2179             : static storage_t
    2180     5678708 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
    2181             : {
    2182     5678708 :         if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
    2183     5678708 :                 return hp->newstorage;       /* 0<=>2,1<=>3,a<=>b */
    2184             : 
    2185           0 :         if (dstmode == BAT_WRITE) {
    2186           0 :                 if (hp->storage != STORE_PRIV)
    2187           0 :                         hp->dirty = true;    /* exception c does not make it dirty */
    2188           0 :                 return STORE_PRIV;      /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
    2189             :         }
    2190           0 :         if (hp->storage == STORE_MMAP) {     /* 6=>4 */
    2191           0 :                 hp->dirty = true;
    2192           0 :                 return backup_new(hp, true) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;        /* only called for existing bats */
    2193             :         }
    2194             :         return hp->storage;  /* 7=>5 */
    2195             : }
    2196             : 
    2197             : /* heap changes persistence mode (at commit point) */
    2198             : static storage_t
    2199      232746 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
    2200             : {
    2201      232746 :         if (existing) {         /* existing, ie will become transient */
    2202       33264 :                 if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) {      /* 6=>2 */
    2203           0 :                         hp->dirty = true;
    2204           0 :                         return backup_new(hp, false) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;       /* only called for existing bats */
    2205             :                 }
    2206       33264 :                 return hp->newstorage;       /* 4=>0,5=>1,7=>3,c=>a no change */
    2207             :         }
    2208             :         /* !existing, ie will become persistent */
    2209      199482 :         if (hp->newstorage == STORE_MEM)
    2210             :                 return hp->newstorage;
    2211        1800 :         if (hp->newstorage == STORE_MMAP && !writable)
    2212             :                 return STORE_MMAP;      /* 0=>4 STORE_MMAP */
    2213             : 
    2214           0 :         if (hp->newstorage == STORE_MMAP)
    2215           0 :                 hp->dirty = true;    /* 2=>6 */
    2216             :         return STORE_PRIV;      /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
    2217             : }
    2218             : 
    2219             : 
    2220             : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str /*|| GDK_ELIMDOUBLES(h) */)
    2221             : 
    2222             : /* change the heap modes at a commit */
    2223             : gdk_return
    2224      196339 : BATcheckmodes(BAT *b, bool existing)
    2225             : {
    2226             :         storage_t m1 = STORE_MEM, m3 = STORE_MEM;
    2227             :         bool dirty = false, wr;
    2228             : 
    2229      196339 :         BATcheck(b, GDK_FAIL);
    2230             : 
    2231      196339 :         wr = (b->batRestricted == BAT_WRITE);
    2232      196339 :         if (b->ttype) {
    2233      196339 :                 m1 = HEAPcommitpersistence(b->theap, wr, existing);
    2234      196339 :                 dirty |= (b->theap->newstorage != m1);
    2235             :         }
    2236             : 
    2237      196339 :         if (b->tvheap) {
    2238       36407 :                 bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
    2239       36407 :                 m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
    2240       36407 :                 dirty |= (b->tvheap->newstorage != m3);
    2241             :         }
    2242      196339 :         if (m1 == STORE_INVALID || m3 == STORE_INVALID)
    2243             :                 return GDK_FAIL;
    2244             : 
    2245      196339 :         if (dirty) {
    2246           0 :                 b->batDirtydesc = true;
    2247           0 :                 b->theap->newstorage = m1;
    2248           0 :                 if (b->tvheap)
    2249           0 :                         b->tvheap->newstorage = m3;
    2250             :         }
    2251             :         return GDK_SUCCEED;
    2252             : }
    2253             : 
    2254             : BAT *
    2255     6635772 : BATsetaccess(BAT *b, restrict_t newmode)
    2256             : {
    2257             :         restrict_t bakmode;
    2258             :         bool bakdirty;
    2259             : 
    2260     6635772 :         BATcheck(b, NULL);
    2261     6635772 :         if ((isVIEW(b) || b->batSharecnt) && newmode != BAT_READ) {
    2262           0 :                 BAT *bn = COLcopy(b, b->ttype, true, TRANSIENT);
    2263           0 :                 BBPunfix(b->batCacheid);
    2264           0 :                 if (bn == NULL)
    2265             :                         return NULL;
    2266             :                 b = bn;
    2267             :         }
    2268     6635772 :         bakmode = (restrict_t) b->batRestricted;
    2269     6635772 :         bakdirty = b->batDirtydesc;
    2270     6635772 :         if (bakmode != newmode) {
    2271     4675362 :                 bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
    2272             :                 bool wr = (newmode == BAT_WRITE);
    2273     4675362 :                 bool rd = (bakmode == BAT_WRITE);
    2274             :                 storage_t m1, m3 = STORE_MEM;
    2275             :                 storage_t b1, b3 = STORE_MEM;
    2276             : 
    2277     4675362 :                 b1 = b->theap->newstorage;
    2278     4684904 :                 m1 = HEAPchangeaccess(b->theap, ACCESSMODE(wr, rd), existing);
    2279     4674661 :                 if (b->tvheap) {
    2280     1003706 :                         bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
    2281     1003706 :                         b3 = b->tvheap->newstorage;
    2282     2007375 :                         m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
    2283             :                 }
    2284     4675319 :                 if (m1 == STORE_INVALID || m3 == STORE_INVALID) {
    2285     1341818 :                         BBPunfix(b->batCacheid);
    2286     1342440 :                         return NULL;
    2287             :                 }
    2288             : 
    2289             :                 /* set new access mode and mmap modes */
    2290     3333501 :                 b->batRestricted = (unsigned int) newmode;
    2291     3333501 :                 b->batDirtydesc = true;
    2292     3333501 :                 b->theap->newstorage = m1;
    2293     3333501 :                 if (b->tvheap)
    2294      946154 :                         b->tvheap->newstorage = m3;
    2295             : 
    2296     3333501 :                 if (existing && BBPsave(b) != GDK_SUCCEED) {
    2297             :                         /* roll back all changes */
    2298           0 :                         b->batRestricted = (unsigned int) bakmode;
    2299           0 :                         b->batDirtydesc = bakdirty;
    2300           0 :                         b->theap->newstorage = b1;
    2301           0 :                         if (b->tvheap)
    2302           0 :                                 b->tvheap->newstorage = b3;
    2303           0 :                         BBPunfix(b->batCacheid);
    2304           0 :                         return NULL;
    2305             :                 }
    2306             :         }
    2307             :         return b;
    2308             : }
    2309             : 
    2310             : restrict_t
    2311           0 : BATgetaccess(BAT *b)
    2312             : {
    2313           0 :         BATcheck(b, BAT_WRITE /* 0 */);
    2314           0 :         assert(b->batRestricted != 3); /* only valid restrict_t values */
    2315           0 :         return (restrict_t) b->batRestricted;
    2316             : }
    2317             : 
    2318             : /*
    2319             :  * @- change BAT persistency (persistent,session,transient)
    2320             :  * In the past, we prevented BATS with certain types from being saved at all:
    2321             :  * - BATs of BATs, as having recursive bats creates cascading
    2322             :  *   complexities in commits/aborts.
    2323             :  * - any atom with refcounts, as the BBP has no overview of such
    2324             :  *   user-defined refcounts.
    2325             :  * - pointer types, as the values they point to are bound to be transient.
    2326             :  *
    2327             :  * However, nowadays we do allow such saves, as the BBP swapping
    2328             :  * mechanism was altered to be able to save transient bats temporarily
    2329             :  * to disk in order to make room.  Thus, we must be able to save any
    2330             :  * transient BAT to disk.
    2331             :  *
    2332             :  * What we don't allow is to make such bats persistent.
    2333             :  *
    2334             :  * Although the persistent state does influence the allowed mmap
    2335             :  * modes, this only goes for the *real* committed persistent
    2336             :  * state. Making the bat persistent with BATmode does not matter for
    2337             :  * the heap modes until the commit point is reached. So we do not need
    2338             :  * to do anything with heap modes yet at this point.
    2339             :  */
    2340             : #define check_type(tp)                                                  \
    2341             :         do {                                                            \
    2342             :                 if (ATOMisdescendant((tp), TYPE_ptr) ||                 \
    2343             :                     BATatoms[tp].atomUnfix ||                           \
    2344             :                     BATatoms[tp].atomFix) {                             \
    2345             :                         GDKerror("%s type implies that %s[%s] "               \
    2346             :                                  "cannot be made persistent.\n",      \
    2347             :                                  ATOMname(tp), BATgetId(b),             \
    2348             :                                  ATOMname(b->ttype));                        \
    2349             :                         return GDK_FAIL;                                \
    2350             :                 }                                                       \
    2351             :         } while (0)
    2352             : 
    2353             : gdk_return
    2354      113808 : BATmode(BAT *b, bool transient)
    2355             : {
    2356      113808 :         BATcheck(b, GDK_FAIL);
    2357             : 
    2358             :         /* can only make a bat PERSISTENT if its role is already
    2359             :          * PERSISTENT */
    2360      113808 :         assert(transient || b->batRole == PERSISTENT);
    2361             :         /* cannot make a view PERSISTENT */
    2362      113808 :         assert(transient || !isVIEW(b));
    2363             : 
    2364      113808 :         if (b->batRole == TRANSIENT && !transient) {
    2365           0 :                 GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
    2366           0 :                 return GDK_FAIL;
    2367             :         }
    2368             : 
    2369      113808 :         if (transient != b->batTransient) {
    2370      113784 :                 bat bid = b->batCacheid;
    2371             : 
    2372      113784 :                 if (!transient) {
    2373       83174 :                         check_type(b->ttype);
    2374             :                 }
    2375             : 
    2376             :                 /* persistent BATs get a logical reference */
    2377      113784 :                 if (!transient) {
    2378       83174 :                         BBPretain(bid);
    2379       30610 :                 } else if (!b->batTransient) {
    2380       30610 :                         BBPrelease(bid);
    2381             :                 }
    2382      113784 :                 MT_lock_set(&GDKswapLock(bid));
    2383      113784 :                 if (!transient) {
    2384       83174 :                         if (BBP_status(bid) & BBPDELETED) {
    2385           0 :                                 BBP_status_on(bid, BBPEXISTING);
    2386           0 :                                 BBP_status_off(bid, BBPDELETED);
    2387             :                         } else
    2388       83174 :                                 BBP_status_on(bid, BBPNEW);
    2389       30610 :                 } else if (!b->batTransient) {
    2390       30610 :                         if (!(BBP_status(bid) & BBPNEW))
    2391       30607 :                                 BBP_status_on(bid, BBPDELETED);
    2392       30610 :                         BBP_status_off(bid, BBPPERSISTENT);
    2393             :                 }
    2394             :                 /* session bats or persistent bats that did not
    2395             :                  * witness a commit yet may have been saved */
    2396      113784 :                 if (b->batCopiedtodisk) {
    2397       28085 :                         if (!transient) {
    2398           0 :                                 BBP_status_off(bid, BBPTMP);
    2399             :                         } else {
    2400             :                                 /* TMcommit must remove it to
    2401             :                                  * guarantee free space */
    2402       28085 :                                 BBP_status_on(bid, BBPTMP);
    2403             :                         }
    2404             :                 }
    2405      113784 :                 b->batTransient = transient;
    2406      113784 :                 MT_lock_unset(&GDKswapLock(bid));
    2407             :         }
    2408             :         return GDK_SUCCEED;
    2409             : }
    2410             : 
    2411             : /* BATassertProps checks whether properties are set correctly.  Under
    2412             :  * no circumstances will it change any properties.  Note that the
    2413             :  * "nil" property is not actually used anywhere, but it is checked. */
    2414             : 
    2415             : #ifdef NDEBUG
    2416             : /* assertions are disabled, turn failing tests into a message */
    2417             : #undef assert
    2418             : #define assert(test)    ((void) ((test) || (TRC_CRITICAL_ENDIF(BAT_, "Assertion `%s' failed\n", #test), 0)))
    2419             : #endif
    2420             : 
    2421             : /* Assert that properties are set correctly.
    2422             :  *
    2423             :  * A BAT can have a bunch of properties set.  Mostly, the property
    2424             :  * bits are set if we *know* the property holds, and not set if we
    2425             :  * don't know whether the property holds (or if we know it doesn't
    2426             :  * hold).  All properties are per column.
    2427             :  *
    2428             :  * The properties currently maintained are:
    2429             :  *
    2430             :  * seqbase      Only valid for TYPE_oid and TYPE_void columns: each
    2431             :  *              value in the column is exactly one more than the
    2432             :  *              previous value, starting at position 0 with the value
    2433             :  *              stored in this property.
    2434             :  *              This implies sorted, key, nonil (which therefore need
    2435             :  *              to be set).
    2436             :  * nil          There is at least one NIL value in the column.
    2437             :  * nonil        There are no NIL values in the column.
    2438             :  * key          All values in the column are distinct.
    2439             :  * sorted       The column is sorted (ascending).  If also revsorted,
    2440             :  *              then all values are equal.
    2441             :  * revsorted    The column is reversely sorted (descending).  If
    2442             :  *              also sorted, then all values are equal.
    2443             :  * nosorted     BUN position which proofs not sorted (given position
    2444             :  *              and one before are not ordered correctly).
    2445             :  * norevsorted  BUN position which proofs not revsorted (given position
    2446             :  *              and one before are not ordered correctly).
    2447             :  * nokey        Pair of BUN positions that proof not all values are
    2448             :  *              distinct (i.e. values at given locations are equal).
    2449             :  *
    2450             :  * Note that the functions BATtseqbase and BATkey also set more
    2451             :  * properties than you might suspect.  When setting properties on a
    2452             :  * newly created and filled BAT, you may want to first make sure the
    2453             :  * batCount is set correctly (e.g. by calling BATsetcount), then use
    2454             :  * BATtseqbase and BATkey, and finally set the other properties.
    2455             :  */
    2456             : 
    2457             : void
    2458    33141054 : BATassertProps(BAT *b)
    2459             : {
    2460             :         unsigned bbpstatus;
    2461             :         BUN p, q;
    2462             :         int (*cmpf)(const void *, const void *);
    2463             :         int cmp;
    2464             :         const void *prev = NULL, *valp, *nilp;
    2465             :         char filename[sizeof(b->theap->filename)];
    2466             : 
    2467             :         /* do the complete check within a lock */
    2468    33141054 :         MT_lock_set(&b->theaplock);
    2469             : 
    2470             :         /* general BAT sanity */
    2471    33143822 :         assert(b != NULL);
    2472    33143822 :         assert(b->batCacheid > 0);
    2473    33143822 :         assert(b->batCount >= b->batInserted);
    2474             : 
    2475             :         /* headless */
    2476    33143822 :         assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
    2477    33143822 :         assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
    2478             : 
    2479    33143822 :         bbpstatus = BBP_status(b->batCacheid);
    2480             :         /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
    2481    33143822 :         assert(((bbpstatus & BBPDELETED) != 0) +
    2482             :                ((bbpstatus & BBPEXISTING) != 0) +
    2483             :                ((bbpstatus & BBPNEW) != 0) <= 1);
    2484             : 
    2485    33143822 :         assert(b->ttype >= TYPE_void);
    2486    33143822 :         assert(b->ttype < GDKatomcnt);
    2487    33143822 :         assert(b->ttype != TYPE_bat);
    2488    33143822 :         assert(isVIEW(b) ||
    2489             :                b->ttype == TYPE_void ||
    2490             :                BBPfarms[b->theap->farmid].roles & (1 << b->batRole));
    2491    33143822 :         assert(isVIEW(b) ||
    2492             :                b->tvheap == NULL ||
    2493             :                (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
    2494             : 
    2495    33143822 :         cmpf = ATOMcompare(b->ttype);
    2496    33143822 :         nilp = ATOMnilptr(b->ttype);
    2497             : 
    2498    33143822 :         assert(b->theap->free >= tailsize(b, BUNlast(b)));
    2499    33143822 :         if (b->ttype != TYPE_void) {
    2500    30121190 :                 assert(b->batCount <= b->batCapacity);
    2501    30121190 :                 assert(b->theap->size >= b->theap->free);
    2502    30121190 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    2503             :                         /* 32 values per 4-byte word (that's not the
    2504             :                          * same as 8 values per byte...) */
    2505        2594 :                         assert(b->theap->size >= 4 * ((b->batCapacity + 31) / 32));
    2506             :                 } else
    2507    30118596 :                         assert(b->theap->size >> b->tshift >= b->batCapacity);
    2508             :         }
    2509    33143822 :         strconcat_len(filename, sizeof(filename),
    2510    33143822 :                       BBP_physical(b->theap->parentid),
    2511     3706582 :                       b->ttype == TYPE_str ? b->twidth == 1 ? ".tail1" : b->twidth == 2 ? ".tail2" :
    2512             : #if SIZEOF_VAR_T == 8
    2513      122383 :                       b->twidth == 4 ? ".tail4" :
    2514             : #endif
    2515             :                       ".tail" : ".tail",
    2516             :                       NULL);
    2517    33142363 :         assert(strcmp(b->theap->filename, filename) == 0);
    2518    33142363 :         if (b->tvheap) {
    2519     3824404 :                 strconcat_len(filename, sizeof(filename),
    2520     3824404 :                               BBP_physical(b->tvheap->parentid),
    2521             :                               ".theap",
    2522             :                               NULL);
    2523     3824384 :                 assert(strcmp(b->tvheap->filename, filename) == 0);
    2524             :         }
    2525             : 
    2526             :         /* void and str imply varsized */
    2527    33142343 :         if (b->ttype == TYPE_void ||
    2528    30120179 :             ATOMstorage(b->ttype) == TYPE_str)
    2529     6729858 :                 assert(b->tvarsized);
    2530             :         /* other "known" types are not varsized */
    2531    33142343 :         if (ATOMstorage(b->ttype) > TYPE_void &&
    2532             :             ATOMstorage(b->ttype) < TYPE_str)
    2533    26411695 :                 assert(!b->tvarsized);
    2534             :         /* shift and width have a particular relationship */
    2535    33142343 :         if (ATOMstorage(b->ttype) == TYPE_str)
    2536     3707427 :                 assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
    2537             :         else
    2538    29434916 :                 assert(b->twidth == ATOMsize(b->ttype));
    2539    33142343 :         assert(b->tseqbase <= oid_nil);
    2540             :         /* only oid/void columns can be dense */
    2541    33142343 :         assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
    2542             :         /* a column cannot both have and not have NILs */
    2543    33142343 :         assert(!b->tnil || !b->tnonil);
    2544    33142343 :         if (b->ttype == TYPE_void) {
    2545     3023532 :                 assert(b->tshift == 0);
    2546     3023532 :                 assert(b->twidth == 0);
    2547     3023532 :                 assert(b->tsorted);
    2548     3023532 :                 if (is_oid_nil(b->tseqbase)) {
    2549         688 :                         assert(b->tvheap == NULL);
    2550         688 :                         assert(BATcount(b) == 0 || !b->tnonil);
    2551         688 :                         assert(BATcount(b) <= 1 || !b->tkey);
    2552         688 :                         assert(b->trevsorted);
    2553             :                 } else {
    2554     3022844 :                         if (b->tvheap != NULL) {
    2555             :                                 /* candidate list with exceptions */
    2556      113943 :                                 assert(b->batRole == TRANSIENT);
    2557      113943 :                                 assert(b->tvheap->free <= b->tvheap->size);
    2558      113943 :                                 assert(b->tvheap->free >= sizeof(ccand_t));
    2559      113943 :                                 assert((negoid_cand(b) && ccand_free(b) % SIZEOF_OID == 0) || mask_cand(b));
    2560      113943 :                                 if (negoid_cand(b) && ccand_free(b) > 0) {
    2561             :                                         const oid *oids = (const oid *) ccand_first(b);
    2562           2 :                                         q = ccand_free(b) / SIZEOF_OID;
    2563             :                                         assert(oids != NULL);
    2564           2 :                                         assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
    2565             :                                         /* exceptions within range */
    2566           2 :                                         assert(oids[0] >= b->tseqbase);
    2567           2 :                                         assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
    2568             :                                         /* exceptions sorted */
    2569           2 :                                         for (p = 1; p < q; p++)
    2570           0 :                                                 assert(oids[p - 1] < oids[p]);
    2571             :                                 }
    2572             :                         }
    2573     3022844 :                         assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2574     3022844 :                         assert(BATcount(b) == 0 || !b->tnil);
    2575     3022844 :                         assert(BATcount(b) <= 1 || !b->trevsorted);
    2576     3022844 :                         assert(b->tkey);
    2577     3022844 :                         assert(b->tnonil);
    2578             :                 }
    2579     3023532 :                 MT_lock_unset(&b->theaplock);
    2580    24832811 :                 return;
    2581             :         }
    2582             : 
    2583    30118811 :         BATiter bi  = bat_iterator_nolock(b);
    2584             : 
    2585    30117566 :         if (BATtdense(b)) {
    2586     1035105 :                 assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2587     1035105 :                 assert(b->ttype == TYPE_oid);
    2588     1035105 :                 assert(b->tsorted);
    2589     1035105 :                 assert(b->tkey);
    2590     1035105 :                 assert(b->tnonil);
    2591     1035105 :                 if ((q = b->batCount) != 0) {
    2592      267495 :                         const oid *o = (const oid *) Tloc(b, 0);
    2593      267495 :                         assert(*o == b->tseqbase);
    2594    32157296 :                         for (p = 1; p < q; p++)
    2595    31889801 :                                 assert(o[p - 1] + 1 == o[p]);
    2596             :                 }
    2597     1035105 :                 MT_lock_unset(&b->theaplock);
    2598     1035105 :                 return;
    2599             :         }
    2600    29082461 :         assert(1 << b->tshift == b->twidth);
    2601             :         /* only linear atoms can be sorted */
    2602    29082461 :         assert(!b->tsorted || ATOMlinear(b->ttype));
    2603    29082461 :         assert(!b->trevsorted || ATOMlinear(b->ttype));
    2604    29082461 :         if (ATOMlinear(b->ttype)) {
    2605    29078889 :                 assert(b->tnosorted == 0 ||
    2606             :                        (b->tnosorted > 0 &&
    2607             :                         b->tnosorted < b->batCount));
    2608    29078889 :                 assert(!b->tsorted || b->tnosorted == 0);
    2609    29078889 :                 if (!b->tsorted &&
    2610    11091829 :                     b->tnosorted > 0 &&
    2611    11091829 :                     b->tnosorted < b->batCount)
    2612    11091829 :                         assert(cmpf(BUNtail(bi, b->tnosorted - 1),
    2613             :                                     BUNtail(bi, b->tnosorted)) > 0);
    2614    29078884 :                 assert(b->tnorevsorted == 0 ||
    2615             :                        (b->tnorevsorted > 0 &&
    2616             :                         b->tnorevsorted < b->batCount));
    2617    29078884 :                 assert(!b->trevsorted || b->tnorevsorted == 0);
    2618    29078884 :                 if (!b->trevsorted &&
    2619     7889466 :                     b->tnorevsorted > 0 &&
    2620     7889466 :                     b->tnorevsorted < b->batCount)
    2621     7889621 :                         assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
    2622             :                                     BUNtail(bi, b->tnorevsorted)) < 0);
    2623             :         }
    2624             :         /* if tkey property set, both tnokey values must be 0 */
    2625    29082092 :         assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
    2626    29082092 :         if (!b->tkey && (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
    2627             :                 /* if tkey not set and tnokey indicates a proof of
    2628             :                  * non-key-ness, make sure the tnokey values are in
    2629             :                  * range and indeed provide a proof */
    2630      208981 :                 assert(b->tnokey[0] != b->tnokey[1]);
    2631      208981 :                 assert(b->tnokey[0] < b->batCount);
    2632      208981 :                 assert(b->tnokey[1] < b->batCount);
    2633      208981 :                 assert(cmpf(BUNtail(bi, b->tnokey[0]),
    2634             :                             BUNtail(bi, b->tnokey[1])) == 0);
    2635             :         }
    2636             :         /* var heaps must have sane sizes */
    2637    29082040 :         assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
    2638             : 
    2639    29082040 :         if (!b->tkey && !b->tsorted && !b->trevsorted &&
    2640    21632900 :             !b->tnonil && !b->tnil) {
    2641             :                 /* nothing more to prove */
    2642    20773516 :                 MT_lock_unset(&b->theaplock);
    2643    20773383 :                 return;
    2644             :         }
    2645             : 
    2646     8308524 :         PROPDEBUG { /* only do a scan if property checking is requested */
    2647             :                 const void *maxval = NULL;
    2648             :                 const void *minval = NULL;
    2649             :                 bool seenmax = false, seenmin = false;
    2650             :                 bool seennil = false;
    2651             : 
    2652     8306100 :                 if (b->tmaxpos != BUN_NONE) {
    2653      577381 :                         assert(b->tmaxpos < BATcount(b));
    2654      577381 :                         maxval = BUNtail(bi, b->tmaxpos);
    2655             :                 }
    2656     8306100 :                 if (b->tminpos != BUN_NONE) {
    2657      553211 :                         assert(b->tminpos < BATcount(b));
    2658      553211 :                         minval = BUNtail(bi, b->tminpos);
    2659             :                 }
    2660     8306100 :                 if (ATOMstorage(b->ttype) == TYPE_msk) {
    2661             :                         /* for now, don't do extra checks for bit mask */
    2662             :                         ;
    2663     8303543 :                 } else if (b->tsorted || b->trevsorted || !b->tkey) {
    2664             :                         /* if sorted (either way), or we don't have to
    2665             :                          * prove uniqueness, we can do a simple
    2666             :                          * scan */
    2667             :                         /* only call compare function if we have to */
    2668     8088470 :                         bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
    2669     8088470 :                         bool cmpnil = b->tnonil | b->tnil;
    2670             : 
    2671  2477324012 :                         BATloop(b, p, q) {
    2672  2469437639 :                                 valp = BUNtail(bi, p);
    2673  2469437639 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2674  2436013439 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2675  2436013439 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2676  2436013439 :                                 if (maxval && !isnil) {
    2677   222027358 :                                         cmp = cmpf(maxval, valp);
    2678   222026090 :                                         assert(cmp >= 0);
    2679   222026090 :                                         seenmax |= cmp == 0;
    2680             :                                 }
    2681  2436012171 :                                 if (minval && !isnil) {
    2682   185872868 :                                         cmp = cmpf(minval, valp);
    2683   185874360 :                                         assert(cmp <= 0);
    2684   185874360 :                                         seenmin |= cmp == 0;
    2685             :                                 }
    2686  2436013663 :                                 if (prev && cmpprv) {
    2687  1408784781 :                                         cmp = cmpf(prev, valp);
    2688  1442205464 :                                         assert(!b->tsorted || cmp <= 0);
    2689  1442205464 :                                         assert(!b->trevsorted || cmp >= 0);
    2690  1442205464 :                                         assert(!b->tkey || cmp != 0);
    2691             :                                 }
    2692  2469434346 :                                 if (cmpnil) {
    2693  2175808688 :                                         assert(!b->tnonil || !isnil);
    2694  2175808688 :                                         if (isnil) {
    2695             :                                                 /* we found a nil:
    2696             :                                                  * we're done checking
    2697             :                                                  * for them */
    2698             :                                                 seennil = true;
    2699             :                                                 cmpnil = 0;
    2700      265760 :                                                 if (!cmpprv && maxval == NULL && minval == NULL) {
    2701             :                                                         /* we were
    2702             :                                                          * only
    2703             :                                                          * checking
    2704             :                                                          * for nils,
    2705             :                                                          * so nothing
    2706             :                                                          * more to
    2707             :                                                          * do */
    2708             :                                                         break;
    2709             :                                                 }
    2710             :                                         }
    2711             :                                 }
    2712             :                                 prev = valp;
    2713             :                         }
    2714             :                 } else {        /* b->tkey && !b->tsorted && !b->trevsorted */
    2715             :                         /* we need to check for uniqueness the hard
    2716             :                          * way (i.e. using a hash table) */
    2717      215073 :                         const char *nme = BBP_physical(b->batCacheid);
    2718             :                         Hash *hs = NULL;
    2719             :                         BUN mask;
    2720             : 
    2721      215073 :                         if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
    2722           0 :                                 TRC_WARNING(BAT_, "Cannot allocate hash table\n");
    2723           0 :                                 goto abort_check;
    2724             :                         }
    2725      215047 :                         if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs->heaplink.filename) ||
    2726      215033 :                             snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs->heapbckt.filename)) {
    2727             :                                 /* cannot happen, see comment in gdk.h
    2728             :                                  * about sizes near definition of
    2729             :                                  * BBPINIT */
    2730           0 :                                 GDKfree(hs);
    2731           0 :                                 TRC_CRITICAL(BAT_, "Heap filename is too large\n");
    2732           0 :                                 goto abort_check;
    2733             :                         }
    2734      215083 :                         if (ATOMsize(b->ttype) == 1)
    2735             :                                 mask = (BUN) 1 << 8;
    2736      215083 :                         else if (ATOMsize(b->ttype) == 2)
    2737             :                                 mask = (BUN) 1 << 16;
    2738             :                         else
    2739      215079 :                                 mask = HASHmask(b->batCount);
    2740      215083 :                         if ((hs->heaplink.farmid = BBPselectfarm(
    2741      215062 :                                      TRANSIENT, b->ttype, hashheap)) < 0 ||
    2742      215062 :                             (hs->heapbckt.farmid = BBPselectfarm(
    2743      430152 :                                     TRANSIENT, b->ttype, hashheap)) < 0 ||
    2744      215062 :                             HASHnew(hs, b->ttype, BUNlast(b),
    2745             :                                     mask, BUN_NONE, false) != GDK_SUCCEED) {
    2746           1 :                                 GDKfree(hs);
    2747           0 :                                 TRC_WARNING(BAT_, "Cannot allocate hash table\n");
    2748           0 :                                 goto abort_check;
    2749             :                         }
    2750    51763384 :                         BATloop(b, p, q) {
    2751             :                                 BUN hb;
    2752             :                                 BUN prb;
    2753    51548538 :                                 valp = BUNtail(bi, p);
    2754    51548538 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2755    51650701 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2756    51650701 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2757    51650701 :                                 if (maxval && !isnil) {
    2758     2073477 :                                         cmp = cmpf(maxval, valp);
    2759     2072281 :                                         assert(cmp >= 0);
    2760     2072281 :                                         seenmax |= cmp == 0;
    2761             :                                 }
    2762    51649505 :                                 if (minval && !isnil) {
    2763     3113693 :                                         cmp = cmpf(minval, valp);
    2764     3113431 :                                         assert(cmp <= 0);
    2765     3113431 :                                         seenmin |= cmp == 0;
    2766             :                                 }
    2767    51649243 :                                 prb = HASHprobe(hs, valp);
    2768    59275148 :                                 for (hb = HASHget(hs, prb);
    2769             :                                      hb != BUN_NONE;
    2770     7308119 :                                      hb = HASHgetlink(hs, hb))
    2771     7395661 :                                         if (cmpf(valp, BUNtail(bi, hb)) == 0)
    2772           0 :                                                 assert(!b->tkey);
    2773    51879487 :                                 HASHputlink(hs, p, HASHget(hs, prb));
    2774    51680050 :                                 HASHput(hs, prb, p);
    2775    51548313 :                                 assert(!b->tnonil || !isnil);
    2776    51548313 :                                 seennil |= isnil;
    2777             :                         }
    2778      214846 :                         HEAPfree(&hs->heaplink, true);
    2779      215093 :                         HEAPfree(&hs->heapbckt, true);
    2780      215094 :                         GDKfree(hs);
    2781             :                 }
    2782     8302828 :           abort_check:
    2783     8302828 :                 GDKclrerr();
    2784     8297690 :                 assert(maxval == NULL || seenmax);
    2785     8297690 :                 assert(minval == NULL || seenmin);
    2786     8297690 :                 assert(!b->tnil || seennil);
    2787             :         }
    2788     8300114 :         MT_lock_unset(&b->theaplock);
    2789             : }
    2790             : 

Generated by: LCOV version 1.14