LCOV - code coverage report
Current view: top level - gdk - gdk_bat.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 947 1235 76.7 %
Date: 2020-06-29 20:00:14 Functions: 33 36 91.7 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2020 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * @a M. L. Kersten, P. Boncz, N. Nes
      11             :  * @* BAT Module
      12             :  * In this Chapter we describe the BAT implementation in more detail.
      13             :  * The routines mentioned are primarily meant to simplify the library
      14             :  * implementation.
      15             :  *
      16             :  * @+ BAT Construction
      17             :  * BATs are implemented in several blocks of memory, prepared for disk
      18             :  * storage and easy shipment over a network.
      19             :  *
      20             :  * The BAT starts with a descriptor, which indicates the required BAT
      21             :  * library version and the BAT administration details.  In particular,
      22             :  * it describes the binary relationship maintained and the location of
      23             :  * fields required for storage.
      24             :  *
      25             :  * The general layout of the BAT in this implementation is as follows.
      26             :  * Each BAT comes with a heap for the loc-size buns and, optionally,
      27             :  * with heaps to manage the variable-sized data items of both
      28             :  * dimensions.  The buns are assumed to be stored as loc-size objects.
      29             :  * This is essentially an array of structs to store the associations.
      30             :  * The size is determined at BAT creation time using an upper bound on
      31             :  * the number of elements to be accommodated.  In case of overflow,
      32             :  * its storage space is extended automatically.
      33             :  *
      34             :  * The capacity of a BAT places an upper limit on the number of BUNs
      35             :  * to be stored initially. The actual space set aside may be quite
      36             :  * large.  Moreover, the size is aligned to int boundaries to speedup
      37             :  * access and avoid some machine limitations.
      38             :  *
      39             :  * Initialization of the variable parts rely on type specific routines
      40             :  * called atomHeap.
      41             :  */
      42             : #include "monetdb_config.h"
      43             : #include "gdk.h"
      44             : #include "gdk_private.h"
      45             : 
      46             : #ifdef ALIGN
      47             : #undef ALIGN
      48             : #endif
      49             : #define ALIGN(n,b)      ((b)?(b)*(1+(((n)-1)/(b))):n)
      50             : 
      51             : #define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)
      52             : 
      53             : static char *BATstring_t = "t";
      54             : 
      55             : #define default_ident(s)        ((s) == BATstring_t)
      56             : 
      57             : void
      58      689455 : BATinit_idents(BAT *bn)
      59             : {
      60      689455 :         bn->tident = BATstring_t;
      61      689455 : }
      62             : 
      63             : BAT *
      64     6785810 : BATcreatedesc(oid hseq, int tt, bool heapnames, role_t role)
      65             : {
      66     6785810 :         BAT *bn;
      67             : 
      68             :         /*
      69             :          * Alloc space for the BAT and its dependent records.
      70             :          */
      71     6785810 :         assert(tt >= 0);
      72             : 
      73     6785810 :         bn = GDKzalloc(sizeof(BAT));
      74             : 
      75     6779180 :         if (bn == NULL)
      76             :                 return NULL;
      77             : 
      78             :         /*
      79             :          * Fill in basic column info
      80             :          */
      81     6779180 :         bn->hseqbase = hseq;
      82             : 
      83     6779180 :         bn->ttype = tt;
      84     6779180 :         bn->tkey = false;
      85     6779180 :         bn->tnonil = true;
      86     6779180 :         bn->tnil = false;
      87     6779180 :         bn->tsorted = bn->trevsorted = ATOMlinear(tt);
      88     6779180 :         bn->tident = BATstring_t;
      89     6779180 :         bn->tseqbase = oid_nil;
      90     6779180 :         bn->tprops = NULL;
      91             : 
      92     6779180 :         bn->batRole = role;
      93     6779180 :         bn->batTransient = true;
      94             :         /*
      95             :          * add to BBP
      96             :          */
      97     6779180 :         if (BBPinsert(bn) == 0) {
      98           0 :                 GDKfree(bn);
      99           0 :                 return NULL;
     100             :         }
     101             :         /*
     102             :         * Default zero for order oid index
     103             :         */
     104     6783960 :         bn->torderidx = NULL;
     105             :         /*
     106             :          * fill in heap names, so HEAPallocs can resort to disk for
     107             :          * very large writes.
     108             :          */
     109     6783960 :         assert(bn->batCacheid > 0);
     110             : 
     111     6783960 :         const char *nme = BBP_physical(bn->batCacheid);
     112     6783960 :         strconcat_len(bn->theap.filename, sizeof(bn->theap.filename),
     113             :                       nme, ".tail", NULL);
     114     6782470 :         bn->theap.farmid = BBPselectfarm(role, bn->ttype, offheap);
     115     6783850 :         if (heapnames && ATOMneedheap(tt)) {
     116      667886 :                 if ((bn->tvheap = (Heap *) GDKzalloc(sizeof(Heap))) == NULL)
     117           0 :                         goto bailout;
     118      667897 :                 strconcat_len(bn->tvheap->filename,
     119             :                               sizeof(bn->tvheap->filename),
     120             :                               nme, ".theap", NULL);
     121      667811 :                 bn->tvheap->parentid = bn->batCacheid;
     122      667811 :                 bn->tvheap->farmid = BBPselectfarm(role, bn->ttype, varheap);
     123             :         }
     124     6783760 :         char name[MT_NAME_LEN];
     125     6783760 :         snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     126     6783760 :         MT_lock_init(&bn->batIdxLock, name);
     127     6783560 :         bn->batDirtydesc = true;
     128     6783560 :         return bn;
     129           0 :       bailout:
     130           0 :         BBPclear(bn->batCacheid);
     131           0 :         if (tt)
     132           0 :                 HEAPfree(&bn->theap, true);
     133           0 :         if (bn->tvheap) {
     134           0 :                 HEAPfree(bn->tvheap, true);
     135           0 :                 GDKfree(bn->tvheap);
     136             :         }
     137           0 :         GDKfree(bn);
     138           0 :         return NULL;
     139             : }
     140             : 
     141             : uint8_t
     142    12236200 : ATOMelmshift(int sz)
     143             : {
     144    12236200 :         uint8_t sh;
     145    12236200 :         int i = sz >> 1;
     146             : 
     147    28672900 :         for (sh = 0; i != 0; sh++) {
     148    16436700 :                 i >>= 1;
     149             :         }
     150    12236200 :         return sh;
     151             : }
     152             : 
     153             : 
     154             : void
     155     6095630 : BATsetdims(BAT *b)
     156             : {
     157     6095630 :         b->twidth = b->ttype == TYPE_str ? 1 : ATOMsize(b->ttype);
     158     6095630 :         b->tshift = ATOMelmshift(Tsize(b));
     159     6095630 :         assert_shift_width(b->tshift, b->twidth);
     160     6095630 :         b->tvarsized = b->ttype == TYPE_void || BATatoms[b->ttype].atomPut != NULL;
     161     6095630 : }
     162             : 
     163             : /*
     164             :  * @- BAT allocation
     165             :  * Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).
     166             :  * We now initialize new BATs with their heapname such that the
     167             :  * modified HEAPalloc/HEAPextend primitives can possibly use memory
     168             :  * mapped files as temporary heap storage.
     169             :  *
     170             :  * In case of huge bats, we want HEAPalloc to write a file to disk,
     171             :  * and memory map it. To make this possible, we must provide it with
     172             :  * filenames.
     173             :  */
     174             : BAT *
     175     6096150 : COLnew(oid hseq, int tt, BUN cap, role_t role)
     176             : {
     177     6096150 :         BAT *bn;
     178             : 
     179     6096150 :         assert(cap <= BUN_MAX);
     180     6096150 :         assert(hseq <= oid_nil);
     181     6096150 :         assert(tt != TYPE_bat);
     182     6096150 :         ERRORcheck((tt < 0) || (tt > GDKatomcnt), "tt error\n", NULL);
     183             : 
     184             :         /* round up to multiple of BATTINY */
     185     6096150 :         if (cap < BUN_MAX - BATTINY)
     186     6096130 :                 cap = (cap + BATTINY - 1) & ~(BATTINY - 1);
     187     6096150 :         if (cap < BATTINY)
     188             :                 cap = BATTINY;
     189             :         /* limit the size */
     190             :         if (cap > BUN_MAX)
     191             :                 cap = BUN_MAX;
     192             : 
     193     6096150 :         bn = BATcreatedesc(hseq, tt, tt != TYPE_void, role);
     194     6095000 :         if (bn == NULL)
     195             :                 return NULL;
     196             : 
     197     6095000 :         BATsetdims(bn);
     198     6095900 :         bn->batCapacity = cap;
     199             : 
     200             :         /* alloc the main heaps */
     201     6095900 :         if (tt && HEAPalloc(&bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
     202           0 :                 goto bailout;
     203             :         }
     204             : 
     205     6093560 :         if (bn->tvheap && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
     206           0 :                 GDKfree(bn->tvheap);
     207           0 :                 goto bailout;
     208             :         }
     209     6094390 :         DELTAinit(bn);
     210     6094980 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {
     211           0 :                 GDKfree(bn->tvheap);
     212           0 :                 goto bailout;
     213             :         }
     214     6095620 :         TRC_DEBUG(ALGO, "-> " ALGOBATFMT "\n", ALGOBATPAR(bn));
     215             :         return bn;
     216           0 :   bailout:
     217           0 :         BBPclear(bn->batCacheid);
     218           0 :         HEAPfree(&bn->theap, true);
     219           0 :         MT_lock_destroy(&bn->batIdxLock);
     220           0 :         GDKfree(bn);
     221           0 :         return NULL;
     222             : }
     223             : 
     224             : BAT *
     225     1838770 : BATdense(oid hseq, oid tseq, BUN cnt)
     226             : {
     227     1838770 :         BAT *bn;
     228             : 
     229     1838770 :         bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
     230     1838620 :         if (bn != NULL) {
     231     1838620 :                 BATtseqbase(bn, tseq);
     232     1838540 :                 BATsetcount(bn, cnt);
     233     1838470 :                 TRC_DEBUG(ALGO, OIDFMT "," OIDFMT "," BUNFMT
     234             :                           "-> " ALGOBATFMT "\n", hseq, tseq, cnt,
     235             :                           ALGOBATPAR(bn));
     236             :         }
     237     1838470 :         return bn;
     238             : }
     239             : 
     240             : BAT *
     241          11 : BATattach(int tt, const char *heapfile, role_t role)
     242             : {
     243          11 :         BAT *bn;
     244          11 :         char *p;
     245          11 :         size_t m;
     246          11 :         FILE *f;
     247             : 
     248          11 :         ERRORcheck(tt <= 0 , "bad tail type (<=0)\n", NULL);
     249          11 :         ERRORcheck(ATOMvarsized(tt) && ATOMstorage(tt) != TYPE_str, "bad tail type (varsized and not str)\n", NULL);
     250          11 :         ERRORcheck(heapfile == NULL, "bad heapfile name\n", NULL);
     251             : 
     252          11 :         if ((f = fopen(heapfile, "rb")) == NULL) {
     253           0 :                 GDKsyserror("BATattach: cannot open %s\n", heapfile);
     254           0 :                 return NULL;
     255             :         }
     256          11 :         if (ATOMstorage(tt) == TYPE_str) {
     257           0 :                 size_t n;
     258           0 :                 char *s;
     259           0 :                 int c, u;
     260             : 
     261           0 :                 if ((bn = COLnew(0, tt, 0, role)) == NULL) {
     262           0 :                         fclose(f);
     263           0 :                         return NULL;
     264             :                 }
     265           0 :                 m = 4096;
     266           0 :                 n = 0;
     267           0 :                 u = 0;
     268           0 :                 s = p = GDKmalloc(m);
     269           0 :                 if (p == NULL) {
     270           0 :                         fclose(f);
     271           0 :                         BBPreclaim(bn);
     272           0 :                         return NULL;
     273             :                 }
     274           0 :                 while ((c = getc(f)) != EOF) {
     275           0 :                         if (n == m) {
     276           0 :                                 m += 4096;
     277           0 :                                 s = GDKrealloc(p, m);
     278           0 :                                 if (s == NULL) {
     279           0 :                                         GDKfree(p);
     280           0 :                                         BBPreclaim(bn);
     281           0 :                                         fclose(f);
     282           0 :                                         return NULL;
     283             :                                 }
     284           0 :                                 p = s;
     285           0 :                                 s = p + n;
     286             :                         }
     287           0 :                         if (c == '\n' && n > 0 && s[-1] == '\r') {
     288             :                                 /* deal with CR-LF sequence */
     289           0 :                                 s[-1] = c;
     290             :                         } else {
     291           0 :                                 *s++ = c;
     292           0 :                                 n++;
     293             :                         }
     294           0 :                         if (u) {
     295           0 :                                 if ((c & 0xC0) == 0x80)
     296           0 :                                         u--;
     297             :                                 else
     298           0 :                                         goto notutf8;
     299           0 :                         } else if ((c & 0xF8) == 0xF0)
     300             :                                 u = 3;
     301           0 :                         else if ((c & 0xF0) == 0xE0)
     302             :                                 u = 2;
     303           0 :                         else if ((c & 0xE0) == 0xC0)
     304             :                                 u = 1;
     305           0 :                         else if ((c & 0x80) == 0x80)
     306           0 :                                 goto notutf8;
     307           0 :                         else if (c == 0) {
     308           0 :                                 if (BUNappend(bn, p, false) != GDK_SUCCEED) {
     309           0 :                                         BBPreclaim(bn);
     310           0 :                                         fclose(f);
     311           0 :                                         GDKfree(p);
     312           0 :                                         return NULL;
     313             :                                 }
     314             :                                 s = p;
     315             :                                 n = 0;
     316             :                         }
     317             :                 }
     318           0 :                 fclose(f);
     319           0 :                 GDKfree(p);
     320           0 :                 if (n > 0) {
     321           0 :                         BBPreclaim(bn);
     322           0 :                         GDKerror("last string is not null-terminated\n");
     323           0 :                         return NULL;
     324             :                 }
     325             :         } else {
     326          11 :                 struct stat st;
     327          11 :                 unsigned int atomsize;
     328          11 :                 BUN cap;
     329          11 :                 lng n;
     330             : 
     331          11 :                 if (fstat(fileno(f), &st) < 0) {
     332           0 :                         GDKsyserror("BATattach: cannot stat %s\n", heapfile);
     333           0 :                         fclose(f);
     334           0 :                         return NULL;
     335             :                 }
     336          11 :                 atomsize = ATOMsize(tt);
     337          11 :                 if (st.st_size % atomsize != 0) {
     338           0 :                         fclose(f);
     339           0 :                         GDKerror("heapfile size not integral number of atoms\n");
     340           0 :                         return NULL;
     341             :                 }
     342          11 :                 if ((size_t) (st.st_size / atomsize) > (size_t) BUN_MAX) {
     343           0 :                         fclose(f);
     344           0 :                         GDKerror("heapfile too large\n");
     345           0 :                         return NULL;
     346             :                 }
     347          11 :                 cap = (BUN) (st.st_size / atomsize);
     348          11 :                 bn = COLnew(0, tt, cap, role);
     349          11 :                 if (bn == NULL) {
     350           0 :                         fclose(f);
     351           0 :                         return NULL;
     352             :                 }
     353          11 :                 p = Tloc(bn, 0);
     354          11 :                 n = (lng) st.st_size;
     355          33 :                 while (n > 0 && (m = fread(p, 1, (size_t) MIN(1024*1024, n), f)) > 0) {
     356          11 :                         p += m;
     357          11 :                         n -= m;
     358             :                 }
     359          11 :                 fclose(f);
     360          11 :                 if (n > 0) {
     361           0 :                         GDKerror("couldn't read the complete file\n");
     362           0 :                         BBPreclaim(bn);
     363           0 :                         return NULL;
     364             :                 }
     365          11 :                 BATsetcount(bn, cap);
     366          11 :                 bn->tnonil = cap == 0;
     367          11 :                 bn->tnil = false;
     368          11 :                 bn->tseqbase = oid_nil;
     369          11 :                 if (cap > 1) {
     370          11 :                         bn->tsorted = false;
     371          11 :                         bn->trevsorted = false;
     372          11 :                         bn->tkey = false;
     373             :                 } else {
     374           0 :                         bn->tsorted = true;
     375           0 :                         bn->trevsorted = true;
     376           0 :                         bn->tkey = true;
     377             :                 }
     378             :         }
     379             :         return bn;
     380             : 
     381           0 :   notutf8:
     382           0 :         fclose(f);
     383           0 :         BBPreclaim(bn);
     384           0 :         GDKfree(p);
     385           0 :         GDKerror("input is not UTF-8\n");
     386           0 :         return NULL;
     387             : }
     388             : 
     389             : /*
     390             :  * If the BAT runs out of storage for BUNS it will reallocate space.
     391             :  * For memory mapped BATs we simple extend the administration after
     392             :  * having an assurance that the BAT still can be safely stored away.
     393             :  */
     394             : BUN
     395       43133 : BATgrows(BAT *b)
     396             : {
     397       43133 :         BUN oldcap, newcap;
     398             : 
     399       43133 :         BATcheck(b, 0);
     400             : 
     401       43133 :         newcap = oldcap = BATcapacity(b);
     402       43133 :         if (newcap < BATTINY)
     403             :                 newcap = 2 * BATTINY;
     404       43131 :         else if (newcap < 10 * BATTINY)
     405       39337 :                 newcap = 4 * newcap;
     406        3794 :         else if (newcap < 50 * BATTINY)
     407        3029 :                 newcap = 2 * newcap;
     408         765 :         else if ((double) newcap * BATMARGIN <= (double) BUN_MAX)
     409         765 :                 newcap = (BUN) ((double) newcap * BATMARGIN);
     410             :         else
     411             :                 newcap = BUN_MAX;
     412       43133 :         if (newcap == oldcap) {
     413           0 :                 if (newcap <= BUN_MAX - 10)
     414           0 :                         newcap += 10;
     415             :                 else
     416             :                         newcap = BUN_MAX;
     417             :         }
     418             :         return newcap;
     419             : }
     420             : 
     421             : /*
     422             :  * The routine should ensure that the BAT keeps its location in the
     423             :  * BAT buffer.
     424             :  *
     425             :  * Overflow in the other heaps are dealt with in the atom routines.
     426             :  * Here we merely copy their references into the new administration
     427             :  * space.
     428             :  */
     429             : gdk_return
     430     3456720 : BATextend(BAT *b, BUN newcap)
     431             : {
     432     3456720 :         size_t theap_size = newcap;
     433             : 
     434     3456720 :         assert(newcap <= BUN_MAX);
     435     3456720 :         BATcheck(b, GDK_FAIL);
     436             :         /*
     437             :          * The main issue is to properly predict the new BAT size.
     438             :          * storage overflow. The assumption taken is that capacity
     439             :          * overflow is rare. It is changed only when the position of
     440             :          * the next available BUN surpasses the free area marker.  Be
     441             :          * aware that the newcap should be greater than the old value,
     442             :          * otherwise you may easily corrupt the administration of
     443             :          * malloc.
     444             :          */
     445     3456720 :         if (newcap <= BATcapacity(b)) {
     446             :                 return GDK_SUCCEED;
     447             :         }
     448             : 
     449     3456680 :         b->batCapacity = newcap;
     450             : 
     451     3456680 :         theap_size *= Tsize(b);
     452     3456680 :         if (b->theap.base) {
     453     3456680 :                 TRC_DEBUG(HEAP, "HEAPextend in BATextend %s %zu %zu\n",
     454             :                           b->theap.filename, b->theap.size, theap_size);
     455     3456680 :                 if (HEAPextend(&b->theap, theap_size, b->batRestricted == BAT_READ) != GDK_SUCCEED)
     456           0 :                         return GDK_FAIL;
     457             :         }
     458             :         return GDK_SUCCEED;
     459             : }
     460             : 
     461             : 
     462             : 
     463             : /*
     464             :  * @+ BAT destruction
     465             :  * BATclear quickly removes all elements from a BAT. It must respect
     466             :  * the transaction rules; so stable elements must be moved to the
     467             :  * "deleted" section of the BAT (they cannot be fully deleted
     468             :  * yet). For the elements that really disappear, we must free
     469             :  * heapspace and unfix the atoms if they have fix/unfix handles. As an
     470             :  * optimization, in the case of no stable elements, we quickly empty
     471             :  * the heaps by copying a standard small empty image over them.
     472             :  */
     473             : gdk_return
     474      281621 : BATclear(BAT *b, bool force)
     475             : {
     476      281621 :         BUN p, q;
     477             : 
     478      281621 :         BATcheck(b, GDK_FAIL);
     479             : 
     480      281621 :         if (!force && b->batInserted > 0) {
     481           0 :                 GDKerror("cannot clear committed BAT\n");
     482           0 :                 return GDK_FAIL;
     483             :         }
     484             : 
     485             :         /* kill all search accelerators */
     486      281621 :         HASHdestroy(b);
     487      281621 :         IMPSdestroy(b);
     488      281621 :         OIDXdestroy(b);
     489      281621 :         PROPdestroy(b);
     490             : 
     491             :         /* we must dispose of all inserted atoms */
     492      281621 :         if (force && BATatoms[b->ttype].atomDel == NULL) {
     493      279999 :                 assert(b->tvheap == NULL || b->tvheap->parentid == b->batCacheid);
     494             :                 /* no stable elements: we do a quick heap clean */
     495             :                 /* need to clean heap which keeps data even though the
     496             :                    BUNs got removed. This means reinitialize when
     497             :                    free > 0
     498             :                 */
     499      279999 :                 if (b->tvheap && b->tvheap->free > 0) {
     500       88645 :                         Heap th;
     501             : 
     502       88645 :                         th = (Heap) {
     503       88645 :                                 .farmid = b->tvheap->farmid,
     504             :                         };
     505       88645 :                         strcpy_len(th.filename, b->tvheap->filename, sizeof(th.filename));
     506       88645 :                         if (ATOMheap(b->ttype, &th, 0) != GDK_SUCCEED)
     507           0 :                                 return GDK_FAIL;
     508       88645 :                         th.parentid = b->tvheap->parentid;
     509       88645 :                         th.dirty = true;
     510       88645 :                         HEAPfree(b->tvheap, false);
     511       88645 :                         *b->tvheap = th;
     512             :                 }
     513             :         } else {
     514             :                 /* do heap-delete of all inserted atoms */
     515        1622 :                 void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel;
     516             : 
     517             :                 /* TYPE_str has no del method, so we shouldn't get here */
     518        1622 :                 assert(tatmdel == NULL || b->twidth == sizeof(var_t));
     519        1622 :                 if (tatmdel) {
     520           1 :                         BATiter bi = bat_iterator(b);
     521             : 
     522           2 :                         for (p = b->batInserted, q = BUNlast(b); p < q; p++)
     523           1 :                                 (*tatmdel)(b->tvheap, (var_t*) BUNtloc(bi,p));
     524           1 :                         b->tvheap->dirty = true;
     525             :                 }
     526             :         }
     527             : 
     528      281621 :         if (force)
     529      280000 :                 b->batInserted = 0;
     530      281621 :         BATsetcount(b,0);
     531      281621 :         BAThseqbase(b, 0);
     532      281621 :         BATtseqbase(b, ATOMtype(b->ttype) == TYPE_oid ? 0 : oid_nil);
     533      281621 :         b->batDirtydesc = true;
     534      281621 :         b->theap.dirty = true;
     535      281621 :         BATsettrivprop(b);
     536      281621 :         b->tnosorted = b->tnorevsorted = 0;
     537      281621 :         b->tnokey[0] = b->tnokey[1] = 0;
     538      281621 :         return GDK_SUCCEED;
     539             : }
     540             : 
     541             : /* free a cached BAT; leave the bat descriptor cached */
     542             : void
     543     1172940 : BATfree(BAT *b)
     544             : {
     545     1172940 :         if (b == NULL)
     546             :                 return;
     547             : 
     548             :         /* deallocate all memory for a bat */
     549     1172940 :         assert(b->batCacheid > 0);
     550     1172940 :         if (b->tident && !default_ident(b->tident))
     551           8 :                 GDKfree(b->tident);
     552     1172940 :         b->tident = BATstring_t;
     553     1172940 :         PROPdestroy(b);
     554     1172900 :         HASHfree(b);
     555     1172730 :         IMPSfree(b);
     556     1172670 :         OIDXfree(b);
     557     1172650 :         if (b->ttype)
     558     1171780 :                 HEAPfree(&b->theap, false);
     559             :         else
     560         867 :                 assert(!b->theap.base);
     561     1172750 :         if (b->tvheap) {
     562       87211 :                 assert(b->tvheap->parentid == b->batCacheid);
     563       87211 :                 HEAPfree(b->tvheap, false);
     564             :         }
     565             : }
     566             : 
     567             : /* free a cached BAT descriptor */
     568             : void
     569     6804230 : BATdestroy(BAT *b)
     570             : {
     571     6804230 :         if (b->tident && !default_ident(b->tident))
     572         682 :                 GDKfree(b->tident);
     573     6804570 :         b->tident = BATstring_t;
     574     6804570 :         if (b->tvheap)
     575      679814 :                 GDKfree(b->tvheap);
     576     6804630 :         PROPdestroy(b);
     577     6804150 :         MT_lock_destroy(&b->batIdxLock);
     578     6803210 :         GDKfree(b);
     579     6808130 : }
     580             : 
     581             : /*
     582             :  * @+ BAT copying
     583             :  *
     584             :  * BAT copying is an often used operation. So it deserves attention.
     585             :  * When making a copy of a BAT, the following aspects are of
     586             :  * importance:
     587             :  *
     588             :  * - the requested head and tail types. The purpose of the copy may be
     589             :  *   to slightly change these types (e.g. void <-> oid). We may also
     590             :  *   remap between types as long as they share the same
     591             :  *   ATOMstorage(type), i.e. the types have the same physical
     592             :  *   implementation. We may even want to allow 'dirty' trick such as
     593             :  *   viewing a flt-column suddenly as int.
     594             :  *
     595             :  *   To allow such changes, the desired column-types is a
     596             :  *   parameter of COLcopy.
     597             :  *
     598             :  * - access mode. If we want a read-only copy of a read-only BAT, a
     599             :  *   VIEW may do (in this case, the user may be after just an
     600             :  *   independent BAT header and id). This is indicated by the
     601             :  *   parameter (writable = FALSE).
     602             :  *
     603             :  *   In other cases, we really want an independent physical copy
     604             :  *   (writable = TRUE).  Changing the mode to BAT_WRITE will be a
     605             :  *   zero-cost operation if the BAT was copied with (writable = TRUE).
     606             :  *
     607             :  * In GDK, the result is a BAT that is BAT_WRITE iff (writable ==
     608             :  * TRUE).
     609             :  *
     610             :  * In these cases the copy becomes a logical view on the original,
     611             :  * which ensures that the original cannot be modified or destroyed
     612             :  * (which could affect the shared heaps).
     613             :  */
     614             : static void
     615      898696 : heapmove(Heap *dst, Heap *src)
     616             : {
     617      898696 :         HEAPfree(dst, false);
     618      898697 :         *dst = *src;
     619       53747 : }
     620             : 
     621             : static bool
     622         243 : wrongtype(int t1, int t2)
     623             : {
     624             :         /* check if types are compatible. be extremely forgiving */
     625         243 :         if (t1 != TYPE_void) {
     626         243 :                 t1 = ATOMtype(ATOMstorage(t1));
     627         243 :                 t2 = ATOMtype(ATOMstorage(t2));
     628         243 :                 if (t1 != t2) {
     629         208 :                         if (ATOMvarsized(t1) ||
     630         208 :                             ATOMvarsized(t2) ||
     631         208 :                             ATOMsize(t1) != ATOMsize(t2) ||
     632         208 :                             BATatoms[t1].atomFix ||
     633         208 :                             BATatoms[t2].atomFix)
     634           0 :                                 return true;
     635             :                 }
     636             :         }
     637             :         return false;
     638             : }
     639             : 
     640             : /*
     641             :  * There are four main implementation cases:
     642             :  * (1) we are allowed to return a view (zero effort),
     643             :  * (2) the result is void,void (zero effort),
     644             :  * (3) we can copy the heaps (memcopy, or even VM page sharing)
     645             :  * (4) we must insert BUN-by-BUN into the result (fallback)
     646             :  * The latter case is still optimized for the case that the result
     647             :  * is bat[void,T] for a simple fixed-size type T. In that case we
     648             :  * do inline array[T] inserts.
     649             :  */
     650             : /* TODO make it simpler, ie copy per column */
     651             : BAT *
     652      857955 : COLcopy(BAT *b, int tt, bool writable, role_t role)
     653             : {
     654      857955 :         BUN bunstocopy = BUN_NONE;
     655      857955 :         BUN cnt;
     656      857955 :         BAT *bn = NULL;
     657             : 
     658      857955 :         BATcheck(b, NULL);
     659      857955 :         assert(tt != TYPE_bat);
     660      857955 :         cnt = b->batCount;
     661             : 
     662             :         /* maybe a bit ugly to change the requested bat type?? */
     663      857955 :         if (b->ttype == TYPE_void && !writable)
     664        1923 :                 tt = TYPE_void;
     665             : 
     666      857955 :         if (tt != b->ttype && wrongtype(tt, b->ttype)) {
     667           0 :                 GDKerror("wrong tail-type requested\n");
     668           0 :                 return NULL;
     669             :         }
     670             : 
     671             :         /* first try case (1); create a view, possibly with different
     672             :          * atom-types */
     673      857955 :         if (role == b->batRole &&
     674       43600 :             b->batRestricted == BAT_READ &&
     675       24947 :             (!VIEWtparent(b) ||
     676       24947 :              BBP_cache(VIEWtparent(b))->batRestricted == BAT_READ) &&
     677             :             !writable) {
     678        9524 :                 bn = VIEWcreate(b->hseqbase, b);
     679        9524 :                 if (bn == NULL)
     680             :                         return NULL;
     681        9524 :                 if (tt != bn->ttype) {
     682          20 :                         bn->ttype = tt;
     683          20 :                         bn->tvarsized = ATOMvarsized(tt);
     684          20 :                         bn->tseqbase = ATOMtype(tt) == TYPE_oid ? b->tseqbase : oid_nil;
     685             :                 }
     686             :         } else {
     687             :                 /* check whether we need case (4); BUN-by-BUN copy (by
     688             :                  * setting bunstocopy != BUN_NONE) */
     689      848431 :                 if (ATOMsize(tt) != ATOMsize(b->ttype)) {
     690             :                         /* oops, void materialization */
     691             :                         bunstocopy = cnt;
     692      848223 :                 } else if (BATatoms[tt].atomFix) {
     693             :                         /* oops, we need to fix/unfix atoms */
     694             :                         bunstocopy = cnt;
     695      848223 :                 } else if (isVIEW(b)) {
     696             :                         /* extra checks needed for views */
     697        2840 :                         bat tp = VIEWtparent(b);
     698             : 
     699        2840 :                         if (tp != 0 && BATcapacity(BBP_cache(tp)) > cnt + cnt)
     700             :                                 /* reduced slice view: do not copy too
     701             :                                  * much garbage */
     702        2429 :                                 bunstocopy = cnt;
     703             :                 }
     704             : 
     705      848431 :                 bn = COLnew(b->hseqbase, tt, MAX(1, bunstocopy == BUN_NONE ? 0 : bunstocopy), role);
     706      848431 :                 if (bn == NULL)
     707             :                         return NULL;
     708             : 
     709      848431 :                 if (bn->tvarsized && bn->ttype && bunstocopy == BUN_NONE) {
     710       53747 :                         bn->tshift = b->tshift;
     711       53747 :                         bn->twidth = b->twidth;
     712       53747 :                         if (HEAPextend(&bn->theap, BATcapacity(bn) << bn->tshift, true) != GDK_SUCCEED)
     713           0 :                                 goto bunins_failed;
     714             :                 }
     715             : 
     716      848431 :                 if (tt == TYPE_void) {
     717             :                         /* case (2): a void,void result => nothing to
     718             :                          * copy! */
     719         843 :                         bn->theap.free = 0;
     720      847588 :                 } else if (bunstocopy == BUN_NONE) {
     721             :                         /* case (3): just copy the heaps; if possible
     722             :                          * with copy-on-write VM support */
     723      844951 :                         Heap bthp, thp;
     724             : 
     725     1689900 :                         bthp = (Heap) {
     726      844951 :                                 .farmid = BBPselectfarm(role, b->ttype, offheap),
     727             :                         };
     728     1689900 :                         thp = (Heap) {
     729      844951 :                                 .farmid = BBPselectfarm(role, b->ttype, varheap),
     730             :                         };
     731      844951 :                         strconcat_len(bthp.filename, sizeof(bthp.filename),
     732      844951 :                                       BBP_physical(bn->batCacheid),
     733             :                                       ".tail", NULL);
     734      844951 :                         strconcat_len(thp.filename, sizeof(thp.filename),
     735      844951 :                                       BBP_physical(bn->batCacheid),
     736             :                                       ".theap", NULL);
     737      844951 :                         if ((b->ttype && HEAPcopy(&bthp, &b->theap) != GDK_SUCCEED) ||
     738      844949 :                             (bn->tvheap && HEAPcopy(&thp, b->tvheap) != GDK_SUCCEED)) {
     739           0 :                                 HEAPfree(&thp, true);
     740           0 :                                 HEAPfree(&bthp, true);
     741           0 :                                 BBPreclaim(bn);
     742           0 :                                 return NULL;
     743             :                         }
     744             :                         /* succeeded; replace dummy small heaps by the
     745             :                          * real ones */
     746      844949 :                         heapmove(&bn->theap, &bthp);
     747      844950 :                         thp.parentid = bn->batCacheid;
     748      844950 :                         if (bn->tvheap)
     749       53747 :                                 heapmove(bn->tvheap, &thp);
     750             : 
     751             :                         /* make sure we use the correct capacity */
     752      844950 :                         bn->batCapacity = (BUN) (bn->ttype ? bn->theap.size >> bn->tshift : 0);
     753        5274 :                 } else if (BATatoms[tt].atomFix || tt != TYPE_void || ATOMextern(tt)) {
     754             :                         /* case (4): one-by-one BUN insert (really slow) */
     755        2637 :                         BUN p, q, r = 0;
     756        2637 :                         BATiter bi = bat_iterator(b);
     757             : 
     758      198462 :                         BATloop(b, p, q) {
     759      205301 :                                 const void *t = BUNtail(bi, p);
     760             : 
     761      391650 :                                 if (bunfastapp_nocheck(bn, r, t, Tsize(bn)) != GDK_SUCCEED)
     762           0 :                                         goto bunins_failed;
     763      195825 :                                 r++;
     764             :                         }
     765        2637 :                         bn->theap.dirty |= bunstocopy > 0;
     766             :                 } else if (tt != TYPE_void && b->ttype == TYPE_void) {
     767             :                         /* case (4): optimized for unary void
     768             :                          * materialization */
     769             :                         oid cur = b->tseqbase, *dst = (oid *) bn->theap.base;
     770             :                         oid inc = !is_oid_nil(cur);
     771             : 
     772             :                         bn->theap.free = bunstocopy * sizeof(oid);
     773             :                         bn->theap.dirty |= bunstocopy > 0;
     774             :                         while (bunstocopy--) {
     775             :                                 *dst++ = cur;
     776             :                                 cur += inc;
     777             :                         }
     778             :                 } else {
     779             :                         /* case (4): optimized for simple array copy */
     780             :                         bn->theap.free = bunstocopy * Tsize(bn);
     781             :                         bn->theap.dirty |= bunstocopy > 0;
     782             :                         memcpy(Tloc(bn, 0), Tloc(b, 0), bn->theap.free);
     783             :                 }
     784             :                 /* copy all properties (size+other) from the source bat */
     785      848430 :                 BATsetcount(bn, cnt);
     786             :         }
     787             :         /* set properties (note that types may have changed in the copy) */
     788     1715700 :         if (ATOMtype(tt) == ATOMtype(b->ttype)) {
     789      857919 :                 if (ATOMtype(tt) == TYPE_oid) {
     790       31508 :                         BATtseqbase(bn, b->tseqbase);
     791             :                 } else {
     792      826411 :                         BATtseqbase(bn, oid_nil);
     793             :                 }
     794     1537250 :                 BATkey(bn, BATtkey(b));
     795      857920 :                 bn->tsorted = BATtordered(b);
     796      857920 :                 bn->trevsorted = BATtrevordered(b);
     797      857920 :                 bn->batDirtydesc = true;
     798      857920 :                 bn->tnorevsorted = b->tnorevsorted;
     799      857920 :                 if (b->tnokey[0] != b->tnokey[1]) {
     800      377468 :                         bn->tnokey[0] = b->tnokey[0];
     801      377468 :                         bn->tnokey[1] = b->tnokey[1];
     802             :                 } else {
     803      480452 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     804             :                 }
     805      857920 :                 bn->tnosorted = b->tnosorted;
     806      857920 :                 bn->tnonil = b->tnonil;
     807      857920 :                 bn->tnil = b->tnil;
     808          35 :         } else if (ATOMstorage(tt) == ATOMstorage(b->ttype) &&
     809          35 :                    ATOMcompare(tt) == ATOMcompare(b->ttype)) {
     810          35 :                 BUN h = BUNlast(b);
     811          35 :                 bn->tsorted = b->tsorted;
     812          35 :                 bn->trevsorted = b->trevsorted;
     813          35 :                 if (b->tkey)
     814          15 :                         BATkey(bn, true);
     815          35 :                 bn->tnonil = b->tnonil;
     816          35 :                 bn->tnil = b->tnil;
     817          35 :                 if (b->tnosorted > 0 && b->tnosorted < h)
     818           2 :                         bn->tnosorted = b->tnosorted;
     819             :                 else
     820          33 :                         bn->tnosorted = 0;
     821          35 :                 if (b->tnorevsorted > 0 && b->tnorevsorted < h)
     822           3 :                         bn->tnorevsorted = b->tnorevsorted;
     823             :                 else
     824          32 :                         bn->tnorevsorted = 0;
     825          35 :                 if (b->tnokey[0] < h &&
     826          35 :                     b->tnokey[1] < h &&
     827             :                     b->tnokey[0] != b->tnokey[1]) {
     828           0 :                         bn->tnokey[0] = b->tnokey[0];
     829           0 :                         bn->tnokey[1] = b->tnokey[1];
     830             :                 } else {
     831          35 :                         bn->tnokey[0] = bn->tnokey[1] = 0;
     832             :                 }
     833             :         } else {
     834           0 :                 bn->tsorted = bn->trevsorted = false; /* set based on count later */
     835           0 :                 bn->tnonil = bn->tnil = false;
     836           0 :                 bn->tnosorted = bn->tnorevsorted = 0;
     837           0 :                 bn->tnokey[0] = bn->tnokey[1] = 0;
     838             :         }
     839      857955 :         if (BATcount(bn) <= 1) {
     840      213500 :                 bn->tsorted = ATOMlinear(b->ttype);
     841      213500 :                 bn->trevsorted = ATOMlinear(b->ttype);
     842      213500 :                 bn->tkey = true;
     843             :         }
     844      857955 :         if (!writable)
     845       21347 :                 bn->batRestricted = BAT_READ;
     846      857955 :         TRC_DEBUG(ALGO, "COLcopy(" ALGOBATFMT ")=" ALGOBATFMT "\n",
     847             :                   ALGOBATPAR(b), ALGOBATPAR(bn));
     848             :         return bn;
     849           0 :       bunins_failed:
     850           0 :         BBPreclaim(bn);
     851           0 :         return NULL;
     852             : }
     853             : 
     854             : #ifdef HAVE_HGE
     855             : #define un_move_sz16(src, dst, sz)                      \
     856             :                 if (sz == 16) {                         \
     857             :                         * (hge *) dst = * (hge *) src;  \
     858             :                 } else
     859             : #else
     860             : #define un_move_sz16(src, dst, sz)
     861             : #endif
     862             : 
     863             : #define un_move(src, dst, sz)                           \
     864             :         do {                                            \
     865             :                 un_move_sz16(src,dst,sz)                \
     866             :                 if (sz == 8) {                          \
     867             :                         * (lng *) dst = * (lng *) src;  \
     868             :                 } else if (sz == 4) {                   \
     869             :                         * (int *) dst = * (int *) src;  \
     870             :                 } else if (sz > 0) {                 \
     871             :                         char *_dst = (char *) dst;      \
     872             :                         char *_src = (char *) src;      \
     873             :                         char *_end = _src + sz;         \
     874             :                                                         \
     875             :                         while (_src < _end)          \
     876             :                                 *_dst++ = *_src++;      \
     877             :                 }                                       \
     878             :         } while (0)
     879             : #define acc_move(l, p)                                                  \
     880             :         do {                                                            \
     881             :                 char tmp[16];                                           \
     882             :                 /* avoid compiler warning: dereferencing type-punned pointer \
     883             :                  * will break strict-aliasing rules */                  \
     884             :                 char *tmpp = tmp;                                       \
     885             :                                                                         \
     886             :                 assert(ts <= 16);                                    \
     887             :                                                                         \
     888             :                 /* move first to tmp */                                 \
     889             :                 un_move(Tloc(b, l), tmpp, ts);                          \
     890             :                 /* move delete to first */                              \
     891             :                 un_move(Tloc(b, p), Tloc(b, l), ts);                    \
     892             :                 /* move first to deleted */                             \
     893             :                 un_move(tmpp, Tloc(b, p), ts);                          \
     894             :         } while (0)
     895             : 
     896             : static void
     897    37371000 : setcolprops(BAT *b, const void *x)
     898             : {
     899    74741000 :         bool isnil = b->ttype != TYPE_void &&
     900    37371800 :                 ATOMcmp(b->ttype, x, ATOMnilptr(b->ttype)) == 0;
     901    37369200 :         BATiter bi;
     902    37369200 :         BUN pos;
     903    37369200 :         const void *prv;
     904    37369200 :         int cmp;
     905             : 
     906             :         /* x may only be NULL if the column type is VOID */
     907    37369200 :         assert(x != NULL || b->ttype == TYPE_void);
     908    37369200 :         if (b->batCount == 0) {
     909             :                 /* first value */
     910      450033 :                 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
     911      450033 :                 b->tnosorted = b->tnorevsorted = 0;
     912      450033 :                 b->tkey = true;
     913      450033 :                 b->tnokey[0] = b->tnokey[1] = 0;
     914      450033 :                 if (b->ttype == TYPE_void) {
     915          17 :                         if (x) {
     916          17 :                                 b->tseqbase = * (const oid *) x;
     917             :                         }
     918          17 :                         b->tnil = is_oid_nil(b->tseqbase);
     919          17 :                         b->tnonil = !b->tnil;
     920             :                 } else {
     921      450016 :                         b->tnil = isnil;
     922      450016 :                         b->tnonil = !isnil;
     923      450016 :                         if (b->ttype == TYPE_oid) {
     924       13820 :                                 b->tseqbase = * (const oid *) x;
     925             :                         }
     926      450016 :                         if (!isnil && ATOMlinear(b->ttype)) {
     927      436427 :                                 BATsetprop(b, GDK_MAX_VALUE, b->ttype, x);
     928      436469 :                                 BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
     929             :                         }
     930             :                 }
     931      450074 :                 return;
     932    36919200 :         } else if (b->ttype == TYPE_void) {
     933             :                 /* not the first value in a VOID column: we keep the
     934             :                  * seqbase, and x is not used, so only some properties
     935             :                  * are affected */
     936           0 :                 if (!is_oid_nil(b->tseqbase)) {
     937           0 :                         if (b->trevsorted) {
     938           0 :                                 b->tnorevsorted = BUNlast(b);
     939           0 :                                 b->trevsorted = false;
     940             :                         }
     941           0 :                         b->tnil = false;
     942           0 :                         b->tnonil = true;
     943             :                 } else {
     944           0 :                         if (b->tkey) {
     945           0 :                                 b->tnokey[0] = 0;
     946           0 :                                 b->tnokey[1] = BUNlast(b);
     947           0 :                                 b->tkey = false;
     948             :                         }
     949           0 :                         b->tnil = true;
     950           0 :                         b->tnonil = false;
     951             :                 }
     952           0 :                 return;
     953    36919200 :         } else if (ATOMlinear(b->ttype)) {
     954    36921600 :                 PROPrec *prop;
     955             : 
     956    36921600 :                 bi = bat_iterator(b);
     957    36921600 :                 pos = BUNlast(b);
     958    36921600 :                 prv = BUNtail(bi, pos - 1);
     959    36921600 :                 cmp = ATOMcmp(b->ttype, prv, x);
     960             : 
     961    36921700 :                 if (b->tkey &&
     962     5384710 :                     (cmp == 0 || /* definitely not KEY */
     963     5384710 :                      (b->batCount > 1 && /* can't guarantee KEY if unordered */
     964     5229150 :                       ((b->tsorted && cmp > 0) ||
     965     5191250 :                        (b->trevsorted && cmp < 0) ||
     966     5171690 :                        (!b->tsorted && !b->trevsorted))))) {
     967      189926 :                         b->tkey = false;
     968      189926 :                         if (cmp == 0) {
     969      109762 :                                 b->tnokey[0] = pos - 1;
     970      109762 :                                 b->tnokey[1] = pos;
     971             :                         }
     972             :                 }
     973    36921700 :                 if (b->tsorted) {
     974    11494700 :                         if (cmp > 0) {
     975             :                                 /* out of order */
     976      115846 :                                 b->tsorted = false;
     977      115846 :                                 b->tnosorted = pos;
     978    11378800 :                         } else if (cmp < 0 && !isnil) {
     979             :                                 /* new largest value */
     980     5611910 :                                 BATsetprop(b, GDK_MAX_VALUE, b->ttype, x);
     981             :                         }
     982    25427000 :                 } else if (!isnil &&
     983    50670100 :                            (prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL &&
     984    25310500 :                            ATOMcmp(b->ttype, VALptr(&prop->v), x) < 0) {
     985      351578 :                         BATsetprop(b, GDK_MAX_VALUE, b->ttype, x);
     986             :                 }
     987    36922600 :                 if (b->trevsorted) {
     988    15254900 :                         if (cmp < 0) {
     989             :                                 /* out of order */
     990      144954 :                                 b->trevsorted = false;
     991      144954 :                                 b->tnorevsorted = pos;
     992             :                                 /* if there is a nil in the BAT, it is
     993             :                                  * the smallest, but that doesn't
     994             :                                  * count for the property, so the new
     995             :                                  * value may still be smaller than the
     996             :                                  * smallest non-nil so far */
     997      144954 :                                 if (!b->tnonil && !isnil &&
     998         890 :                                     (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL &&
     999         136 :                                     ATOMcmp(b->ttype, VALptr(&prop->v), x) > 0) {
    1000          37 :                                         BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
    1001             :                                 }
    1002    15110000 :                         } else if (cmp > 0 && !isnil) {
    1003             :                                 /* new smallest value */
    1004       74018 :                                 BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
    1005             :                         }
    1006    21667700 :                 } else if (!isnil &&
    1007    43079200 :                            (prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL &&
    1008    21479000 :                            ATOMcmp(b->ttype, VALptr(&prop->v), x) > 0) {
    1009       47644 :                         BATsetprop(b, GDK_MIN_VALUE, b->ttype, x);
    1010             :                 }
    1011    36922400 :                 if (BATtdense(b) && (cmp >= 0 || * (const oid *) prv + 1 != * (const oid *) x)) {
    1012        5003 :                         assert(b->ttype == TYPE_oid);
    1013        5003 :                         b->tseqbase = oid_nil;
    1014             :                 }
    1015             :         }
    1016    36920000 :         if (isnil) {
    1017      352035 :                 b->tnonil = false;
    1018      352035 :                 b->tnil = true;
    1019             :         }
    1020             : }
    1021             : 
    1022             : /*
    1023             :  * @+ BUNappend
    1024             :  * The BUNappend function can be used to add a single value to void
    1025             :  * and oid headed bats. The new head value will be a unique number,
    1026             :  * (max(bat)+1).
    1027             :  */
    1028             : gdk_return
    1029    37372500 : BUNappend(BAT *b, const void *t, bool force)
    1030             : {
    1031    37372500 :         BUN p;
    1032    37372500 :         size_t tsize = 0;
    1033             : 
    1034    37372500 :         BATcheck(b, GDK_FAIL);
    1035             : 
    1036    37372500 :         assert(!VIEWtparent(b));
    1037             : 
    1038    37372500 :         p = BUNlast(b);         /* insert at end */
    1039    37372500 :         if (p == BUN_MAX || b->batCount == BUN_MAX) {
    1040           0 :                 GDKerror("bat too large\n");
    1041           0 :                 return GDK_FAIL;
    1042             :         }
    1043             : 
    1044    37372500 :         ALIGNapp(b, force, GDK_FAIL);
    1045    37372500 :         b->batDirtydesc = true;
    1046    37372500 :         if (b->thash && b->tvheap)
    1047       61103 :                 tsize = b->tvheap->size;
    1048             : 
    1049    37372500 :         if (b->ttype == TYPE_void && BATtdense(b)) {
    1050           0 :                 if (b->batCount == 0) {
    1051           0 :                         b->tseqbase = * (const oid *) t;
    1052           0 :                 } else if (is_oid_nil(* (oid *) t) ||
    1053           0 :                            b->tseqbase + b->batCount != *(const oid *) t) {
    1054           0 :                         if (BATmaterialize(b) != GDK_SUCCEED)
    1055             :                                 return GDK_FAIL;
    1056             :                 }
    1057             :         }
    1058             : 
    1059    37372500 :         if (unshare_string_heap(b) != GDK_SUCCEED) {
    1060             :                 return GDK_FAIL;
    1061             :         }
    1062             : 
    1063    37371900 :         setcolprops(b, t);
    1064             : 
    1065    37369400 :         if (b->ttype != TYPE_void) {
    1066    37369400 :                 if (bunfastapp(b, t) != GDK_SUCCEED)
    1067             :                         return GDK_FAIL;
    1068    37369000 :                 b->theap.dirty = true;
    1069             :         } else {
    1070          17 :                 BATsetcount(b, b->batCount + 1);
    1071             :         }
    1072             : 
    1073             : 
    1074    37369000 :         IMPSdestroy(b); /* no support for inserts in imprints yet */
    1075    37369100 :         OIDXdestroy(b);
    1076    37367700 :         BATrmprop(b, GDK_NUNIQUE);
    1077             : #if 0           /* enable if we have more properties than just min/max */
    1078             :         PROPrec *prop;
    1079             :         do {
    1080             :                 for (prop = b->tprops; prop; prop = prop->next)
    1081             :                         if (prop->id != GDK_MAX_VALUE &&
    1082             :                             prop->id != GDK_MIN_VALUE &&
    1083             :                             prop->id != GDK_HASH_BUCKETS) {
    1084             :                                 BATrmprop(b, prop->id);
    1085             :                                 break;
    1086             :                         }
    1087             :         } while (prop);
    1088             : #endif
    1089    37371800 :         if (b->thash) {
    1090      217236 :                 HASHins(b, p, t);
    1091      217236 :                 if (b->thash)
    1092      217225 :                         BATsetprop(b, GDK_NUNIQUE,
    1093      217225 :                                    TYPE_oid, &(oid){b->thash->nunique});
    1094      217236 :                 if (tsize && tsize != b->tvheap->size)
    1095         201 :                         HEAPwarm(b->tvheap);
    1096             :         }
    1097             :         return GDK_SUCCEED;
    1098             : }
    1099             : 
    1100             : gdk_return
    1101           4 : BUNdelete(BAT *b, oid o)
    1102             : {
    1103           4 :         BUN p;
    1104           4 :         BATiter bi = bat_iterator(b);
    1105           4 :         const void *val;
    1106           4 :         PROPrec *prop;
    1107             : 
    1108           4 :         assert(!is_oid_nil(b->hseqbase) || BATcount(b) == 0);
    1109           4 :         if (o < b->hseqbase || o >= b->hseqbase + BATcount(b)) {
    1110             :                 /* value already not there */
    1111             :                 return GDK_SUCCEED;
    1112             :         }
    1113           4 :         assert(BATcount(b) > 0); /* follows from "if" above */
    1114           4 :         p = o - b->hseqbase;
    1115           4 :         if (p < b->batInserted) {
    1116           0 :                 GDKerror("cannot delete committed value\n");
    1117           0 :                 return GDK_FAIL;
    1118             :         }
    1119           4 :         b->batDirtydesc = true;
    1120           4 :         val = BUNtail(bi, p);
    1121           4 :         if (ATOMcmp(b->ttype, ATOMnilptr(b->ttype), val) != 0) {
    1122           4 :                 if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL
    1123           4 :                     && ATOMcmp(b->ttype, VALptr(&prop->v), val) >= 0)
    1124           4 :                         BATrmprop(b, GDK_MAX_VALUE);
    1125           4 :                 if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL
    1126           1 :                     && ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0)
    1127           1 :                         BATrmprop(b, GDK_MIN_VALUE);
    1128             :         }
    1129           4 :         if (ATOMunfix(b->ttype, val) != GDK_SUCCEED)
    1130             :                 return GDK_FAIL;
    1131           4 :         ATOMdel(b->ttype, b->tvheap, (var_t *) BUNtloc(bi, p));
    1132           4 :         if (p != BUNlast(b) - 1 &&
    1133           2 :             (b->ttype != TYPE_void || BATtdense(b))) {
    1134             :                 /* replace to-be-delete BUN with last BUN; materialize
    1135             :                  * void column before doing so */
    1136           2 :                 if (b->ttype == TYPE_void &&
    1137           0 :                     BATmaterialize(b) != GDK_SUCCEED)
    1138             :                         return GDK_FAIL;
    1139           2 :                 memcpy(Tloc(b, p), Tloc(b, BUNlast(b) - 1), Tsize(b));
    1140             :                 /* no longer sorted */
    1141           2 :                 b->tsorted = b->trevsorted = false;
    1142           2 :                 b->theap.dirty = true;
    1143             :         }
    1144           4 :         if (b->tnosorted >= p)
    1145           3 :                 b->tnosorted = 0;
    1146           4 :         if (b->tnorevsorted >= p)
    1147           1 :                 b->tnorevsorted = 0;
    1148           4 :         b->batCount--;
    1149           4 :         if (b->batCount <= 1) {
    1150             :                 /* some trivial properties */
    1151           0 :                 b->tkey = true;
    1152           0 :                 b->tsorted = b->trevsorted = true;
    1153           0 :                 b->tnosorted = b->tnorevsorted = 0;
    1154           0 :                 if (b->batCount == 0) {
    1155           0 :                         b->tnil = false;
    1156           0 :                         b->tnonil = true;
    1157             :                 }
    1158             :         }
    1159           4 :         IMPSdestroy(b);
    1160           4 :         OIDXdestroy(b);
    1161           4 :         HASHdestroy(b);
    1162           4 :         BATrmprop(b, GDK_NUNIQUE);
    1163             : #if 0           /* enable if we have more properties than just min/max */
    1164             :         do {
    1165             :                 for (prop = b->tprops; prop; prop = prop->next)
    1166             :                         if (prop->id != GDK_MAX_VALUE &&
    1167             :                             prop->id != GDK_MIN_VALUE &&
    1168             :                             prop->id != GDK_HASH_BUCKETS) {
    1169             :                                 BATrmprop(b, prop->id);
    1170             :                                 break;
    1171             :                         }
    1172             :         } while (prop);
    1173             : #endif
    1174           4 :         return GDK_SUCCEED;
    1175             : }
    1176             : 
    1177             : /* @-  BUN replace
    1178             :  * The last operation in this context is BUN replace. It assumes that
    1179             :  * the header denotes a key. The old value association is destroyed
    1180             :  * (if it exists in the first place) and the new value takes its
    1181             :  * place.
    1182             :  *
    1183             :  * In order to make updates on void columns workable; replaces on them
    1184             :  * are always done in-place. Performing them without bun-movements
    1185             :  * greatly simplifies the problem. The 'downside' is that when
    1186             :  * transaction management has to be performed, replaced values should
    1187             :  * be saved explicitly.
    1188             :  */
    1189             : gdk_return
    1190        8107 : BUNinplace(BAT *b, BUN p, const void *t, bool force)
    1191             : {
    1192        8107 :         BUN last = BUNlast(b) - 1;
    1193        8107 :         BATiter bi = bat_iterator(b);
    1194        8107 :         int tt;
    1195        8107 :         BUN prv, nxt;
    1196        8107 :         const void *val;
    1197             : 
    1198        8107 :         assert(p >= b->batInserted || force);
    1199             : 
    1200             :         /* uncommitted BUN elements */
    1201             : 
    1202             :         /* zap alignment info */
    1203        8107 :         if (!force && (b->batRestricted != BAT_WRITE || b->batSharecnt > 0)) {
    1204           0 :                 GDKerror("access denied to %s, aborting.\n",
    1205             :                          BATgetId(b));
    1206           0 :                 return GDK_FAIL;
    1207             :         }
    1208        8107 :         val = BUNtail(bi, p);   /* old value */
    1209        8107 :         if (b->tnil &&
    1210           0 :             ATOMcmp(b->ttype, val, ATOMnilptr(b->ttype)) == 0 &&
    1211           0 :             ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0) {
    1212             :                 /* if old value is nil and new value isn't, we're not
    1213             :                  * sure anymore about the nil property, so we must
    1214             :                  * clear it */
    1215           0 :                 b->tnil = false;
    1216             :         }
    1217        8107 :         HASHdestroy(b);
    1218       16214 :         if (b->ttype != TYPE_void && ATOMlinear(b->ttype)) {
    1219        8107 :                 PROPrec *prop;
    1220             : 
    1221        8107 :                 if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL) {
    1222        9910 :                         if (ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0 &&
    1223        4955 :                             ATOMcmp(b->ttype, VALptr(&prop->v), t) < 0) {
    1224             :                                 /* new value is larger than previous
    1225             :                                  * largest */
    1226        3229 :                                 BATsetprop(b, GDK_MAX_VALUE, b->ttype, t);
    1227        3450 :                         } else if (ATOMcmp(b->ttype, t, val) != 0 &&
    1228        1724 :                                    ATOMcmp(b->ttype, VALptr(&prop->v), val) == 0) {
    1229             :                                 /* old value is equal to largest and
    1230             :                                  * new value is smaller (see above),
    1231             :                                  * so we don't know anymore which is
    1232             :                                  * the largest */
    1233         170 :                                 BATrmprop(b, GDK_MAX_VALUE);
    1234             :                         }
    1235             :                 }
    1236        8107 :                 if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL) {
    1237        1422 :                         if (ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0 &&
    1238         711 :                             ATOMcmp(b->ttype, VALptr(&prop->v), t) > 0) {
    1239             :                                 /* new value is smaller than previous
    1240             :                                  * smallest */
    1241         167 :                                 BATsetprop(b, GDK_MIN_VALUE, b->ttype, t);
    1242        1087 :                         } else if (ATOMcmp(b->ttype, t, val) != 0 &&
    1243         543 :                                    ATOMcmp(b->ttype, VALptr(&prop->v), val) <= 0) {
    1244             :                                 /* old value is equal to smallest and
    1245             :                                  * new value is larger (see above), so
    1246             :                                  * we don't know anymore which is the
    1247             :                                  * smallest */
    1248         543 :                                 BATrmprop(b, GDK_MIN_VALUE);
    1249             :                         }
    1250             :                 }
    1251        8107 :                 BATrmprop(b, GDK_NUNIQUE);
    1252             : #if 0           /* enable if we have more properties than just min/max */
    1253             :                 do {
    1254             :                         for (prop = b->tprops; prop; prop = prop->next)
    1255             :                                 if (prop->id != GDK_MAX_VALUE &&
    1256             :                                     prop->id != GDK_MIN_VALUE &&
    1257             :                                     prop->id != GDK_HASH_BUCKETS) {
    1258             :                                         BATrmprop(b, prop->id);
    1259             :                                         break;
    1260             :                                 }
    1261             :                 } while (prop);
    1262             : #endif
    1263             :         } else {
    1264           0 :                 PROPdestroy(b);
    1265             :         }
    1266        8107 :         OIDXdestroy(b);
    1267        8107 :         IMPSdestroy(b);
    1268        8127 :         if (b->tvarsized && b->ttype) {
    1269          20 :                 var_t _d;
    1270          20 :                 ptr _ptr;
    1271          20 :                 _ptr = BUNtloc(bi, p);
    1272          20 :                 switch (b->twidth) {
    1273          16 :                 default:        /* only three or four cases possible */
    1274          16 :                         _d = (var_t) * (uint8_t *) _ptr + GDK_VAROFFSET;
    1275          16 :                         break;
    1276           4 :                 case 2:
    1277           4 :                         _d = (var_t) * (uint16_t *) _ptr + GDK_VAROFFSET;
    1278           4 :                         break;
    1279           0 :                 case 4:
    1280           0 :                         _d = (var_t) * (uint32_t *) _ptr;
    1281           0 :                         break;
    1282             : #if SIZEOF_VAR_T == 8
    1283           0 :                 case 8:
    1284           0 :                         _d = (var_t) * (uint64_t *) _ptr;
    1285           0 :                         break;
    1286             : #endif
    1287             :                 }
    1288          20 :                 if (ATOMreplaceVAR(b->ttype, b->tvheap, &_d, t) != GDK_SUCCEED)
    1289           0 :                         return GDK_FAIL;
    1290          20 :                 if (b->twidth < SIZEOF_VAR_T &&
    1291          20 :                     (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 << (8 * b->twidth))) {
    1292             :                         /* doesn't fit in current heap, upgrade it */
    1293           1 :                         if (GDKupgradevarheap(b, _d, false, b->batRestricted == BAT_READ) != GDK_SUCCEED)
    1294             :                                 return GDK_FAIL;
    1295             :                 }
    1296          20 :                 _ptr = BUNtloc(bi, p);
    1297          20 :                 switch (b->twidth) {
    1298          15 :                 default:        /* only three or four cases possible */
    1299          15 :                         * (uint8_t *) _ptr = (uint8_t) (_d - GDK_VAROFFSET);
    1300          15 :                         break;
    1301           5 :                 case 2:
    1302           5 :                         * (uint16_t *) _ptr = (uint16_t) (_d - GDK_VAROFFSET);
    1303           5 :                         break;
    1304           0 :                 case 4:
    1305           0 :                         * (uint32_t *) _ptr = (uint32_t) _d;
    1306           0 :                         break;
    1307             : #if SIZEOF_VAR_T == 8
    1308           0 :                 case 8:
    1309           0 :                         * (uint64_t *) _ptr = (uint64_t) _d;
    1310           0 :                         break;
    1311             : #endif
    1312             :                 }
    1313             :         } else {
    1314        8087 :                 assert(BATatoms[b->ttype].atomPut == NULL);
    1315        8087 :                 if (ATOMfix(b->ttype, t) != GDK_SUCCEED)
    1316             :                         return GDK_FAIL;
    1317        8087 :                 if (ATOMunfix(b->ttype, BUNtloc(bi, p)) != GDK_SUCCEED)
    1318             :                         return GDK_FAIL;
    1319        8087 :                 switch (ATOMsize(b->ttype)) {
    1320             :                 case 0:      /* void */
    1321             :                         break;
    1322         334 :                 case 1:
    1323         334 :                         ((bte *) b->theap.base)[p] = * (bte *) t;
    1324         334 :                         break;
    1325        1313 :                 case 2:
    1326        1313 :                         ((sht *) b->theap.base)[p] = * (sht *) t;
    1327        1313 :                         break;
    1328        2081 :                 case 4:
    1329        2081 :                         ((int *) b->theap.base)[p] = * (int *) t;
    1330        2081 :                         break;
    1331        4359 :                 case 8:
    1332        4359 :                         ((lng *) b->theap.base)[p] = * (lng *) t;
    1333        4359 :                         break;
    1334             : #ifdef HAVE_HGE
    1335           0 :                 case 16:
    1336           0 :                         ((hge *) b->theap.base)[p] = * (hge *) t;
    1337           0 :                         break;
    1338             : #endif
    1339           0 :                 default:
    1340           0 :                         memcpy(BUNtloc(bi, p), t, ATOMsize(b->ttype));
    1341           0 :                         break;
    1342             :                 }
    1343             :         }
    1344             : 
    1345        8107 :         tt = b->ttype;
    1346        8107 :         prv = p > 0 ? p - 1 : BUN_NONE;
    1347        8107 :         nxt = p < last ? p + 1 : BUN_NONE;
    1348             : 
    1349        8107 :         if (BATtordered(b)) {
    1350        6193 :                 if (prv != BUN_NONE &&
    1351        2918 :                     ATOMcmp(tt, t, BUNtail(bi, prv)) < 0) {
    1352         165 :                         b->tsorted = false;
    1353         165 :                         b->tnosorted = p;
    1354        3122 :                 } else if (nxt != BUN_NONE &&
    1355          12 :                            ATOMcmp(tt, t, BUNtail(bi, nxt)) > 0) {
    1356           9 :                         b->tsorted = false;
    1357           9 :                         b->tnosorted = nxt;
    1358        3101 :                 } else if (b->ttype != TYPE_void && BATtdense(b)) {
    1359           0 :                         if (prv != BUN_NONE &&
    1360           0 :                             1 + * (oid *) BUNtloc(bi, prv) != * (oid *) t) {
    1361           0 :                                 b->tseqbase = oid_nil;
    1362           0 :                         } else if (nxt != BUN_NONE &&
    1363           0 :                                    * (oid *) BUNtloc(bi, nxt) != 1 + * (oid *) t) {
    1364           0 :                                 b->tseqbase = oid_nil;
    1365           0 :                         } else if (prv == BUN_NONE &&
    1366           0 :                                    nxt == BUN_NONE) {
    1367           0 :                                 b->tseqbase = * (oid *) t;
    1368             :                         }
    1369             :                 }
    1370        4832 :         } else if (b->tnosorted >= p)
    1371          23 :                 b->tnosorted = 0;
    1372        8107 :         if (BATtrevordered(b)) {
    1373        5222 :                 if (prv != BUN_NONE &&
    1374        2433 :                     ATOMcmp(tt, t, BUNtail(bi, prv)) > 0) {
    1375         181 :                         b->trevsorted = false;
    1376         181 :                         b->tnorevsorted = p;
    1377        4683 :                 } else if (nxt != BUN_NONE &&
    1378        2075 :                            ATOMcmp(tt, t, BUNtail(bi, nxt)) < 0) {
    1379           0 :                         b->trevsorted = false;
    1380           0 :                         b->tnorevsorted = nxt;
    1381             :                 }
    1382        5318 :         } else if (b->tnorevsorted >= p)
    1383         404 :                 b->tnorevsorted = 0;
    1384        8107 :         if (((b->ttype != TYPE_void) & b->tkey) && b->batCount > 1) {
    1385         113 :                 BATkey(b, false);
    1386        7994 :         } else if (!b->tkey && (b->tnokey[0] == p || b->tnokey[1] == p))
    1387          21 :                 b->tnokey[0] = b->tnokey[1] = 0;
    1388        8107 :         if (b->tnonil)
    1389        8107 :                 b->tnonil = t && ATOMcmp(b->ttype, t, ATOMnilptr(b->ttype)) != 0;
    1390        8107 :         b->theap.dirty = true;
    1391        8107 :         if (b->tvheap)
    1392          20 :                 b->tvheap->dirty = true;
    1393             : 
    1394             :         return GDK_SUCCEED;
    1395             : }
    1396             : 
    1397             : /* very much like void_inplace, except this materializes a void tail
    1398             :  * column if necessarry */
    1399             : gdk_return
    1400        2065 : BUNreplace(BAT *b, oid id, const void *t, bool force)
    1401             : {
    1402        2065 :         BATcheck(b, GDK_FAIL);
    1403        2065 :         if (t == NULL) {
    1404           0 :                 GDKerror("tail value is nil");
    1405           0 :                 return GDK_FAIL;
    1406             :         }
    1407             : 
    1408        2065 :         if (id < b->hseqbase || id >= b->hseqbase + BATcount(b))
    1409             :                 return GDK_SUCCEED;
    1410             : 
    1411        2065 :         if (b->ttype == TYPE_void) {
    1412             :                 /* no need to materialize if value doesn't change */
    1413           0 :                 if (is_oid_nil(b->tseqbase) ||
    1414           0 :                     b->tseqbase + id - b->hseqbase == *(const oid *) t)
    1415             :                         return GDK_SUCCEED;
    1416           0 :                 if (BATmaterialize(b) != GDK_SUCCEED)
    1417             :                         return GDK_FAIL;
    1418             :         }
    1419             : 
    1420        2065 :         return BUNinplace(b, id - b->hseqbase, t, force);
    1421             : }
    1422             : 
    1423             : /* very much like BUNreplace, but this doesn't make any changes if the
    1424             :  * tail column is void */
    1425             : gdk_return
    1426        1728 : void_inplace(BAT *b, oid id, const void *val, bool force)
    1427             : {
    1428        1728 :         assert(id >= b->hseqbase && id < b->hseqbase + BATcount(b));
    1429        1728 :         if (id < b->hseqbase || id >= b->hseqbase + BATcount(b)) {
    1430             :                 GDKerror("id out of range\n");
    1431             :                 return GDK_FAIL;
    1432             :         }
    1433        1728 :         if (b->ttype == TYPE_void)
    1434             :                 return GDK_SUCCEED;
    1435        1728 :         return BUNinplace(b, id - b->hseqbase, val, force);
    1436             : }
    1437             : 
    1438             : /*
    1439             :  * @- BUN Lookup
    1440             :  * Location of a BUN using a value should use the available indexes to
    1441             :  * speed up access. If indexes are lacking then a hash index is
    1442             :  * constructed under the assumption that 1) multiple access to the BAT
    1443             :  * can be expected and 2) building the hash is only slightly more
    1444             :  * expensive than the full linear scan.  BUN_NONE is returned if no
    1445             :  * such element could be found.  In those cases where the type is
    1446             :  * known and a hash index is available, one should use the inline
    1447             :  * functions to speed-up processing.
    1448             :  */
    1449             : static BUN
    1450           0 : slowfnd(BAT *b, const void *v)
    1451             : {
    1452           0 :         BATiter bi = bat_iterator(b);
    1453           0 :         BUN p, q;
    1454           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
    1455             : 
    1456           0 :         BATloop(b, p, q) {
    1457           0 :                 if ((*cmp)(v, BUNtail(bi, p)) == 0)
    1458           0 :                         return p;
    1459             :         }
    1460             :         return BUN_NONE;
    1461             : }
    1462             : 
    1463             : BUN
    1464     1534620 : BUNfnd(BAT *b, const void *v)
    1465             : {
    1466     1534620 :         BUN r = BUN_NONE;
    1467     1534620 :         BATiter bi;
    1468             : 
    1469     1534620 :         BATcheck(b, BUN_NONE);
    1470     1534620 :         if (!v)
    1471             :                 return r;
    1472     1534620 :         if (b->ttype == TYPE_void && b->tvheap != NULL) {
    1473           0 :                 struct canditer ci;
    1474           0 :                 canditer_init(&ci, NULL, b);
    1475           0 :                 return canditer_search(&ci, * (const oid *) v, false);
    1476             :         }
    1477     1534620 :         if (BATtvoid(b))
    1478       77809 :                 return BUNfndVOID(b, v);
    1479     1456810 :         if (!BATcheckhash(b)) {
    1480      247949 :                 if (BATordered(b) || BATordered_rev(b))
    1481      247551 :                         return SORTfnd(b, v);
    1482             :         }
    1483     1209260 :         bi = bat_iterator(b);
    1484     1851420 :         switch (ATOMbasetype(b->ttype)) {
    1485           0 :         case TYPE_bte:
    1486           0 :                 HASHfnd_bte(r, bi, v);
    1487             :                 break;
    1488           0 :         case TYPE_sht:
    1489           0 :                 HASHfnd_sht(r, bi, v);
    1490             :                 break;
    1491           0 :         case TYPE_int:
    1492           0 :                 HASHfnd_int(r, bi, v);
    1493             :                 break;
    1494           0 :         case TYPE_flt:
    1495           0 :                 HASHfnd_flt(r, bi, v);
    1496             :                 break;
    1497           0 :         case TYPE_dbl:
    1498           0 :                 HASHfnd_dbl(r, bi, v);
    1499             :                 break;
    1500      567099 :         case TYPE_lng:
    1501     1394730 :                 HASHfnd_lng(r, bi, v);
    1502             :                 break;
    1503             : #ifdef HAVE_HGE
    1504           0 :         case TYPE_hge:
    1505           0 :                 HASHfnd_hge(r, bi, v);
    1506             :                 break;
    1507             : #endif
    1508      642163 :         case TYPE_str:
    1509     8508460 :                 HASHfnd_str(r, bi, v);
    1510             :                 break;
    1511           0 :         default:
    1512           0 :                 HASHfnd(r, bi, v);
    1513             :         }
    1514             :         return r;
    1515           0 :   hashfnd_failed:
    1516             :         /* can't build hash table, search the slow way */
    1517           0 :         return slowfnd(b, v);
    1518             : }
    1519             : 
    1520             : /*
    1521             :  * @+ BAT Property Management
    1522             :  *
    1523             :  * The function BATcount returns the number of active elements in a
    1524             :  * BAT.  Counting is type independent.  It can be implemented quickly,
    1525             :  * because the system ensures a dense BUN list.
    1526             :  */
    1527             : void
    1528      702231 : BATsetcapacity(BAT *b, BUN cnt)
    1529             : {
    1530      702231 :         b->batCapacity = cnt;
    1531      702231 :         assert(b->batCount <= cnt);
    1532      702231 : }
    1533             : 
    1534             : void
    1535    18797200 : BATsetcount(BAT *b, BUN cnt)
    1536             : {
    1537             :         /* head column is always VOID, and some head properties never change */
    1538    18797200 :         assert(!is_oid_nil(b->hseqbase));
    1539    18797200 :         assert(cnt <= BUN_MAX);
    1540             : 
    1541    18797200 :         b->batCount = cnt;
    1542    18797200 :         b->batDirtydesc = true;
    1543    18797200 :         b->theap.free = tailsize(b, cnt);
    1544    18797200 :         if (b->ttype == TYPE_void)
    1545     3747220 :                 b->batCapacity = cnt;
    1546    18797200 :         if (cnt <= 1) {
    1547     8888350 :                 b->tsorted = b->trevsorted = ATOMlinear(b->ttype);
    1548     8888350 :                 b->tnosorted = b->tnorevsorted = 0;
    1549             :         }
    1550             :         /* if the BAT was made smaller, we need to zap some values */
    1551    18797200 :         if (b->tnosorted >= BUNlast(b))
    1552     8053250 :                 b->tnosorted = 0;
    1553    18797200 :         if (b->tnorevsorted >= BUNlast(b))
    1554     8050560 :                 b->tnorevsorted = 0;
    1555    18797200 :         if (b->tnokey[0] >= BUNlast(b) || b->tnokey[1] >= BUNlast(b)) {
    1556     8042020 :                 b->tnokey[0] = 0;
    1557     8042020 :                 b->tnokey[1] = 0;
    1558             :         }
    1559    18797200 :         if (b->ttype == TYPE_void) {
    1560     3747280 :                 b->tsorted = true;
    1561     3747280 :                 if (is_oid_nil(b->tseqbase)) {
    1562     1901520 :                         b->tkey = cnt <= 1;
    1563     1901520 :                         b->trevsorted = true;
    1564     1901520 :                         b->tnil = true;
    1565     1901520 :                         b->tnonil = false;
    1566             :                 } else {
    1567     1845750 :                         b->tkey = true;
    1568     1845750 :                         b->trevsorted = cnt <= 1;
    1569     1845750 :                         b->tnil = false;
    1570     1845750 :                         b->tnonil = true;
    1571             :                 }
    1572             :         }
    1573    18797200 :         assert(b->batCapacity >= cnt);
    1574    18797200 : }
    1575             : 
    1576             : /*
    1577             :  * The key and name properties can be changed at any time.  Keyed
    1578             :  * dimensions are automatically supported by an auxiliary hash-based
    1579             :  * access structure to speed up searching. Turning off the key
    1580             :  * integrity property does not cause the index to disappear. It can
    1581             :  * still be used to speed-up retrieval. The routine BATkey sets the
    1582             :  * key property of the association head.
    1583             :  */
    1584             : gdk_return
    1585     1788960 : BATkey(BAT *b, bool flag)
    1586             : {
    1587     1788960 :         BATcheck(b, GDK_FAIL);
    1588     1788960 :         assert(b->batCacheid > 0);
    1589     1788960 :         if (b->ttype == TYPE_void) {
    1590       13789 :                 if (BATtdense(b) && !flag) {
    1591           0 :                         GDKerror("dense column must be unique.\n");
    1592           0 :                         return GDK_FAIL;
    1593             :                 }
    1594       13789 :                 if (is_oid_nil(b->tseqbase) && flag && b->batCount > 1) {
    1595           0 :                         GDKerror("void column cannot be unique.\n");
    1596           0 :                         return GDK_FAIL;
    1597             :                 }
    1598             :         }
    1599     1788960 :         if (b->tkey != flag)
    1600      253512 :                 b->batDirtydesc = true;
    1601     1788960 :         b->tkey = flag;
    1602     1788960 :         if (!flag) {
    1603     1193090 :                 b->tseqbase = oid_nil;
    1604             :         } else
    1605      595872 :                 b->tnokey[0] = b->tnokey[1] = 0;
    1606     1788960 :         if (flag && VIEWtparent(b)) {
    1607             :                 /* if a view is key, then so is the parent if the two
    1608             :                  * are aligned */
    1609      242862 :                 BAT *bp = BBP_cache(VIEWtparent(b));
    1610      296368 :                 if (BATcount(b) == BATcount(bp) &&
    1611       53506 :                     ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
    1612       53507 :                     !BATtkey(bp) &&
    1613           5 :                     ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == bp->tseqbase) ||
    1614             :                      BATcount(b) == 0))
    1615           5 :                         return BATkey(bp, true);
    1616             :         }
    1617             :         return GDK_SUCCEED;
    1618             : }
    1619             : 
    1620             : void
    1621     1667120 : BAThseqbase(BAT *b, oid o)
    1622             : {
    1623     1667120 :         if (b != NULL) {
    1624     1667120 :                 assert(o <= GDK_oid_max);    /* i.e., not oid_nil */
    1625     1667120 :                 assert(o + BATcount(b) <= GDK_oid_max);
    1626     1667120 :                 assert(b->batCacheid > 0);
    1627     1667120 :                 if (b->hseqbase != o) {
    1628      305540 :                         b->batDirtydesc = true;
    1629      305540 :                         b->hseqbase = o;
    1630             :                 }
    1631             :         }
    1632     1667120 : }
    1633             : 
    1634             : void
    1635     3740870 : BATtseqbase(BAT *b, oid o)
    1636             : {
    1637     3740870 :         assert(o <= oid_nil);
    1638     3740870 :         if (b == NULL)
    1639             :                 return;
    1640     3740870 :         assert(is_oid_nil(o) || o + BATcount(b) <= GDK_oid_max);
    1641     3740870 :         assert(b->batCacheid > 0);
    1642     3740870 :         if (b->tseqbase != o) {
    1643     1986220 :                 b->batDirtydesc = true;
    1644             :         }
    1645     3740870 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    1646     2088590 :                 b->tseqbase = o;
    1647             : 
    1648             :                 /* adapt keyness */
    1649     2088590 :                 if (BATtvoid(b)) {
    1650     2055610 :                         b->tsorted = true;
    1651     2055610 :                         if (is_oid_nil(o)) {
    1652         134 :                                 b->tkey = b->batCount <= 1;
    1653         134 :                                 b->tnonil = b->batCount == 0;
    1654         134 :                                 b->tnil = b->batCount > 0;
    1655         134 :                                 b->trevsorted = true;
    1656         134 :                                 b->tnosorted = b->tnorevsorted = 0;
    1657         134 :                                 if (!b->tkey) {
    1658           0 :                                         b->tnokey[0] = 0;
    1659           0 :                                         b->tnokey[1] = 1;
    1660             :                                 } else {
    1661         134 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    1662             :                                 }
    1663             :                         } else {
    1664     2055480 :                                 if (!b->tkey) {
    1665       44317 :                                         b->tkey = true;
    1666       44317 :                                         b->tnokey[0] = b->tnokey[1] = 0;
    1667             :                                 }
    1668     2055480 :                                 b->tnonil = true;
    1669     2055480 :                                 b->tnil = false;
    1670     2055480 :                                 b->trevsorted = b->batCount <= 1;
    1671     2055480 :                                 if (!b->trevsorted)
    1672       17479 :                                         b->tnorevsorted = 1;
    1673             :                         }
    1674             :                 }
    1675             :         } else {
    1676     1652280 :                 assert(o == oid_nil);
    1677     1652280 :                 b->tseqbase = oid_nil;
    1678             :         }
    1679             : }
    1680             : 
    1681             : gdk_return
    1682       24160 : BATroles(BAT *b, const char *tnme)
    1683             : {
    1684       24160 :         if (b == NULL)
    1685             :                 return GDK_SUCCEED;
    1686       24160 :         if (b->tident && !default_ident(b->tident))
    1687           0 :                 GDKfree(b->tident);
    1688       24160 :         if (tnme)
    1689         690 :                 b->tident = GDKstrdup(tnme);
    1690             :         else
    1691       23470 :                 b->tident = BATstring_t;
    1692       24160 :         return b->tident ? GDK_SUCCEED : GDK_FAIL;
    1693             : }
    1694             : 
    1695             : /*
    1696             :  * @- Change the BAT access permissions (read, append, write)
    1697             :  * Regrettably, BAT access-permissions, persistent status and memory
    1698             :  * map modes, interact in ways that makes one's brain sizzle. This
    1699             :  * makes BATsetaccess and TMcommit (where a change in BAT persistence
    1700             :  * mode is made permanent) points in which the memory map status of
    1701             :  * bats needs to be carefully re-assessed and ensured.
    1702             :  *
    1703             :  * Another complication is the fact that during commit, concurrent
    1704             :  * users may access the heaps, such that the simple solution
    1705             :  * unmap;re-map is out of the question.
    1706             :  * Even worse, it is not possible to even rename an open mmap file in
    1707             :  * Windows. For this purpose, we dropped the old .priv scheme, which
    1708             :  * relied on file moves. Now, the file that is opened with mmap is
    1709             :  * always the X file, in case of newstorage=STORE_PRIV, we save in a
    1710             :  * new file X.new
    1711             :  *
    1712             :  * we must consider the following dimensions:
    1713             :  *
    1714             :  * persistence:
    1715             :  *     not simply the current persistence mode but whether the bat *was*
    1716             :  *     present at the last commit point (BBP status & BBPEXISTING).
    1717             :  *     The crucial issue is namely whether we must guarantee recovery
    1718             :  *     to a previous sane state.
    1719             :  *
    1720             :  * access:
    1721             :  *     whether the BAT is BAT_READ or BAT_WRITE. Note that BAT_APPEND
    1722             :  *     is usually the same as BAT_READ (as our concern are only data pages
    1723             :  *     that already existed at the last commit).
    1724             :  *
    1725             :  * storage:
    1726             :  *     the current way the heap file X is memory-mapped;
    1727             :  *     STORE_MMAP uses direct mapping (so dirty pages may be flushed
    1728             :  *     at any time to disk), STORE_PRIV uses copy-on-write.
    1729             :  *
    1730             :  * newstorage:
    1731             :  *     the current save-regime. STORE_MMAP calls msync() on the heap X,
    1732             :  *     whereas STORE_PRIV writes the *entire* heap in a file: X.new
    1733             :  *     If a BAT is loaded from disk, the field newstorage is used
    1734             :  *     to set storage as well (so before change-access and commit-
    1735             :  *     persistence mayhem, we always have newstorage=storage).
    1736             :  *
    1737             :  * change-access:
    1738             :  *     what happens if the bat-access mode is changed from
    1739             :  *     BAT_READ into BAT_WRITE (or vice versa).
    1740             :  *
    1741             :  * commit-persistence:
    1742             :  *     what happens during commit if the bat-persistence mode was
    1743             :  *     changed (from TRANSIENT into PERSISTENT, or vice versa).
    1744             :  *
    1745             :  * this is the scheme:
    1746             :  *
    1747             :  *  persistence access    newstorage storage    change-access commit-persistence
    1748             :  *  =========== ========= ========== ========== ============= ==================
    1749             :  * 0 transient  BAT_READ  STORE_MMAP STORE_MMAP =>2           =>4
    1750             :  * 1 transient  BAT_READ  STORE_PRIV STORE_PRIV =>3           =>5
    1751             :  * 2 transient  BAT_WRITE STORE_MMAP STORE_MMAP =>0           =>6+
    1752             :  * 3 transient  BAT_WRITE STORE_PRIV STORE_PRIV =>1           =>7
    1753             :  * 4 persistent BAT_READ  STORE_MMAP STORE_MMAP =>6+          =>0
    1754             :  * 5 persistent BAT_READ  STORE_PRIV STORE_PRIV =>7           =>1
    1755             :  * 6 persistent BAT_WRITE STORE_PRIV STORE_MMAP del X.new=>4+ del X.new;=>2+
    1756             :  * 7 persistent BAT_WRITE STORE_PRIV STORE_PRIV =>5           =>3
    1757             :  *
    1758             :  * exception states:
    1759             :  * a transient  BAT_READ  STORE_PRIV STORE_MMAP =>b           =>c
    1760             :  * b transient  BAT_WRITE STORE_PRIV STORE_MMAP =>a           =>6
    1761             :  * c persistent BAT_READ  STORE_PRIV STORE_MMAP =>6           =>a
    1762             :  *
    1763             :  * (+) indicates that we must ensure that the heap gets saved in its new mode
    1764             :  *
    1765             :  * Note that we now allow a heap with save-regime STORE_PRIV that was
    1766             :  * actually mapped STORE_MMAP. In effect, the potential corruption of
    1767             :  * the X file is compensated by writing out full X.new files that take
    1768             :  * precedence.  When transitioning out of this state towards one with
    1769             :  * both storage regime and OS as STORE_MMAP we need to move the X.new
    1770             :  * files into the backup directory. Then msync the X file and (on
    1771             :  * success) remove the X.new; see backup_new().
    1772             :  *
    1773             :  * Exception states are only reachable if the commit fails and those
    1774             :  * new persistent bats have already been processed (but never become
    1775             :  * part of a committed state). In that case a transition 2=>6 may end
    1776             :  * up 2=>b.  Exception states a and c are reachable from b.
    1777             :  *
    1778             :  * Errors in HEAPchangeaccess() can be handled atomically inside the
    1779             :  * routine.  The work on changing mmap modes HEAPcommitpersistence()
    1780             :  * is done during the BBPsync() for all bats that are newly persistent
    1781             :  * (BBPNEW). After the TMcommit(), it is done for those bats that are
    1782             :  * no longer persistent after the commit (BBPDELETED), only if it
    1783             :  * succeeds.  Such transient bats cannot be processed before the
    1784             :  * commit, because the commit may fail and then the more unsafe
    1785             :  * transient mmap modes would be present on a persistent bat.
    1786             :  *
    1787             :  * See dirty_bat() in BBPsync() -- gdk_bbp.c and epilogue() in
    1788             :  * gdk_tm.c.
    1789             :  *
    1790             :  * Including the exception states, we have 11 of the 16
    1791             :  * combinations. As for the 5 avoided states, all four
    1792             :  * (persistence,access) states with (STORE_MMAP,STORE_PRIV) are
    1793             :  * omitted (this would amount to an msync() save regime on a
    1794             :  * copy-on-write heap -- which does not work). The remaining avoided
    1795             :  * state is the patently unsafe
    1796             :  * (persistent,BAT_WRITE,STORE_MMAP,STORE_MMAP).
    1797             :  *
    1798             :  * Note that after a server restart exception states are gone, as on
    1799             :  * BAT loads the saved descriptor is inspected again (which will
    1800             :  * reproduce the state at the last succeeded commit).
    1801             :  *
    1802             :  * To avoid exception states, a TMsubcommit protocol would need to be
    1803             :  * used which is too heavy for BATsetaccess().
    1804             :  *
    1805             :  * Note that this code is not about making heaps mmap-ed in the first
    1806             :  * place.  It is just about determining which flavor of mmap should be
    1807             :  * used. The MAL user is oblivious of such details.
    1808             :  */
    1809             : 
    1810             : /* rather than deleting X.new, we comply with the commit protocol and
    1811             :  * move it to backup storage */
    1812             : static gdk_return
    1813           0 : backup_new(Heap *hp, int lockbat)
    1814             : {
    1815           0 :         int batret, bakret, xx, ret = 0;
    1816           0 :         char *batpath, *bakpath;
    1817           0 :         struct stat st;
    1818             : 
    1819             :         /* file actions here interact with the global commits */
    1820           0 :         for (xx = 0; xx <= lockbat; xx++)
    1821           0 :                 MT_lock_set(&GDKtrimLock(xx));
    1822             : 
    1823             :         /* check for an existing X.new in BATDIR, BAKDIR and SUBDIR */
    1824           0 :         batpath = GDKfilepath(hp->farmid, BATDIR, hp->filename, ".new");
    1825           0 :         bakpath = GDKfilepath(hp->farmid, BAKDIR, hp->filename, ".new");
    1826           0 :         if (batpath == NULL || bakpath == NULL) {
    1827           0 :                 ret = -1;
    1828           0 :                 goto bailout;
    1829             :         }
    1830           0 :         batret = stat(batpath, &st);
    1831           0 :         bakret = stat(bakpath, &st);
    1832             : 
    1833           0 :         if (batret == 0 && bakret) {
    1834             :                 /* no backup yet, so move the existing X.new there out
    1835             :                  * of the way */
    1836           0 :                 if ((ret = rename(batpath, bakpath)) < 0)
    1837           0 :                         GDKsyserror("backup_new: rename %s to %s failed\n",
    1838             :                                     batpath, bakpath);
    1839           0 :                 TRC_DEBUG(IO_, "rename(%s,%s) = %d\n", batpath, bakpath, ret);
    1840           0 :         } else if (batret == 0) {
    1841             :                 /* there is a backup already; just remove the X.new */
    1842           0 :                 if ((ret = remove(batpath)) != 0)
    1843           0 :                         GDKsyserror("backup_new: remove %s failed\n", batpath);
    1844           0 :                 TRC_DEBUG(IO_, "remove(%s) = %d\n", batpath, ret);
    1845             :         }
    1846           0 :   bailout:
    1847           0 :         GDKfree(batpath);
    1848           0 :         GDKfree(bakpath);
    1849           0 :         for (xx = lockbat; xx >= 0; xx--)
    1850           0 :                 MT_lock_unset(&GDKtrimLock(xx));
    1851           0 :         return ret ? GDK_FAIL : GDK_SUCCEED;
    1852             : }
    1853             : 
    1854             : #define ACCESSMODE(wr,rd) ((wr)?BAT_WRITE:(rd)?BAT_READ:-1)
    1855             : 
    1856             : /* transition heap from readonly to writable */
    1857             : static storage_t
    1858      278303 : HEAPchangeaccess(Heap *hp, int dstmode, bool existing)
    1859             : {
    1860      278303 :         if (hp->base == NULL || hp->newstorage == STORE_MEM || !existing || dstmode == -1)
    1861      278303 :                 return hp->newstorage;       /* 0<=>2,1<=>3,a<=>b */
    1862             : 
    1863           0 :         if (dstmode == BAT_WRITE) {
    1864           0 :                 if (hp->storage != STORE_PRIV)
    1865           0 :                         hp->dirty = true;    /* exception c does not make it dirty */
    1866           0 :                 return STORE_PRIV;      /* 4=>6,5=>7,c=>6 persistent BAT_WRITE needs STORE_PRIV */
    1867             :         }
    1868           0 :         if (hp->storage == STORE_MMAP) {     /* 6=>4 */
    1869           0 :                 hp->dirty = true;
    1870           0 :                 return backup_new(hp, BBP_THREADMASK) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;      /* only called for existing bats */
    1871             :         }
    1872             :         return hp->storage;  /* 7=>5 */
    1873             : }
    1874             : 
    1875             : /* heap changes persistence mode (at commit point) */
    1876             : static storage_t
    1877      140597 : HEAPcommitpersistence(Heap *hp, bool writable, bool existing)
    1878             : {
    1879      140597 :         if (existing) {         /* existing, ie will become transient */
    1880        3613 :                 if (hp->storage == STORE_MMAP && hp->newstorage == STORE_PRIV && writable) {      /* 6=>2 */
    1881           0 :                         hp->dirty = true;
    1882           0 :                         return backup_new(hp, -1) != GDK_SUCCEED ? STORE_INVALID : STORE_MMAP;  /* only called for existing bats */
    1883             :                 }
    1884        3613 :                 return hp->newstorage;       /* 4=>0,5=>1,7=>3,c=>a no change */
    1885             :         }
    1886             :         /* !existing, ie will become persistent */
    1887      136984 :         if (hp->newstorage == STORE_MEM)
    1888             :                 return hp->newstorage;
    1889         988 :         if (hp->newstorage == STORE_MMAP && !writable)
    1890             :                 return STORE_MMAP;      /* 0=>4 STORE_MMAP */
    1891             : 
    1892          96 :         if (hp->newstorage == STORE_MMAP)
    1893          48 :                 hp->dirty = true;    /* 2=>6 */
    1894             :         return STORE_PRIV;      /* 1=>5,2=>6,3=>7,a=>c,b=>6 states */
    1895             : }
    1896             : 
    1897             : 
    1898             : #define ATOMappendpriv(t, h) (ATOMstorage(t) != TYPE_str || GDK_ELIMDOUBLES(h))
    1899             : 
    1900             : /* change the heap modes at a commit */
    1901             : gdk_return
    1902      114935 : BATcheckmodes(BAT *b, bool existing)
    1903             : {
    1904      114935 :         bool wr = (b->batRestricted == BAT_WRITE);
    1905      114935 :         storage_t m1 = STORE_MEM, m3 = STORE_MEM;
    1906      114935 :         bool dirty = false;
    1907             : 
    1908      114935 :         BATcheck(b, GDK_FAIL);
    1909             : 
    1910      114935 :         if (b->ttype) {
    1911      114935 :                 m1 = HEAPcommitpersistence(&b->theap, wr, existing);
    1912      114935 :                 dirty |= (b->theap.newstorage != m1);
    1913             :         }
    1914             : 
    1915      114935 :         if (b->tvheap) {
    1916       25662 :                 bool ta = (b->batRestricted == BAT_APPEND) && ATOMappendpriv(b->ttype, b->tvheap);
    1917       25662 :                 m3 = HEAPcommitpersistence(b->tvheap, wr || ta, existing);
    1918       25662 :                 dirty |= (b->tvheap->newstorage != m3);
    1919             :         }
    1920      114935 :         if (m1 == STORE_INVALID || m3 == STORE_INVALID)
    1921             :                 return GDK_FAIL;
    1922             : 
    1923      114935 :         if (dirty) {
    1924          48 :                 b->batDirtydesc = true;
    1925          48 :                 b->theap.newstorage = m1;
    1926          48 :                 if (b->tvheap)
    1927           0 :                         b->tvheap->newstorage = m3;
    1928             :         }
    1929             :         return GDK_SUCCEED;
    1930             : }
    1931             : 
    1932             : gdk_return
    1933    17064900 : BATsetaccess(BAT *b, restrict_t newmode)
    1934             : {
    1935    17064900 :         restrict_t bakmode;
    1936    17064900 :         bool bakdirty;
    1937             : 
    1938    17064900 :         BATcheck(b, GDK_FAIL);
    1939    17064900 :         if (isVIEW(b) && newmode != BAT_READ) {
    1940          72 :                 if (VIEWreset(b) != GDK_SUCCEED)
    1941             :                         return GDK_FAIL;
    1942             :         }
    1943    17064900 :         bakmode = (restrict_t) b->batRestricted;
    1944    17064900 :         bakdirty = b->batDirtydesc;
    1945    17064900 :         if (bakmode != newmode || (b->batSharecnt && newmode != BAT_READ)) {
    1946      205355 :                 bool existing = (BBP_status(b->batCacheid) & BBPEXISTING) != 0;
    1947      205355 :                 bool wr = (newmode == BAT_WRITE);
    1948      205355 :                 bool rd = (bakmode == BAT_WRITE);
    1949      205355 :                 storage_t m1, m3 = STORE_MEM;
    1950      205355 :                 storage_t b1, b3 = STORE_MEM;
    1951             : 
    1952      205355 :                 if (b->batSharecnt && newmode != BAT_READ) {
    1953           0 :                         TRC_DEBUG(BAT_, "%s has %d views; try creating a copy\n", BATgetId(b), b->batSharecnt);
    1954           0 :                         GDKerror("%s has %d views\n",
    1955             :                                  BATgetId(b), b->batSharecnt);
    1956           0 :                         return GDK_FAIL;
    1957             :                 }
    1958             : 
    1959      205355 :                 b1 = b->theap.newstorage;
    1960      214453 :                 m1 = HEAPchangeaccess(&b->theap, ACCESSMODE(wr, rd), existing);
    1961      205358 :                 if (b->tvheap) {
    1962       72966 :                         bool ta = (newmode == BAT_APPEND && ATOMappendpriv(b->ttype, b->tvheap));
    1963       72966 :                         b3 = b->tvheap->newstorage;
    1964      144134 :                         m3 = HEAPchangeaccess(b->tvheap, ACCESSMODE(wr && ta, rd && ta), existing);
    1965             :                 }
    1966      205341 :                 if (m1 == STORE_INVALID || m3 == STORE_INVALID)
    1967             :                         return GDK_FAIL;
    1968             : 
    1969             :                 /* set new access mode and mmap modes */
    1970      205341 :                 b->batRestricted = (unsigned int) newmode;
    1971      205341 :                 b->batDirtydesc = true;
    1972      205341 :                 b->theap.newstorage = m1;
    1973      205341 :                 if (b->tvheap)
    1974       72958 :                         b->tvheap->newstorage = m3;
    1975             : 
    1976      205341 :                 if (existing && BBPsave(b) != GDK_SUCCEED) {
    1977             :                         /* roll back all changes */
    1978           0 :                         b->batRestricted = (unsigned int) bakmode;
    1979           0 :                         b->batDirtydesc = bakdirty;
    1980           0 :                         b->theap.newstorage = b1;
    1981           0 :                         if (b->tvheap)
    1982           0 :                                 b->tvheap->newstorage = b3;
    1983           0 :                         return GDK_FAIL;
    1984             :                 }
    1985             :         }
    1986             :         return GDK_SUCCEED;
    1987             : }
    1988             : 
    1989             : restrict_t
    1990           0 : BATgetaccess(BAT *b)
    1991             : {
    1992           0 :         BATcheck(b, BAT_WRITE /* 0 */);
    1993           0 :         assert(b->batRestricted != 3); /* only valid restrict_t values */
    1994           0 :         return (restrict_t) b->batRestricted;
    1995             : }
    1996             : 
    1997             : /*
    1998             :  * @- change BAT persistency (persistent,session,transient)
    1999             :  * In the past, we prevented BATS with certain types from being saved at all:
    2000             :  * - BATs of BATs, as having recursive bats creates cascading
    2001             :  *   complexities in commits/aborts.
    2002             :  * - any atom with refcounts, as the BBP has no overview of such
    2003             :  *   user-defined refcounts.
    2004             :  * - pointer types, as the values they point to are bound to be transient.
    2005             :  *
    2006             :  * However, nowadays we do allow such saves, as the BBP swapping
    2007             :  * mechanism was altered to be able to save transient bats temporarily
    2008             :  * to disk in order to make room.  Thus, we must be able to save any
    2009             :  * transient BAT to disk.
    2010             :  *
    2011             :  * What we don't allow is to make such bats persistent.
    2012             :  *
    2013             :  * Although the persistent state does influence the allowed mmap
    2014             :  * modes, this only goes for the *real* committed persistent
    2015             :  * state. Making the bat persistent with BATmode does not matter for
    2016             :  * the heap modes until the commit point is reached. So we do not need
    2017             :  * to do anything with heap modes yet at this point.
    2018             :  */
    2019             : #define check_type(tp)                                                  \
    2020             :         do {                                                            \
    2021             :                 if (ATOMisdescendant((tp), TYPE_ptr) ||                 \
    2022             :                     BATatoms[tp].atomUnfix ||                           \
    2023             :                     BATatoms[tp].atomFix) {                             \
    2024             :                         GDKerror("%s type implies that %s[%s] "               \
    2025             :                                  "cannot be made persistent.\n",      \
    2026             :                                  ATOMname(tp), BATgetId(b),             \
    2027             :                                  ATOMname(b->ttype));                        \
    2028             :                         return GDK_FAIL;                                \
    2029             :                 }                                                       \
    2030             :         } while (0)
    2031             : 
    2032             : gdk_return
    2033       65633 : BATmode(BAT *b, bool transient)
    2034             : {
    2035       65633 :         BATcheck(b, GDK_FAIL);
    2036             : 
    2037             :         /* can only make a bat PERSISTENT if its role is already
    2038             :          * PERSISTENT */
    2039       65633 :         assert(transient || b->batRole == PERSISTENT);
    2040             : 
    2041       65633 :         if (b->batRole == TRANSIENT && !transient) {
    2042           0 :                 GDKerror("cannot change mode of BAT in TRANSIENT farm.\n");
    2043           0 :                 return GDK_FAIL;
    2044             :         }
    2045             : 
    2046       65633 :         if (transient != b->batTransient) {
    2047       65389 :                 bat bid = b->batCacheid;
    2048             : 
    2049       65389 :                 if (!transient) {
    2050       55941 :                         check_type(b->ttype);
    2051             :                 }
    2052             : 
    2053       65389 :                 if (!transient && isVIEW(b)) {
    2054           0 :                         if (VIEWreset(b) != GDK_SUCCEED) {
    2055             :                                 return GDK_FAIL;
    2056             :                         }
    2057             :                 }
    2058             :                 /* persistent BATs get a logical reference */
    2059       65389 :                 if (!transient) {
    2060       55941 :                         BBPretain(bid);
    2061        9448 :                 } else if (!b->batTransient) {
    2062        9448 :                         BBPrelease(bid);
    2063             :                 }
    2064       65389 :                 MT_lock_set(&GDKswapLock(bid));
    2065       65389 :                 if (!transient) {
    2066       55941 :                         if (!(BBP_status(bid) & BBPDELETED))
    2067       55941 :                                 BBP_status_on(bid, BBPNEW, "BATmode");
    2068             :                         else
    2069           0 :                                 BBP_status_on(bid, BBPEXISTING, "BATmode");
    2070       55941 :                         BBP_status_off(bid, BBPDELETED, "BATmode");
    2071        9448 :                 } else if (!b->batTransient) {
    2072        9448 :                         if (!(BBP_status(bid) & BBPNEW))
    2073        9447 :                                 BBP_status_on(bid, BBPDELETED, "BATmode");
    2074        9448 :                         BBP_status_off(bid, BBPPERSISTENT, "BATmode");
    2075             :                 }
    2076             :                 /* session bats or persistent bats that did not
    2077             :                  * witness a commit yet may have been saved */
    2078       65389 :                 if (b->batCopiedtodisk) {
    2079        9623 :                         if (!transient) {
    2080         176 :                                 BBP_status_off(bid, BBPTMP, "BATmode");
    2081             :                         } else {
    2082             :                                 /* TMcommit must remove it to
    2083             :                                  * guarantee free space */
    2084        9447 :                                 BBP_status_on(bid, BBPTMP, "BATmode");
    2085             :                         }
    2086             :                 }
    2087       65389 :                 b->batTransient = transient;
    2088       65389 :                 MT_lock_unset(&GDKswapLock(bid));
    2089             :         }
    2090             :         return GDK_SUCCEED;
    2091             : }
    2092             : 
    2093             : /* BATassertProps checks whether properties are set correctly.  Under
    2094             :  * no circumstances will it change any properties.  Note that the
    2095             :  * "nil" property is not actually used anywhere, but it is checked. */
    2096             : 
    2097             : #ifdef NDEBUG
    2098             : /* assertions are disabled, turn failing tests into a message */
    2099             : #undef assert
    2100             : #define assert(test)    ((void) ((test) || (TRC_CRITICAL_ENDIF(BAT_, "Assertion `%s' failed\n", #test), 0)))
    2101             : #endif
    2102             : 
    2103             : /* Assert that properties are set correctly.
    2104             :  *
    2105             :  * A BAT can have a bunch of properties set.  Mostly, the property
    2106             :  * bits are set if we *know* the property holds, and not set if we
    2107             :  * don't know whether the property holds (or if we know it doesn't
    2108             :  * hold).  All properties are per column.
    2109             :  *
    2110             :  * The properties currently maintained are:
    2111             :  *
    2112             :  * seqbase      Only valid for TYPE_oid and TYPE_void columns: each
    2113             :  *              value in the column is exactly one more than the
    2114             :  *              previous value, starting at position 0 with the value
    2115             :  *              stored in this property.
    2116             :  *              This implies sorted, key, nonil (which therefore need
    2117             :  *              to be set).
    2118             :  * nil          There is at least one NIL value in the column.
    2119             :  * nonil        There are no NIL values in the column.
    2120             :  * key          All values in the column are distinct.
    2121             :  * sorted       The column is sorted (ascending).  If also revsorted,
    2122             :  *              then all values are equal.
    2123             :  * revsorted    The column is reversely sorted (descending).  If
    2124             :  *              also sorted, then all values are equal.
    2125             :  * nosorted     BUN position which proofs not sorted (given position
    2126             :  *              and one before are not ordered correctly).
    2127             :  * norevsorted  BUN position which proofs not revsorted (given position
    2128             :  *              and one before are not ordered correctly).
    2129             :  * nokey        Pair of BUN positions that proof not all values are
    2130             :  *              distinct (i.e. values at given locations are equal).
    2131             :  *
    2132             :  * Note that the functions BATtseqbase and BATkey also set more
    2133             :  * properties than you might suspect.  When setting properties on a
    2134             :  * newly created and filled BAT, you may want to first make sure the
    2135             :  * batCount is set correctly (e.g. by calling BATsetcount), then use
    2136             :  * BATtseqbase and BATkey, and finally set the other properties.
    2137             :  */
    2138             : 
    2139             : void
    2140    43497400 : BATassertProps(BAT *b)
    2141             : {
    2142    43497400 :         unsigned bbpstatus;
    2143    43497400 :         BATiter bi = bat_iterator(b);
    2144    43497400 :         BUN p, q;
    2145    43497400 :         int (*cmpf)(const void *, const void *);
    2146    43497400 :         int cmp;
    2147    43497400 :         const void *prev = NULL, *valp, *nilp;
    2148             : 
    2149             :         /* general BAT sanity */
    2150    43497400 :         assert(b != NULL);
    2151    43497400 :         assert(b->batCacheid > 0);
    2152    43497400 :         assert(b->batCount >= b->batInserted);
    2153             : 
    2154             :         /* headless */
    2155    43497400 :         assert(b->hseqbase <= GDK_oid_max); /* non-nil seqbase */
    2156    43497400 :         assert(b->hseqbase + BATcount(b) <= GDK_oid_max);
    2157             : 
    2158    43497400 :         bbpstatus = BBP_status(b->batCacheid);
    2159             :         /* only at most one of BBPDELETED, BBPEXISTING, BBPNEW may be set */
    2160    43497400 :         assert(((bbpstatus & BBPDELETED) != 0) +
    2161             :                ((bbpstatus & BBPEXISTING) != 0) +
    2162             :                ((bbpstatus & BBPNEW) != 0) <= 1);
    2163             : 
    2164    43497400 :         assert(b != NULL);
    2165    43497400 :         assert(b->ttype >= TYPE_void);
    2166    43497400 :         assert(b->ttype < GDKatomcnt);
    2167    43497400 :         assert(b->ttype != TYPE_bat);
    2168    43497400 :         assert(isVIEW(b) ||
    2169             :                b->ttype == TYPE_void ||
    2170             :                BBPfarms[b->theap.farmid].roles & (1 << b->batRole));
    2171    43497400 :         assert(isVIEW(b) ||
    2172             :                b->tvheap == NULL ||
    2173             :                (BBPfarms[b->tvheap->farmid].roles & (1 << b->batRole)));
    2174             : 
    2175    43497400 :         cmpf = ATOMcompare(b->ttype);
    2176    43497400 :         nilp = ATOMnilptr(b->ttype);
    2177             : 
    2178    43497400 :         assert(b->theap.free >= tailsize(b, BUNlast(b)));
    2179    43497400 :         if (b->ttype != TYPE_void) {
    2180    41738900 :                 assert(b->batCount <= b->batCapacity);
    2181    41738900 :                 assert(b->theap.size >= b->theap.free);
    2182    41738900 :                 assert(b->theap.size >> b->tshift >= b->batCapacity);
    2183             :         }
    2184             : 
    2185             :         /* void and str imply varsized */
    2186    43497400 :         if (b->ttype == TYPE_void ||
    2187    41738400 :             ATOMstorage(b->ttype) == TYPE_str)
    2188     4708800 :                 assert(b->tvarsized);
    2189             :         /* other "known" types are not varsized */
    2190    43497400 :         if (ATOMstorage(b->ttype) > TYPE_void &&
    2191             :             ATOMstorage(b->ttype) < TYPE_str)
    2192    38788700 :                 assert(!b->tvarsized);
    2193             :         /* shift and width have a particular relationship */
    2194    43497400 :         if (ATOMstorage(b->ttype) == TYPE_str)
    2195     2949840 :                 assert(b->twidth >= 1 && b->twidth <= ATOMsize(b->ttype));
    2196             :         else
    2197    40547500 :                 assert(b->twidth == ATOMsize(b->ttype));
    2198    43497400 :         assert(b->tseqbase <= oid_nil);
    2199             :         /* only oid/void columns can be dense */
    2200    43497400 :         assert(is_oid_nil(b->tseqbase) || b->ttype == TYPE_oid || b->ttype == TYPE_void);
    2201             :         /* a column cannot both have and not have NILs */
    2202    43497400 :         assert(!b->tnil || !b->tnonil);
    2203    43497400 :         if (b->ttype == TYPE_void) {
    2204     1756260 :                 assert(b->tshift == 0);
    2205     1756260 :                 assert(b->twidth == 0);
    2206     1756260 :                 assert(b->tsorted);
    2207     1756260 :                 if (is_oid_nil(b->tseqbase)) {
    2208       10805 :                         assert(b->tvheap == NULL);
    2209       10805 :                         assert(BATcount(b) == 0 || !b->tnonil);
    2210       10805 :                         assert(BATcount(b) <= 1 || !b->tkey);
    2211       10805 :                         assert(b->trevsorted);
    2212             :                 } else {
    2213     1745450 :                         if (b->tvheap != NULL) {
    2214             :                                 /* candidate list with exceptions */
    2215        2443 :                                 assert(b->batRole == TRANSIENT);
    2216        2443 :                                 assert(b->tvheap->free <= b->tvheap->size);
    2217        2443 :                                 assert(b->tvheap->free % SIZEOF_OID == 0);
    2218        2443 :                                 if (b->tvheap->free > 0) {
    2219        2443 :                                         const oid *oids = (const oid *) b->tvheap->base;
    2220        2443 :                                         q = b->tvheap->free / SIZEOF_OID;
    2221        2443 :                                         assert(oids != NULL);
    2222        2443 :                                         assert(b->tseqbase + BATcount(b) + q <= GDK_oid_max);
    2223             :                                         /* exceptions within range */
    2224        2443 :                                         assert(oids[0] >= b->tseqbase);
    2225        2443 :                                         assert(oids[q - 1] < b->tseqbase + BATcount(b) + q);
    2226             :                                         /* exceptions sorted */
    2227      272792 :                                         for (p = 1; p < q; p++)
    2228      270349 :                                                 assert(oids[p - 1] < oids[p]);
    2229             :                                 }
    2230             :                         }
    2231     1745450 :                         assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2232     1745450 :                         assert(BATcount(b) == 0 || !b->tnil);
    2233     1745450 :                         assert(BATcount(b) <= 1 || !b->trevsorted);
    2234     1745450 :                         assert(b->tkey);
    2235     1745450 :                         assert(b->tnonil);
    2236             :                 }
    2237     3152370 :                 return;
    2238             :         }
    2239    41741100 :         if (BATtdense(b)) {
    2240      812919 :                 assert(b->tseqbase + b->batCount <= GDK_oid_max);
    2241      812919 :                 assert(b->ttype == TYPE_oid);
    2242      812919 :                 assert(b->tsorted);
    2243      812919 :                 assert(b->tkey);
    2244      812919 :                 assert(b->tnonil);
    2245      812919 :                 if ((q = b->batCount) != 0) {
    2246      302732 :                         const oid *o = (const oid *) Tloc(b, 0);
    2247      302732 :                         assert(*o == b->tseqbase);
    2248   189945000 :                         for (p = 1; p < q; p++)
    2249   189642000 :                                 assert(o[p - 1] + 1 == o[p]);
    2250             :                 }
    2251             :                 return;
    2252             :         }
    2253    40928200 :         assert(1 << b->tshift == b->twidth);
    2254             :         /* only linear atoms can be sorted */
    2255    40928200 :         assert(!b->tsorted || ATOMlinear(b->ttype));
    2256    40928200 :         assert(!b->trevsorted || ATOMlinear(b->ttype));
    2257    40928200 :         if (ATOMlinear(b->ttype)) {
    2258    40926700 :                 assert(b->tnosorted == 0 ||
    2259             :                        (b->tnosorted > 0 &&
    2260             :                         b->tnosorted < b->batCount));
    2261    40926700 :                 assert(!b->tsorted || b->tnosorted == 0);
    2262    40926700 :                 if (!b->tsorted &&
    2263    30524200 :                     b->tnosorted > 0 &&
    2264             :                     b->tnosorted < b->batCount)
    2265    30524300 :                         assert(cmpf(BUNtail(bi, b->tnosorted - 1),
    2266             :                                     BUNtail(bi, b->tnosorted)) > 0);
    2267    40926700 :                 assert(b->tnorevsorted == 0 ||
    2268             :                        (b->tnorevsorted > 0 &&
    2269             :                         b->tnorevsorted < b->batCount));
    2270    40926700 :                 assert(!b->trevsorted || b->tnorevsorted == 0);
    2271    40926700 :                 if (!b->trevsorted &&
    2272    13559400 :                     b->tnorevsorted > 0 &&
    2273    13559400 :                     b->tnorevsorted < b->batCount)
    2274    13559600 :                         assert(cmpf(BUNtail(bi, b->tnorevsorted - 1),
    2275             :                                     BUNtail(bi, b->tnorevsorted)) < 0);
    2276             :         }
    2277             :         /* if tkey property set, both tnokey values must be 0 */
    2278    40927900 :         assert(!b->tkey || (b->tnokey[0] == 0 && b->tnokey[1] == 0));
    2279    40927900 :         if (!b->tkey && (b->tnokey[0] != 0 || b->tnokey[1] != 0)) {
    2280             :                 /* if tkey not set and tnokey indicates a proof of
    2281             :                  * non-key-ness, make sure the tnokey values are in
    2282             :                  * range and indeed provide a proof */
    2283    20638500 :                 assert(b->tnokey[0] != b->tnokey[1]);
    2284    20638500 :                 assert(b->tnokey[0] < b->batCount);
    2285    20638500 :                 assert(b->tnokey[1] < b->batCount);
    2286    20638500 :                 assert(cmpf(BUNtail(bi, b->tnokey[0]),
    2287             :                             BUNtail(bi, b->tnokey[1])) == 0);
    2288             :         }
    2289             :         /* var heaps must have sane sizes */
    2290    40927900 :         assert(b->tvheap == NULL || b->tvheap->free <= b->tvheap->size);
    2291             : 
    2292    40927900 :         if (!b->tkey && !b->tsorted && !b->trevsorted &&
    2293    12615700 :             !b->tnonil && !b->tnil) {
    2294             :                 /* nothing more to prove */
    2295             :                 return;
    2296             :         }
    2297             : 
    2298    40344700 :         PROPDEBUG { /* only do a scan if property checking is requested */
    2299     7645930 :                 PROPrec *prop;
    2300     7645930 :                 const void *maxval = NULL;
    2301     7645930 :                 const void *minval = NULL;
    2302     7645930 :                 bool seenmax = false, seenmin = false;
    2303     7645930 :                 bool seennil = false;
    2304             : 
    2305     7645930 :                 if ((prop = BATgetprop(b, GDK_MAX_VALUE)) != NULL)
    2306      590962 :                         maxval = VALptr(&prop->v);
    2307     7651030 :                 if ((prop = BATgetprop(b, GDK_MIN_VALUE)) != NULL)
    2308      544489 :                         minval = VALptr(&prop->v);
    2309     7650870 :                 if (b->tsorted || b->trevsorted || !b->tkey) {
    2310             :                         /* if sorted (either way), or we don't have to
    2311             :                          * prove uniqueness, we can do a simple
    2312             :                          * scan */
    2313             :                         /* only call compare function if we have to */
    2314     7587600 :                         bool cmpprv = b->tsorted | b->trevsorted | b->tkey;
    2315     7587600 :                         bool cmpnil = b->tnonil | b->tnil;
    2316             : 
    2317  9533540000 :                         BATloop(b, p, q) {
    2318  9526200000 :                                 valp = BUNtail(bi, p);
    2319  9526200000 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2320  9448820000 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2321  9448820000 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2322  9448820000 :                                 if (maxval && !isnil) {
    2323   603115000 :                                         cmp = cmpf(maxval, valp);
    2324   603107000 :                                         assert(cmp >= 0);
    2325   603107000 :                                         seenmax |= cmp == 0;
    2326             :                                 }
    2327  9448820000 :                                 if (minval && !isnil) {
    2328    51614600 :                                         cmp = cmpf(minval, valp);
    2329    51614200 :                                         assert(cmp <= 0);
    2330    51614200 :                                         seenmin |= cmp == 0;
    2331             :                                 }
    2332  9448820000 :                                 if (prev && cmpprv) {
    2333  3508280000 :                                         cmp = cmpf(prev, valp);
    2334  3585640000 :                                         assert(!b->tsorted || cmp <= 0);
    2335  3585640000 :                                         assert(!b->trevsorted || cmp >= 0);
    2336  3585640000 :                                         assert(!b->tkey || cmp != 0);
    2337             :                                 }
    2338  9526180000 :                                 if (cmpnil) {
    2339  9271760000 :                                         assert(!b->tnonil || !isnil);
    2340  9271760000 :                                         if (isnil) {
    2341             :                                                 /* we found a nil:
    2342             :                                                  * we're done checking
    2343             :                                                  * for them */
    2344      279659 :                                                 seennil = true;
    2345      279659 :                                                 cmpnil = 0;
    2346      279659 :                                                 if (!cmpprv && maxval == NULL && minval == NULL) {
    2347             :                                                         /* we were
    2348             :                                                          * only
    2349             :                                                          * checking
    2350             :                                                          * for nils,
    2351             :                                                          * so nothing
    2352             :                                                          * more to
    2353             :                                                          * do */
    2354             :                                                         break;
    2355             :                                                 }
    2356             :                                         }
    2357             :                                 }
    2358  9525960000 :                                 prev = valp;
    2359             :                         }
    2360             :                 } else {        /* b->tkey && !b->tsorted && !b->trevsorted */
    2361             :                         /* we need to check for uniqueness the hard
    2362             :                          * way (i.e. using a hash table) */
    2363       63264 :                         const char *nme = BBP_physical(b->batCacheid);
    2364       63264 :                         Hash *hs = NULL;
    2365       63264 :                         BUN mask;
    2366             : 
    2367       63264 :                         if ((hs = GDKzalloc(sizeof(Hash))) == NULL) {
    2368           0 :                                 TRC_WARNING(BAT_, "Cannot allocate hash table\n");
    2369           0 :                                 goto abort_check;
    2370             :                         }
    2371       63264 :                         if (snprintf(hs->heaplink.filename, sizeof(hs->heaplink.filename), "%s.thshprpl%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs->heaplink.filename) ||
    2372       63263 :                             snprintf(hs->heapbckt.filename, sizeof(hs->heapbckt.filename), "%s.thshprpb%x", nme, (unsigned) THRgettid()) >= (int) sizeof(hs->heapbckt.filename)) {
    2373             :                                 /* cannot happen, see comment in gdk.h
    2374             :                                  * about sizes near definition of
    2375             :                                  * BBPINIT */
    2376           0 :                                 GDKfree(hs);
    2377           0 :                                 TRC_CRITICAL(BAT_, "Heap filename is too large\n");
    2378           0 :                                 goto abort_check;
    2379             :                         }
    2380       63265 :                         if (ATOMsize(b->ttype) == 1)
    2381             :                                 mask = (BUN) 1 << 8;
    2382       63233 :                         else if (ATOMsize(b->ttype) == 2)
    2383             :                                 mask = (BUN) 1 << 16;
    2384             :                         else
    2385       63153 :                                 mask = HASHmask(b->batCount);
    2386       63265 :                         if ((hs->heaplink.farmid = BBPselectfarm(
    2387       63264 :                                      TRANSIENT, b->ttype, hashheap)) < 0 ||
    2388       63264 :                             (hs->heapbckt.farmid = BBPselectfarm(
    2389      126530 :                                     TRANSIENT, b->ttype, hashheap)) < 0 ||
    2390       63264 :                             HASHnew(hs, b->ttype, BUNlast(b),
    2391             :                                     mask, BUN_NONE, false) != GDK_SUCCEED) {
    2392           0 :                                 GDKfree(hs);
    2393           0 :                                 TRC_WARNING(BAT_, "Cannot allocate hash table\n");
    2394           0 :                                 goto abort_check;
    2395             :                         }
    2396   169674000 :                         BATloop(b, p, q) {
    2397   169611000 :                                 BUN hb;
    2398   169611000 :                                 BUN prb;
    2399   169611000 :                                 valp = BUNtail(bi, p);
    2400   169611000 :                                 bool isnil = cmpf(valp, nilp) == 0;
    2401   169609000 :                                 assert(b->ttype != TYPE_flt || !isinf(*(flt*)valp));
    2402   169609000 :                                 assert(b->ttype != TYPE_dbl || !isinf(*(dbl*)valp));
    2403   169609000 :                                 if (maxval && !isnil) {
    2404        2963 :                                         cmp = cmpf(maxval, valp);
    2405        2963 :                                         assert(cmp >= 0);
    2406        2963 :                                         seenmax |= cmp == 0;
    2407             :                                 }
    2408   169609000 :                                 if (minval && !isnil) {
    2409        2963 :                                         cmp = cmpf(minval, valp);
    2410        2963 :                                         assert(cmp <= 0);
    2411        2963 :                                         seenmin |= cmp == 0;
    2412             :                                 }
    2413   169609000 :                                 prb = HASHprobe(hs, valp);
    2414   169611000 :                                 for (hb = HASHget(hs, prb);
    2415   196961000 :                                      hb != HASHnil(hs);
    2416   224311000 :                                      hb = HASHgetlink(hs, hb))
    2417    27350000 :                                         if (cmpf(valp, BUNtail(bi, hb)) == 0)
    2418           0 :                                                 assert(!b->tkey);
    2419   339222000 :                                 HASHputlink(hs, p, HASHget(hs, prb));
    2420   169611000 :                                 HASHput(hs, prb, p);
    2421   169611000 :                                 assert(!b->tnonil || !isnil);
    2422   169611000 :                                 seennil |= isnil;
    2423             :                         }
    2424       63250 :                         HEAPfree(&hs->heaplink, true);
    2425       63265 :                         HEAPfree(&hs->heapbckt, true);
    2426       63265 :                         GDKfree(hs);
    2427             :                 }
    2428     7632700 :           abort_check:
    2429     7632700 :                 GDKclrerr();
    2430     7625470 :                 assert(maxval == NULL || seenmax);
    2431     7625470 :                 assert(minval == NULL || seenmin);
    2432     7625470 :                 assert(!b->tnil || seennil);
    2433             :         }
    2434             : }

Generated by: LCOV version 1.14