LCOV - code coverage report
Current view: top level - gdk - gdk_hash.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 549 712 77.1 %
Date: 2021-09-14 19:48:19 Functions: 21 26 80.8 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * - Hash Table Creation
      11             :  * The hash indexing scheme for BATs reserves a block of memory to
      12             :  * maintain the hash table and a collision list. A one-to-one mapping
      13             :  * exists between the BAT and the collision list using the BUN
      14             :  * index. NOTE: we alloc the link list as a parallel array to the BUN
      15             :  * array; hence the hash link array has the same size as
      16             :  * BATcapacity(b) (not BATcount(b)). This allows us in the BUN insert
      17             :  * and delete to assume that there is hash space iff there is BUN
      18             :  * space.
      19             :  *
      20             :  * The hash mask size is a power of two, so we can do bitwise AND on
      21             :  * the hash (integer) number to quickly find the head of the bucket
      22             :  * chain.  Clearly, the hash mask size is a crucial parameter. If we
      23             :  * know that the column is unique (tkey), we use direct hashing (mask
      24             :  * size ~= BATcount). Otherwise we dynamically determine the mask size
      25             :  * by starting out with mask size = BATcount/64 (just 1.5% of memory
      26             :  * storage overhead). Then we start building the hash table on the
      27             :  * first 25% of the BAT. As we aim for max-collisions list length of
      28             :  * 4, the list on 25% should not exceed length 1. So, if a small
      29             :  * number of collisssions occurs (mask/2) then we abandon the attempt
      30             :  * and restart with a mask that is 4 times larger. This converges
      31             :  * after three cycles to direct hashing.
      32             :  */
      33             : 
      34             : #include "monetdb_config.h"
      35             : #include "gdk.h"
      36             : #include "gdk_private.h"
      37             : 
      38             : static inline uint8_t __attribute__((__const__))
      39             : HASHwidth(BUN hashsize)
      40             : {
      41             :         (void) hashsize;
      42             : #ifdef BUN2
      43      191966 :         if (hashsize <= (BUN) BUN2_NONE)
      44             :                 return BUN2;
      45             : #endif
      46             : #ifdef BUN8
      47        1296 :         if (hashsize > (BUN) BUN4_NONE)
      48           0 :                 return BUN8;
      49             : #endif
      50             :         return BUN4;
      51             : }
      52             : 
      53             : static inline BUN __attribute__((__const__))
      54      115639 : hashmask(BUN m)
      55             : {
      56      115639 :         m |= m >> 1;
      57      115639 :         m |= m >> 2;
      58      115639 :         m |= m >> 4;
      59      115639 :         m |= m >> 8;
      60      115639 :         m |= m >> 16;
      61             : #if SIZEOF_BUN == 8
      62      115639 :         m |= m >> 32;
      63             : #endif
      64      115639 :         return m;
      65             : }
      66             : 
      67             : BUN
      68      187743 : HASHmask(BUN cnt)
      69             : {
      70             :         BUN m = cnt;
      71             : 
      72             : #if 0
      73             :         /* find largest power of 2 smaller than or equal to cnt */
      74             :         m = hashmask(m);
      75             :         m -= m >> 1;
      76             : 
      77             :         /* if cnt is more than 1/3 into the gap between m and 2*m,
      78             :            double m */
      79             :         if (m + m - cnt < 2 * (cnt - m))
      80             :                 m += m;
      81             : #else
      82      284528 :         m = m * 8 / 7;
      83             : #endif
      84      187743 :         if (m < BATTINY)
      85             :                 m = BATTINY;
      86      187743 :         return m;
      87             : }
      88             : 
      89             : static inline void
      90      319635 : HASHclear(Hash *h)
      91             : {
      92             :         /* since BUN2_NONE, BUN4_NONE, BUN8_NONE
      93             :          * are all equal to ~0, i.e., have all bits set,
      94             :          * we can use a simple memset() to clear the Hash,
      95             :          * rather than iteratively assigning individual
      96             :          * BUNi_NONE values in a for-loop
      97             :          */
      98      319635 :         memset(h->Bckt, 0xFF, h->nbucket * h->width);
      99      319635 : }
     100             : 
     101             : #define HASH_VERSION            4
     102             : /* this is only for the change of hash function of the UUID type; if
     103             :  * HASH_VERSION is increased again from 4, the code associated with
     104             :  * HASH_VERSION_NOUUID must be deleted */
     105             : #define HASH_VERSION_NOUUID     3
     106             : #define HASH_HEADER_SIZE        7       /* nr of size_t fields in header */
     107             : 
     108             : void
     109       92065 : doHASHdestroy(BAT *b, Hash *hs)
     110             : {
     111       92065 :         if (hs == (Hash *) 1) {
     112          66 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
     113             :                           BATDIR,
     114          66 :                           BBP_physical(b->batCacheid),
     115             :                           "thashl");
     116          67 :                 GDKunlink(BBPselectfarm(b->batRole, b->ttype, hashheap),
     117             :                           BATDIR,
     118          67 :                           BBP_physical(b->batCacheid),
     119             :                           "thashb");
     120       91999 :         } else if (hs) {
     121       91999 :                 bat p = VIEWtparent(b);
     122             :                 BAT *hp = NULL;
     123             : 
     124       80127 :                 if (p)
     125       80127 :                         hp = BBP_cache(p);
     126             : 
     127       91999 :                 if (!hp || hs != hp->thash) {
     128       91999 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": removing%s hash\n", ALGOBATPAR(b), *(size_t *) hs->heapbckt.base & (1 << 24) ? " persisted" : "");
     129       91999 :                         HEAPfree(&hs->heapbckt, true);
     130       92013 :                         HEAPfree(&hs->heaplink, true);
     131       92017 :                         GDKfree(hs);
     132             :                 }
     133             :         }
     134       92084 : }
     135             : 
     136             : gdk_return
     137      319279 : HASHnew(Hash *h, int tpe, BUN size, BUN mask, BUN count, bool bcktonly)
     138             : {
     139      319279 :         if (h->width == 0)
     140      191966 :                 h->width = HASHwidth(size);
     141             : 
     142      319279 :         if (!bcktonly) {
     143      191948 :                 if (HEAPalloc(&h->heaplink, size, h->width, 0) != GDK_SUCCEED)
     144             :                         return GDK_FAIL;
     145      192218 :                 h->heaplink.free = size * h->width;
     146      192218 :                 h->Link = h->heaplink.base;
     147             :         }
     148      319549 :         if (HEAPalloc(&h->heapbckt, mask + HASH_HEADER_SIZE * SIZEOF_SIZE_T / h->width, h->width, 0) != GDK_SUCCEED)
     149             :                 return GDK_FAIL;
     150      319619 :         h->heapbckt.free = mask * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     151      319619 :         h->nbucket = mask;
     152      319619 :         if (mask & (mask - 1)) {
     153      115262 :                 h->mask2 = hashmask(mask);
     154      115262 :                 h->mask1 = h->mask2 >> 1;
     155             :         } else {
     156             :                 /* mask is a power of two */
     157      204357 :                 h->mask1 = mask - 1;
     158      204357 :                 h->mask2 = h->mask1 << 1 | 1;
     159             :         }
     160      319619 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     161      319619 :         h->type = tpe;
     162      319619 :         HASHclear(h);           /* zero the mask */
     163      319663 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     164      319663 :         ((size_t *) h->heapbckt.base)[1] = (size_t) size;
     165      319663 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     166      319663 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     167      319663 :         ((size_t *) h->heapbckt.base)[4] = (size_t) count;
     168      319663 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     169      319663 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     170      319663 :         TRC_DEBUG(ACCELERATOR,
     171             :                   "create hash(size " BUNFMT ", mask " BUNFMT ", width %d, total " BUNFMT " bytes);\n", size, mask, h->width, (size + mask) * h->width);
     172             :         return GDK_SUCCEED;
     173             : }
     174             : 
     175             : /* collect HASH statistics for analysis */
     176             : static void
     177           0 : HASHcollisions(BAT *b, Hash *h, const char *func)
     178             : {
     179             :         lng cnt, entries = 0, max = 0;
     180             :         double total = 0;
     181             :         BUN p, i, j;
     182             : 
     183           0 :         if (b == 0 || h == 0)
     184             :                 return;
     185           0 :         for (i = 0, j = h->nbucket; i < j; i++)
     186           0 :                 if ((p = HASHget(h, i)) != BUN_NONE) {
     187           0 :                         entries++;
     188             :                         cnt = 0;
     189           0 :                         for (; p != BUN_NONE; p = HASHgetlink(h, p))
     190           0 :                                 cnt++;
     191             :                         if (cnt > max)
     192             :                                 max = cnt;
     193           0 :                         total += cnt;
     194             :                 }
     195           0 :         TRC_DEBUG_ENDIF(ACCELERATOR,
     196             :                         "%s(" ALGOBATFMT "): statistics " BUNFMT ", "
     197             :                         "entries " LLFMT ", nunique " BUNFMT ", "
     198             :                         "nbucket " BUNFMT ", max " LLFMT ", avg %2.6f;\n",
     199             :                         func, ALGOBATPAR(b), BATcount(b), entries,
     200             :                         h->nunique, h->nbucket, max,
     201             :                         entries == 0 ? 0 : total / entries);
     202             : }
     203             : 
     204             : static gdk_return
     205           0 : HASHupgradehashheap(BAT *b)
     206             : {
     207             : #if defined(BUN2) || defined(BUN8)
     208           0 :         Hash *h = b->thash;
     209           0 :         int nwidth = h->width << 1;
     210             :         BUN i;
     211             : 
     212           0 :         assert(nwidth <= SIZEOF_BUN);
     213           0 :         assert((nwidth & (nwidth - 1)) == 0);
     214             : 
     215           0 :         if (HEAPextend(&h->heaplink, h->heaplink.size * nwidth / h->width, true) != GDK_SUCCEED ||
     216           0 :             HEAPextend(&h->heapbckt, (h->heapbckt.size - HASH_HEADER_SIZE * SIZEOF_SIZE_T) * nwidth / h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T, true) != GDK_SUCCEED) {
     217           0 :                 b->thash = NULL;
     218           0 :                 doHASHdestroy(b, h);
     219           0 :                 return GDK_FAIL;
     220             :         }
     221           0 :         h->Link = h->heaplink.base;
     222           0 :         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     223           0 :         switch (nwidth) {
     224           0 :         case BUN4:
     225             : #ifdef BUN2
     226           0 :                 switch (h->width) {
     227           0 :                 case BUN2:
     228           0 :                         i = h->heaplink.free / h->width;
     229           0 :                         h->heaplink.free = i * nwidth;
     230           0 :                         while (i > 0) {
     231           0 :                                 i--;
     232           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     233           0 :                                 ((BUN4type *) h->Link)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     234             :                         }
     235           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     236           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     237           0 :                         while (i > 0) {
     238           0 :                                 i--;
     239           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     240           0 :                                 ((BUN4type *) h->Bckt)[i] = v == BUN2_NONE ? BUN4_NONE : v;
     241             :                         }
     242             :                         break;
     243             :                 }
     244             : #endif
     245             :                 break;
     246             : #ifdef BUN8
     247           0 :         case BUN8:
     248           0 :                 switch (h->width) {
     249             : #ifdef BUN2
     250           0 :                 case BUN2:
     251           0 :                         i = h->heaplink.free / h->width;
     252           0 :                         h->heaplink.free = i * nwidth;
     253           0 :                         while (i > 0) {
     254           0 :                                 i--;
     255           0 :                                 BUN2type v = ((BUN2type *) h->Link)[i];
     256           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     257             :                         }
     258           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     259           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     260           0 :                         while (i > 0) {
     261           0 :                                 i--;
     262           0 :                                 BUN2type v = ((BUN2type *) h->Bckt)[i];
     263           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN2_NONE ? BUN8_NONE : v;
     264             :                         }
     265             :                         break;
     266             : #endif
     267           0 :                 case BUN4:
     268           0 :                         i = h->heaplink.free / h->width;
     269           0 :                         h->heaplink.free = i * nwidth;
     270           0 :                         while (i > 0) {
     271           0 :                                 i--;
     272           0 :                                 BUN4type v = ((BUN4type *) h->Link)[i];
     273           0 :                                 ((BUN8type *) h->Link)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     274             :                         }
     275           0 :                         i = (h->heapbckt.free - HASH_HEADER_SIZE * SIZEOF_SIZE_T) / h->width;
     276           0 :                         h->heapbckt.free = HASH_HEADER_SIZE * SIZEOF_SIZE_T + i * nwidth;
     277           0 :                         while (i > 0) {
     278           0 :                                 i--;
     279           0 :                                 BUN4type v = ((BUN4type *) h->Bckt)[i];
     280           0 :                                 ((BUN8type *) h->Bckt)[i] = v == BUN4_NONE ? BUN8_NONE : v;
     281             :                         }
     282             :                         break;
     283             :                 }
     284             :                 break;
     285             : #endif
     286             :         }
     287           0 :         h->width = nwidth;
     288             : #else
     289             :         (void) b;
     290             : #endif
     291           0 :         return GDK_SUCCEED;
     292             : }
     293             : 
     294             : /* write/remove the bit into/from the hash file that indicates the hash
     295             :  * is good to go; the bit is the last part to be written and the first
     296             :  * to be removed */
     297             : static inline gdk_return
     298     1695082 : HASHfix(Hash *h, bool save, bool dosync)
     299             : {
     300     1695082 :         if (!h->heapbckt.dirty && !h->heaplink.dirty) {
     301             :                 const size_t mask = (size_t) 1 << 24;
     302       57708 :                 if (((size_t *) h->heapbckt.base)[0] & mask) {
     303       23321 :                         if (save)
     304             :                                 return GDK_SUCCEED;
     305       23319 :                         ((size_t *) h->heapbckt.base)[0] &= ~mask;
     306             :                 } else {
     307       34387 :                         if (!save)
     308             :                                 return GDK_SUCCEED;
     309       34387 :                         ((size_t *) h->heapbckt.base)[0] |= mask;
     310             :                 }
     311       57706 :                 if (h->heapbckt.storage == STORE_MEM) {
     312             :                         gdk_return rc = GDK_FAIL;
     313       57624 :                         int fd = GDKfdlocate(h->heapbckt.farmid, h->heapbckt.filename, "rb+", NULL);
     314       57621 :                         if (fd >= 0) {
     315       57623 :                                 if (write(fd, h->heapbckt.base, SIZEOF_SIZE_T) == SIZEOF_SIZE_T) {
     316       57619 :                                         if (dosync &&
     317       57619 :                                             !(GDKdebug & NOSYNCMASK)) {
     318             : #if defined(NATIVE_WIN32)
     319             :                                                 _commit(fd);
     320             : #elif defined(HAVE_FDATASYNC)
     321         136 :                                                 fdatasync(fd);
     322             : #elif defined(HAVE_FSYNC)
     323             :                                                 fsync(fd);
     324             : #endif
     325             :                                         }
     326             :                                         rc = GDK_SUCCEED;
     327             :                                 }
     328       57617 :                                 close(fd);
     329             :                         }
     330       57624 :                         if (rc != GDK_SUCCEED)
     331           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     332       57622 :                         return rc;
     333             :                 } else {
     334          82 :                         if (dosync &&
     335         157 :                             !(GDKdebug & NOSYNCMASK) &&
     336          74 :                             MT_msync(h->heapbckt.base, SIZEOF_SIZE_T) < 0) {
     337           0 :                                 ((size_t *) h->heapbckt.base)[0] &= ~mask;
     338           0 :                                 return GDK_FAIL;
     339             :                         }
     340             :                 }
     341             :         }
     342             :         return GDK_SUCCEED;
     343             : }
     344             : 
     345             : static gdk_return
     346      867602 : HASHgrowbucket(BAT *b)
     347             : {
     348      867602 :         Hash *h = b->thash;
     349             :         BUN nbucket;
     350      867602 :         BUN onbucket = h->nbucket;
     351             :         lng t0 = 0;
     352             : 
     353      867602 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     354             : 
     355             :         /* only needed to fix hash tables built before this fix was
     356             :          * introduced */
     357      867602 :         if (h->width < SIZEOF_BUN &&
     358      867553 :             ((BUN) 1 << (h->width * 8)) - 1 <= h->mask2 &&
     359           0 :             HASHupgradehashheap(b) != GDK_SUCCEED)
     360             :                 return GDK_FAIL;
     361             : 
     362      867602 :         h->heapbckt.dirty = true;
     363      867602 :         h->heaplink.dirty = true;
     364     1151977 :         while (h->nunique >= (nbucket = h->nbucket) * 7 / 8) {
     365             :                 BUN new = h->nbucket;
     366      284367 :                 BUN old = new & h->mask1;
     367      284367 :                 BUN mask = h->mask1 + 1; /* == h->mask2 - h->mask1 */
     368             : 
     369      284367 :                 assert(h->heapbckt.free == nbucket * h->width + HASH_HEADER_SIZE * SIZEOF_SIZE_T);
     370      284367 :                 if (h->heapbckt.free + h->width > h->heapbckt.size) {
     371        3079 :                         if (HEAPextend(&h->heapbckt,
     372             :                                        h->heapbckt.size + GDK_mmap_pagesize,
     373             :                                        true) != GDK_SUCCEED) {
     374           0 :                                 return GDK_FAIL;
     375             :                         }
     376        3079 :                         h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     377             :                 }
     378      284367 :                 assert(h->heapbckt.free + h->width <= h->heapbckt.size);
     379      284367 :                 if (h->nbucket == h->mask2) {
     380         266 :                         h->mask1 = h->mask2;
     381         266 :                         h->mask2 |= h->mask2 << 1;
     382         266 :                         if (h->width < SIZEOF_BUN &&
     383         266 :                             h->mask2 == ((BUN) 1 << (h->width * 8)) - 1) {
     384             :                                 /* time to widen the hash table */
     385           0 :                                 if (HASHupgradehashheap(b) != GDK_SUCCEED)
     386             :                                         return GDK_FAIL;
     387             :                         }
     388             :                 }
     389      284367 :                 h->nbucket++;
     390      284367 :                 h->heapbckt.free += h->width;
     391             :                 BUN lold, lnew, hb;
     392             :                 lold = lnew = BUN_NONE;
     393      284367 :                 BATiter bi = bat_iterator(b);
     394      284367 :                 if ((hb = HASHget(h, old)) != BUN_NONE) {
     395      229657 :                         h->nheads--;
     396             :                         do {
     397      481627 :                                 const void *v = BUNtail(bi, hb);
     398      481627 :                                 BUN hsh = ATOMhash(h->type, v);
     399      481627 :                                 assert((hsh & (mask - 1)) == old);
     400      481627 :                                 if (hsh & mask) {
     401             :                                         /* move to new list */
     402      194739 :                                         if (lnew == BUN_NONE) {
     403      143069 :                                                 HASHput(h, new, hb);
     404      143069 :                                                 h->nheads++;
     405             :                                         } else
     406       51670 :                                                 HASHputlink(h, lnew, hb);
     407             :                                         lnew = hb;
     408             :                                 } else {
     409             :                                         /* move to old list */
     410      286888 :                                         if (lold == BUN_NONE) {
     411      156160 :                                                 h->nheads++;
     412      156160 :                                                 HASHput(h, old, hb);
     413             :                                         } else
     414      130728 :                                                 HASHputlink(h, lold, hb);
     415             :                                         lold = hb;
     416             :                                 }
     417      481627 :                                 hb = HASHgetlink(h, hb);
     418      481627 :                         } while (hb != BUN_NONE);
     419             :                 }
     420      284367 :                 bat_iterator_end(&bi);
     421      284367 :                 if (lnew == BUN_NONE)
     422      141298 :                         HASHput(h, new, BUN_NONE);
     423             :                 else
     424      143069 :                         HASHputlink(h, lnew, BUN_NONE);
     425      284367 :                 if (lold == BUN_NONE)
     426      128207 :                         HASHput(h, old, BUN_NONE);
     427             :                 else
     428      156160 :                         HASHputlink(h, lold, BUN_NONE);
     429             :         }
     430      867610 :         TRC_DEBUG_IF(ACCELERATOR) if (h->nbucket > onbucket) {
     431           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR, ALGOBATFMT " " BUNFMT
     432             :                         " -> " BUNFMT " buckets (" LLFMT " usec)\n",
     433             :                         ALGOBATPAR(b),
     434             :                         onbucket, h->nbucket, GDKusec() - t0);
     435           0 :                 HASHcollisions(b, h, __func__);
     436             :         }
     437             :         return GDK_SUCCEED;
     438             : }
     439             : 
     440             : /* Return TRUE if we have a hash on the tail, even if we need to read
     441             :  * one from disk.
     442             :  *
     443             :  * Note that the b->thash pointer can be NULL, meaning there is no
     444             :  * hash; (Hash *) 1, meaning there is no hash loaded, but it may exist
     445             :  * on disk; or a valid pointer to a loaded hash.  These values are
     446             :  * maintained here, in the HASHdestroy and HASHfree functions, and in
     447             :  * BBPdiskscan during initialization. */
     448             : bool
     449     4559121 : BATcheckhash(BAT *b)
     450             : {
     451             :         bool ret;
     452             :         lng t = 0;
     453             : 
     454             :         /* we don't need the lock just to read the value b->thash */
     455     4559121 :         if (b->thash == (Hash *) 1) {
     456             :                 /* but when we want to change it, we need the lock */
     457        1074 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec();
     458        1074 :                 MT_rwlock_wrlock(&b->thashlock);
     459        1075 :                 TRC_DEBUG_IF(ACCELERATOR) t = GDKusec() - t;
     460             :                 /* if still 1 now that we have the lock, we can update */
     461        1075 :                 if (b->thash == (Hash *) 1) {
     462             :                         Hash *h;
     463             :                         int fd;
     464             : 
     465        1074 :                         assert(!GDKinmemory(b->theap->farmid));
     466        1074 :                         b->thash = NULL;
     467        1074 :                         if ((h = GDKzalloc(sizeof(*h))) != NULL &&
     468        1074 :                             (h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0 &&
     469        1074 :                             (h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype, hashheap)) >= 0) {
     470        1074 :                                 const char *nme = BBP_physical(b->batCacheid);
     471        1074 :                                 strconcat_len(h->heaplink.filename,
     472             :                                               sizeof(h->heaplink.filename),
     473             :                                               nme, ".thashl", NULL);
     474        1074 :                                 strconcat_len(h->heapbckt.filename,
     475             :                                               sizeof(h->heapbckt.filename),
     476             :                                               nme, ".thashb", NULL);
     477        1074 :                                 h->heaplink.storage = STORE_INVALID;
     478        1074 :                                 h->heaplink.newstorage = STORE_INVALID;
     479        1074 :                                 h->heapbckt.storage = STORE_INVALID;
     480        1074 :                                 h->heapbckt.newstorage = STORE_INVALID;
     481             : 
     482             :                                 /* check whether a persisted hash can be found */
     483        1074 :                                 if ((fd = GDKfdlocate(h->heapbckt.farmid, nme, "rb+", "thashb")) >= 0) {
     484             :                                         size_t hdata[HASH_HEADER_SIZE];
     485             :                                         struct stat st;
     486             : 
     487        1074 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     488        1074 :                                             (hdata[0] == (
     489             : #ifdef PERSISTENTHASH
     490             :                                                     ((size_t) 1 << 24) |
     491             : #endif
     492             :                                                     HASH_VERSION)
     493             : #ifdef HASH_VERSION_NOUUID
     494             :                                              /* if not uuid, also allow previous version */
     495           0 :                                              || (hdata[0] == (
     496             : #ifdef PERSISTENTHASH
     497             :                                                          ((size_t) 1 << 24) |
     498             : #endif
     499           0 :                                                          HASH_VERSION_NOUUID) &&
     500           0 :                                                  strcmp(ATOMname(b->ttype), "uuid") != 0)
     501             : #endif
     502        1074 :                                                     ) &&
     503        1074 :                                             hdata[1] > 0 &&
     504             :                                             (
     505             : #ifdef BUN2
     506        1074 :                                                     hdata[3] == BUN2 ||
     507             : #endif
     508             :                                                     hdata[3] == BUN4
     509             : #ifdef BUN8
     510           0 :                                                     || hdata[3] == BUN8
     511             : #endif
     512        1074 :                                                     ) &&
     513        2147 :                                             hdata[4] == (size_t) BATcount(b) &&
     514        1073 :                                             fstat(fd, &st) == 0 &&
     515        2147 :                                             st.st_size >= (off_t) (h->heapbckt.size = h->heapbckt.free = (h->nbucket = (BUN) hdata[2]) * (BUN) (h->width = (uint8_t) hdata[3]) + HASH_HEADER_SIZE * SIZEOF_SIZE_T) &&
     516        2147 :                                             close(fd) == 0 &&
     517        2148 :                                             (fd = GDKfdlocate(h->heaplink.farmid, nme, "rb+", "thashl")) >= 0 &&
     518        1074 :                                             fstat(fd, &st) == 0 &&
     519        1074 :                                             st.st_size > 0 &&
     520        2148 :                                             st.st_size >= (off_t) (h->heaplink.size = h->heaplink.free = hdata[1] * h->width) &&
     521        1074 :                                             HEAPload(&h->heaplink, nme, "thashl", false) == GDK_SUCCEED) {
     522        1074 :                                                 if (HEAPload(&h->heapbckt, nme, "thashb", false) == GDK_SUCCEED) {
     523        1074 :                                                         if (h->nbucket & (h->nbucket - 1)) {
     524         376 :                                                                 h->mask2 = hashmask(h->nbucket);
     525         376 :                                                                 h->mask1 = h->mask2 >> 1;
     526             :                                                         } else {
     527         698 :                                                                 h->mask1 = h->nbucket - 1;
     528         698 :                                                                 h->mask2 = h->mask1 << 1 | 1;
     529             :                                                         }
     530        1074 :                                                         h->nunique = hdata[5];
     531        1074 :                                                         h->nheads = hdata[6];
     532        1074 :                                                         h->type = ATOMtype(b->ttype);
     533        1074 :                                                         if (h->width < SIZEOF_BUN &&
     534        1074 :                                                             ((BUN) 1 << (8 * h->width)) - 1 > h->nbucket) {
     535        1069 :                                                                 close(fd);
     536        1068 :                                                                 h->Link = h->heaplink.base;
     537        1068 :                                                                 h->Bckt = h->heapbckt.base + HASH_HEADER_SIZE * SIZEOF_SIZE_T;
     538        1068 :                                                                 h->heaplink.parentid = b->batCacheid;
     539        1068 :                                                                 h->heapbckt.parentid = b->batCacheid;
     540        1068 :                                                                 h->heaplink.dirty = false;
     541        1068 :                                                                 h->heapbckt.dirty = false;
     542        1068 :                                                                 b->thash = h;
     543        1068 :                                                                 TRC_DEBUG(ACCELERATOR,
     544             :                                                                           ALGOBATFMT ": reusing persisted hash\n", ALGOBATPAR(b));
     545        1068 :                                                                 MT_rwlock_wrunlock(&b->thashlock);
     546        1068 :                                                                 return true;
     547             :                                                         }
     548             :                                                         /* if h->nbucket
     549             :                                                          * equals the
     550             :                                                          * BUN_NONE
     551             :                                                          * representation
     552             :                                                          * for the
     553             :                                                          * current hash
     554             :                                                          * width (was
     555             :                                                          * possible in
     556             :                                                          * previous
     557             :                                                          * iterations of
     558             :                                                          * the code),
     559             :                                                          * then we can't
     560             :                                                          * use the hash
     561             :                                                          * since we
     562             :                                                          * can't
     563             :                                                          * distinguish
     564             :                                                          * between
     565             :                                                          * end-of-list
     566             :                                                          * and a valid
     567             :                                                          * link */
     568           5 :                                                         HEAPfree(&h->heapbckt, false);
     569             :                                                 }
     570           5 :                                                 HEAPfree(&h->heaplink, false);
     571             :                                         }
     572           5 :                                         close(fd);
     573             :                                         /* unlink unusable file */
     574           5 :                                         GDKunlink(h->heaplink.farmid, BATDIR, nme, "thashl");
     575           5 :                                         GDKunlink(h->heapbckt.farmid, BATDIR, nme, "thashb");
     576             :                                 }
     577             :                         }
     578           5 :                         GDKfree(h);
     579           5 :                         GDKclrerr();    /* we're not currently interested in errors */
     580             :                 }
     581           6 :                 MT_rwlock_wrunlock(&b->thashlock);
     582             :         }
     583     4558053 :         ret = b->thash != NULL;
     584     4558053 :         if (ret) {
     585     2691204 :                 TRC_DEBUG(ACCELERATOR, ALGOBATFMT ": already has hash, waited " LLFMT " usec\n", ALGOBATPAR(b), t);
     586             :         }
     587             :         return ret;
     588             : }
     589             : 
     590             : static void
     591       77074 : BAThashsave_intern(BAT *b, bool dosync)
     592             : {
     593             :         Hash *h;
     594             :         lng t0 = 0;
     595             : 
     596       77074 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     597             : 
     598       77074 :         if ((h = b->thash) != NULL) {
     599       77074 :                 Heap *hp = &h->heapbckt;
     600             : 
     601             : #ifndef PERSISTENTHASH
     602             :                 /* no need to sync if not persistent */
     603             :                 dosync = false;
     604             : #endif
     605             : 
     606             :                 /* only persist if parent BAT hasn't changed in the
     607             :                  * mean time */
     608       77074 :                 if (!b->theap->dirty &&
     609       68776 :                     ((size_t *) h->heapbckt.base)[4] == BATcount(b) &&
     610       68774 :                     HEAPsave(&h->heaplink, h->heaplink.filename, NULL, dosync, h->heaplink.free) == GDK_SUCCEED &&
     611       34386 :                     HEAPsave(hp, hp->filename, NULL, dosync, hp->free) == GDK_SUCCEED) {
     612       34387 :                         h->heaplink.dirty = false;
     613       34387 :                         hp->dirty = false;
     614       34387 :                         gdk_return rc = HASHfix(h, true, dosync);
     615       34389 :                         TRC_DEBUG(ACCELERATOR,
     616             :                                   ALGOBATFMT ": persisting hash %s%s (" LLFMT " usec)%s\n", ALGOBATPAR(b), hp->filename, dosync ? "" : " no sync", GDKusec() - t0, rc == GDK_SUCCEED ? "" : " failed");
     617             :                 }
     618       77070 :                 GDKclrerr();
     619             :         }
     620       77074 : }
     621             : 
     622             : void
     623       68055 : BAThashsave(BAT *b, bool dosync)
     624             : {
     625       68055 :         Hash *h = b->thash;
     626       68055 :         if (h == NULL)
     627             :                 return;
     628       68055 :         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
     629       68055 :         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
     630       68055 :         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
     631       68055 :         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
     632       68055 :         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
     633       68055 :         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
     634       68055 :         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
     635       68055 :         BAThashsave_intern(b, dosync);
     636             : }
     637             : 
     638             : #ifdef PERSISTENTHASH
     639             : static void
     640        9019 : BAThashsync(void *arg)
     641             : {
     642             :         BAT *b = arg;
     643             : 
     644             :         /* we could check whether b->thash == NULL before getting the
     645             :          * lock, and only lock if it isn't; however, it's very
     646             :          * unlikely that that is the case, so we don't */
     647        9019 :         MT_rwlock_rdlock(&b->thashlock);
     648        9019 :         BAThashsave_intern(b, true);
     649        9018 :         MT_rwlock_rdunlock(&b->thashlock);
     650        9018 :         BBPunfix(b->batCacheid);
     651        9019 : }
     652             : #endif
     653             : 
     654             : #define EQbte(a, b)     ((a) == (b))
     655             : #define EQsht(a, b)     ((a) == (b))
     656             : #define EQint(a, b)     ((a) == (b))
     657             : #define EQlng(a, b)     ((a) == (b))
     658             : #ifdef HAVE_HGE
     659             : #define EQhge(a, b)     ((a) == (b))
     660             : #endif
     661             : #define EQflt(a, b)     (is_flt_nil(a) ? is_flt_nil(b) : (a) == (b))
     662             : #define EQdbl(a, b)     (is_dbl_nil(a) ? is_dbl_nil(b) : (a) == (b))
     663             : #ifdef HAVE_HGE
     664             : #define EQuuid(a, b)    ((a).h == (b).h)
     665             : #else
     666             : #ifdef HAVE_UUID
     667             : #define EQuuid(a, b)    (uuid_compare((a).u, (b).u) == 0)
     668             : #else
     669             : #define EQuuid(a, b)    (memcmp((a).u, (b).u, UUID_SIZE) == 0)
     670             : #endif
     671             : #endif
     672             : 
     673             : #define starthash(TYPE)                                                 \
     674             :         do {                                                            \
     675             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     676             :                 for (; p < cnt1; p++) {                                      \
     677             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     678             :                         hget = HASHget(h, c);                           \
     679             :                         if (hget == BUN_NONE) {                         \
     680             :                                 if (h->nheads == maxslots)           \
     681             :                                         break; /* mask too full */      \
     682             :                                 h->nheads++;                         \
     683             :                                 h->nunique++;                                \
     684             :                         } else {                                        \
     685             :                                 for (hb = hget;                         \
     686             :                                      hb != BUN_NONE;                    \
     687             :                                      hb = HASHgetlink(h, hb)) {         \
     688             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     689             :                                                 break;                  \
     690             :                                 }                                       \
     691             :                                 h->nunique += hb == BUN_NONE;                \
     692             :                         }                                               \
     693             :                         HASHputlink(h, p, hget);                        \
     694             :                         HASHput(h, c, p);                               \
     695             :                         o = canditer_next(ci);                          \
     696             :                 }                                                       \
     697             :         } while (0)
     698             : #define finishhash(TYPE)                                                \
     699             :         do {                                                            \
     700             :                 const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
     701             :                 for (; p < ci->ncand; p++) {                              \
     702             :                         c = hash_##TYPE(h, v + o - b->hseqbase);     \
     703             :                         hget = HASHget(h, c);                           \
     704             :                         h->nheads += hget == BUN_NONE;                       \
     705             :                         if (!hascand) {                                 \
     706             :                                 for (hb = hget;                         \
     707             :                                      hb != BUN_NONE;                    \
     708             :                                      hb = HASHgetlink(h, hb)) {         \
     709             :                                         if (EQ##TYPE(v[o - b->hseqbase], v[hb])) \
     710             :                                                 break;                  \
     711             :                                 }                                       \
     712             :                                 h->nunique += hb == BUN_NONE;                \
     713             :                                 o = canditer_next_dense(ci);            \
     714             :                         } else {                                        \
     715             :                                 o = canditer_next(ci);                  \
     716             :                         }                                               \
     717             :                         HASHputlink(h, p, hget);                        \
     718             :                         HASHput(h, c, p);                               \
     719             :                 }                                                       \
     720             :         } while (0)
     721             : 
     722             : /* Internal function to create a hash table for the given BAT b.
     723             :  * If a candidate list s is also given, the hash table is specific for
     724             :  * the combination of the two: only values from b that are referred to
     725             :  * by s are included in the hash table, so if a result is found when
     726             :  * searching the hash table, the result is a candidate. */
     727             : Hash *
     728       97671 : BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char *restrict ext)
     729             : {
     730             :         lng t0 = 0;
     731       97671 :         unsigned int tpe = ATOMbasetype(b->ttype);
     732             :         BUN cnt1;
     733             :         BUN mask, maxmask = 0;
     734             :         BUN p, c;
     735             :         oid o;
     736             :         BUN hget, hb;
     737             :         Hash *h = NULL;
     738       97671 :         const char *nme = GDKinmemory(b->theap->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
     739       97671 :         BATiter bi = bat_iterator(b);
     740             :         const ValRecord *prop;
     741       97703 :         bool hascand = ci->tpe != cand_dense || ci->ncand != bi.count;
     742             : 
     743       97703 :         assert(strcmp(ext, "thash") != 0 || !hascand);
     744             : 
     745      195338 :         MT_thread_setalgorithm(hascand ? "create hash with candidates" : "create hash");
     746       97696 :         TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
     747       97696 :         TRC_DEBUG(ACCELERATOR,
     748             :                   ALGOBATFMT ": create hash;\n", ALGOBATPAR(b));
     749       97669 :         if (b->ttype == TYPE_void) {
     750           0 :                 if (is_oid_nil(b->tseqbase)) {
     751           0 :                         TRC_DEBUG(ACCELERATOR,
     752             :                                   "cannot create hash-table on void-NIL column.\n");
     753           0 :                         GDKerror("no hash on void/nil column\n");
     754           0 :                         bat_iterator_end(&bi);
     755           0 :                         return NULL;
     756             :                 }
     757           0 :                 TRC_DEBUG(ACCELERATOR,
     758             :                           "creating hash-table on void column..\n");
     759           0 :                 assert(0);
     760             :                 tpe = TYPE_void;
     761             :         }
     762             : 
     763       97669 :         if ((h = GDKzalloc(sizeof(*h))) == NULL ||
     764       97686 :             (h->heaplink.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, b->ttype, hashheap)) < 0 ||
     765       97690 :             (h->heapbckt.farmid = BBPselectfarm(hascand ? TRANSIENT : b->batRole, b->ttype, hashheap)) < 0) {
     766           0 :                 GDKfree(h);
     767           0 :                 bat_iterator_end(&bi);
     768           0 :                 return NULL;
     769             :         }
     770       97688 :         h->width = HASHwidth(BATcapacity(b));
     771       97688 :         h->heaplink.dirty = true;
     772       97688 :         h->heapbckt.dirty = true;
     773       97688 :         strconcat_len(h->heaplink.filename, sizeof(h->heaplink.filename),
     774             :                       nme, ".", ext, "l", NULL);
     775       97683 :         strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
     776             :                       nme, ".", ext, "b", NULL);
     777       97690 :         if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
     778       97690 :                       h->width, 0) != GDK_SUCCEED) {
     779           0 :                 GDKfree(h);
     780           0 :                 bat_iterator_end(&bi);
     781           0 :                 return NULL;
     782             :         }
     783       97698 :         h->heaplink.free = ci->ncand * h->width;
     784       97698 :         h->Link = h->heaplink.base;
     785             : #ifndef NDEBUG
     786             :         /* clear unused part of Link array */
     787       97698 :         if (h->heaplink.size > h->heaplink.free)
     788       17904 :                 memset(h->heaplink.base + h->heaplink.free, 0,
     789             :                        h->heaplink.size - h->heaplink.free);
     790             : #endif
     791             : 
     792             :         /* determine hash mask size */
     793             :         cnt1 = 0;
     794       97698 :         if (ATOMsize(tpe) == 1) {
     795             :                 /* perfect hash for one-byte sized atoms */
     796             :                 mask = (1 << 8);
     797       97364 :         } else if (ATOMsize(tpe) == 2) {
     798             :                 /* perfect hash for two-byte sized atoms */
     799             :                 mask = (1 << 16);
     800       96958 :         } else if (b->tkey || ci->ncand <= 4096) {
     801             :                 /* if key, or if small, don't bother dynamically
     802             :                  * adjusting the hash mask */
     803       86306 :                 mask = HASHmask(ci->ncand);
     804       10652 :         } else if (!hascand && (prop = BATgetprop_try(b, GDK_NUNIQUE)) != NULL) {
     805           0 :                 assert(prop->vtype == TYPE_oid);
     806           0 :                 mask = prop->val.oval * 8 / 7;
     807       10652 :         } else if (!hascand && (prop = BATgetprop_try(b, GDK_HASH_BUCKETS)) != NULL) {
     808           0 :                 assert(prop->vtype == TYPE_oid);
     809           0 :                 mask = prop->val.oval;
     810           0 :                 maxmask = HASHmask(ci->ncand);
     811             :                 if (mask > maxmask)
     812             :                         mask = maxmask;
     813       10652 :         } else if (!hascand && (prop = BATgetprop_try(b, GDK_UNIQUE_ESTIMATE)) != NULL) {
     814         173 :                 assert(prop->vtype == TYPE_dbl);
     815         173 :                 mask = (BUN) (prop->val.dval * 8 / 7);
     816             :         } else {
     817             :                 /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
     818             :                  * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
     819             :                  * are too many collisions we try HASHmask(ci->ncand)/64,
     820             :                  * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
     821             :                  * HASHmask(ci->ncand), but we might skip some of these if
     822             :                  * there are many distinct values.  */
     823       10479 :                 maxmask = HASHmask(ci->ncand);
     824       10479 :                 mask = maxmask >> 6;
     825       10566 :                 while (mask > 4096)
     826          87 :                         mask >>= 2;
     827             :                 /* try out on first 25% of b */
     828       10479 :                 cnt1 = ci->ncand >> 2;
     829             :         }
     830             : 
     831       97698 :         o = canditer_next(ci);  /* always one ahead */
     832       29735 :         for (;;) {
     833             :                 lng t1 = 0;
     834      127409 :                 TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
     835      127409 :                 BUN maxslots = (mask >> 3) - 1;   /* 1/8 full is too full */
     836             : 
     837      127409 :                 h->nheads = 0;
     838      127409 :                 h->nunique = 0;
     839             :                 p = 0;
     840      127409 :                 HEAPfree(&h->heapbckt, true);
     841             :                 /* create the hash structures */
     842      254877 :                 if (HASHnew(h, ATOMtype(b->ttype), BATcapacity(b),
     843             :                             mask, ci->ncand, true) != GDK_SUCCEED) {
     844           0 :                         HEAPfree(&h->heaplink, true);
     845           0 :                         GDKfree(h);
     846           0 :                         bat_iterator_end(&bi);
     847           0 :                         return NULL;
     848             :                 }
     849             : 
     850      127419 :                 switch (tpe) {
     851         334 :                 case TYPE_bte:
     852         334 :                         starthash(bte);
     853             :                         break;
     854         406 :                 case TYPE_sht:
     855         406 :                         starthash(sht);
     856             :                         break;
     857          19 :                 case TYPE_flt:
     858          19 :                         starthash(flt);
     859             :                         break;
     860      123202 :                 case TYPE_int:
     861    54507827 :                         starthash(int);
     862             :                         break;
     863          27 :                 case TYPE_dbl:
     864          27 :                         starthash(dbl);
     865             :                         break;
     866        1217 :                 case TYPE_lng:
     867        1628 :                         starthash(lng);
     868             :                         break;
     869             : #ifdef HAVE_HGE
     870          16 :                 case TYPE_hge:
     871          16 :                         starthash(hge);
     872             :                         break;
     873             : #endif
     874           0 :                 case TYPE_uuid:
     875           0 :                         starthash(uuid);
     876             :                         break;
     877             :                 default:
     878     1700314 :                         for (; p < cnt1; p++) {
     879     1698145 :                                 const void *restrict v = BUNtail(bi, o - b->hseqbase);
     880     1698145 :                                 c = hash_any(h, v);
     881     1698145 :                                 hget = HASHget(h, c);
     882     1698145 :                                 if (hget == BUN_NONE) {
     883       28123 :                                         if (h->nheads == maxslots)
     884             :                                                 break; /* mask too full */
     885       28094 :                                         h->nheads++;
     886       28094 :                                         h->nunique++;
     887             :                                 } else {
     888     1680460 :                                         for (hb = hget;
     889             :                                              hb != BUN_NONE;
     890       10438 :                                              hb = HASHgetlink(h, hb)) {
     891     1678915 :                                                 if (ATOMcmp(h->type,
     892             :                                                             v,
     893             :                                                             BUNtail(bi, hb)) == 0)
     894             :                                                         break;
     895             :                                         }
     896     1670022 :                                         h->nunique += hb == BUN_NONE;
     897             :                                 }
     898     1698116 :                                 HASHputlink(h, p, hget);
     899     1698116 :                                 HASHput(h, c, p);
     900     1698116 :                                 o = canditer_next(ci);
     901             :                         }
     902             :                         break;
     903             :                 }
     904      127420 :                 TRC_DEBUG_IF(ACCELERATOR) if (p < cnt1)
     905           0 :                         TRC_DEBUG_ENDIF(ACCELERATOR,
     906             :                                         "%s: abort starthash with "
     907             :                                 "mask " BUNFMT " at " BUNFMT " after " LLFMT " usec\n", BATgetId(b), mask, p, GDKusec() - t1);
     908      127420 :                 if (p == cnt1 || mask == maxmask)
     909             :                         break;
     910       29739 :                 mask <<= 2;
     911             :                 /* if we fill up the slots fast (p <= maxslots * 1.2)
     912             :                  * increase mask size a bit more quickly */
     913       29739 :                 if (p == h->nunique) {
     914             :                         /* only unique values so far: grow really fast */
     915             :                         mask = maxmask;
     916             :                         cnt1 = 0;
     917       29167 :                 } else if (mask < maxmask && p <= maxslots * 1.2)
     918          38 :                         mask <<= 2;
     919       29739 :                 canditer_reset(ci);
     920       29735 :                 o = canditer_next(ci);
     921             :         }
     922             : 
     923             :         /* finish the hashtable with the current mask */
     924       97681 :         switch (tpe) {
     925         334 :         case TYPE_bte:
     926       87201 :                 finishhash(bte);
     927             :                 break;
     928         406 :         case TYPE_sht:
     929      199536 :                 finishhash(sht);
     930             :                 break;
     931       93498 :         case TYPE_int:
     932   206674140 :                 finishhash(int);
     933             :                 break;
     934          18 :         case TYPE_flt:
     935         528 :                 finishhash(flt);
     936             :                 break;
     937          27 :         case TYPE_dbl:
     938          76 :                 finishhash(dbl);
     939             :                 break;
     940        1214 :         case TYPE_lng:
     941    36118618 :                 finishhash(lng);
     942             :                 break;
     943             : #ifdef HAVE_HGE
     944          16 :         case TYPE_hge:
     945         276 :                 finishhash(hge);
     946             :                 break;
     947             : #endif
     948           0 :         case TYPE_uuid:
     949           0 :                 finishhash(uuid);
     950             :                 break;
     951             :         default:
     952     5353120 :                 for (; p < ci->ncand; p++) {
     953     5350950 :                         const void *restrict v = BUNtail(bi, o - b->hseqbase);
     954     5350950 :                         c = hash_any(h, v);
     955     5350950 :                         hget = HASHget(h, c);
     956     5350950 :                         h->nheads += hget == BUN_NONE;
     957     5350950 :                         if (!hascand) {
     958     5522155 :                                 for (hb = hget;
     959             :                                      hb != BUN_NONE;
     960      171199 :                                      hb = HASHgetlink(h, hb)) {
     961     5324867 :                                         if (ATOMcmp(h->type, v, BUNtail(bi, hb)) == 0)
     962             :                                                 break;
     963             :                                 }
     964     5350945 :                                 h->nunique += hb == BUN_NONE;
     965             :                         }
     966     5350939 :                         HASHputlink(h, p, hget);
     967     5350938 :                         HASHput(h, c, p);
     968     5350949 :                         o = canditer_next(ci);
     969             :                 }
     970             :                 break;
     971             :         }
     972       97702 :         bat_iterator_end(&bi);
     973       97702 :         if (!hascand) {
     974             :                 /* don't keep these properties while we have a hash
     975             :                  * structure: they get added again when the hash is
     976             :                  * freed */
     977       97639 :                 MT_lock_set(&b->theaplock);
     978       97643 :                 BATrmprop_nolock(b, GDK_HASH_BUCKETS);
     979       97638 :                 BATrmprop_nolock(b, GDK_NUNIQUE);
     980       97645 :                 MT_lock_unset(&b->theaplock);
     981             :         }
     982       97707 :         h->heapbckt.parentid = b->batCacheid;
     983       97707 :         h->heaplink.parentid = b->batCacheid;
     984             :         /* if the number of unique values is equal to the bat count,
     985             :          * all values are necessarily distinct */
     986       97707 :         if (h->nunique == BATcount(b) && !b->tkey) {
     987       45127 :                 b->tkey = true;
     988       45127 :                 b->batDirtydesc = true;
     989             :         }
     990       97707 :         TRC_DEBUG_IF(ACCELERATOR) {
     991           0 :                 TRC_DEBUG_ENDIF(ACCELERATOR,
     992             :                                 "hash construction " LLFMT " usec\n", GDKusec() - t0);
     993           0 :                 HASHcollisions(b, h, __func__);
     994             :         }
     995             :         return h;
     996             : }
     997             : 
     998             : gdk_return
     999     1514432 : BAThash(BAT *b)
    1000             : {
    1001     1514432 :         if (ATOMstorage(b->ttype) == TYPE_msk) {
    1002           0 :                 GDKerror("No hash on msk type bats\n");
    1003           0 :                 return GDK_FAIL;
    1004             :         }
    1005     1514432 :         if (BATcheckhash(b)) {
    1006             :                 return GDK_SUCCEED;
    1007             :         }
    1008             :         for (;;) {
    1009             :                 /* If multiple threads simultaneously try to build a
    1010             :                  * hash on a bat, e.g. in order to perform a join, it
    1011             :                  * may happen that one thread succeeds in obtaining the
    1012             :                  * write lock, then builds the hash, releases the lock,
    1013             :                  * acquires the read lock, and performs the join.  The
    1014             :                  * other threads may then still be attempting to acquire
    1015             :                  * the write lock.  But now they have to wait until the
    1016             :                  * read lock is released, which can be quite a long
    1017             :                  * time.  Especially if a second thread goes through the
    1018             :                  * same process as the first. */
    1019      109441 :                 if (MT_rwlock_wrtry(&b->thashlock))
    1020             :                         break;
    1021       11882 :                 MT_sleep_ms(1);
    1022       11790 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1023         314 :                         if (b->thash != NULL && b->thash != (Hash *) 1) {
    1024         314 :                                 MT_rwlock_rdunlock(&b->thashlock);
    1025         317 :                                 return GDK_SUCCEED;
    1026             :                         }
    1027           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1028             :                 }
    1029             :         }
    1030             :         /* we have the write lock */
    1031       97638 :         if (b->thash == NULL) {
    1032             :                 struct canditer ci;
    1033       97624 :                 canditer_init(&ci, b, NULL);
    1034       97618 :                 if ((b->thash = BAThash_impl(b, &ci, "thash")) == NULL) {
    1035           0 :                         MT_rwlock_wrunlock(&b->thashlock);
    1036        9019 :                         return GDK_FAIL;
    1037             :                 }
    1038             : #ifdef PERSISTENTHASH
    1039       97638 :                 if (BBP_status(b->batCacheid) & BBPEXISTING && !b->theap->dirty && !GDKinmemory(b->theap->farmid)) {
    1040        9019 :                         Hash *h = b->thash;
    1041        9019 :                         ((size_t *) h->heapbckt.base)[0] = (size_t) HASH_VERSION;
    1042        9019 :                         ((size_t *) h->heapbckt.base)[1] = (size_t) (h->heaplink.free / h->width);
    1043        9019 :                         ((size_t *) h->heapbckt.base)[2] = (size_t) h->nbucket;
    1044        9019 :                         ((size_t *) h->heapbckt.base)[3] = (size_t) h->width;
    1045        9019 :                         ((size_t *) h->heapbckt.base)[4] = (size_t) BATcount(b);
    1046        9019 :                         ((size_t *) h->heapbckt.base)[5] = (size_t) h->nunique;
    1047        9019 :                         ((size_t *) h->heapbckt.base)[6] = (size_t) h->nheads;
    1048             :                         MT_Id tid;
    1049        9019 :                         BBPfix(b->batCacheid);
    1050             :                         char name[MT_NAME_LEN];
    1051        9019 :                         snprintf(name, sizeof(name), "hashsync%d", b->batCacheid);
    1052        9019 :                         MT_rwlock_wrunlock(&b->thashlock);
    1053        9019 :                         if (MT_create_thread(&tid, BAThashsync, b,
    1054             :                                              MT_THR_DETACHED,
    1055             :                                              name) < 0) {
    1056             :                                 /* couldn't start thread: clean up */
    1057           0 :                                 BBPunfix(b->batCacheid);
    1058             :                         }
    1059             :                         return GDK_SUCCEED;
    1060             :                 } else
    1061       88619 :                         TRC_DEBUG(ACCELERATOR,
    1062             :                                         "NOT persisting hash %d\n", b->batCacheid);
    1063             : #endif
    1064             :         }
    1065       88633 :         MT_rwlock_wrunlock(&b->thashlock);
    1066       88627 :         return GDK_SUCCEED;
    1067             : }
    1068             : 
    1069             : /*
    1070             :  * The entry on which a value hashes can be calculated with the
    1071             :  * routine HASHprobe.
    1072             :  */
    1073             : BUN
    1074    56688706 : HASHprobe(const Hash *h, const void *v)
    1075             : {
    1076    86358382 :         switch (ATOMbasetype(h->type)) {
    1077        9751 :         case TYPE_bte:
    1078        9751 :                 return hash_bte(h, v);
    1079      351300 :         case TYPE_sht:
    1080      351300 :                 return hash_sht(h, v);
    1081    16242537 :         case TYPE_int:
    1082             :         case TYPE_flt:
    1083    16242537 :                 return hash_int(h, v);
    1084    27706057 :         case TYPE_dbl:
    1085             :         case TYPE_lng:
    1086    27706057 :                 return hash_lng(h, v);
    1087             : #ifdef HAVE_HGE
    1088       17922 :         case TYPE_hge:
    1089       17922 :                 return hash_hge(h, v);
    1090             : #endif
    1091           9 :         case TYPE_uuid:
    1092           9 :                 return hash_uuid(h, v);
    1093    12361130 :         default:
    1094    12361130 :                 return hash_any(h, v);
    1095             :         }
    1096             : }
    1097             : 
    1098             : void
    1099     1093296 : HASHappend_locked(BAT *b, BUN i, const void *v)
    1100             : {
    1101     1093296 :         Hash *h = b->thash;
    1102     1093296 :         if (h == NULL) {
    1103          30 :                 return;
    1104             :         }
    1105     1093296 :         if (h == (Hash *) 1) {
    1106          28 :                 b->thash = NULL;
    1107          28 :                 doHASHdestroy(b, h);
    1108          29 :                 GDKclrerr();
    1109          29 :                 return;
    1110             :         }
    1111     1093268 :         assert(i * h->width == h->heaplink.free);
    1112     1093268 :         if (h->nunique < b->batCount / HASH_DESTROY_UNIQUES_FRACTION) {
    1113           1 :                 b->thash = NULL;
    1114           1 :                 doHASHdestroy(b, h);
    1115           1 :                 GDKclrerr();
    1116           1 :                 return;
    1117             :         }
    1118     1093267 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1119           0 :                 b->thash = NULL;
    1120           0 :                 doHASHdestroy(b, h);
    1121           0 :                 GDKclrerr();
    1122           0 :                 return;
    1123             :         }
    1124     1093208 :         if (HASHwidth(i + 1) > h->width &&
    1125           0 :              HASHupgradehashheap(b) != GDK_SUCCEED) {
    1126           0 :                 GDKclrerr();
    1127           0 :                 return;
    1128             :         }
    1129     1960817 :         if ((ATOMsize(b->ttype) > 2 &&
    1130      867573 :              HASHgrowbucket(b) != GDK_SUCCEED) ||
    1131     1094196 :             ((i + 1) * h->width > h->heaplink.size &&
    1132         943 :              HEAPextend(&h->heaplink,
    1133         943 :                         i * h->width + GDK_mmap_pagesize,
    1134             :                         true) != GDK_SUCCEED)) {
    1135           0 :                 b->thash = NULL;
    1136           0 :                 HEAPfree(&h->heapbckt, true);
    1137           0 :                 HEAPfree(&h->heaplink, true);
    1138           0 :                 GDKfree(h);
    1139           0 :                 GDKclrerr();
    1140           0 :                 return;
    1141             :         }
    1142     1093253 :         h->Link = h->heaplink.base;
    1143     1093253 :         BUN c = HASHprobe(h, v);
    1144     1093243 :         h->heaplink.free += h->width;
    1145     1093243 :         BUN hb = HASHget(h, c);
    1146             :         BUN hb2;
    1147     1093243 :         BATiter bi = bat_iterator_nolock(b);
    1148     1093349 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1149     1496394 :         for (hb2 = hb;
    1150             :              hb2 != BUN_NONE;
    1151      403045 :              hb2 = HASHgetlink(h, hb2)) {
    1152     1007201 :                 if (atomcmp(v, BUNtail(bi, hb2)) == 0)
    1153             :                         break;
    1154             :         }
    1155     1093275 :         h->nheads += hb == BUN_NONE;
    1156     1093275 :         h->nunique += hb2 == BUN_NONE;
    1157     1093275 :         HASHputlink(h, i, hb);
    1158     1093331 :         HASHput(h, c, i);
    1159     1093339 :         h->heapbckt.dirty = true;
    1160     1093339 :         h->heaplink.dirty = true;
    1161             : }
    1162             : 
    1163             : void
    1164           0 : HASHappend(BAT *b, BUN i, const void *v)
    1165             : {
    1166           0 :         MT_rwlock_wrlock(&b->thashlock);
    1167           0 :         HASHappend_locked(b, i, v);
    1168           0 :         MT_rwlock_wrunlock(&b->thashlock);
    1169           0 : }
    1170             : 
    1171             : /* insert value v at position p into the hash table of b */
    1172             : void
    1173   123326038 : HASHinsert_locked(BAT *b, BUN p, const void *v)
    1174             : {
    1175   123326038 :         Hash *h = b->thash;
    1176   123326038 :         if (h == NULL) {
    1177             :                 return;
    1178             :         }
    1179      283731 :         if (h == (Hash *) 1) {
    1180           0 :                 b->thash = NULL;
    1181           0 :                 doHASHdestroy(b, h);
    1182           0 :                 GDKclrerr();
    1183           0 :                 return;
    1184             :         }
    1185      283731 :         assert(p * h->width < h->heaplink.free);
    1186      283731 :         if (h->nunique < b->batCount / HASH_DESTROY_UNIQUES_FRACTION) {
    1187           1 :                 b->thash = NULL;
    1188           1 :                 doHASHdestroy(b, h);
    1189           1 :                 GDKclrerr();
    1190           1 :                 return;
    1191             :         }
    1192      283730 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1193           0 :                 b->thash = NULL;
    1194           0 :                 doHASHdestroy(b, h);
    1195           0 :                 GDKclrerr();
    1196           0 :                 return;
    1197             :         }
    1198      283730 :         BUN c = HASHprobe(h, v);
    1199      283730 :         BUN hb = HASHget(h, c);
    1200      283730 :         BATiter bi = bat_iterator_nolock(b);
    1201      283731 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1202      283731 :         if (hb == BUN_NONE || hb < p) {
    1203             :                 /* bucket is empty, or bucket is used by lower numbered
    1204             :                  * position */
    1205       50861 :                 h->heaplink.dirty = true;
    1206       50861 :                 h->heapbckt.dirty = true;
    1207       50861 :                 HASHputlink(h, p, hb);
    1208       50861 :                 HASHput(h, c, p);
    1209       50861 :                 if (hb == BUN_NONE) {
    1210       23024 :                         h->nheads++;
    1211             :                 } else {
    1212             :                         do {
    1213       53133 :                                 if (atomcmp(v, BUNtail(bi, hb)) == 0) {
    1214             :                                         /* found another row with the
    1215             :                                          * same value, so don't
    1216             :                                          * increment nunique */
    1217             :                                         return;
    1218             :                                 }
    1219       39189 :                                 hb = HASHgetlink(h, hb);
    1220       39189 :                         } while (hb != BUN_NONE);
    1221             :                 }
    1222             :                 /* this is a new value */
    1223       36917 :                 h->nunique++;
    1224       36917 :                 return;
    1225             :         }
    1226             :         bool seen = false;
    1227             :         for (;;) {
    1228    87097037 :                 if (!seen)
    1229      449906 :                         seen = atomcmp(v, BUNtail(bi, hb)) == 0;
    1230    87097037 :                 BUN hb2 = HASHgetlink(h, hb);
    1231    87097037 :                 if (hb2 == BUN_NONE || hb2 < p) {
    1232      232870 :                         h->heaplink.dirty = true;
    1233      232870 :                         HASHputlink(h, p, hb2);
    1234      232870 :                         HASHputlink(h, hb, p);
    1235      240769 :                         while (!seen && hb2 != BUN_NONE) {
    1236        7899 :                                 seen = atomcmp(v, BUNtail(bi, hb2)) == 0;
    1237        7899 :                                 hb2 = HASHgetlink(h, hb2);
    1238             :                         }
    1239      232870 :                         if (!seen)
    1240        3011 :                                 h->nunique++;
    1241      232870 :                         return;
    1242             :                 }
    1243             :                 hb = hb2;
    1244             :         }
    1245             : }
    1246             : 
    1247             : void
    1248           2 : HASHinsert(BAT *b, BUN p, const void *v)
    1249             : {
    1250           2 :         MT_rwlock_wrlock(&b->thashlock);
    1251           2 :         HASHinsert_locked(b, p, v);
    1252           2 :         MT_rwlock_wrunlock(&b->thashlock);
    1253           2 : }
    1254             : 
    1255             : /* delete value v at position p from the hash table of b */
    1256             : void
    1257   120644110 : HASHdelete_locked(BAT *b, BUN p, const void *v)
    1258             : {
    1259   120644110 :         Hash *h = b->thash;
    1260   120644110 :         if (h == NULL) {
    1261   120399182 :                 return;
    1262             :         }
    1263      283753 :         if (h == (Hash *) 1) {
    1264          19 :                 b->thash = NULL;
    1265          19 :                 doHASHdestroy(b, h);
    1266          19 :                 GDKclrerr();
    1267          19 :                 return;
    1268             :         }
    1269      283734 :         assert(p * h->width < h->heaplink.free);
    1270      283734 :         if (h->nunique < b->batCount / HASH_DESTROY_UNIQUES_FRACTION) {
    1271           3 :                 b->thash = NULL;
    1272           3 :                 doHASHdestroy(b, h);
    1273           3 :                 GDKclrerr();
    1274           3 :                 return;
    1275             :         }
    1276      283731 :         if (HASHfix(h, false, true) != GDK_SUCCEED) {
    1277           0 :                 b->thash = NULL;
    1278           0 :                 doHASHdestroy(b, h);
    1279           0 :                 GDKclrerr();
    1280           0 :                 return;
    1281             :         }
    1282      283732 :         BUN c = HASHprobe(h, v);
    1283      283732 :         BUN hb = HASHget(h, c);
    1284      283732 :         BATiter bi = bat_iterator_nolock(b);
    1285      283732 :         int (*atomcmp)(const void *, const void *) = ATOMcompare(h->type);
    1286      283732 :         if (hb == p) {
    1287       38803 :                 BUN hb2 = HASHgetlink(h, p);
    1288       38803 :                 h->heaplink.dirty = true;
    1289       38803 :                 h->heapbckt.dirty = true;
    1290       38803 :                 HASHput(h, c, hb2);
    1291       38803 :                 HASHputlink(h, p, BUN_NONE);
    1292       38803 :                 if (hb2 == BUN_NONE) {
    1293       23732 :                         h->nheads--;
    1294             :                 } else {
    1295             :                         do {
    1296       40900 :                                 if (atomcmp(v, BUNtail(bi, hb2)) == 0) {
    1297             :                                         /* found another row with the
    1298             :                                          * same value, so don't
    1299             :                                          * decrement nunique below */
    1300             :                                         return;
    1301             :                                 }
    1302       40251 :                                 hb2 = HASHgetlink(h, hb2);
    1303       40251 :                         } while (hb2 != BUN_NONE);
    1304             :                 }
    1305             :                 /* no rows found with the same value, so number of
    1306             :                  * unique values is one lower */
    1307       38154 :                 h->nunique--;
    1308       38154 :                 return;
    1309             :         }
    1310             :         bool seen = false;
    1311             :         for (;;) {
    1312   106606620 :                 if (!seen)
    1313      251335 :                         seen = atomcmp(v, BUNtail(bi, hb)) == 0;
    1314   106606620 :                 BUN hb2 = HASHgetlink(h, hb);
    1315   106606620 :                 assert(hb2 != BUN_NONE );
    1316   106606620 :                 assert(hb2 < hb);
    1317   106606620 :                 if (hb2 == p) {
    1318      244929 :                         for (hb2 = HASHgetlink(h, hb2);
    1319      246487 :                              !seen && hb2 != BUN_NONE;
    1320        1558 :                              hb2 = HASHgetlink(h, hb2))
    1321        1558 :                                 seen = atomcmp(v, BUNtail(bi, hb2)) == 0;
    1322             :                         break;
    1323             :                 }
    1324             :                 hb = hb2;
    1325             :         }
    1326      244929 :         h->heaplink.dirty = true;
    1327      244929 :         HASHputlink(h, hb, HASHgetlink(h, p));
    1328      244929 :         HASHputlink(h, p, BUN_NONE);
    1329      244929 :         if (!seen)
    1330        1326 :                 h->nunique--;
    1331             : }
    1332             : 
    1333             : void
    1334           6 : HASHdelete(BAT *b, BUN p, const void *v)
    1335             : {
    1336           6 :         MT_rwlock_wrlock(&b->thashlock);
    1337           6 :         HASHdelete_locked(b, p, v);
    1338           6 :         MT_rwlock_wrunlock(&b->thashlock);
    1339           6 : }
    1340             : 
    1341             : BUN
    1342           0 : HASHlist(Hash *h, BUN i)
    1343             : {
    1344             :         BUN c = 1;
    1345           0 :         BUN j = HASHget(h, i);
    1346             : 
    1347           0 :         if (j == BUN_NONE)
    1348             :                 return 1;
    1349           0 :         while ((j = HASHgetlink(h, i)) != BUN_NONE) {
    1350           0 :                 c++;
    1351             :                 i = j;
    1352             :         }
    1353             :         return c;
    1354             : }
    1355             : 
    1356             : void
    1357    16552869 : HASHdestroy(BAT *b)
    1358             : {
    1359    16552869 :         if (b && b->thash) {
    1360             :                 Hash *hs;
    1361       92016 :                 MT_rwlock_wrlock(&b->thashlock);
    1362       92030 :                 hs = b->thash;
    1363       92030 :                 b->thash = NULL;
    1364       92030 :                 MT_rwlock_wrunlock(&b->thashlock);
    1365       92025 :                 doHASHdestroy(b, hs);
    1366             :         }
    1367    16552876 : }
    1368             : 
    1369             : void
    1370     5872266 : HASHfree(BAT *b)
    1371             : {
    1372     5872266 :         if (b && b->thash) {
    1373             :                 Hash *h;
    1374        7313 :                 MT_rwlock_wrlock(&b->thashlock);
    1375        7313 :                 if ((h = b->thash) != NULL && h != (Hash *) 1) {
    1376        6682 :                         bool rmheap = h->heaplink.dirty || h->heapbckt.dirty;
    1377        6682 :                         TRC_DEBUG(ACCELERATOR, ALGOBATFMT " free hash %s\n",
    1378             :                                   ALGOBATPAR(b),
    1379             :                                   rmheap ? "removing" : "keeping");
    1380             : 
    1381        6682 :                         b->thash = rmheap ? NULL : (Hash *) 1;
    1382        6682 :                         HEAPfree(&h->heapbckt, rmheap);
    1383        6682 :                         HEAPfree(&h->heaplink, rmheap);
    1384        6682 :                         GDKfree(h);
    1385             :                 }
    1386        7313 :                 MT_rwlock_wrunlock(&b->thashlock);
    1387             :         }
    1388     5872266 : }
    1389             : 
    1390             : bool
    1391           0 : HASHgonebad(BAT *b, const void *v)
    1392             : {
    1393           0 :         Hash *h = b->thash;
    1394             :         BUN cnt, hit;
    1395             : 
    1396           0 :         if (h == NULL)
    1397             :                 return true;    /* no hash is bad hash? */
    1398             : 
    1399           0 :         BATiter bi = bat_iterator(b);
    1400           0 :         if (h->nbucket * 2 < BATcount(b)) {
    1401           0 :                 int (*cmp) (const void *, const void *) = ATOMcompare(b->ttype);
    1402           0 :                 BUN i = HASHget(h, (BUN) HASHprobe(h, v));
    1403           0 :                 for (cnt = hit = 1; i != BUN_NONE; i = HASHgetlink(h, i), cnt++)
    1404           0 :                         hit += ((*cmp) (v, BUNtail(bi, (BUN) i)) == 0);
    1405             : 
    1406           0 :                 if (cnt / hit > 4) {
    1407           0 :                         bat_iterator_end(&bi);
    1408           0 :                         return true;    /* linked list too long */
    1409             :                 }
    1410             : 
    1411             :                 /* in this case, linked lists are long but contain the
    1412             :                  * desired values such hash tables may be useful for
    1413             :                  * locating all duplicates */
    1414             :         }
    1415           0 :         bat_iterator_end(&bi);
    1416           0 :         return false;           /* a-ok */
    1417             : }

Generated by: LCOV version 1.14