LCOV - code coverage report
Current view: top level - gdk - gdk_imprints.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 284 358 79.3 %
Date: 2020-06-29 20:00:14 Functions: 9 10 90.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2020 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * Implementation for the column imprints index.
      11             :  * See paper:
      12             :  * Column Imprints: A Secondary Index Structure,
      13             :  * L.Sidirourgos and M.Kersten.
      14             :  */
      15             : 
      16             : #include "monetdb_config.h"
      17             : #include "gdk.h"
      18             : #include "gdk_private.h"
      19             : #include "gdk_imprints.h"
      20             : 
      21             : #define IMPRINTS_VERSION        2
      22             : #define IMPRINTS_HEADER_SIZE    4 /* nr of size_t fields in header */
      23             : 
      24             : #define BINSIZE(B, FUNC, T) do {                \
      25             :         switch (B) {                            \
      26             :                 case 8: FUNC(T,8); break;       \
      27             :                 case 16: FUNC(T,16); break;     \
      28             :                 case 32: FUNC(T,32); break;     \
      29             :                 case 64: FUNC(T,64); break;     \
      30             :                 default: assert(0); break;      \
      31             :         }                                       \
      32             : } while (0)
      33             : 
      34             : 
      35             : #define GETBIN(Z,X,B)                           \
      36             : do {                                            \
      37             :         int _i;                                 \
      38             :         Z = 0;                                  \
      39             :         for (_i = 1; _i < B; _i++)           \
      40             :                 Z += ((X) >= bins[_i]);              \
      41             : } while (0)
      42             : 
      43             : 
      44             : #define IMPS_CREATE(TYPE,B)                                             \
      45             : do {                                                                    \
      46             :         uint##B##_t mask, prvmask;                                      \
      47             :         uint##B##_t *restrict im = (uint##B##_t *) imps;                \
      48             :         const TYPE *restrict col = (TYPE *) Tloc(b, 0);                 \
      49             :         const TYPE *restrict bins = (TYPE *) inbins;                    \
      50             :         const BUN page = IMPS_PAGE / sizeof(TYPE);                      \
      51             :         prvmask = 0;                                                    \
      52             :         for (i = 0; i < b->batCount; ) {                          \
      53             :                 const BUN lim = MIN(i + page, b->batCount);          \
      54             :                 /* new mask */                                          \
      55             :                 mask = 0;                                               \
      56             :                 /* build mask for all BUNs in one PAGE */               \
      57             :                 for ( ; i < lim; i++) {                                      \
      58             :                         register const TYPE val = col[i];               \
      59             :                         GETBIN(bin,val,B);                              \
      60             :                         mask = IMPSsetBit(B,mask,bin);                  \
      61             :                         if (!is_##TYPE##_nil(val)) { /* do not count nils */ \
      62             :                                 if (!cnt_bins[bin]++) {                 \
      63             :                                         min_bins[bin] = max_bins[bin] = i; \
      64             :                                 } else {                                \
      65             :                                         if (val < col[min_bins[bin]])        \
      66             :                                                 min_bins[bin] = i;      \
      67             :                                         if (val > col[max_bins[bin]])        \
      68             :                                                 max_bins[bin] = i;      \
      69             :                                 }                                       \
      70             :                         }                                               \
      71             :                 }                                                       \
      72             :                 /* same mask as previous and enough count to add */     \
      73             :                 if ((prvmask == mask) && (dcnt > 0) &&                       \
      74             :                     (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) {         \
      75             :                         /* not a repeat header */                       \
      76             :                         if (!dict[dcnt-1].repeat) {                     \
      77             :                                 /* if compressed */                     \
      78             :                                 if (dict[dcnt-1].cnt > 1) {          \
      79             :                                         /* uncompress last */           \
      80             :                                         dict[dcnt-1].cnt--;             \
      81             :                                         /* new header */                \
      82             :                                         dict[dcnt].cnt = 1;             \
      83             :                                         dict[dcnt].flags = 0;           \
      84             :                                         dcnt++;                         \
      85             :                                 }                                       \
      86             :                                 /* set repeat */                        \
      87             :                                 dict[dcnt-1].repeat = 1;                \
      88             :                         }                                               \
      89             :                         /* increase cnt */                              \
      90             :                         dict[dcnt-1].cnt++;                             \
      91             :                 } else { /* new mask (or run out of header count) */    \
      92             :                         prvmask=mask;                                   \
      93             :                         im[icnt] = mask;                                \
      94             :                         icnt++;                                         \
      95             :                         if ((dcnt > 0) && !(dict[dcnt-1].repeat) &&  \
      96             :                             (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
      97             :                                 dict[dcnt-1].cnt++;                     \
      98             :                         } else {                                        \
      99             :                                 dict[dcnt].cnt = 1;                     \
     100             :                                 dict[dcnt].repeat = 0;                  \
     101             :                                 dict[dcnt].flags = 0;                   \
     102             :                                 dcnt++;                                 \
     103             :                         }                                               \
     104             :                 }                                                       \
     105             :         }                                                               \
     106             : } while (0)
     107             : 
     108             : static void
     109         605 : imprints_create(BAT *b, void *inbins, BUN *stats, bte bits,
     110             :                 void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt)
     111             : {
     112         605 :         BUN i;
     113         605 :         BUN dcnt, icnt;
     114         605 :         BUN *restrict min_bins = stats;
     115         605 :         BUN *restrict max_bins = min_bins + 64;
     116         605 :         BUN *restrict cnt_bins = max_bins + 64;
     117         605 :         int bin = 0;
     118         605 :         dcnt = icnt = 0;
     119             : #ifndef NDEBUG
     120         605 :         memset(min_bins, 0, 64 * SIZEOF_BUN);
     121         605 :         memset(max_bins, 0, 64 * SIZEOF_BUN);
     122             : #endif
     123         605 :         memset(cnt_bins, 0, 64 * SIZEOF_BUN);
     124             : 
     125        1172 :         switch (ATOMbasetype(b->ttype)) {
     126           3 :         case TYPE_bte:
     127        1152 :                 BINSIZE(bits, IMPS_CREATE, bte);
     128             :                 break;
     129         155 :         case TYPE_sht:
     130      155957 :                 BINSIZE(bits, IMPS_CREATE, sht);
     131             :                 break;
     132         374 :         case TYPE_int:
     133  5129750000 :                 BINSIZE(bits, IMPS_CREATE, int);
     134             :                 break;
     135          60 :         case TYPE_lng:
     136  1921970000 :                 BINSIZE(bits, IMPS_CREATE, lng);
     137             :                 break;
     138             : #ifdef HAVE_HGE
     139           5 :         case TYPE_hge:
     140         750 :                 BINSIZE(bits, IMPS_CREATE, hge);
     141             :                 break;
     142             : #endif
     143           5 :         case TYPE_flt:
     144       34845 :                 BINSIZE(bits, IMPS_CREATE, flt);
     145             :                 break;
     146           3 :         case TYPE_dbl:
     147         231 :                 BINSIZE(bits, IMPS_CREATE, dbl);
     148             :                 break;
     149             :         default:
     150             :                 /* should never reach here */
     151           0 :                 assert(0);
     152             :         }
     153             : 
     154         605 :         *dictcnt = dcnt;
     155         605 :         *impcnt = icnt;
     156         605 : }
     157             : 
     158             : #ifdef NDEBUG
     159             : #define CLRMEM()        ((void) 0)
     160             : #else
     161             : #define CLRMEM()        while (k < 64) h[k++] = 0
     162             : #endif
     163             : 
     164             : #define FILL_HISTOGRAM(TYPE)                                            \
     165             : do {                                                                    \
     166             :         BUN k;                                                          \
     167             :         TYPE *restrict s = (TYPE *) Tloc(s4, 0);                        \
     168             :         TYPE *restrict h = imprints->bins;                           \
     169             :         if (cnt < 64-1) {                                            \
     170             :                 TYPE max = GDK_##TYPE##_max;                            \
     171             :                 for (k = 0; k < cnt; k++)                            \
     172             :                         h[k] = s[k];                                    \
     173             :                 while (k < (BUN) imprints->bits)                  \
     174             :                         h[k++] = max;                                   \
     175             :                 CLRMEM();                                               \
     176             :         } else {                                                        \
     177             :                 double y, ystep = (double) cnt / (64 - 1);              \
     178             :                 for (k = 0, y = 0; (BUN) y < cnt; y += ystep, k++)   \
     179             :                         h[k] = s[(BUN) y];                              \
     180             :                 if (k == 64 - 1) /* there is one left */                \
     181             :                         h[k] = s[cnt - 1];                              \
     182             :         }                                                               \
     183             : } while (0)
     184             : 
     185             : /* Check whether we have imprints on b (and return true if we do).  It
     186             :  * may be that the imprints were made persistent, but we hadn't seen
     187             :  * that yet, so check the file system.  This also returns true if b is
     188             :  * a view and there are imprints on b's parent.
     189             :  *
     190             :  * Note that the b->timprints pointer can be NULL, meaning there are
     191             :  * no imprints; (Imprints *) 1, meaning there are no imprints loaded,
     192             :  * but they may exist on disk; or a valid pointer to loaded imprints.
     193             :  * These values are maintained here, in the IMPSdestroy and IMPSfree
     194             :  * functions, and in BBPdiskscan during initialization. */
     195             : bool
     196       24568 : BATcheckimprints(BAT *b)
     197             : {
     198       24568 :         bool ret;
     199             : 
     200       24568 :         if (VIEWtparent(b)) {
     201        3052 :                 assert(b->timprints == NULL);
     202        3052 :                 b = BBPdescriptor(VIEWtparent(b));
     203             :         }
     204             : 
     205       24568 :         if (b->timprints == (Imprints *) 1) {
     206         140 :                 MT_lock_set(&b->batIdxLock);
     207         140 :                 if (b->timprints == (Imprints *) 1) {
     208         140 :                         Imprints *imprints;
     209         140 :                         const char *nme = BBP_physical(b->batCacheid);
     210             : 
     211         140 :                         assert(!GDKinmemory());
     212         140 :                         b->timprints = NULL;
     213         140 :                         if ((imprints = GDKzalloc(sizeof(Imprints))) != NULL &&
     214         140 :                             (imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype, imprintsheap)) >= 0) {
     215         140 :                                 int fd;
     216             : 
     217         140 :                                 strconcat_len(imprints->imprints.filename,
     218             :                                               sizeof(imprints->imprints.filename),
     219             :                                               nme, ".timprints", NULL);
     220             :                                 /* check whether a persisted imprints index
     221             :                                  * can be found */
     222         140 :                                 if ((fd = GDKfdlocate(imprints->imprints.farmid, nme, "rb", "timprints")) >= 0) {
     223         130 :                                         size_t hdata[4];
     224         130 :                                         struct stat st;
     225         130 :                                         size_t pages;
     226             : 
     227         130 :                                         pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE;
     228         130 :                                         if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
     229         130 :                                             hdata[0] & ((size_t) 1 << 16) &&
     230         130 :                                             ((hdata[0] & 0xFF00) >> 8) == IMPRINTS_VERSION &&
     231         130 :                                             hdata[3] == (size_t) BATcount(b) &&
     232         130 :                                             fstat(fd, &st) == 0 &&
     233         130 :                                             st.st_size >= (off_t) (imprints->imprints.size =
     234         130 :                                                                    imprints->imprints.free =
     235         130 :                                                                    64 * b->twidth +
     236         130 :                                                                    64 * 2 * SIZEOF_OID +
     237             :                                                                    64 * SIZEOF_BUN +
     238         130 :                                                                    pages * ((bte) hdata[0] / 8) +
     239         130 :                                                                    hdata[2] * sizeof(cchdc_t) +
     240             :                                                                    sizeof(uint64_t) /* padding for alignment */
     241         160 :                                                                    + 4 * SIZEOF_SIZE_T) &&
     242          30 :                                             HEAPload(&imprints->imprints, nme, "timprints", false) == GDK_SUCCEED) {
     243             :                                                 /* usable */
     244          30 :                                                 imprints->bits = (bte) (hdata[0] & 0xFF);
     245          30 :                                                 imprints->impcnt = (BUN) hdata[1];
     246          30 :                                                 imprints->dictcnt = (BUN) hdata[2];
     247          30 :                                                 imprints->bins = imprints->imprints.base + 4 * SIZEOF_SIZE_T;
     248          30 :                                                 imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth);
     249          30 :                                                 imprints->imps = (void *) (imprints->stats + 64 * 3);
     250          30 :                                                 imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
     251          30 :                                                 close(fd);
     252          30 :                                                 imprints->imprints.parentid = b->batCacheid;
     253          30 :                                                 b->timprints = imprints;
     254          30 :                                                 TRC_DEBUG(ACCELERATOR, "BATcheckimprints(" ALGOBATFMT "): reusing persisted imprints\n", ALGOBATPAR(b));
     255          30 :                                                 MT_lock_unset(&b->batIdxLock);
     256             : 
     257          30 :                                                 return true;
     258             :                                         }
     259         100 :                                         close(fd);
     260             :                                         /* unlink unusable file */
     261         100 :                                         GDKunlink(imprints->imprints.farmid, BATDIR, nme, "timprints");
     262             :                                 }
     263             :                         }
     264         110 :                         GDKfree(imprints);
     265         110 :                         GDKclrerr();    /* we're not currently interested in errors */
     266             :                 }
     267         110 :                 MT_lock_unset(&b->batIdxLock);
     268             :         }
     269       24538 :         ret = b->timprints != NULL;
     270       24538 :         if( ret)
     271       22755 :                 TRC_DEBUG(ACCELERATOR, "BATcheckimprints(" ALGOBATFMT "): already has imprints\n", ALGOBATPAR(b));
     272             :         return ret;
     273             : }
     274             : 
     275             : static void
     276         462 : BATimpsync(void *arg)
     277             : {
     278         462 :         BAT *b = arg;
     279         462 :         Imprints *imprints;
     280         462 :         int fd;
     281         462 :         lng t0 = GDKusec();
     282         462 :         const char *failed = " failed";
     283             : 
     284         462 :         MT_lock_set(&b->batIdxLock);
     285         462 :         if ((imprints = b->timprints) != NULL) {
     286         462 :                 Heap *hp = &imprints->imprints;
     287         462 :                 if (HEAPsave(hp, hp->filename, NULL, true) == GDK_SUCCEED) {
     288         462 :                         if (hp->storage == STORE_MEM) {
     289         397 :                                 if ((fd = GDKfdlocate(hp->farmid, hp->filename, "rb+", NULL)) >= 0) {
     290             :                                         /* add version number */
     291         397 :                                         ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8;
     292             :                                         /* sync-on-disk checked bit */
     293         397 :                                         ((size_t *) hp->base)[0] |= (size_t) 1 << 16;
     294         397 :                                         if (write(fd, hp->base, SIZEOF_SIZE_T) >= 0) {
     295         397 :                                                 failed = ""; /* not failed */
     296         397 :                                                 if (!(GDKdebug & NOSYNCMASK)) {
     297             : #if defined(NATIVE_WIN32)
     298             :                                                         _commit(fd);
     299             : #elif defined(HAVE_FDATASYNC)
     300           0 :                                                         fdatasync(fd);
     301             : #elif defined(HAVE_FSYNC)
     302             :                                                         fsync(fd);
     303             : #endif
     304             :                                                 }
     305         397 :                                                 hp->dirty = false;
     306             :                                         } else {
     307           0 :                                                 failed = " write failed";
     308           0 :                                                 perror("write hash");
     309             :                                         }
     310         397 :                                         close(fd);
     311             :                                 }
     312             :                         } else {
     313             :                                 /* add version number */
     314          65 :                                 ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8;
     315             :                                 /* sync-on-disk checked bit */
     316          65 :                                 ((size_t *) hp->base)[0] |= (size_t) 1 << 16;
     317          65 :                                 if (!(GDKdebug & NOSYNCMASK) &&
     318           0 :                                     MT_msync(hp->base, SIZEOF_SIZE_T) < 0) {
     319           0 :                                         failed = " sync failed";
     320           0 :                                         ((size_t *) hp->base)[0] &= ~((size_t) IMPRINTS_VERSION << 8);
     321             :                                 } else {
     322          65 :                                         hp->dirty = false;
     323          65 :                                         failed = ""; /* not failed */
     324             :                                 }
     325             :                         }
     326         462 :                         TRC_DEBUG(ACCELERATOR, "BATimpsync(" ALGOBATFMT "): "
     327             :                                   "imprints persisted "
     328             :                                   "(" LLFMT " usec)%s\n", ALGOBATPAR(b),
     329             :                                   GDKusec() - t0, failed);
     330             :                 }
     331             :         }
     332         462 :         MT_lock_unset(&b->batIdxLock);
     333         462 :         BBPunfix(b->batCacheid);
     334         462 : }
     335             : 
     336             : gdk_return
     337       23505 : BATimprints(BAT *b)
     338             : {
     339       23505 :         BAT *s1 = NULL, *s2 = NULL, *s3 = NULL, *s4 = NULL;
     340       23505 :         Imprints *imprints;
     341       23505 :         lng t0 = GDKusec();
     342             : 
     343             :         /* we only create imprints for types that look like types we know */
     344       46687 :         switch (ATOMbasetype(b->ttype)) {
     345             :         case TYPE_bte:
     346             :         case TYPE_sht:
     347             :         case TYPE_int:
     348             :         case TYPE_lng:
     349             : #ifdef HAVE_HGE
     350             :         case TYPE_hge:
     351             : #endif
     352             :         case TYPE_flt:
     353             :         case TYPE_dbl:
     354       23491 :                 break;
     355          14 :         default:                /* type not supported */
     356             :                 /* doesn't look enough like base type: do nothing */
     357          14 :                 GDKerror("unsupported type\n");
     358          14 :                 return GDK_FAIL;
     359             :         }
     360             : 
     361       23491 :         BATcheck(b, GDK_FAIL);
     362             : 
     363       23491 :         if (BATcheckimprints(b))
     364             :                 return GDK_SUCCEED;
     365             : 
     366         706 :         if (VIEWtparent(b)) {
     367             :                 /* views always keep null pointer and need to obtain
     368             :                  * the latest imprint from the parent at query time */
     369         334 :                 s2 = b;         /* remember for ACCELDEBUG print */
     370         334 :                 b = BBPdescriptor(VIEWtparent(b));
     371         334 :                 assert(b);
     372         334 :                 if (BATcheckimprints(b))
     373             :                         return GDK_SUCCEED;
     374             :         }
     375         706 :         MT_lock_set(&b->batIdxLock);
     376             : 
     377             : 
     378         706 :         if (b->timprints == NULL) {
     379         675 :                 BUN cnt;
     380         675 :                 const char *nme = GDKinmemory() ? ":inmemory" : BBP_physical(b->batCacheid);
     381         675 :                 size_t pages;
     382             : 
     383         675 :                 MT_lock_unset(&b->batIdxLock);
     384             : 
     385         675 :                 if (s2)
     386         303 :                         TRC_DEBUG(ACCELERATOR, "BATimprints(b=" ALGOBATFMT
     387             :                                   "): creating imprints on parent "
     388             :                                   ALGOBATFMT "\n",
     389             :                                   ALGOBATPAR(s2), ALGOBATPAR(b));
     390             :                 else
     391         372 :                         TRC_DEBUG(ACCELERATOR, "BATimprints(b=" ALGOBATFMT
     392             :                                   "): creating imprints\n",
     393             :                                   ALGOBATPAR(b));
     394             : 
     395         675 :                 s2 = NULL;
     396             : 
     397         675 :                 imprints = GDKzalloc(sizeof(Imprints));
     398         675 :                 if (imprints == NULL) {
     399             :                         return GDK_FAIL;
     400             :                 }
     401         675 :                 strconcat_len(imprints->imprints.filename,
     402             :                               sizeof(imprints->imprints.filename),
     403             :                               nme, ".timprints", NULL);
     404         675 :                 pages = (((size_t) BATcount(b) * b->twidth) + IMPS_PAGE - 1) / IMPS_PAGE;
     405         675 :                 imprints->imprints.farmid = BBPselectfarm(b->batRole, b->ttype,
     406             :                                                            imprintsheap);
     407             : 
     408             : #define SMP_SIZE 2048
     409         675 :                 s1 = BATsample(b, SMP_SIZE);
     410         675 :                 if (s1 == NULL) {
     411           0 :                         GDKfree(imprints);
     412           0 :                         return GDK_FAIL;
     413             :                 }
     414         675 :                 s2 = BATunique(b, s1);
     415         675 :                 if (s2 == NULL) {
     416           0 :                         BBPunfix(s1->batCacheid);
     417           0 :                         GDKfree(imprints);
     418           0 :                         return GDK_FAIL;
     419             :                 }
     420         675 :                 s3 = BATproject(s2, b);
     421         675 :                 if (s3 == NULL) {
     422           0 :                         BBPunfix(s1->batCacheid);
     423           0 :                         BBPunfix(s2->batCacheid);
     424           0 :                         GDKfree(imprints);
     425           0 :                         return GDK_FAIL;
     426             :                 }
     427         675 :                 s3->tkey = true;     /* we know is unique on tail now */
     428         675 :                 if (BATsort(&s4, NULL, NULL, s3, NULL, NULL, false, false, false) != GDK_SUCCEED) {
     429           0 :                         BBPunfix(s1->batCacheid);
     430           0 :                         BBPunfix(s2->batCacheid);
     431           0 :                         BBPunfix(s3->batCacheid);
     432           0 :                         GDKfree(imprints);
     433           0 :                         return GDK_FAIL;
     434             :                 }
     435             :                 /* s4 now is ordered and unique on tail */
     436         675 :                 assert(s4->tkey && s4->tsorted);
     437         675 :                 cnt = BATcount(s4);
     438         675 :                 imprints->bits = 64;
     439         675 :                 if (cnt <= 32)
     440         429 :                         imprints->bits = 32;
     441         675 :                 if (cnt <= 16)
     442         420 :                         imprints->bits = 16;
     443         675 :                 if (cnt <= 8)
     444         407 :                         imprints->bits = 8;
     445             : 
     446             :                 /* The heap we create here consists of four parts:
     447             :                  * bins, max 64 entries with bin boundaries, domain of b;
     448             :                  * stats, min/max/count for each bin, min/max are oid, and count BUN;
     449             :                  * imps, max one entry per "page", entry is "bits" wide;
     450             :                  * dict, max two entries per three "pages".
     451             :                  * In addition, we add some housekeeping entries at
     452             :                  * the start so that we can determine whether we can
     453             :                  * trust the imprints when encountered on startup (including
     454             :                  * a version number -- CURRENT VERSION is 2). */
     455         675 :                 MT_lock_set(&b->batIdxLock);
     456        1280 :                 if (b->timprints != NULL ||
     457         605 :                     HEAPalloc(&imprints->imprints,
     458             :                               IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T + /* extra info */
     459         605 :                               64 * b->twidth + /* bins */
     460         605 :                               64 * 2 * SIZEOF_OID + /* {min,max}_bins */
     461             :                               64 * SIZEOF_BUN +     /* cnt_bins */
     462         605 :                               pages * (imprints->bits / 8) + /* imps */
     463         605 :                               sizeof(uint64_t) + /* padding for alignment */
     464             :                               pages * sizeof(cchdc_t), /* dict */
     465             :                               1) != GDK_SUCCEED) {
     466          70 :                         MT_lock_unset(&b->batIdxLock);
     467          70 :                         GDKfree(imprints);
     468          70 :                         BBPunfix(s1->batCacheid);
     469          70 :                         BBPunfix(s2->batCacheid);
     470          70 :                         BBPunfix(s3->batCacheid);
     471          70 :                         BBPunfix(s4->batCacheid);
     472          70 :                         if (b->timprints != NULL)
     473             :                                 return GDK_SUCCEED; /* we were beaten to it */
     474           0 :                         GDKerror("memory allocation error");
     475           0 :                         return GDK_FAIL;
     476             :                 }
     477         605 :                 imprints->bins = imprints->imprints.base + IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T;
     478         605 :                 imprints->stats = (BUN *) ((char *) imprints->bins + 64 * b->twidth);
     479         605 :                 imprints->imps = (void *) (imprints->stats + 64 * 3);
     480         605 :                 imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
     481             : 
     482        1172 :                 switch (ATOMbasetype(b->ttype)) {
     483           3 :                 case TYPE_bte:
     484         195 :                         FILL_HISTOGRAM(bte);
     485             :                         break;
     486         155 :                 case TYPE_sht:
     487       10074 :                         FILL_HISTOGRAM(sht);
     488             :                         break;
     489         374 :                 case TYPE_int:
     490       24223 :                         FILL_HISTOGRAM(int);
     491             :                         break;
     492          60 :                 case TYPE_lng:
     493        3887 :                         FILL_HISTOGRAM(lng);
     494             :                         break;
     495             : #ifdef HAVE_HGE
     496           5 :                 case TYPE_hge:
     497         325 :                         FILL_HISTOGRAM(hge);
     498             :                         break;
     499             : #endif
     500           5 :                 case TYPE_flt:
     501         324 :                         FILL_HISTOGRAM(flt);
     502             :                         break;
     503           3 :                 case TYPE_dbl:
     504         195 :                         FILL_HISTOGRAM(dbl);
     505             :                         break;
     506             :                 default:
     507             :                         /* should never reach here */
     508           0 :                         assert(0);
     509             :                 }
     510             : 
     511         605 :                 imprints_create(b,
     512             :                                 imprints->bins,
     513             :                                 imprints->stats,
     514         605 :                                 imprints->bits,
     515             :                                 imprints->imps,
     516             :                                 &imprints->impcnt,
     517         605 :                                 imprints->dict,
     518             :                                 &imprints->dictcnt);
     519         605 :                 assert(imprints->impcnt <= pages);
     520         605 :                 assert(imprints->dictcnt <= pages);
     521             : #ifndef NDEBUG
     522         605 :                 memset((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8), 0, (char *) imprints->dict - ((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8)));
     523             : #endif
     524         605 :                 imprints->imprints.free = (size_t) ((char *) ((cchdc_t *) imprints->dict + imprints->dictcnt) - imprints->imprints.base);
     525             :                 /* add info to heap for when they become persistent */
     526         605 :                 ((size_t *) imprints->imprints.base)[0] = (size_t) (imprints->bits);
     527         605 :                 ((size_t *) imprints->imprints.base)[1] = (size_t) imprints->impcnt;
     528         605 :                 ((size_t *) imprints->imprints.base)[2] = (size_t) imprints->dictcnt;
     529         605 :                 ((size_t *) imprints->imprints.base)[3] = (size_t) BATcount(b);
     530         605 :                 imprints->imprints.parentid = b->batCacheid;
     531         605 :                 b->timprints = imprints;
     532         605 :                 if (BBP_status(b->batCacheid) & BBPEXISTING &&
     533        1016 :                     !b->theap.dirty &&
     534         462 :                     !GDKinmemory()) {
     535         462 :                         MT_Id tid;
     536         462 :                         BBPfix(b->batCacheid);
     537         462 :                         char name[MT_NAME_LEN];
     538         462 :                         snprintf(name, sizeof(name), "impssync%d", b->batCacheid);
     539         462 :                         if (MT_create_thread(&tid, BATimpsync, b,
     540             :                                              MT_THR_DETACHED, name) < 0)
     541           0 :                                 BBPunfix(b->batCacheid);
     542             :                 }
     543             :         }
     544             : 
     545         636 :         TRC_DEBUG(ACCELERATOR, "BATimprints(%s): imprints construction " LLFMT " usec\n", BATgetId(b), GDKusec() - t0);
     546         636 :         MT_lock_unset(&b->batIdxLock);
     547             : 
     548             :         /* BBPUnfix tries to get the imprints lock which might lead to
     549             :          * a deadlock if those were unfixed earlier */
     550         636 :         if (s1) {
     551         605 :                 BBPunfix(s1->batCacheid);
     552         605 :                 BBPunfix(s2->batCacheid);
     553         605 :                 BBPunfix(s3->batCacheid);
     554         605 :                 BBPunfix(s4->batCacheid);
     555             :         }
     556             :         return GDK_SUCCEED;
     557             : }
     558             : 
     559             : #define getbin(TYPE,B)                          \
     560             : do {                                            \
     561             :         register const TYPE val = * (TYPE *) v; \
     562             :         GETBIN(ret,val,B);                      \
     563             : } while (0)
     564             : 
     565             : int
     566       47753 : IMPSgetbin(int tpe, bte bits, const char *restrict inbins, const void *restrict v)
     567             : {
     568       47753 :         int ret = -1;
     569             : 
     570       47753 :         switch (tpe) {
     571           2 :         case TYPE_bte:
     572             :         {
     573           2 :                 const bte *restrict bins = (bte *) inbins;
     574          16 :                 BINSIZE(bits, getbin, bte);
     575             :         }
     576             :                 break;
     577        1850 :         case TYPE_sht:
     578             :         {
     579        1850 :                 const sht *restrict bins = (sht *) inbins;
     580       15248 :                 BINSIZE(bits, getbin, sht);
     581             :         }
     582             :                 break;
     583       45302 :         case TYPE_int:
     584             :         {
     585       45302 :                 const int *restrict bins = (int *) inbins;
     586     2610170 :                 BINSIZE(bits, getbin, int);
     587             :         }
     588             :                 break;
     589         571 :         case TYPE_lng:
     590             :         {
     591         571 :                 const lng *restrict bins = (lng *) inbins;
     592       32997 :                 BINSIZE(bits, getbin, lng);
     593             :         }
     594             :                 break;
     595             : #ifdef HAVE_HGE
     596           0 :         case TYPE_hge:
     597             :         {
     598           0 :                 const hge *restrict bins = (hge *) inbins;
     599           0 :                 BINSIZE(bits, getbin, hge);
     600             :         }
     601             :                 break;
     602             : #endif
     603          12 :         case TYPE_flt:
     604             :         {
     605          12 :                 const flt *restrict bins = (flt *) inbins;
     606         384 :                 BINSIZE(bits, getbin, flt);
     607             :         }
     608             :                 break;
     609          16 :         case TYPE_dbl:
     610             :         {
     611          16 :                 const dbl *restrict bins = (dbl *) inbins;
     612         256 :                 BINSIZE(bits, getbin, dbl);
     613             :         }
     614             :                 break;
     615             :         default:
     616           0 :                 assert(0);
     617             :                 (void) inbins;
     618             :                 break;
     619             :         }
     620       47753 :         return ret;
     621             : }
     622             : 
     623             : lng
     624     5669540 : IMPSimprintsize(BAT *b)
     625             : {
     626     5669540 :         lng sz = 0;
     627     5669540 :         if (b->timprints && b->timprints != (Imprints *) 1) {
     628       83835 :                 sz = b->timprints->impcnt * b->timprints->bits / 8;
     629       83835 :                 sz += b->timprints->dictcnt * sizeof(cchdc_t);
     630             :         }
     631     5669540 :         return sz;
     632             : }
     633             : 
     634             : static void
     635         232 : IMPSremove(BAT *b)
     636             : {
     637         232 :         Imprints *imprints;
     638             : 
     639         232 :         assert(b->timprints != NULL);
     640         232 :         assert(!VIEWtparent(b));
     641             : 
     642         232 :         if ((imprints = b->timprints) != NULL) {
     643         232 :                 b->timprints = NULL;
     644             : 
     645         232 :                 TRC_DEBUG_IF(ACCELERATOR) {
     646           0 :                         if (* (size_t *) imprints->imprints.base & (1 << 16))
     647           0 :                                 TRC_DEBUG_ENDIF(ACCELERATOR, "Removing persisted imprints\n");
     648             :                 }
     649         232 :                 if (HEAPdelete(&imprints->imprints, BBP_physical(b->batCacheid),
     650             :                                "timprints") != GDK_SUCCEED)
     651           0 :                         TRC_DEBUG(IO_, "IMPSremove(%s): imprints heap\n", BATgetId(b));
     652             : 
     653         232 :                 GDKfree(imprints);
     654             :         }
     655         232 : }
     656             : 
     657             : void
     658    45593200 : IMPSdestroy(BAT *b)
     659             : {
     660    45593200 :         if (b && b->timprints) {
     661         264 :                 MT_lock_set(&b->batIdxLock);
     662         264 :                 if (b->timprints == (Imprints *) 1) {
     663          32 :                         b->timprints = NULL;
     664          32 :                         GDKunlink(BBPselectfarm(b->batRole, b->ttype, imprintsheap),
     665             :                                   BATDIR,
     666          32 :                                   BBP_physical(b->batCacheid),
     667             :                                   "timprints");
     668         232 :                 } else if (b->timprints != NULL && !VIEWtparent(b))
     669         232 :                         IMPSremove(b);
     670         264 :                 MT_lock_unset(&b->batIdxLock);
     671             :         }
     672    45593200 : }
     673             : 
     674             : /* free the memory associated with the imprints, do not remove the
     675             :  * heap files; indicate that imprints are available on disk by setting
     676             :  * the imprints pointer to 1 */
     677             : void
     678     1172720 : IMPSfree(BAT *b)
     679             : {
     680     1172720 :         Imprints *imprints;
     681             : 
     682     1172720 :         if (b && b->timprints) {
     683         656 :                 assert(b->batCacheid > 0);
     684         656 :                 MT_lock_set(&b->batIdxLock);
     685         656 :                 imprints = b->timprints;
     686         656 :                 if (imprints != NULL && imprints != (Imprints *) 1) {
     687         403 :                         if (GDKinmemory()) {
     688           0 :                                 b->timprints = NULL;
     689           0 :                                 if (!VIEWtparent(b)) {
     690           0 :                                         HEAPfree(&imprints->imprints, true);
     691           0 :                                         GDKfree(imprints);
     692             :                                 }
     693             :                         } else {
     694         403 :                                 b->timprints = (Imprints *) 1;
     695         403 :                                 if (!VIEWtparent(b)) {
     696         403 :                                         HEAPfree(&imprints->imprints, false);
     697         403 :                                         GDKfree(imprints);
     698             :                                 }
     699             :                         }
     700             :                 }
     701         656 :                 MT_lock_unset(&b->batIdxLock);
     702             :         }
     703     1172720 : }
     704             : 
     705             : #ifndef NDEBUG
     706             : /* never called, useful for debugging */
     707             : 
     708             : #define IMPSPRNTMASK(T, B)                                              \
     709             :         do {                                                            \
     710             :                 uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
     711             :                 for (j = 0; j < imprints->bits; j++)                      \
     712             :                         s[j] = IMPSisSet(B, im[icnt], j) ? 'x' : '.';   \
     713             :                 s[j] = '\0';                                            \
     714             :         } while (0)
     715             : 
     716             : /* function used for debugging */
     717             : void
     718           0 : IMPSprint(BAT *b)
     719             : {
     720           0 :         Imprints *imprints;
     721           0 :         cchdc_t *restrict d;
     722           0 :         char s[65];             /* max number of bits + 1 */
     723           0 :         BUN icnt, dcnt, l, pages;
     724           0 :         BUN *restrict min_bins, *restrict max_bins;
     725           0 :         BUN *restrict cnt_bins;
     726           0 :         bte j;
     727           0 :         int i;
     728             : 
     729           0 :         if (!BATcheckimprints(b)) {
     730           0 :                 fprintf(stderr, "No imprint\n");
     731           0 :                 return;
     732             :         }
     733           0 :         imprints = b->timprints;
     734           0 :         d = (cchdc_t *) imprints->dict;
     735           0 :         min_bins = imprints->stats;
     736           0 :         max_bins = min_bins + 64;
     737           0 :         cnt_bins = max_bins + 64;
     738             : 
     739           0 :         fprintf(stderr,
     740             :                 "bits = %d, impcnt = " BUNFMT ", dictcnt = " BUNFMT "\n",
     741           0 :                 imprints->bits, imprints->impcnt, imprints->dictcnt);
     742           0 :         fprintf(stderr, "MIN\n");
     743           0 :         for (i = 0; i < imprints->bits; i++) {
     744           0 :                 fprintf(stderr, "[ " BUNFMT " ]\n", min_bins[i]);
     745             :         }
     746             : 
     747           0 :         fprintf(stderr, "MAX\n");
     748           0 :         for (i = 0; i < imprints->bits; i++) {
     749           0 :                 fprintf(stderr, "[ " BUNFMT " ]\n", max_bins[i]);
     750             :         }
     751           0 :         fprintf(stderr, "COUNT\n");
     752           0 :         for (i = 0; i < imprints->bits; i++) {
     753           0 :                 fprintf(stderr, "[ " BUNFMT " ]\n", cnt_bins[i]);
     754             :         }
     755           0 :         for (dcnt = 0, icnt = 0, pages = 1; dcnt < imprints->dictcnt; dcnt++) {
     756           0 :                 if (d[dcnt].repeat) {
     757           0 :                         BINSIZE(imprints->bits, IMPSPRNTMASK, " ");
     758           0 :                         pages += d[dcnt].cnt;
     759           0 :                         fprintf(stderr, "[ " BUNFMT " ]r %s\n", pages, s);
     760           0 :                         icnt++;
     761             :                 } else {
     762           0 :                         l = icnt + d[dcnt].cnt;
     763           0 :                         for (; icnt < l; icnt++) {
     764           0 :                                 BINSIZE(imprints->bits, IMPSPRNTMASK, " ");
     765           0 :                                 fprintf(stderr, "[ " BUNFMT " ]  %s\n", pages++, s);
     766             :                         }
     767             :                 }
     768             :         }
     769             : }
     770             : #endif

Generated by: LCOV version 1.14