LCOV - code coverage report
Current view: top level - gdk - gdk_align.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 135 180 75.0 %
Date: 2021-09-14 19:48:19 Functions: 6 6 100.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * @a Peter Boncz, Niels Nes
      11             :  * @* BAT Alignment
      12             :  * For BATs that result from a n-ary relational scheme it may help to
      13             :  * align the BATs on their head value. In particular, it permits
      14             :  * replacing a hash-join by a merge-join, which is significantly
      15             :  * faster on large tables. Especially if the BATs involved cause page
      16             :  * activity or when you can not afford the large hash structures to
      17             :  * speed-up processing.
      18             :  *
      19             :  * For orthogonality, we support alignment between arbitrary columns
      20             :  * (head or tail).
      21             :  *
      22             :  * All standard GDK set-calls update the alignment info in their
      23             :  * respective ways. For example, the routine @emph{BUNclustercopy}
      24             :  * shuffles the first argument, such that the BUNs are in the same
      25             :  * order as those in the second argument.  This operation will mark
      26             :  * both columns of the first @emph{BAT} as synced with the second
      27             :  * (likewise, @emph{Colcopy()}, which makes a copy, instead of
      28             :  * in-place shuffling, has the same alignment effect.
      29             :  *
      30             :  * Each alignment sequence is given a unique identifier, so as to
      31             :  * easily detect this situation. It is retained in the @emph{BAT
      32             :  * descriptor}.
      33             :  * @+ Alignment Design Considerations
      34             :  * Alignment primitives require the right hooks to be inserted in
      35             :  * several places in the GDK, apart form this file:
      36             :  * @itemize
      37             :  * @item @emph{ BUN update operations}.
      38             :  * The updated BATs have to be marked as un-aligned.
      39             :  * @item @emph{ set operations}.
      40             :  * For most relational operations some statements can be made about
      41             :  * the size and order of the BATs they produce. This information can
      42             :  * be formalized by indicating alignment information automatically.
      43             :  * @item @emph{ transaction operations}.
      44             :  * Alignment statuses must be kept consistent under database commits
      45             :  * and aborts.
      46             :  * @end itemize
      47             :  */
      48             : #include "monetdb_config.h"
      49             : #include "gdk.h"
      50             : #include "gdk_private.h"
      51             : 
      52             : /* Return TRUE if the two BATs are aligned (same size, same
      53             :  * hseqbase). */
      54             : int
      55       23409 : ALIGNsynced(BAT *b1, BAT *b2)
      56             : {
      57       23409 :         if (b1 == NULL || b2 == NULL)
      58             :                 return 0;
      59             : 
      60       23409 :         assert(!is_oid_nil(b1->hseqbase));
      61       23409 :         assert(!is_oid_nil(b2->hseqbase));
      62             : 
      63       23409 :         return BATcount(b1) == BATcount(b2) && b1->hseqbase == b2->hseqbase;
      64             : }
      65             : 
      66             : /*
      67             :  * @+ View BATS
      68             :  * The general routine for getting a 'view' BAT upon another BAT is
      69             :  * @emph{VIEWcreate}. On this @emph{#read-only} BAT (there is kernel
      70             :  * support for this), you can then make vertical slices.
      71             :  *
      72             :  * It is possible to create a view on a writable BAT. Updates in the
      73             :  * parent are then automatically reflected in the VIEW.  Note that the
      74             :  * VIEW bat itself can never be modified.
      75             :  *
      76             :  * Horizontal views should only be given out on a view BAT, but only
      77             :  * if it is dead sure the parent BAT is read-only.  This because they
      78             :  * cannot physically share the batBuns heap with the parent, as they
      79             :  * need a modified version.
      80             :  */
      81             : BAT *
      82     5624198 : VIEWcreate(oid seq, BAT *b)
      83             : {
      84             :         BAT *bn;
      85             :         bat tp = 0;
      86             : 
      87     5624198 :         BATcheck(b, NULL);
      88             : 
      89     5624198 :         if (b->ttype == TYPE_void) {
      90             :                 /* we don't do views on void bats */
      91        2226 :                 return BATdense(seq, b->tseqbase, b->batCount);
      92             :         }
      93             : 
      94     5621972 :         bn = BATcreatedesc(seq, b->ttype, false, TRANSIENT, 0);
      95     5618594 :         if (bn == NULL)
      96             :                 return NULL;
      97     5618594 :         assert(bn->theap == NULL);
      98             : 
      99             :         /* the T column descriptor is fully copied. We need copies
     100             :          * because in case of a mark, we are going to override a
     101             :          * column with a void. Take care to zero the accelerator data,
     102             :          * though. */
     103     5618594 :         bn->batInserted = b->batInserted;
     104     5618594 :         bn->batCount = b->batCount;
     105     5618594 :         bn->batCapacity = b->batCapacity;
     106     5618594 :         MT_lock_set(&b->theaplock);
     107     5636725 :         bn->T = b->T;
     108     5636725 :         tp = VIEWtparent(b);
     109     5636725 :         if (tp == 0 && b->ttype != TYPE_void)
     110     3788984 :                 tp = b->batCacheid;
     111     5636725 :         assert(b->ttype != TYPE_void || !tp);
     112             :         /* copy again now we have the correct lock */
     113     5636725 :         bn->theap = b->theap;
     114     5636725 :         HEAPincref(b->theap);
     115     5636408 :         if (b->tvheap)
     116     1280603 :                 HEAPincref(b->tvheap);
     117     5636206 :         MT_lock_unset(&b->theaplock);
     118             : 
     119     5637849 :         if (tp)
     120     5636622 :                 BBPshare(tp);
     121     5634335 :         if (bn->tvheap) {
     122     1280365 :                 assert(bn->tvheap->parentid > 0);
     123     1280365 :                 BBPshare(bn->tvheap->parentid);
     124             :         }
     125             : 
     126     5634369 :         bn->tprops = NULL;
     127             : 
     128             :         /* correct values after copy of column info */
     129     5634369 :         BATinit_idents(bn);
     130     5631353 :         bn->batRestricted = BAT_READ;
     131     5631353 :         bn->thash = NULL;
     132             :         /* imprints are shared, but the check is dynamic */
     133     5631353 :         bn->timprints = NULL;
     134             :         /* Order OID index */
     135     5631353 :         bn->torderidx = NULL;
     136     5631353 :         if (BBPcacheit(bn, true) != GDK_SUCCEED) {      /* enter in BBP */
     137           0 :                 if (tp) {
     138           0 :                         BBPunshare(tp);
     139           0 :                         BBPunfix(tp);
     140             :                 }
     141           0 :                 if (bn->tvheap) {
     142           0 :                         BBPunshare(bn->tvheap->parentid);
     143           0 :                         HEAPdecref(bn->tvheap, false);
     144             :                 }
     145           0 :                 HEAPdecref(bn->theap, false);
     146           0 :                 MT_lock_destroy(&bn->theaplock);
     147           0 :                 MT_lock_destroy(&bn->batIdxLock);
     148           0 :                 MT_rwlock_destroy(&bn->thashlock);
     149           0 :                 GDKfree(bn);
     150           0 :                 return NULL;
     151             :         }
     152     5637459 :         TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n",
     153             :                   ALGOBATPAR(b), ALGOBATPAR(bn));
     154             :         return bn;
     155             : }
     156             : 
     157             : /*
     158             :  * The BATmaterialize routine produces in-place materialized version
     159             :  * of a void bat (which should not be a VIEW) (later we should add the
     160             :  * code for VIEWs).
     161             :  */
     162             : 
     163             : gdk_return
     164       14274 : BATmaterialize(BAT *b)
     165             : {
     166             :         BUN cnt;
     167             :         Heap *tail;
     168             :         BUN p, q;
     169             :         oid t, *x;
     170             : 
     171       14274 :         BATcheck(b, GDK_FAIL);
     172       14274 :         assert(!isVIEW(b));
     173       14274 :         if (b->ttype != TYPE_void) {
     174             :                 /* no voids */
     175             :                 return GDK_SUCCEED;
     176             :         }
     177             : 
     178         407 :         cnt = BATcapacity(b);
     179         407 :         if ((tail = GDKmalloc(sizeof(Heap))) == NULL)
     180             :                 return GDK_FAIL;
     181             :         p = 0;
     182         407 :         q = BUNlast(b);
     183         407 :         assert(cnt >= q - p);
     184         407 :         TRC_DEBUG(ALGO, "BATmaterialize(" ALGOBATFMT ")\n", ALGOBATPAR(b));
     185             : 
     186             :         /* cleanup possible ACC's */
     187         407 :         HASHdestroy(b);
     188         407 :         IMPSdestroy(b);
     189         407 :         OIDXdestroy(b);
     190             : 
     191         407 :         *tail = (Heap) {
     192         407 :                 .farmid = BBPselectfarm(b->batRole, TYPE_oid, offheap),
     193         407 :                 .parentid = b->batCacheid,
     194             :                 .dirty = true,
     195             :         };
     196         407 :         settailname(tail, BBP_physical(b->batCacheid), TYPE_oid, 0);
     197         407 :         if (HEAPalloc(tail, cnt, sizeof(oid), 0) != GDK_SUCCEED) {
     198           0 :                 GDKfree(tail);
     199           0 :                 return GDK_FAIL;
     200             :         }
     201         407 :         x = (oid *) tail->base;
     202         407 :         t = b->tseqbase;
     203         407 :         if (is_oid_nil(t)) {
     204          12 :                 for (p = 0; p < q; p++)
     205           0 :                         x[p] = oid_nil;
     206             :         } else {
     207         774 :                 for (p = 0; p < q; p++)
     208         379 :                         x[p] = t++;
     209             :         }
     210         407 :         ATOMIC_INIT(&tail->refs, 1);
     211             :         /* point of no return */
     212         407 :         MT_lock_set(&b->theaplock);
     213         407 :         assert(ATOMIC_GET(&b->theap->refs) > 0);
     214             :         /* can only look at tvheap when lock is held */
     215         407 :         if (complex_cand(b)) {
     216           0 :                 assert(b->batRole == TRANSIENT);
     217           0 :                 if (negoid_cand(b)) {
     218           0 :                         assert(ccand_free(b) % SIZEOF_OID == 0);
     219           0 :                         BUN nexc = (BUN) (ccand_free(b) / SIZEOF_OID);
     220             :                         const oid *exc = (const oid *) ccand_first(b);
     221             :                         BUN i;
     222           0 :                         for (p = 0, i = 0; p < q; p++) {
     223           0 :                                 while (i < nexc && t == exc[i]) {
     224             :                                         i++;
     225           0 :                                         t++;
     226             :                                 }
     227           0 :                                 x[p] = t++;
     228             :                         }
     229             :                 } else {
     230           0 :                         assert(mask_cand(b));
     231           0 :                         BUN nmsk = (BUN) (ccand_free(b) / sizeof(uint32_t));
     232             :                         const uint32_t *src = (const uint32_t *) ccand_first(b);
     233             :                         BUN n = 0;
     234           0 :                         t -= (oid) CCAND(b)->firstbit;
     235           0 :                         for (p = 0; p < nmsk; p++) {
     236           0 :                                 uint32_t val = src[p];
     237           0 :                                 if (val == 0)
     238           0 :                                         continue;
     239           0 :                                 for (uint32_t i = 0; i < 32; i++) {
     240           0 :                                         if (val & (1U << i)) {
     241           0 :                                                 assert(n < q);
     242           0 :                                                 x[n++] = t + p * 32 + i;
     243             :                                         }
     244             :                                 }
     245             :                         }
     246           0 :                         assert(n == q);
     247             :                 }
     248           0 :                 HEAPdecref(b->tvheap, true);
     249           0 :                 b->tvheap = NULL;
     250             :         }
     251         407 :         HEAPdecref(b->theap, false);
     252         407 :         b->theap = tail;
     253         407 :         b->tbaseoff = 0;
     254         407 :         BATsetprop_nolock(b, GDK_NUNIQUE, TYPE_oid, &(oid){is_oid_nil(t) ? 1 : b->batCount});
     255         407 :         BATsetprop_nolock(b, GDK_UNIQUE_ESTIMATE, TYPE_dbl, &(dbl){is_oid_nil(t) ? 1.0 : (dbl)b->batCount});
     256         407 :         MT_lock_unset(&b->theaplock);
     257         407 :         b->ttype = TYPE_oid;
     258         407 :         BATsetdims(b);
     259         407 :         b->batDirtydesc = true;
     260         407 :         BATsetcount(b, b->batCount);
     261             : 
     262         407 :         return GDK_SUCCEED;
     263             : }
     264             : 
     265             : /*
     266             :  * The @#VIEWunlink@ routine cuts a reference to the parent. Part of the view
     267             :  * destroy sequence.
     268             :  */
     269             : static void
     270     5794886 : VIEWunlink(BAT *b)
     271             : {
     272     5794886 :         if (b) {
     273     5794886 :                 MT_lock_set(&b->theaplock);
     274             : 
     275     5814729 :                 bat tp = VIEWtparent(b);
     276     5814729 :                 bat vtp = VIEWvtparent(b);
     277             :                 BAT *tpb = NULL;
     278             :                 BAT *vtpb = NULL;
     279             : 
     280     5814729 :                 if (tp)
     281     5639038 :                         tpb = BBP_cache(tp);
     282     5814729 :                 if (tp && !vtp)
     283             :                         vtp = tp;
     284     5814729 :                 if (vtp)
     285     5814694 :                         vtpb = BBP_cache(vtp);
     286             : 
     287     5814729 :                 if (tpb == NULL && vtpb == NULL) {
     288           0 :                         MT_lock_unset(&b->theaplock);
     289           0 :                         return;
     290             :                 }
     291             : 
     292             :                 /* unlink heaps shared with parent */
     293     5814729 :                 if (b->theap && b->theap->parentid != b->batCacheid) {
     294     5639161 :                         HEAPdecref(b->theap, false);
     295     5638177 :                         b->theap = NULL;
     296             :                 }
     297     5813745 :                 assert(b->tvheap == NULL || b->tvheap->parentid > 0);
     298     5813745 :                 if (b->tvheap && b->tvheap->parentid != b->batCacheid) {
     299     1456609 :                         HEAPdecref(b->tvheap, false);
     300     1456485 :                         b->tvheap = NULL;
     301             :                 }
     302             : 
     303     5813621 :                 MT_lock_unset(&b->theaplock);
     304             : 
     305     5814485 :                 MT_lock_set(&b->batIdxLock);
     306             :                 /* unlink imprints shared with parent */
     307     5812668 :                 if (b->timprints &&
     308           0 :                     b->timprints != (Imprints *) 1 &&
     309           0 :                     b->timprints->imprints.parentid != b->batCacheid) {
     310           0 :                         IMPSdecref(b->timprints, false);
     311           0 :                         b->timprints = NULL;
     312             :                 }
     313     5812668 :                 MT_lock_unset(&b->batIdxLock);
     314             :         }
     315             : }
     316             : 
     317             : /*
     318             :  * The remainder are utilities to manipulate the BAT view and not to
     319             :  * forget some details in the process.  It expects a position range in
     320             :  * the underlying BAT and compensates for outliers.
     321             :  */
     322             : void
     323     5597283 : VIEWbounds(BAT *b, BAT *view, BUN l, BUN h)
     324             : {
     325             :         BUN cnt;
     326             :         BUN baseoff;
     327             : 
     328     5597283 :         if (b == NULL || view == NULL)
     329             :                 return;
     330     5597283 :         if (h > BATcount(b))
     331             :                 h = BATcount(b);
     332     5597283 :         baseoff = b->tbaseoff;
     333             :         if (h < l)
     334             :                 h = l;
     335     5597283 :         cnt = h - l;
     336     5597283 :         view->batInserted = 0;
     337     5597283 :         if (view->ttype != TYPE_void) {
     338     5601100 :                 view->tbaseoff = baseoff + l;
     339             :         }
     340     5597283 :         BATsetcount(view, cnt);
     341     5616784 :         BATsetcapacity(view, cnt);
     342     5613189 :         if (view->tnosorted > l && view->tnosorted < l + cnt)
     343      490906 :                 view->tnosorted -= l;
     344             :         else
     345     5122283 :                 view->tnosorted = 0;
     346     5613189 :         if (view->tnorevsorted > l && view->tnorevsorted < l + cnt)
     347      817672 :                 view->tnorevsorted -= l;
     348             :         else
     349     4795517 :                 view->tnorevsorted = 0;
     350     5613189 :         if (view->tnokey[0] >= l && view->tnokey[0] < l + cnt &&
     351     4361508 :             view->tnokey[1] >= l && view->tnokey[1] < l + cnt &&
     352             :             view->tnokey[0] != view->tnokey[1]) {
     353      288304 :                 view->tnokey[0] -= l;
     354      288304 :                 view->tnokey[1] -= l;
     355             :         } else {
     356     5324885 :                 view->tnokey[0] = view->tnokey[1] = 0;
     357             :         }
     358             : }
     359             : 
     360             : /*
     361             :  * Destroy a view.
     362             :  */
     363             : void
     364     5803306 : VIEWdestroy(BAT *b)
     365             : {
     366     5803306 :         assert(isVIEW(b));
     367             : 
     368             :         /* remove any leftover private hash structures */
     369     5803306 :         HASHdestroy(b);
     370     5797386 :         IMPSdestroy(b);
     371     5794235 :         OIDXdestroy(b);
     372     5795329 :         PROPdestroy(b);
     373     5794558 :         VIEWunlink(b);
     374             : 
     375     5813982 :         MT_lock_set(&b->theaplock);
     376     5807180 :         if (b->theap) {
     377      175949 :                 HEAPdecref(b->theap, false);
     378      175946 :                 b->theap = NULL;
     379             :         }
     380     5807177 :         if (b->tvheap) {
     381           0 :                 HEAPdecref(b->tvheap, false);
     382           0 :                 b->tvheap = NULL;
     383             :         }
     384     5807177 :         MT_lock_unset(&b->theaplock);
     385     5813149 :         BATfree(b);
     386     5807860 : }

Generated by: LCOV version 1.14