LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - mkey.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 143 286 50.0 %
Date: 2021-10-27 03:06:47 Functions: 5 8 62.5 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             : - * @- The Problem
      11             : - * When creating a join, we want to make a unique key of the attributes on both
      12             : - * sides and then join these keys. Consider the following BATs.
      13             : - *
      14             : - * @verbatim
      15             : - * orders                  customer                link
      16             : - * ====================    =====================   ===========
      17             : - *         zipcode h_nr            zipcode hnr    oid     cid
      18             : - * o1      13      9       c1      11      10      o1      c5
      19             : - * o2      11      10      c2      11      11      o2      c1
      20             : - * o3      11      11      c3      12      2       o3      c2
      21             : - * o4      12      5       c4      12      1       o4      nil
      22             : - * o5      11      10      c5      13      9       o5      c1
      23             : - * o6      12      2       c6      14      7       o6      c3
      24             : - * o7      13      9                               o7      c5
      25             : - * o8      12      1                               o8      c4
      26             : - * o9      13      9                               o9      c5
      27             : - * @end verbatim
      28             : - *
      29             : - * The current approach is designed to take minimal memory, as our previous
      30             : - * solutions to the problem did not scale well. In case of singular keys,
      31             : - * the link is executed by a simple join. Before going into the join, we
      32             : - * make sure the end result size is not too large, which is done by looking
      33             : - * at relation sizes (if the other key is unique) or, if that is not possible,
      34             : - * by computing the exact join size.
      35             : - *
      36             : - * The join algorithm was also improved to do dynamic sampling to determine
      37             : - * with high accuracy the join size, so that we can alloc in one go a memory
      38             : - * region of sufficient size. This also reduces the ds\_link memory requirements.
      39             : - *
      40             : - * For compound keys, those that consist of multiple attributes, we now compute
      41             : - * a derived column that contains an integer hash value derived from all
      42             : - * key columns.
      43             : - * This is done by computing a hash value for each individual key column
      44             : - * and combining those by bitwise XOR and left-rotation. That is, for each
      45             : - * column,we rotate the working hash value by N bits and XOR the hash value
      46             : - * of the column over it. The working hash value is initialized with zero,
      47             : - * and after all columns are processed, this working value is used as output.
      48             : - * Computing the hash value for all columns in the key for one table is done
      49             : - * by the command hash(). Hence, we do hash on both sides, and join
      50             : - * that together with a simple join:
      51             : - *
      52             : - * @code{join(hash(keys), hash(keys.reverse);}
      53             : - *
      54             : - * One complication of this procedure are nil values:
      55             : - * @table
      56             : - * @itemize
      57             : - * @item
      58             : - * it may happen that the final hash-value (an int formed by a
      59             : - * random bit pattern) accidentally has the value of int(nil).
      60             : - * Notice that join never matches nil values.
      61             : - * Hence these accidental nils must be replaced by a begin value (currently: 0).
      62             : - * @item
      63             : - * in case any of the compound key values is nil, our nil semantics
      64             : - * require us that those tuples may never match on a join. Consequently,
      65             : - * during the hash() processing of all compound key columns for computing
      66             : - * the hash value, we also maintain a bit-bat that records which tuples had
      67             : - * a nil value. The bit-bat is initialized to false, and the results of the
      68             : - * nil-check on each column is OR-ed to it.
      69             : - * Afterwards, the hash-value of all tuples that have this nil-bit set to
      70             : - * TRUE are forced to int(nil), which will exclude them from matching.
      71             : - * @end itemize
      72             : - *
      73             : - * Joining on hash values produces a @emph{superset} of the join result:
      74             : - * it may happen that  two different key combinations hash on the same value,
      75             : - * which will make them match on the join (false hits). The final part
      76             : - * of the ds\_link therefore consists of filtering out the false hits.
      77             : - * This is done incrementally by joining back the join result to the original
      78             : - * columns, incrementally one by one for each pair of corresponding
      79             : - * columns. These values are compared with each other and we AND the
      80             : - * result of this comparison together for each pair of columns.
      81             : - * The bat containing these bits is initialized to all TRUE and serves as
      82             : - * final result after all column pairs have been compared.
      83             : - * The initial join result is finally filtered with this bit-bat.
      84             : - *
      85             : - * Joining back from the initial join-result to the original columns on
      86             : - * both sides takes quite a lot of memory. For this reason, the false
      87             : - * hit-filtering is done in slices (not all tuples at one time).
      88             : - * In this way the memory requirements of this phase are kept low.
      89             : - * In fact, the most memory demanding part of the join is the int-join
      90             : - * on hash number, which takes N*24 bytes (where N= |L| = |R|).
      91             : - * In comparison, the previous CTmultigroup/CTmultiderive approach
      92             : - * took N*48 bytes. Additionally, by making it possible to use merge-sort,
      93             : - * it avoids severe performance degradation (memory thrashing) as produced
      94             : - * by the old ds\_link when the inner join relation would be larger than memory.
      95             : - *
      96             : - * If ds\_link performance is still an issue, the sort-merge join used here
      97             : - * could be replaced by partitioned hash-join with radix-cluster/decluster.
      98             : - *
      99             : - * @+ Implementation
     100             : - */
     101             : 
     102             : /*
     103             :  * (c) Peter Boncz, Stefan Manegold, Niels Nes
     104             :  *
     105             :  * new functionality for the low-resource-consumption. It will
     106             :  * first one by one create a hash value out of the multiple attributes.
     107             :  * This hash value is computed by xoring and rotating individual hash
     108             :  * values together. We create a hash and rotate command to do this.
     109             :  */
     110             : 
     111             : #include "monetdb_config.h"
     112             : #include "mal.h"
     113             : #include "mal_interpreter.h"
     114             : #include "mal_exception.h"
     115             : 
     116             : #define MKEYHASH_bte(valp)      ((ulng) (lng) *(const bte*)(valp))
     117             : #define MKEYHASH_sht(valp)      ((ulng) (lng) *(const sht*)(valp))
     118             : #define MKEYHASH_int(valp)      ((ulng) (lng) *(const int*)(valp))
     119             : #define MKEYHASH_lng(valp)      ((ulng) (lng) *(const lng*)(valp))
     120             : #ifdef HAVE_HGE
     121             : #define MKEYHASH_hge(valp)      ((ulng) (*(const uhge *)(valp) >> 64) ^ \
     122             :                                                          (ulng) *(const uhge *)(valp))
     123             : #endif
     124             : 
     125             : static inline ulng
     126             : GDK_ROTATE(ulng x, int y, int z)
     127             : {
     128    53189955 :         return (x << y) | (x >> z);
     129             : }
     130             : 
     131             : /* TODO: nil handling. however; we do not want to lose time in bulk_rotate_xor_hash with that */
     132             : static str
     133           0 : MKEYrotate(lng *res, const lng *val, const int *n)
     134             : {
     135           0 :         *res = (lng) GDK_ROTATE((ulng) *val, *n, (sizeof(lng)*8) - *n);
     136           0 :         return MAL_SUCCEED;
     137             : }
     138             : 
     139             : static str
     140        1020 : MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     141             : {
     142        1020 :         lng *res = getArgReference_lng(stk,p,0);
     143        1020 :         ptr val = getArgReference(stk,p,1);
     144        1020 :         int tpe = getArgType(mb,p,1);
     145             : 
     146             :         (void) cntxt;
     147        1020 :         switch (ATOMstorage(tpe)) {
     148           1 :         case TYPE_void:
     149           1 :                 *res = lng_nil; /* It can be called from SQL */
     150           1 :                 break;
     151             :         case TYPE_bat:
     152             :         case TYPE_ptr:
     153             :                 // illegal types, avoid falling into the default case.
     154           0 :                 assert(0);
     155             :         case TYPE_bte:
     156           0 :                 *res = (lng) MKEYHASH_bte(val);
     157           0 :                 break;
     158           0 :         case TYPE_sht:
     159           0 :                 *res = (lng) MKEYHASH_sht(val);
     160           0 :                 break;
     161         946 :         case TYPE_int:
     162             :         case TYPE_flt:
     163         946 :                 *res = (lng) MKEYHASH_int(val);
     164         946 :                 break;
     165           4 :         case TYPE_lng:
     166             :         case TYPE_dbl:
     167           4 :                 *res = (lng) MKEYHASH_lng(val);
     168           4 :                 break;
     169             : #ifdef HAVE_HGE
     170           0 :         case TYPE_hge:
     171           0 :                 *res = (lng) MKEYHASH_hge(val);
     172           0 :                 break;
     173             : #endif
     174          69 :         default:
     175          69 :                 if (ATOMextern(tpe))
     176          69 :                         *res = (lng) ATOMhash(tpe, *(ptr*)val);
     177             :                 else
     178           0 :                         *res = (lng) ATOMhash(tpe, val);
     179             :                 break;
     180             :         }
     181        1020 :         return MAL_SUCCEED;
     182             : }
     183             : 
     184             : static str
     185       12418 : MKEYbathash(bat *res, const bat *bid)
     186             : {
     187             :         BAT *b, *dst;
     188             :         ulng *restrict r;
     189             :         BUN n;
     190             : 
     191       12418 :         if ((b = BATdescriptor(*bid)) == NULL)
     192           0 :                 throw(SQL, "mkey.bathash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     193             : 
     194       12417 :         n = BATcount(b);
     195       12417 :         dst = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
     196       12417 :         if (dst == NULL) {
     197           0 :                 BBPunfix(b->batCacheid);
     198           0 :                 throw(SQL, "mkey.bathash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     199             :         }
     200       12417 :         BATsetcount(dst, n);
     201             : 
     202       12418 :         r = (ulng *) Tloc(dst, 0);
     203             : 
     204       12418 :         BATiter bi = bat_iterator(b);
     205       12417 :         switch (ATOMstorage(b->ttype)) {
     206           0 :         case TYPE_void: {
     207           0 :                 oid o = b->tseqbase;
     208           0 :                 if (is_oid_nil(o))
     209           0 :                         for (BUN i = 0; i < n; i++)
     210           0 :                                 r[i] = (ulng) lng_nil;
     211             :                 else
     212           0 :                         for (BUN i = 0; i < n; i++)
     213           0 :                                 r[i] = o + i;
     214             :                 break;
     215             :         }
     216          10 :         case TYPE_bte: {
     217          10 :                 const bte *restrict v = (const bte *) bi.base;
     218          45 :                 for (BUN i = 0; i < n; i++)
     219          35 :                         r[i] = MKEYHASH_bte(v + i);
     220             :                 break;
     221             :         }
     222           7 :         case TYPE_sht: {
     223           7 :                 const sht *restrict v = (const sht *) bi.base;
     224       31492 :                 for (BUN i = 0; i < n; i++)
     225       31485 :                         r[i] = MKEYHASH_sht(v + i);
     226             :                 break;
     227             :         }
     228       11574 :         case TYPE_int:
     229             :         case TYPE_flt: {
     230       11574 :                 const int *restrict v = (const int *) bi.base;
     231    35295444 :                 for (BUN i = 0; i < n; i++)
     232    35283870 :                         r[i] = MKEYHASH_int(v + i);
     233             :                 break;
     234             :         }
     235         138 :         case TYPE_lng:
     236             :         case TYPE_dbl: {
     237         138 :                 const lng *restrict v = (const lng *) bi.base;
     238       16013 :                 for (BUN i = 0; i < n; i++)
     239       15875 :                         r[i] = MKEYHASH_lng(v + i);
     240             :                 break;
     241             :         }
     242             : #ifdef HAVE_HGE
     243          38 :         case TYPE_hge: {
     244          38 :                 const hge *restrict v = (const hge *) bi.base;
     245         221 :                 for (BUN i = 0; i < n; i++)
     246         183 :                         r[i] = MKEYHASH_hge(v + i);
     247             :                 break;
     248             :         }
     249             : #endif
     250         650 :         default: {
     251         650 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     252         650 :                 int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
     253         650 :                 const void *nil = ATOMnilptr(b->ttype);
     254             : 
     255      461761 :                 for (BUN i = 0; i < n; i++) {
     256      461111 :                         const void *restrict v = BUNtail(bi, i);
     257      461111 :                         if ((*cmp)(v, nil) == 0)
     258        9007 :                                 r[i] = (ulng) lng_nil;
     259             :                         else
     260      453297 :                                 r[i] = (ulng) (*hash)(v);
     261             :                 }
     262             :                 break;
     263             :         }
     264             :         }
     265       12417 :         bat_iterator_end(&bi);
     266             : 
     267       12416 :         if (dst->batCount <= 1) {
     268        5356 :                 BATkey(dst, true);
     269        5356 :                 dst->tsorted = dst->trevsorted = true;
     270             :         } else {
     271        7060 :                 BATkey(dst, false);
     272        7062 :                 dst->tsorted = dst->trevsorted = false;
     273             :         }
     274       12418 :         dst->tnonil = false;
     275       12418 :         dst->tnil = false;
     276             : 
     277       12418 :         BBPkeepref(*res = dst->batCacheid);
     278       12416 :         BBPunfix(b->batCacheid);
     279       12417 :         return MAL_SUCCEED;
     280             : }
     281             : 
     282             : static str
     283        1779 : MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     284             : {
     285        1779 :         lng *dst = getArgReference_lng(stk, p, 0);
     286        1779 :         ulng h = (ulng) *getArgReference_lng(stk, p, 1);
     287        1779 :         int lbit = *getArgReference_int(stk, p, 2);
     288             :         int rbit = (int) sizeof(lng) * 8 - lbit;
     289        1779 :         int tpe = getArgType(mb, p, 3);
     290        1779 :         ptr pval = getArgReference(stk, p, 3);
     291             :         ulng val;
     292             : 
     293             :         (void) cntxt;
     294        1777 :         switch (ATOMstorage(tpe)) {
     295           2 :         case TYPE_bte:
     296           2 :                 val = MKEYHASH_bte(pval);
     297           2 :                 break;
     298           0 :         case TYPE_sht:
     299           0 :                 val = MKEYHASH_sht(pval);
     300           0 :                 break;
     301        1525 :         case TYPE_int:
     302             :         case TYPE_flt:
     303        1525 :                 val = MKEYHASH_int(pval);
     304        1525 :                 break;
     305          37 :         case TYPE_lng:
     306             :         case TYPE_dbl:
     307          37 :                 val = MKEYHASH_lng(pval);
     308          37 :                 break;
     309             : #ifdef HAVE_HGE
     310           0 :         case TYPE_hge:
     311           0 :                 val = MKEYHASH_hge(pval);
     312           0 :                 break;
     313             : #endif
     314         213 :         default:
     315         213 :                 if (ATOMextern(tpe))
     316         213 :                         val = ATOMhash(tpe, *(ptr*)pval);
     317             :                 else
     318           0 :                         val = ATOMhash(tpe, pval);
     319             :                 break;
     320             :         }
     321        1777 :         *dst = (lng) (GDK_ROTATE(h, lbit, rbit) ^ val);
     322        1777 :         return MAL_SUCCEED;
     323             : }
     324             : 
     325             : static str
     326       14169 : MKEYbulk_rotate_xor_hash(bat *res, const bat *hid, const int *nbits, const bat *bid)
     327             : {
     328             :         BAT *hb, *b, *bn;
     329       14169 :         int lbit = *nbits;
     330             :         int rbit = (int) sizeof(lng) * 8 - lbit;
     331             :         ulng *restrict r;
     332             :         const ulng *restrict h;
     333             :         BUN n;
     334             : 
     335       14169 :         if ((hb = BATdescriptor(*hid)) == NULL)
     336           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     337             : 
     338       14166 :         if ((b = BATdescriptor(*bid)) == NULL) {
     339           0 :                 BBPunfix(hb->batCacheid);
     340           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     341             :         }
     342             : 
     343       14167 :         if (!ALIGNsynced(hb, b) && (BATcount(b) || BATcount(hb))) {
     344           0 :                 BBPunfix(hb->batCacheid);
     345           0 :                 BBPunfix(b->batCacheid);
     346           0 :                 throw(MAL, "mkey.rotate_xor_hash",
     347             :                           OPERATION_FAILED ": input bats are not aligned");
     348             :         }
     349       14169 :         if (b->ttype == TYPE_msk || mask_cand(b)) {
     350             :                 BAT *ob = b;
     351           0 :                 b = BATunmask(b);
     352           0 :                 BBPunfix(ob->batCacheid);
     353           0 :                 if (!b) {
     354           0 :                         BBPunfix(hb->batCacheid);
     355           0 :                         throw(MAL, "mkey.rotate_xor_hash", GDK_EXCEPTION);
     356             :                 }
     357             :         }
     358             : 
     359       14169 :         n = BATcount(b);
     360             : 
     361       14169 :         bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
     362       14167 :         if (bn == NULL) {
     363           0 :                 BBPunfix(hb->batCacheid);
     364           0 :                 BBPunfix(b->batCacheid);
     365           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     366             :         }
     367       14167 :         BATsetcount(bn, n);
     368             : 
     369       14169 :         r = (ulng *) Tloc(bn, 0);
     370             : 
     371       14169 :         BATiter bi = bat_iterator(b);
     372       14167 :         BATiter hbi = bat_iterator(hb);
     373       14166 :         h = (const ulng *) hbi.base;
     374       14166 :         switch (ATOMstorage(b->ttype)) {
     375         300 :         case TYPE_bte: {
     376         300 :                 const bte *restrict v = (const bte *) bi.base;
     377       23717 :                 for (BUN i = 0; i < n; i++) {
     378       23417 :                         r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_bte(v + i);
     379             :                 }
     380             :                 break;
     381             :         }
     382         492 :         case TYPE_sht: {
     383         492 :                 const sht *restrict v = (const sht *) bi.base;
     384       89377 :                 for (BUN i = 0; i < n; i++) {
     385       88885 :                         r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_sht(v + i);
     386             :                 }
     387             :                 break;
     388             :         }
     389        8718 :         case TYPE_int:
     390             :         case TYPE_flt: {
     391        8718 :                 const int *restrict v = (const int *) bi.base;
     392    51340320 :                 for (BUN i = 0; i < n; i++) {
     393    51331602 :                         r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_int(v + i);
     394             :                 }
     395             :                 break;
     396             :         }
     397         577 :         case TYPE_lng:
     398             :         case TYPE_dbl: {
     399         577 :                 const lng *restrict v = (const lng *) bi.base;
     400      405412 :                 for (BUN i = 0; i < n; i++) {
     401      404835 :                         r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_lng(v + i);
     402             :                 }
     403             :                 break;
     404             :         }
     405             : #ifdef HAVE_HGE
     406          28 :         case TYPE_hge: {
     407          28 :                 const hge *restrict v = (const hge *) bi.base;
     408         198 :                 for (BUN i = 0; i < n; i++) {
     409         170 :                         r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_hge(v + i);
     410             :                 }
     411             :                 break;
     412             :         }
     413             : #endif
     414        4051 :         case TYPE_str:
     415             :         default: {
     416        4051 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     417             : 
     418     1343309 :                 for (BUN i = 0; i < n; i++)
     419     1339269 :                         r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ (ulng) (*hash)(BUNtail(bi, i));
     420             :                 break;
     421             :         }
     422             :         }
     423       14155 :         bat_iterator_end(&bi);
     424       14169 :         bat_iterator_end(&hbi);
     425       14168 :         if (bn->batCount <= 1) {
     426        5952 :                 BATkey(bn, true);
     427        5952 :                 bn->tsorted = bn->trevsorted = true;
     428             :         } else {
     429        8216 :                 BATkey(bn, false);
     430        8217 :                 bn->tsorted = bn->trevsorted = false;
     431             :         }
     432       14169 :         bn->tnonil = false;
     433       14169 :         bn->tnil = false;
     434             : 
     435       14169 :         BBPkeepref(*res = bn->batCacheid);
     436       14169 :         BBPunfix(b->batCacheid);
     437       14168 :         BBPunfix(hb->batCacheid);
     438       14169 :         return MAL_SUCCEED;
     439             : }
     440             : 
     441             : static str
     442           0 : MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     443             : {
     444           0 :         bat *res = getArgReference_bat(stk, p, 0);
     445           0 :         bat *hid = getArgReference_bat(stk, p, 1);
     446           0 :         int lbit = *getArgReference_int(stk, p, 2);
     447           0 :         int tpe = getArgType(mb, p, 3);
     448           0 :         ptr pval = getArgReference(stk, p, 3);
     449             :         BAT *hb, *bn;
     450             :         int rbit = (int) sizeof(lng) * 8 - lbit;
     451             :         ulng *r;
     452             :         const ulng *h;
     453             :         ulng val;
     454             :         BUN n;
     455             : 
     456             :         (void) cntxt;
     457             : 
     458           0 :         if ((hb = BATdescriptor(*hid)) == NULL)
     459           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     460             : 
     461           0 :         n = BATcount(hb);
     462             : 
     463           0 :         bn = COLnew(hb->hseqbase, TYPE_lng, n, TRANSIENT);
     464           0 :         if (bn == NULL) {
     465           0 :                 BBPunfix(hb->batCacheid);
     466           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     467             :         }
     468           0 :         BATsetcount(bn, n);
     469             : 
     470           0 :         switch (ATOMstorage(tpe)) {
     471           0 :         case TYPE_bte:
     472           0 :                 val = MKEYHASH_bte(pval);
     473           0 :                 break;
     474           0 :         case TYPE_sht:
     475           0 :                 val = MKEYHASH_sht(pval);
     476           0 :                 break;
     477           0 :         case TYPE_int:
     478             :         case TYPE_flt:
     479           0 :                 val = MKEYHASH_int(pval);
     480           0 :                 break;
     481           0 :         case TYPE_lng:
     482             :         case TYPE_dbl:
     483           0 :                 val = MKEYHASH_lng(pval);
     484           0 :                 break;
     485             : #ifdef HAVE_HGE
     486           0 :         case TYPE_hge:
     487           0 :                 val = MKEYHASH_hge(pval);
     488           0 :                 break;
     489             : #endif
     490           0 :         default:
     491           0 :                 if (ATOMextern(tpe))
     492           0 :                         val = (ulng) ATOMhash(tpe, *(ptr*)pval);
     493             :                 else
     494           0 :                         val = (ulng) ATOMhash(tpe, pval);
     495             :                 break;
     496             :         }
     497             : 
     498           0 :         r = (ulng *) Tloc(bn, 0);
     499           0 :         BATiter hbi = bat_iterator(hb);
     500           0 :         h = (const ulng *) hbi.base;
     501             : 
     502           0 :         while (n-- > 0) {
     503           0 :                 *r++ = GDK_ROTATE(*h, lbit, rbit) ^ val;
     504           0 :                 h++;
     505             :         }
     506           0 :         bat_iterator_end(&hbi);
     507             : 
     508           0 :         if (bn->batCount <= 1) {
     509           0 :                 BATkey(bn, true);
     510           0 :                 bn->tsorted = bn->trevsorted = true;
     511             :         } else {
     512           0 :                 BATkey(bn, false);
     513           0 :                 bn->tsorted = bn->trevsorted = false;
     514             :         }
     515           0 :         bn->tnonil = false;
     516           0 :         bn->tnil = false;
     517             : 
     518           0 :         BBPkeepref(*res = bn->batCacheid);
     519           0 :         BBPunfix(hb->batCacheid);
     520           0 :         return MAL_SUCCEED;
     521             : }
     522             : 
     523             : static str
     524           0 : MKEYconstbulk_rotate_xor_hash(bat *res, const lng *h, const int *nbits, const bat *bid)
     525             : {
     526             :         BAT *b, *bn;
     527           0 :         int lbit = *nbits;
     528             :         int rbit = (int) sizeof(lng) * 8 - lbit;
     529             :         ulng *restrict r;
     530             :         BUN n;
     531             : 
     532           0 :         if ((b = BATdescriptor(*bid)) == NULL)
     533           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     534             : 
     535           0 :         n = BATcount(b);
     536             : 
     537           0 :         bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
     538           0 :         if (bn == NULL) {
     539           0 :                 BBPunfix(b->batCacheid);
     540           0 :                 throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     541             :         }
     542           0 :         BATsetcount(bn, n);
     543             : 
     544           0 :         r = (ulng *) Tloc(bn, 0);
     545             : 
     546           0 :         BATiter bi = bat_iterator(b);
     547           0 :         switch (ATOMstorage(b->ttype)) {
     548           0 :         case TYPE_bte: {
     549           0 :                 const bte *restrict v = (const bte *) bi.base;
     550           0 :                 for (BUN i = 0; i < n; i++)
     551           0 :                         r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_bte(v + i);
     552             :                 break;
     553             :         }
     554           0 :         case TYPE_sht: {
     555           0 :                 const sht *restrict v = (const sht *) bi.base;
     556           0 :                 for (BUN i = 0; i < n; i++)
     557           0 :                         r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_sht(v + i);
     558             :                 break;
     559             :         }
     560           0 :         case TYPE_int:
     561             :         case TYPE_flt: {
     562           0 :                 const int *restrict v = (const int *) bi.base;
     563           0 :                 for (BUN i = 0; i < n; i++)
     564           0 :                         r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_int(v + i);
     565             :                 break;
     566             :         }
     567           0 :         case TYPE_lng:
     568             :         case TYPE_dbl: {
     569           0 :                 const lng *restrict v = (const lng *) bi.base;
     570           0 :                 for (BUN i = 0; i < n; i++)
     571           0 :                         r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_lng(v + i);
     572             :                 break;
     573             :         }
     574             : #ifdef HAVE_HGE
     575           0 :         case TYPE_hge: {
     576           0 :                 const hge *restrict v = (const hge *) bi.base;
     577           0 :                 for (BUN i = 0; i < n; i++)
     578           0 :                         r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_hge(v + i);
     579             :                 break;
     580             :         }
     581             : #endif
     582           0 :         case TYPE_str:
     583             :         default: {
     584           0 :                 BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
     585             : 
     586           0 :                 for (BUN i = 0; i < n; i++)
     587           0 :                         r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ (ulng) (*hash)(BUNtail(bi, i));
     588             :                 break;
     589             :         }
     590             :         }
     591           0 :         bat_iterator_end(&bi);
     592           0 :         if (bn->batCount <= 1) {
     593           0 :                 BATkey(bn, true);
     594           0 :                 bn->tsorted = bn->trevsorted = true;
     595             :         } else {
     596           0 :                 BATkey(bn, false);
     597           0 :                 bn->tsorted = bn->trevsorted = false;
     598             :         }
     599           0 :         bn->tnonil = false;
     600           0 :         bn->tnil = false;
     601             : 
     602           0 :         BBPkeepref(*res = bn->batCacheid);
     603           0 :         BBPunfix(b->batCacheid);
     604           0 :         return MAL_SUCCEED;
     605             : }
     606             : 
     607             : #include "mel.h"
     608             : mel_func mkey_init_funcs[] = {
     609             :  command("mkey", "rotate", MKEYrotate, false, "left-rotate an int by nbits", args(1,3, arg("",lng),arg("v",lng),arg("nbits",int))),
     610             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),argany("v",0))),
     611             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",bit))),
     612             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",bte))),
     613             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",sht))),
     614             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",int))),
     615             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",flt))),
     616             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",dbl))),
     617             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",lng))),
     618             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",str))),
     619             :  pattern("mkey", "bulk_rotate_xor_hash", MKEYrotate_xor_hash, false, "post: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",0))),
     620             :  command("mkey", "bulk_rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre:  h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",1))),
     621             :  pattern("mkey", "bulk_rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre:  h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))),
     622             :  command("mkey", "bulk_rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre:  h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",1))),
     623             :  command("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",1))),
     624             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",bte))),
     625             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",bte))),
     626             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",sht))),
     627             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",sht))),
     628             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",int))),
     629             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",int))),
     630             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",lng))),
     631             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",lng))),
     632             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",oid))),
     633             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",oid))),
     634             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",lng))),
     635             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",lng))),
     636             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",flt))),
     637             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",flt))),
     638             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",dbl))),
     639             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",dbl))),
     640             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),argany("v",0))),
     641             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batargany("b",1))),
     642             :  pattern("calc", "rotate_xor_hash", MKEYrotate_xor_hash, false, "", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",1))),
     643             :  command("batcalc", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "", args(1,4, batarg("",int),batarg("h",lng),arg("nbits",int),batargany("b",1))),
     644             : #ifdef HAVE_HGE
     645             :  pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",hge))),
     646             :  pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",hge))),
     647             :  command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",hge))),
     648             : #endif
     649             :  { .imp=NULL }
     650             : };
     651             : #include "mal_import.h"
     652             : #ifdef _MSC_VER
     653             : #undef read
     654             : #pragma section(".CRT$XCU",read)
     655             : #endif
     656         257 : LIB_STARTUP_FUNC(init_mkey_mal)
     657         257 : { mal_module("mkey", NULL, mkey_init_funcs); }

Generated by: LCOV version 1.14