Line data Source code
1 : /*
2 : * This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 : *
6 : * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
7 : */
8 :
9 : /*
10 : - * @- The Problem
11 : - * When creating a join, we want to make a unique key of the attributes on both
12 : - * sides and then join these keys. Consider the following BATs.
13 : - *
14 : - * @verbatim
15 : - * orders customer link
16 : - * ==================== ===================== ===========
17 : - * zipcode h_nr zipcode hnr oid cid
18 : - * o1 13 9 c1 11 10 o1 c5
19 : - * o2 11 10 c2 11 11 o2 c1
20 : - * o3 11 11 c3 12 2 o3 c2
21 : - * o4 12 5 c4 12 1 o4 nil
22 : - * o5 11 10 c5 13 9 o5 c1
23 : - * o6 12 2 c6 14 7 o6 c3
24 : - * o7 13 9 o7 c5
25 : - * o8 12 1 o8 c4
26 : - * o9 13 9 o9 c5
27 : - * @end verbatim
28 : - *
29 : - * The current approach is designed to take minimal memory, as our previous
30 : - * solutions to the problem did not scale well. In case of singular keys,
31 : - * the link is executed by a simple join. Before going into the join, we
32 : - * make sure the end result size is not too large, which is done by looking
33 : - * at relation sizes (if the other key is unique) or, if that is not possible,
34 : - * by computing the exact join size.
35 : - *
36 : - * The join algorithm was also improved to do dynamic sampling to determine
37 : - * with high accuracy the join size, so that we can alloc in one go a memory
38 : - * region of sufficient size. This also reduces the ds\_link memory requirements.
39 : - *
40 : - * For compound keys, those that consist of multiple attributes, we now compute
41 : - * a derived column that contains an integer hash value derived from all
42 : - * key columns.
43 : - * This is done by computing a hash value for each individual key column
44 : - * and combining those by bitwise XOR and left-rotation. That is, for each
45 : - * column,we rotate the working hash value by N bits and XOR the hash value
46 : - * of the column over it. The working hash value is initialized with zero,
47 : - * and after all columns are processed, this working value is used as output.
48 : - * Computing the hash value for all columns in the key for one table is done
49 : - * by the command hash(). Hence, we do hash on both sides, and join
50 : - * that together with a simple join:
51 : - *
52 : - * @code{join(hash(keys), hash(keys.reverse);}
53 : - *
54 : - * One complication of this procedure are nil values:
55 : - * @table
56 : - * @itemize
57 : - * @item
58 : - * it may happen that the final hash-value (an int formed by a
59 : - * random bit pattern) accidentally has the value of int(nil).
60 : - * Notice that join never matches nil values.
61 : - * Hence these accidental nils must be replaced by a begin value (currently: 0).
62 : - * @item
63 : - * in case any of the compound key values is nil, our nil semantics
64 : - * require us that those tuples may never match on a join. Consequently,
65 : - * during the hash() processing of all compound key columns for computing
66 : - * the hash value, we also maintain a bit-bat that records which tuples had
67 : - * a nil value. The bit-bat is initialized to false, and the results of the
68 : - * nil-check on each column is OR-ed to it.
69 : - * Afterwards, the hash-value of all tuples that have this nil-bit set to
70 : - * TRUE are forced to int(nil), which will exclude them from matching.
71 : - * @end itemize
72 : - *
73 : - * Joining on hash values produces a @emph{superset} of the join result:
74 : - * it may happen that two different key combinations hash on the same value,
75 : - * which will make them match on the join (false hits). The final part
76 : - * of the ds\_link therefore consists of filtering out the false hits.
77 : - * This is done incrementally by joining back the join result to the original
78 : - * columns, incrementally one by one for each pair of corresponding
79 : - * columns. These values are compared with each other and we AND the
80 : - * result of this comparison together for each pair of columns.
81 : - * The bat containing these bits is initialized to all TRUE and serves as
82 : - * final result after all column pairs have been compared.
83 : - * The initial join result is finally filtered with this bit-bat.
84 : - *
85 : - * Joining back from the initial join-result to the original columns on
86 : - * both sides takes quite a lot of memory. For this reason, the false
87 : - * hit-filtering is done in slices (not all tuples at one time).
88 : - * In this way the memory requirements of this phase are kept low.
89 : - * In fact, the most memory demanding part of the join is the int-join
90 : - * on hash number, which takes N*24 bytes (where N= |L| = |R|).
91 : - * In comparison, the previous CTmultigroup/CTmultiderive approach
92 : - * took N*48 bytes. Additionally, by making it possible to use merge-sort,
93 : - * it avoids severe performance degradation (memory thrashing) as produced
94 : - * by the old ds\_link when the inner join relation would be larger than memory.
95 : - *
96 : - * If ds\_link performance is still an issue, the sort-merge join used here
97 : - * could be replaced by partitioned hash-join with radix-cluster/decluster.
98 : - *
99 : - * @+ Implementation
100 : - */
101 :
102 : /*
103 : * (c) Peter Boncz, Stefan Manegold, Niels Nes
104 : *
105 : * new functionality for the low-resource-consumption. It will
106 : * first one by one create a hash value out of the multiple attributes.
107 : * This hash value is computed by xoring and rotating individual hash
108 : * values together. We create a hash and rotate command to do this.
109 : */
110 :
111 : #include "monetdb_config.h"
112 : #include "mal.h"
113 : #include "mal_interpreter.h"
114 : #include "mal_exception.h"
115 :
116 : #define MKEYHASH_bte(valp) ((ulng) (lng) *(const bte*)(valp))
117 : #define MKEYHASH_sht(valp) ((ulng) (lng) *(const sht*)(valp))
118 : #define MKEYHASH_int(valp) ((ulng) (lng) *(const int*)(valp))
119 : #define MKEYHASH_lng(valp) ((ulng) (lng) *(const lng*)(valp))
120 : #ifdef HAVE_HGE
121 : #define MKEYHASH_hge(valp) ((ulng) (*(const uhge *)(valp) >> 64) ^ \
122 : (ulng) *(const uhge *)(valp))
123 : #endif
124 :
125 : static inline ulng
126 : GDK_ROTATE(ulng x, int y, int z)
127 : {
128 53189955 : return (x << y) | (x >> z);
129 : }
130 :
131 : /* TODO: nil handling. however; we do not want to lose time in bulk_rotate_xor_hash with that */
132 : static str
133 0 : MKEYrotate(lng *res, const lng *val, const int *n)
134 : {
135 0 : *res = (lng) GDK_ROTATE((ulng) *val, *n, (sizeof(lng)*8) - *n);
136 0 : return MAL_SUCCEED;
137 : }
138 :
139 : static str
140 1020 : MKEYhash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
141 : {
142 1020 : lng *res = getArgReference_lng(stk,p,0);
143 1020 : ptr val = getArgReference(stk,p,1);
144 1020 : int tpe = getArgType(mb,p,1);
145 :
146 : (void) cntxt;
147 1020 : switch (ATOMstorage(tpe)) {
148 1 : case TYPE_void:
149 1 : *res = lng_nil; /* It can be called from SQL */
150 1 : break;
151 : case TYPE_bat:
152 : case TYPE_ptr:
153 : // illegal types, avoid falling into the default case.
154 0 : assert(0);
155 : case TYPE_bte:
156 0 : *res = (lng) MKEYHASH_bte(val);
157 0 : break;
158 0 : case TYPE_sht:
159 0 : *res = (lng) MKEYHASH_sht(val);
160 0 : break;
161 946 : case TYPE_int:
162 : case TYPE_flt:
163 946 : *res = (lng) MKEYHASH_int(val);
164 946 : break;
165 4 : case TYPE_lng:
166 : case TYPE_dbl:
167 4 : *res = (lng) MKEYHASH_lng(val);
168 4 : break;
169 : #ifdef HAVE_HGE
170 0 : case TYPE_hge:
171 0 : *res = (lng) MKEYHASH_hge(val);
172 0 : break;
173 : #endif
174 69 : default:
175 69 : if (ATOMextern(tpe))
176 69 : *res = (lng) ATOMhash(tpe, *(ptr*)val);
177 : else
178 0 : *res = (lng) ATOMhash(tpe, val);
179 : break;
180 : }
181 1020 : return MAL_SUCCEED;
182 : }
183 :
184 : static str
185 12418 : MKEYbathash(bat *res, const bat *bid)
186 : {
187 : BAT *b, *dst;
188 : ulng *restrict r;
189 : BUN n;
190 :
191 12418 : if ((b = BATdescriptor(*bid)) == NULL)
192 0 : throw(SQL, "mkey.bathash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
193 :
194 12417 : n = BATcount(b);
195 12417 : dst = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
196 12417 : if (dst == NULL) {
197 0 : BBPunfix(b->batCacheid);
198 0 : throw(SQL, "mkey.bathash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
199 : }
200 12417 : BATsetcount(dst, n);
201 :
202 12418 : r = (ulng *) Tloc(dst, 0);
203 :
204 12418 : BATiter bi = bat_iterator(b);
205 12417 : switch (ATOMstorage(b->ttype)) {
206 0 : case TYPE_void: {
207 0 : oid o = b->tseqbase;
208 0 : if (is_oid_nil(o))
209 0 : for (BUN i = 0; i < n; i++)
210 0 : r[i] = (ulng) lng_nil;
211 : else
212 0 : for (BUN i = 0; i < n; i++)
213 0 : r[i] = o + i;
214 : break;
215 : }
216 10 : case TYPE_bte: {
217 10 : const bte *restrict v = (const bte *) bi.base;
218 45 : for (BUN i = 0; i < n; i++)
219 35 : r[i] = MKEYHASH_bte(v + i);
220 : break;
221 : }
222 7 : case TYPE_sht: {
223 7 : const sht *restrict v = (const sht *) bi.base;
224 31492 : for (BUN i = 0; i < n; i++)
225 31485 : r[i] = MKEYHASH_sht(v + i);
226 : break;
227 : }
228 11574 : case TYPE_int:
229 : case TYPE_flt: {
230 11574 : const int *restrict v = (const int *) bi.base;
231 35295444 : for (BUN i = 0; i < n; i++)
232 35283870 : r[i] = MKEYHASH_int(v + i);
233 : break;
234 : }
235 138 : case TYPE_lng:
236 : case TYPE_dbl: {
237 138 : const lng *restrict v = (const lng *) bi.base;
238 16013 : for (BUN i = 0; i < n; i++)
239 15875 : r[i] = MKEYHASH_lng(v + i);
240 : break;
241 : }
242 : #ifdef HAVE_HGE
243 38 : case TYPE_hge: {
244 38 : const hge *restrict v = (const hge *) bi.base;
245 221 : for (BUN i = 0; i < n; i++)
246 183 : r[i] = MKEYHASH_hge(v + i);
247 : break;
248 : }
249 : #endif
250 650 : default: {
251 650 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
252 650 : int (*cmp)(const void *, const void *) = ATOMcompare(b->ttype);
253 650 : const void *nil = ATOMnilptr(b->ttype);
254 :
255 461761 : for (BUN i = 0; i < n; i++) {
256 461111 : const void *restrict v = BUNtail(bi, i);
257 461111 : if ((*cmp)(v, nil) == 0)
258 9007 : r[i] = (ulng) lng_nil;
259 : else
260 453297 : r[i] = (ulng) (*hash)(v);
261 : }
262 : break;
263 : }
264 : }
265 12417 : bat_iterator_end(&bi);
266 :
267 12416 : if (dst->batCount <= 1) {
268 5356 : BATkey(dst, true);
269 5356 : dst->tsorted = dst->trevsorted = true;
270 : } else {
271 7060 : BATkey(dst, false);
272 7062 : dst->tsorted = dst->trevsorted = false;
273 : }
274 12418 : dst->tnonil = false;
275 12418 : dst->tnil = false;
276 :
277 12418 : BBPkeepref(*res = dst->batCacheid);
278 12416 : BBPunfix(b->batCacheid);
279 12417 : return MAL_SUCCEED;
280 : }
281 :
282 : static str
283 1779 : MKEYrotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
284 : {
285 1779 : lng *dst = getArgReference_lng(stk, p, 0);
286 1779 : ulng h = (ulng) *getArgReference_lng(stk, p, 1);
287 1779 : int lbit = *getArgReference_int(stk, p, 2);
288 : int rbit = (int) sizeof(lng) * 8 - lbit;
289 1779 : int tpe = getArgType(mb, p, 3);
290 1779 : ptr pval = getArgReference(stk, p, 3);
291 : ulng val;
292 :
293 : (void) cntxt;
294 1777 : switch (ATOMstorage(tpe)) {
295 2 : case TYPE_bte:
296 2 : val = MKEYHASH_bte(pval);
297 2 : break;
298 0 : case TYPE_sht:
299 0 : val = MKEYHASH_sht(pval);
300 0 : break;
301 1525 : case TYPE_int:
302 : case TYPE_flt:
303 1525 : val = MKEYHASH_int(pval);
304 1525 : break;
305 37 : case TYPE_lng:
306 : case TYPE_dbl:
307 37 : val = MKEYHASH_lng(pval);
308 37 : break;
309 : #ifdef HAVE_HGE
310 0 : case TYPE_hge:
311 0 : val = MKEYHASH_hge(pval);
312 0 : break;
313 : #endif
314 213 : default:
315 213 : if (ATOMextern(tpe))
316 213 : val = ATOMhash(tpe, *(ptr*)pval);
317 : else
318 0 : val = ATOMhash(tpe, pval);
319 : break;
320 : }
321 1777 : *dst = (lng) (GDK_ROTATE(h, lbit, rbit) ^ val);
322 1777 : return MAL_SUCCEED;
323 : }
324 :
325 : static str
326 14169 : MKEYbulk_rotate_xor_hash(bat *res, const bat *hid, const int *nbits, const bat *bid)
327 : {
328 : BAT *hb, *b, *bn;
329 14169 : int lbit = *nbits;
330 : int rbit = (int) sizeof(lng) * 8 - lbit;
331 : ulng *restrict r;
332 : const ulng *restrict h;
333 : BUN n;
334 :
335 14169 : if ((hb = BATdescriptor(*hid)) == NULL)
336 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
337 :
338 14166 : if ((b = BATdescriptor(*bid)) == NULL) {
339 0 : BBPunfix(hb->batCacheid);
340 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
341 : }
342 :
343 14167 : if (!ALIGNsynced(hb, b) && (BATcount(b) || BATcount(hb))) {
344 0 : BBPunfix(hb->batCacheid);
345 0 : BBPunfix(b->batCacheid);
346 0 : throw(MAL, "mkey.rotate_xor_hash",
347 : OPERATION_FAILED ": input bats are not aligned");
348 : }
349 14169 : if (b->ttype == TYPE_msk || mask_cand(b)) {
350 : BAT *ob = b;
351 0 : b = BATunmask(b);
352 0 : BBPunfix(ob->batCacheid);
353 0 : if (!b) {
354 0 : BBPunfix(hb->batCacheid);
355 0 : throw(MAL, "mkey.rotate_xor_hash", GDK_EXCEPTION);
356 : }
357 : }
358 :
359 14169 : n = BATcount(b);
360 :
361 14169 : bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
362 14167 : if (bn == NULL) {
363 0 : BBPunfix(hb->batCacheid);
364 0 : BBPunfix(b->batCacheid);
365 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
366 : }
367 14167 : BATsetcount(bn, n);
368 :
369 14169 : r = (ulng *) Tloc(bn, 0);
370 :
371 14169 : BATiter bi = bat_iterator(b);
372 14167 : BATiter hbi = bat_iterator(hb);
373 14166 : h = (const ulng *) hbi.base;
374 14166 : switch (ATOMstorage(b->ttype)) {
375 300 : case TYPE_bte: {
376 300 : const bte *restrict v = (const bte *) bi.base;
377 23717 : for (BUN i = 0; i < n; i++) {
378 23417 : r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_bte(v + i);
379 : }
380 : break;
381 : }
382 492 : case TYPE_sht: {
383 492 : const sht *restrict v = (const sht *) bi.base;
384 89377 : for (BUN i = 0; i < n; i++) {
385 88885 : r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_sht(v + i);
386 : }
387 : break;
388 : }
389 8718 : case TYPE_int:
390 : case TYPE_flt: {
391 8718 : const int *restrict v = (const int *) bi.base;
392 51340320 : for (BUN i = 0; i < n; i++) {
393 51331602 : r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_int(v + i);
394 : }
395 : break;
396 : }
397 577 : case TYPE_lng:
398 : case TYPE_dbl: {
399 577 : const lng *restrict v = (const lng *) bi.base;
400 405412 : for (BUN i = 0; i < n; i++) {
401 404835 : r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_lng(v + i);
402 : }
403 : break;
404 : }
405 : #ifdef HAVE_HGE
406 28 : case TYPE_hge: {
407 28 : const hge *restrict v = (const hge *) bi.base;
408 198 : for (BUN i = 0; i < n; i++) {
409 170 : r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ MKEYHASH_hge(v + i);
410 : }
411 : break;
412 : }
413 : #endif
414 4051 : case TYPE_str:
415 : default: {
416 4051 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
417 :
418 1343309 : for (BUN i = 0; i < n; i++)
419 1339269 : r[i] = GDK_ROTATE(h[i], lbit, rbit) ^ (ulng) (*hash)(BUNtail(bi, i));
420 : break;
421 : }
422 : }
423 14155 : bat_iterator_end(&bi);
424 14169 : bat_iterator_end(&hbi);
425 14168 : if (bn->batCount <= 1) {
426 5952 : BATkey(bn, true);
427 5952 : bn->tsorted = bn->trevsorted = true;
428 : } else {
429 8216 : BATkey(bn, false);
430 8217 : bn->tsorted = bn->trevsorted = false;
431 : }
432 14169 : bn->tnonil = false;
433 14169 : bn->tnil = false;
434 :
435 14169 : BBPkeepref(*res = bn->batCacheid);
436 14169 : BBPunfix(b->batCacheid);
437 14168 : BBPunfix(hb->batCacheid);
438 14169 : return MAL_SUCCEED;
439 : }
440 :
441 : static str
442 0 : MKEYbulkconst_rotate_xor_hash(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
443 : {
444 0 : bat *res = getArgReference_bat(stk, p, 0);
445 0 : bat *hid = getArgReference_bat(stk, p, 1);
446 0 : int lbit = *getArgReference_int(stk, p, 2);
447 0 : int tpe = getArgType(mb, p, 3);
448 0 : ptr pval = getArgReference(stk, p, 3);
449 : BAT *hb, *bn;
450 : int rbit = (int) sizeof(lng) * 8 - lbit;
451 : ulng *r;
452 : const ulng *h;
453 : ulng val;
454 : BUN n;
455 :
456 : (void) cntxt;
457 :
458 0 : if ((hb = BATdescriptor(*hid)) == NULL)
459 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
460 :
461 0 : n = BATcount(hb);
462 :
463 0 : bn = COLnew(hb->hseqbase, TYPE_lng, n, TRANSIENT);
464 0 : if (bn == NULL) {
465 0 : BBPunfix(hb->batCacheid);
466 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
467 : }
468 0 : BATsetcount(bn, n);
469 :
470 0 : switch (ATOMstorage(tpe)) {
471 0 : case TYPE_bte:
472 0 : val = MKEYHASH_bte(pval);
473 0 : break;
474 0 : case TYPE_sht:
475 0 : val = MKEYHASH_sht(pval);
476 0 : break;
477 0 : case TYPE_int:
478 : case TYPE_flt:
479 0 : val = MKEYHASH_int(pval);
480 0 : break;
481 0 : case TYPE_lng:
482 : case TYPE_dbl:
483 0 : val = MKEYHASH_lng(pval);
484 0 : break;
485 : #ifdef HAVE_HGE
486 0 : case TYPE_hge:
487 0 : val = MKEYHASH_hge(pval);
488 0 : break;
489 : #endif
490 0 : default:
491 0 : if (ATOMextern(tpe))
492 0 : val = (ulng) ATOMhash(tpe, *(ptr*)pval);
493 : else
494 0 : val = (ulng) ATOMhash(tpe, pval);
495 : break;
496 : }
497 :
498 0 : r = (ulng *) Tloc(bn, 0);
499 0 : BATiter hbi = bat_iterator(hb);
500 0 : h = (const ulng *) hbi.base;
501 :
502 0 : while (n-- > 0) {
503 0 : *r++ = GDK_ROTATE(*h, lbit, rbit) ^ val;
504 0 : h++;
505 : }
506 0 : bat_iterator_end(&hbi);
507 :
508 0 : if (bn->batCount <= 1) {
509 0 : BATkey(bn, true);
510 0 : bn->tsorted = bn->trevsorted = true;
511 : } else {
512 0 : BATkey(bn, false);
513 0 : bn->tsorted = bn->trevsorted = false;
514 : }
515 0 : bn->tnonil = false;
516 0 : bn->tnil = false;
517 :
518 0 : BBPkeepref(*res = bn->batCacheid);
519 0 : BBPunfix(hb->batCacheid);
520 0 : return MAL_SUCCEED;
521 : }
522 :
523 : static str
524 0 : MKEYconstbulk_rotate_xor_hash(bat *res, const lng *h, const int *nbits, const bat *bid)
525 : {
526 : BAT *b, *bn;
527 0 : int lbit = *nbits;
528 : int rbit = (int) sizeof(lng) * 8 - lbit;
529 : ulng *restrict r;
530 : BUN n;
531 :
532 0 : if ((b = BATdescriptor(*bid)) == NULL)
533 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
534 :
535 0 : n = BATcount(b);
536 :
537 0 : bn = COLnew(b->hseqbase, TYPE_lng, n, TRANSIENT);
538 0 : if (bn == NULL) {
539 0 : BBPunfix(b->batCacheid);
540 0 : throw(MAL, "mkey.rotate_xor_hash", SQLSTATE(HY013) MAL_MALLOC_FAIL);
541 : }
542 0 : BATsetcount(bn, n);
543 :
544 0 : r = (ulng *) Tloc(bn, 0);
545 :
546 0 : BATiter bi = bat_iterator(b);
547 0 : switch (ATOMstorage(b->ttype)) {
548 0 : case TYPE_bte: {
549 0 : const bte *restrict v = (const bte *) bi.base;
550 0 : for (BUN i = 0; i < n; i++)
551 0 : r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_bte(v + i);
552 : break;
553 : }
554 0 : case TYPE_sht: {
555 0 : const sht *restrict v = (const sht *) bi.base;
556 0 : for (BUN i = 0; i < n; i++)
557 0 : r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_sht(v + i);
558 : break;
559 : }
560 0 : case TYPE_int:
561 : case TYPE_flt: {
562 0 : const int *restrict v = (const int *) bi.base;
563 0 : for (BUN i = 0; i < n; i++)
564 0 : r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_int(v + i);
565 : break;
566 : }
567 0 : case TYPE_lng:
568 : case TYPE_dbl: {
569 0 : const lng *restrict v = (const lng *) bi.base;
570 0 : for (BUN i = 0; i < n; i++)
571 0 : r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_lng(v + i);
572 : break;
573 : }
574 : #ifdef HAVE_HGE
575 0 : case TYPE_hge: {
576 0 : const hge *restrict v = (const hge *) bi.base;
577 0 : for (BUN i = 0; i < n; i++)
578 0 : r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ MKEYHASH_hge(v + i);
579 : break;
580 : }
581 : #endif
582 0 : case TYPE_str:
583 : default: {
584 0 : BUN (*hash)(const void *) = BATatoms[b->ttype].atomHash;
585 :
586 0 : for (BUN i = 0; i < n; i++)
587 0 : r[i] = GDK_ROTATE((ulng) *h, lbit, rbit) ^ (ulng) (*hash)(BUNtail(bi, i));
588 : break;
589 : }
590 : }
591 0 : bat_iterator_end(&bi);
592 0 : if (bn->batCount <= 1) {
593 0 : BATkey(bn, true);
594 0 : bn->tsorted = bn->trevsorted = true;
595 : } else {
596 0 : BATkey(bn, false);
597 0 : bn->tsorted = bn->trevsorted = false;
598 : }
599 0 : bn->tnonil = false;
600 0 : bn->tnil = false;
601 :
602 0 : BBPkeepref(*res = bn->batCacheid);
603 0 : BBPunfix(b->batCacheid);
604 0 : return MAL_SUCCEED;
605 : }
606 :
607 : #include "mel.h"
608 : mel_func mkey_init_funcs[] = {
609 : command("mkey", "rotate", MKEYrotate, false, "left-rotate an int by nbits", args(1,3, arg("",lng),arg("v",lng),arg("nbits",int))),
610 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),argany("v",0))),
611 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",bit))),
612 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",bte))),
613 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",sht))),
614 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",int))),
615 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",flt))),
616 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",dbl))),
617 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",lng))),
618 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",str))),
619 : pattern("mkey", "bulk_rotate_xor_hash", MKEYrotate_xor_hash, false, "post: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",0))),
620 : command("mkey", "bulk_rotate_xor_hash", MKEYconstbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),arg("h",lng),arg("nbits",int),batargany("b",1))),
621 : pattern("mkey", "bulk_rotate_xor_hash", MKEYbulkconst_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),argany("v",0))),
622 : command("mkey", "bulk_rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "pre: h and b should be synced on head\npost: [:xor=]([:rotate=](h, nbits), [hash](b))", args(1,4, batarg("",lng),batarg("h",lng),arg("nbits",int),batargany("b",1))),
623 : command("batmkey", "hash", MKEYbathash, false, "calculate a hash value", args(1,2, batarg("",lng),batargany("b",1))),
624 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",bte))),
625 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",bte))),
626 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",sht))),
627 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",sht))),
628 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",int))),
629 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",int))),
630 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",lng))),
631 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",lng))),
632 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",oid))),
633 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",oid))),
634 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",lng))),
635 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",lng))),
636 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",flt))),
637 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",flt))),
638 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",dbl))),
639 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",dbl))),
640 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),argany("v",0))),
641 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batargany("b",1))),
642 : pattern("calc", "rotate_xor_hash", MKEYrotate_xor_hash, false, "", args(1,4, arg("",lng),arg("h",lng),arg("nbits",int),argany("v",1))),
643 : command("batcalc", "rotate_xor_hash", MKEYbulk_rotate_xor_hash, false, "", args(1,4, batarg("",int),batarg("h",lng),arg("nbits",int),batargany("b",1))),
644 : #ifdef HAVE_HGE
645 : pattern("mkey", "hash", MKEYhash, false, "calculate a hash value", args(1,2, arg("",lng),arg("v",hge))),
646 : pattern("calc", "hash", MKEYhash, false, "", args(1,2, arg("",lng),arg("v",hge))),
647 : command("batcalc", "hash", MKEYbathash, false, "", args(1,2, batarg("",lng),batarg("b",hge))),
648 : #endif
649 : { .imp=NULL }
650 : };
651 : #include "mal_import.h"
652 : #ifdef _MSC_VER
653 : #undef read
654 : #pragma section(".CRT$XCU",read)
655 : #endif
656 257 : LIB_STARTUP_FUNC(init_mkey_mal)
657 257 : { mal_module("mkey", NULL, mkey_init_funcs); }
|