Line data Source code
1 : /*
2 : * This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 : *
6 : * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
7 : */
8 :
9 : /*
10 : * author M.L.Kersten
11 : * BAT Iterators
12 : * Many low level algorithms rely on an iterator to break a
13 : * collection into smaller pieces. Each piece is subsequently processed
14 : * by a block.
15 : *
16 : * For very large BATs it may make sense to break it into chunks
17 : * and process them separately to solve a query. An iterator pair is
18 : * provided to chop a BAT into fixed size elements.
19 : * Each chunk is made available as a BATview.
20 : * It provides read-only access to an underlying BAT. Adjusting the bounds
21 : * is cheap, once the BATview descriptor has been constructed.
22 : *
23 : * The smallest granularity is a single BUN, which can be used
24 : * to realize an iterator over the individual BAT elements.
25 : * For larger sized chunks, the operators return a BATview.
26 : *
27 : * All iterators require storage space to administer the
28 : * location of the next element. The BAT iterator module uses a simple
29 : * lng variable, which also acts as a cursor for barrier statements.
30 : *
31 : * The larger chunks produced are currently static, i.e.
32 : * their size is a parameter of the call. Dynamic chunk sizes
33 : * are interesting for time-series query processing. (See another module)
34 : *
35 : */
36 :
37 : #include "monetdb_config.h"
38 : #include "mal.h"
39 : #include "mal_interpreter.h"
40 : #include "mal_exception.h"
41 :
42 : /*
43 : * We start with the large chunk iterator.
44 : * The definition of the control statements require the same
45 : * control variables, which means that the BATview is accessible
46 : * to determine how far to advance when the next chunk is retrieved.
47 : * The number of elements in the chunk is limited by the granule
48 : * size.
49 : */
50 : static str
51 1 : ITRnewChunk(lng *res, bat *vid, bat *bid, lng *granule)
52 : {
53 : BAT *b, *view;
54 : BUN cnt;
55 :
56 1 : if ((b = BATdescriptor(*bid)) == NULL) {
57 0 : throw(MAL, "chop.newChunk", INTERNAL_BAT_ACCESS);
58 : }
59 1 : cnt = BATcount(b);
60 1 : view = VIEWcreate(b->hseqbase, b);
61 1 : if (view == NULL) {
62 0 : BBPunfix(b->batCacheid);
63 0 : throw(MAL, "chop.newChunk", GDK_EXCEPTION);
64 : }
65 :
66 : /* printf("set bat chunk bound to " LLFMT " 0 - " BUNFMT "\n",
67 : *granule, MIN(cnt,(BUN) *granule)); */
68 1 : VIEWbounds(b, view, 0, MIN(cnt, (BUN) * granule));
69 1 : *vid = view->batCacheid;
70 1 : BBPkeepref(view->batCacheid);
71 1 : BBPunfix(b->batCacheid);
72 1 : *res = 0;
73 1 : return MAL_SUCCEED;
74 : }
75 :
76 : /*
77 : * The nextChunk version advances the reader,
78 : * which also means that the view descriptor is already available.
79 : * The granule size may differ in each call.
80 : */
81 : static str
82 3 : ITRnextChunk(lng *res, bat *vid, bat *bid, lng *granule)
83 : {
84 : BAT *b, *view;
85 : BUN i;
86 :
87 3 : if ((b = BATdescriptor(*bid)) == NULL) {
88 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
89 : }
90 3 : if ((view = BATdescriptor(*vid)) == NULL) {
91 0 : BBPunfix(b->batCacheid);
92 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
93 : }
94 3 : i = (BUN) (*res + BATcount(view));
95 3 : if (i >= BUNlast(b)) {
96 1 : *res = lng_nil;
97 1 : *vid = 0;
98 1 : BBPunfix(view->batCacheid);
99 1 : BBPunfix(b->batCacheid);
100 1 : return MAL_SUCCEED;
101 : }
102 : /* printf("set bat chunk bound to " BUNFMT " - " BUNFMT " \n",
103 : i, i+(BUN) *granule-1); */
104 2 : VIEWbounds(b, view, i, i + (BUN) * granule);
105 2 : BAThseqbase(view, is_oid_nil(b->hseqbase) ? oid_nil : b->hseqbase + i);
106 2 : BBPkeepref(*vid = view->batCacheid);
107 2 : BBPunfix(b->batCacheid);
108 2 : *res = i;
109 2 : return MAL_SUCCEED;
110 : }
111 :
112 : /*
113 : * @-
114 : * The BUN- and BAT-stream manipulate a long handle, i.e.
115 : * the destination variable. It assumes it has been set to
116 : * zero as part of runtime stack initialization. Subsequently,
117 : * it fetches a bun and returns the increment to the control
118 : * variable. If it returns zero the control variable has been reset
119 : * to zero and end of stream has been reached.
120 : */
121 : static str
122 422 : ITRbunIterator(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
123 : {
124 : BATiter bi;
125 : BAT *b;
126 : oid *head;
127 : bat *bid;
128 : ValPtr tail;
129 :
130 : (void) cntxt;
131 : (void) mb;
132 422 : head = getArgReference_oid(stk, pci, 0);
133 422 : tail = &stk->stk[pci->argv[1]];
134 422 : bid = getArgReference_bat(stk, pci, 2);
135 :
136 422 : if ((b = BATdescriptor(*bid)) == NULL) {
137 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
138 : }
139 :
140 422 : if (BATcount(b) == 0) {
141 307 : *head = oid_nil;
142 307 : BBPunfix(b->batCacheid);
143 307 : return MAL_SUCCEED;
144 : }
145 115 : *head = 0;
146 :
147 115 : bi = bat_iterator(b);
148 115 : if (VALinit(tail, b->ttype, BUNtail(bi, *head)) == NULL) {
149 0 : bat_iterator_end(&bi);
150 0 : BBPunfix(b->batCacheid);
151 0 : throw(MAL, "iterator.nextChunk", SQLSTATE(HY013) MAL_MALLOC_FAIL);
152 : }
153 115 : bat_iterator_end(&bi);
154 115 : BBPunfix(b->batCacheid);
155 115 : return MAL_SUCCEED;
156 : }
157 :
158 : static str
159 25962 : ITRbunNext(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
160 : {
161 : BATiter bi;
162 : BAT *b;
163 : oid *head;
164 : bat *bid;
165 : ValPtr tail;
166 :
167 : (void) cntxt;
168 : (void) mb;
169 25962 : head = getArgReference_oid(stk, pci, 0);
170 25962 : tail = &stk->stk[pci->argv[1]];
171 25962 : bid = getArgReference_bat(stk, pci, 2);
172 :
173 25962 : if ((b = BATdescriptor(*bid)) == NULL) {
174 0 : throw(MAL, "iterator.nextChunk", INTERNAL_BAT_ACCESS);
175 : }
176 :
177 25962 : *head = *head + 1;
178 25962 : if (*head >= BUNlast(b)) {
179 110 : *head = oid_nil;
180 110 : BBPunfix(b->batCacheid);
181 110 : return MAL_SUCCEED;
182 : }
183 25852 : bi = bat_iterator(b);
184 25852 : if (VALinit(tail, b->ttype, BUNtail(bi, *head)) == NULL) {
185 0 : bat_iterator_end(&bi);
186 0 : BBPunfix(b->batCacheid);
187 0 : throw(MAL, "iterator.nextChunk", SQLSTATE(HY013) MAL_MALLOC_FAIL);
188 : }
189 25852 : bat_iterator_end(&bi);
190 25852 : BBPunfix(b->batCacheid);
191 25852 : return MAL_SUCCEED;
192 : }
193 :
194 2 : static str ITRnext_oid(oid *i, oid *step, oid *last){
195 2 : oid v = *i;
196 2 : v = v + *step;
197 2 : *i = v;
198 2 : if ( *last <= v )
199 1 : *i = oid_nil;
200 2 : return MAL_SUCCEED;
201 : }
202 14210250 : static str ITRnext_lng(lng *i, lng *step, lng *last){
203 14210250 : lng v = *i;
204 14210250 : v = v + *step;
205 14210250 : *i = v;
206 14210250 : if ( *last <= v )
207 14 : *i = lng_nil;
208 14210250 : return MAL_SUCCEED;
209 : }
210 : #ifdef HAVE_HGE
211 0 : static str ITRnext_hge(hge *i, hge *step, hge *last){
212 0 : hge v = *i;
213 0 : v = v + *step;
214 0 : *i = v;
215 0 : if ( *last <= v )
216 0 : *i = hge_nil;
217 0 : return MAL_SUCCEED;
218 : }
219 : #endif
220 1981129 : static str ITRnext_int(int *i, int *step, int *last){
221 1981129 : int v = *i;
222 1981129 : v = v + *step;
223 1981129 : *i = v;
224 1981129 : if ( *last <= v )
225 6 : *i = int_nil;
226 1981129 : return MAL_SUCCEED;
227 : }
228 0 : static str ITRnext_sht(sht *i, sht *step, sht *last){
229 0 : sht v = *i;
230 0 : v = v + *step;
231 0 : *i = v;
232 0 : if ( *last <= v )
233 0 : *i = int_nil;
234 0 : return MAL_SUCCEED;
235 : }
236 2 : static str ITRnext_flt(flt *i, flt *step, flt *last){
237 2 : flt v = *i;
238 2 : v = v + *step;
239 2 : *i = v;
240 2 : if ( *last <= v )
241 1 : *i = flt_nil;
242 2 : return MAL_SUCCEED;
243 : }
244 0 : static str ITRnext_dbl(dbl *i, dbl *step, dbl *last){
245 0 : dbl v = *i;
246 0 : v = v + *step;
247 0 : *i = v;
248 0 : if ( *last <= v )
249 0 : *i = dbl_nil;
250 0 : return MAL_SUCCEED;
251 : }
252 :
253 : #include "mel.h"
254 : mel_func iterator_init_funcs[] = {
255 : command("iterator", "new", ITRnewChunk, false, "Create an iterator with fixed granule size.\nThe result is a view.", args(2,4, arg("",lng),batargany("",2),batargany("b",2),arg("size",lng))),
256 : command("iterator", "next", ITRnextChunk, false, "Produce the next chunk for processing.", args(2,4, arg("",lng),batargany("",2),batargany("b",2),arg("size",lng))),
257 : pattern("iterator", "new", ITRbunIterator, false, "Process the buns one by one extracted from a void table.", args(2,3, arg("h",oid),argany("t",2),batargany("b",2))),
258 : pattern("iterator", "next", ITRbunNext, false, "Produce the next bun for processing.", args(2,3, arg("h",oid),argany("t",2),batargany("b",2))),
259 : command("iterator", "next", ITRnext_oid, false, "", args(1,3, arg("",oid),arg("step",oid),arg("last",oid))),
260 : command("iterator", "next", ITRnext_sht, false, "", args(1,3, arg("",sht),arg("step",sht),arg("last",sht))),
261 : command("iterator", "next", ITRnext_int, false, "", args(1,3, arg("",int),arg("step",int),arg("last",int))),
262 : command("iterator", "next", ITRnext_lng, false, "", args(1,3, arg("",lng),arg("step",lng),arg("last",lng))),
263 : command("iterator", "next", ITRnext_flt, false, "", args(1,3, arg("",flt),arg("step",flt),arg("last",flt))),
264 : command("iterator", "next", ITRnext_dbl, false, "Advances the iterator with a fixed value", args(1,3, arg("",dbl),arg("step",dbl),arg("last",dbl))),
265 : #ifdef HAVE_HGE
266 : command("iterator", "next", ITRnext_hge, false, "", args(1,3, arg("",hge),arg("step",hge),arg("last",hge))),
267 : #endif
268 : { .imp=NULL }
269 : };
270 : #include "mal_import.h"
271 : #ifdef _MSC_VER
272 : #undef read
273 : #pragma section(".CRT$XCU",read)
274 : #endif
275 257 : LIB_STARTUP_FUNC(init_iterator_mal)
276 257 : { mal_module("iterator", NULL, iterator_init_funcs); }
|