LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - orderidx.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 40 224 17.9 %
Date: 2021-01-13 20:07:21 Functions: 5 8 62.5 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * (c) Martin Kersten, Lefteris Sidirourgos
      11             :  * Implement a parallel sort-merge MAL program generator
      12             :  */
      13             : #include "monetdb_config.h"
      14             : #include "orderidx.h"
      15             : #include "gdk.h"
      16             : 
      17             : #define MIN_PIECE       ((BUN) 1000)    /* TODO use realistic size in production */
      18             : 
      19             : str
      20          67 : OIDXdropImplementation(Client cntxt, BAT *b)
      21             : {
      22             :         (void) cntxt;
      23          67 :         OIDXdestroy(b);
      24          67 :         return MAL_SUCCEED;
      25             : }
      26             : 
      27             : str
      28          80 : OIDXcreateImplementation(Client cntxt, int tpe, BAT *b, int pieces)
      29             : {
      30             :         int i, loopvar, arg;
      31             :         BUN cnt, step=0,o;
      32             :         MalBlkPtr smb;
      33             :         MalStkPtr newstk;
      34             :         Symbol snew = NULL;
      35             :         InstrPtr q, pack;
      36             :         char name[IDLENGTH];
      37             :         str msg= MAL_SUCCEED;
      38             : 
      39          80 :         if (BATcount(b) <= 1)
      40             :                 return MAL_SUCCEED;
      41             : 
      42             :         /* check if b is sorted, then index does not make sense */
      43          73 :         if (b->tsorted || b->trevsorted)
      44             :                 return MAL_SUCCEED;
      45             : 
      46             :         /* check if b already has index */
      47          71 :         if (BATcheckorderidx(b))
      48             :                 return MAL_SUCCEED;
      49             : 
      50         116 :         switch (ATOMbasetype(b->ttype)) {
      51             :         case TYPE_void:
      52             :                 /* trivially supported */
      53             :                 return MAL_SUCCEED;
      54          51 :         case TYPE_bte:
      55             :         case TYPE_sht:
      56             :         case TYPE_int:
      57             :         case TYPE_lng:
      58             : #ifdef HAVE_HGE
      59             :         case TYPE_hge:
      60             : #endif
      61             :         case TYPE_flt:
      62             :         case TYPE_dbl:
      63          51 :                 if (GDKnr_threads > 1 && BATcount(b) >= 2 * MIN_PIECE && (GDKdebug & FORCEMITOMASK) == 0)
      64             :                         break;
      65             :                 /* fall through */
      66             :         default:
      67          65 :                 if (BATorderidx(b, true) != GDK_SUCCEED)
      68           0 :                         throw(MAL, "bat.orderidx", TYPE_NOT_SUPPORTED);
      69             :                 return MAL_SUCCEED;
      70             :         }
      71             : 
      72           0 :         if( pieces <= 0 ){
      73             :                 if (GDKnr_threads <= 1) {
      74             :                         pieces = 1;
      75             :                 } else if (GDKdebug & FORCEMITOMASK) {
      76             :                         /* we want many pieces, even tiny ones */
      77             :                         if (BATcount(b) < 4)
      78             :                                 pieces = 1;
      79             :                         else if (BATcount(b) / 2 < (BUN) GDKnr_threads)
      80             :                                 pieces = (int) (BATcount(b) / 2);
      81             :                         else
      82             :                                 pieces = GDKnr_threads;
      83             :                 } else {
      84             :                         if (BATcount(b) < 2 * MIN_PIECE)
      85             :                                 pieces = 1;
      86           0 :                         else if (BATcount(b) / MIN_PIECE < (BUN) GDKnr_threads)
      87           0 :                                 pieces = (int) (BATcount(b) / MIN_PIECE);
      88             :                         else
      89             :                                 pieces = GDKnr_threads;
      90             :                 }
      91           0 :         } else if (BATcount(b) < (BUN) pieces || BATcount(b) < MIN_PIECE) {
      92             :                 pieces = 1;
      93             :         }
      94             : 
      95             :         /* create a temporary MAL function to sort the BAT in parallel */
      96           0 :         snprintf(name, IDLENGTH, "sort%d", rand()%1000);
      97           0 :         snew = newFunction(putName("user"), putName(name),
      98             :                                            FUNCTIONsymbol);
      99           0 :         if(snew == NULL) {
     100           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     101           0 :                 goto bailout;
     102             :         }
     103           0 :         smb = snew->def;
     104           0 :         q = getInstrPtr(smb, 0);
     105           0 :         if ((arg = newTmpVariable(smb, tpe)) < 0) {
     106           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     107           0 :                 goto bailout;
     108             :         }
     109           0 :         q= addArgument(smb, q, arg);
     110           0 :         if (q == NULL || (getArg(q,0) = newTmpVariable(smb, TYPE_void)) < 0) {
     111           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     112           0 :                 goto bailout;
     113             :         }
     114             : 
     115           0 :         if( resizeMalBlk(smb, 2*pieces+10) < 0)
     116           0 :                 goto bailout; // large enough
     117             :         /* create the pack instruction first, as it will hold
     118             :          * intermediate variables */
     119           0 :         pack = newInstruction(0, putName("bat"), putName("orderidx"));
     120           0 :         if (pack == NULL || (pack->argv[0] = newTmpVariable(smb, TYPE_void)) < 0) {
     121           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     122           0 :                 goto bailout;
     123             :         }
     124           0 :         pack = addArgument(smb, pack, arg);
     125           0 :         if (pack == NULL) {
     126           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     127           0 :                 goto bailout;
     128             :         }
     129           0 :         setVarFixed(smb, getArg(pack, 0));
     130             : 
     131             :         /* the costly part executed as a parallel block */
     132           0 :         if ((loopvar = newTmpVariable(smb, TYPE_bit)) < 0) {
     133           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     134           0 :                 goto bailout;
     135             :         }
     136           0 :         q = newStmt(smb, putName("language"), putName("dataflow"));
     137           0 :         if (q == NULL) {
     138           0 :                 freeInstruction(pack);
     139           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     140           0 :                 goto bailout;
     141             :         }
     142           0 :         q->barrier = BARRIERsymbol;
     143           0 :         q->argv[0] = loopvar;
     144             : 
     145           0 :         cnt = BATcount(b);
     146           0 :         step = cnt/pieces;
     147             :         o = 0;
     148           0 :         for (i = 0; i < pieces; i++) {
     149             :                 /* add slice instruction */
     150           0 :                 q = newInstruction(smb, putName("algebra"),putName("slice"));
     151           0 :                 if (q == NULL || (setDestVar(q, newTmpVariable(smb, TYPE_any))) < 0) {
     152           0 :                         freeInstruction(q);
     153           0 :                         freeInstruction(pack);
     154           0 :                         msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     155           0 :                         goto bailout;
     156             :                 }
     157           0 :                 setVarType(smb, getArg(q,0), tpe);
     158           0 :                 setVarFixed(smb, getArg(q,0));
     159           0 :                 q = addArgument(smb, q, arg);
     160           0 :                 if (q == NULL) {
     161           0 :                         freeInstruction(pack);
     162           0 :                         msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     163           0 :                         goto bailout;
     164             :                 }
     165           0 :                 pack = addArgument(smb, pack, getArg(q,0));
     166           0 :                 q = pushOid(smb, q, o);
     167           0 :                 if (i == pieces-1) {
     168             :                         o = cnt;
     169             :                 } else {
     170           0 :                         o += step;
     171             :                 }
     172           0 :                 q = pushOid(smb, q, o - 1);
     173           0 :                 if (q == NULL) {
     174           0 :                         freeInstruction(pack);
     175           0 :                         msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     176           0 :                         goto bailout;
     177             :                 }
     178           0 :                 pushInstruction(smb, q);
     179             :         }
     180           0 :         for (i = 0; i < pieces; i++) {
     181             :                 /* add sort instruction */
     182           0 :                 q = newInstruction(smb, putName("algebra"), putName("orderidx"));
     183           0 :                 if (q == NULL || (setDestVar(q, newTmpVariable(smb, TYPE_any))) < 0) {
     184           0 :                         freeInstruction(q);
     185           0 :                         freeInstruction(pack);
     186           0 :                         msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     187           0 :                         goto bailout;
     188             :                 }
     189           0 :                 setVarType(smb, getArg(q, 0), tpe);
     190           0 :                 setVarFixed(smb, getArg(q, 0));
     191           0 :                 q = addArgument(smb, q, pack->argv[2+i]);
     192           0 :                 q = pushBit(smb, q, 1);
     193           0 :                 if (q == NULL) {
     194           0 :                         freeInstruction(pack);
     195           0 :                         msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     196           0 :                         goto bailout;
     197             :                 }
     198           0 :                 pack->argv[2+i] = getArg(q, 0);
     199           0 :                 pushInstruction(smb, q);
     200             :         }
     201             :         /* finalize OID packing, check, and evaluate */
     202           0 :         pushInstruction(smb,pack);
     203           0 :         q = newAssignment(smb);
     204           0 :         if(q == NULL) {
     205           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     206           0 :                 goto bailout;
     207             :         }
     208           0 :         q->barrier = EXITsymbol;
     209           0 :         q->argv[0] = loopvar;
     210           0 :         pushEndInstruction(smb);
     211           0 :         msg = chkProgram(cntxt->usermodule, smb);
     212           0 :         if( msg )
     213           0 :                 goto bailout;
     214             :         /* evaluate MAL block and keep the ordered OID bat */
     215           0 :         newstk = prepareMALstack(smb, smb->vsize);
     216           0 :         if (newstk == NULL) {
     217           0 :                 msg = createException(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     218           0 :                 goto bailout;
     219             :         }
     220           0 :         newstk->up = 0;
     221           0 :         newstk->stk[arg].vtype= TYPE_bat;
     222           0 :         newstk->stk[arg].val.bval= b->batCacheid;
     223           0 :         BBPretain(newstk->stk[arg].val.bval);
     224           0 :         msg = runMALsequence(cntxt, smb, 1, 0, newstk, 0, 0);
     225           0 :         freeStack(newstk);
     226             :         /* get rid of temporary MAL block */
     227           0 :   bailout:
     228           0 :         freeSymbol(snew);
     229           0 :         return msg;
     230             : }
     231             : 
     232             : str
     233           6 : OIDXcreate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     234             : {
     235             :         BAT *b;
     236             :         str msg= MAL_SUCCEED;
     237             :         int pieces = -1;
     238             : 
     239           6 :         if (pci->argc == 3) {
     240           6 :                 pieces = stk->stk[pci->argv[2]].val.ival;
     241           6 :                 if (pieces < 0)
     242           0 :                         throw(MAL,"bat.orderidx","Positive number expected");
     243             :         }
     244             : 
     245           6 :         b = BATdescriptor( *getArgReference_bat(stk, pci, 1));
     246           6 :         if (b == NULL)
     247           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     248           6 :         msg = OIDXcreateImplementation(cntxt, getArgType(mb,pci,1), b, pieces);
     249           6 :         BBPunfix(b->batCacheid);
     250           6 :         return msg;
     251             : }
     252             : 
     253             : str
     254           0 : OIDXhasorderidx(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     255             : {
     256             :         BAT *b;
     257           0 :         bit *ret = getArgReference_bit(stk,pci,0);
     258           0 :         bat bid = *getArgReference_bat(stk, pci, 1);
     259             : 
     260             :         (void) cntxt;
     261             :         (void) mb;
     262             : 
     263           0 :         b = BATdescriptor(bid);
     264           0 :         if (b == NULL)
     265           0 :                 throw(MAL, "bat.hasorderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     266             : 
     267           0 :         *ret = b->torderidx != NULL;
     268             : 
     269           0 :         BBPunfix(b->batCacheid);
     270           0 :         return MAL_SUCCEED;
     271             : }
     272             : 
     273             : str
     274           6 : OIDXgetorderidx(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     275             : {
     276             :         BAT *b;
     277             :         BAT *bn;
     278           6 :         bat *ret = getArgReference_bat(stk,pci,0);
     279           6 :         bat bid = *getArgReference_bat(stk, pci, 1);
     280             : 
     281             :         (void) cntxt;
     282             :         (void) mb;
     283             : 
     284           6 :         b = BATdescriptor(bid);
     285           6 :         if (b == NULL)
     286           0 :                 throw(MAL, "bat.getorderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     287             : 
     288           6 :         if (!BATcheckorderidx(b)) {
     289           0 :                 BBPunfix(b->batCacheid);
     290           0 :                 throw(MAL, "bat.getorderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     291             :         }
     292             : 
     293           6 :         if ((bn = COLnew(0, TYPE_oid, BATcount(b), TRANSIENT)) == NULL) {
     294           0 :                 BBPunfix(b->batCacheid);
     295           0 :                 throw(MAL, "bat.getorderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     296             :         }
     297           6 :         memcpy(Tloc(bn, 0), (const oid *) b->torderidx->base + ORDERIDXOFF,
     298           6 :                    BATcount(b) * SIZEOF_OID);
     299           6 :         BATsetcount(bn, BATcount(b));
     300           6 :         bn->tkey = true;
     301           6 :         bn->tsorted = bn->trevsorted = BATcount(b) <= 1;
     302           6 :         bn->tnil = false;
     303           6 :         bn->tnonil = true;
     304           6 :         *ret = bn->batCacheid;
     305           6 :         BBPkeepref(*ret);
     306           6 :         BBPunfix(b->batCacheid);
     307           6 :         return MAL_SUCCEED;
     308             : }
     309             : 
     310             : str
     311           0 : OIDXorderidx(bat *ret, const bat *bid, const bit *stable)
     312             : {
     313             :         BAT *b;
     314             :         gdk_return r;
     315             : 
     316             :         (void) ret;
     317           0 :         b = BATdescriptor(*bid);
     318           0 :         if (b == NULL)
     319           0 :                 throw(MAL, "algebra.orderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     320             : 
     321           0 :         r = BATorderidx(b, *stable);
     322           0 :         if (r != GDK_SUCCEED) {
     323           0 :                 BBPunfix(*bid);
     324           0 :                 throw(MAL, "algebra.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     325             :         }
     326           0 :         *ret = *bid;
     327           0 :         BBPkeepref(*ret);
     328           0 :         return MAL_SUCCEED;
     329             : }
     330             : 
     331             : /*
     332             :  * Merge the collection of sorted OID BATs into one
     333             :  */
     334             : 
     335             : str
     336           0 : OIDXmerge(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
     337             : {
     338             :         bat bid;
     339             :         BAT *b;
     340             :         BAT **a;
     341             :         int i, j, n_ar;
     342             :         BUN m_sz;
     343             :         gdk_return rc;
     344             : 
     345             :         (void) cntxt;
     346             :         (void) mb;
     347             : 
     348           0 :         if( pci->retc != 1 )
     349           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY002) "INTERNAL ERROR, retc != 1 ");
     350           0 :         if( pci->argc < 2 )
     351           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY002) "INTERNAL ERROR, argc != 2");
     352             : 
     353           0 :         n_ar = pci->argc - 2;
     354             : 
     355           0 :         bid = *getArgReference_bat(stk, pci, 1);
     356           0 :         b = BATdescriptor(bid);
     357           0 :         if (b == NULL)
     358           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     359             : 
     360           0 :         if (b->torderidx )
     361           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY002) "INTERNAL ERROR, torderidx already set");
     362             : 
     363           0 :         switch (ATOMbasetype(b->ttype)) {
     364             :         case TYPE_bte:
     365             :         case TYPE_sht:
     366             :         case TYPE_int:
     367             :         case TYPE_lng:
     368             : #ifdef HAVE_HGE
     369             :         case TYPE_hge:
     370             : #endif
     371             :         case TYPE_flt:
     372             :         case TYPE_dbl:
     373             :                 break;
     374           0 :         case TYPE_str:
     375             :                 /* TODO: support strings etc. */
     376             :         case TYPE_void:
     377             :         case TYPE_ptr:
     378             :         default:
     379           0 :                 BBPunfix(bid);
     380           0 :                 throw(MAL, "bat.orderidx", TYPE_NOT_SUPPORTED);
     381             :         }
     382             : 
     383           0 :         if ((a = (BAT **) GDKmalloc(n_ar*sizeof(BAT *))) == NULL) {
     384           0 :                 BBPunfix(bid);
     385           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     386             :         }
     387             :         m_sz = 0;
     388           0 :         for (i = 0; i < n_ar; i++) {
     389           0 :                 a[i] = BATdescriptor(*getArgReference_bat(stk, pci, i+2));
     390           0 :                 if (a[i] == NULL) {
     391           0 :                         for (j = i-1; j >= 0; j--) {
     392           0 :                                 BBPunfix(a[j]->batCacheid);
     393             :                         }
     394           0 :                         GDKfree(a);
     395           0 :                         BBPunfix(bid);
     396           0 :                         throw(MAL, "bat.orderidx", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     397             :                 }
     398           0 :                 m_sz += BATcount(a[i]);
     399           0 :                 if (BATcount(a[i]) == 0) {
     400           0 :                         BBPunfix(a[i]->batCacheid);
     401           0 :                         a[i] = NULL;
     402             :                 }
     403             :         }
     404             :         assert(m_sz == BATcount(b));
     405           0 :         for (i = 0; i < n_ar; i++) {
     406           0 :                 if (a[i] == NULL) {
     407           0 :                         n_ar--;
     408           0 :                         if (i < n_ar)
     409           0 :                                 a[i] = a[n_ar];
     410           0 :                         i--;
     411             :                 }
     412             :         }
     413           0 :         if (m_sz != BATcount(b)) {
     414           0 :                 BBPunfix(bid);
     415           0 :                 for (i = 0; i < n_ar; i++)
     416           0 :                         BBPunfix(a[i]->batCacheid);
     417           0 :                 GDKfree(a);
     418           0 :                 throw(MAL, "bat.orderidx", "count mismatch");
     419             :         }
     420             : 
     421           0 :         rc = GDKmergeidx(b, a, n_ar);
     422             : 
     423           0 :         for (i = 0; i < n_ar; i++)
     424           0 :                 BBPunfix(a[i]->batCacheid);
     425           0 :         GDKfree(a);
     426           0 :         BBPunfix(bid);
     427             : 
     428           0 :         if (rc != GDK_SUCCEED)
     429           0 :                 throw(MAL, "bat.orderidx", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     430             : 
     431             :         return MAL_SUCCEED;
     432             : }
     433             : 
     434             : #include "mel.h"
     435             : mel_func orderidx_init_funcs[] = {
     436             :  pattern("bat", "orderidx", OIDXcreate, false, "Introduces the OID index arrangement of ordered values", args(1,2, arg("",void),batargany("bv",1))),
     437             :  pattern("bat", "orderidx", OIDXcreate, false, "Introduces the OID index arrangement of ordered values", args(1,3, arg("",void),batargany("bv",1),arg("pieces",int))),
     438             :  pattern("bat", "orderidx", OIDXmerge, false, "Consolidates the OID index arrangement", args(1,3, arg("",void),batargany("bv",1),batvarargany("l",1))),
     439             :  pattern("bat", "hasorderidx", OIDXhasorderidx, false, "Return true if order index exists", args(1,2, arg("",bit),batargany("bv",1))),
     440             :  pattern("bat", "getorderidx", OIDXgetorderidx, false, "Return the order index if it exists", args(1,2, batarg("",oid),batargany("bv",1))),
     441             :  command("algebra", "orderidx", OIDXorderidx, false, "Create an order index", args(1,3, batargany("",1),batargany("bv",1),arg("stable",bit))),
     442             :  { .imp=NULL }
     443             : };
     444             : #include "mal_import.h"
     445             : #ifdef _MSC_VER
     446             : #undef read
     447             : #pragma section(".CRT$XCU",read)
     448             : #endif
     449         255 : LIB_STARTUP_FUNC(init_orderidx_mal)
     450         255 : { mal_module("orderidx", NULL, orderidx_init_funcs); }

Generated by: LCOV version 1.14