LCOV - code coverage report
Current view: top level - monetdb5/scheduler - run_memo.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 16 65 24.6 %
Date: 2021-10-13 02:24:04 Functions: 2 5 40.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * @f run_memo
      11             :  * @a M. Kersten
      12             :  * @+ Memo-based Query Execution
      13             :  * Modern cost-based query optimizers use a memo structure
      14             :  * to organize the search space for an efficient query execution plan.
      15             :  * For example, consider an oid join path 'A.B.C.D'.
      16             :  * We can start the evaluation at any point in this path.
      17             :  *
      18             :  * Its memo structure can be represented by a (large) MAL program.
      19             :  * The memo levels are encapsulated with a @code{choice} operator.
      20             :  * The arguments of the second dictate which instructions to consider
      21             :  * for cost evaluation.
      22             :  * @example
      23             :  *  ...
      24             :  *  run_memo.choice("getVolume");
      25             :  *  T1:= algebra.join(A,B);
      26             :  *  T2:= algebra.join(B,C);
      27             :  *  T3:= algebra.join(C,D);
      28             :  *  run_memo.choice("getVolume",T1,T2,T3);
      29             :  *  T4:= algebra.join(T1,C);
      30             :  *  T5:= algebra.join(A,T2);
      31             :  *  T6:= algebra.join(T2,D);
      32             :  *  T7:= algebra.join(B,T3);
      33             :  *  T8:= algebra.join(C,D);
      34             :  *  run_memo.choice("getVolume",T4,T5,T6,T7,T8);
      35             :  *  T9:= algebra.join(T4,D);
      36             :  *  T10:= algebra.join(T5,D);
      37             :  *  T11:= algebra.join(A,T6);
      38             :  *  T12:= algebra.join(A,T7);
      39             :  *  T13:= algebra.join(T1,T8);
      40             :  *  run_memo.choice("getVolume",T9,T10,T11,T12,T13);
      41             :  *  answer:= run_memo.pick(T9, T10, T11, T12, T13);
      42             :  * @end example
      43             :  *
      44             :  * The @code{run_memo.choice()} operator calls a builtin @code{getVolume}
      45             :  * for each target variable and expects an integer-valued cost.
      46             :  * In this case it returns the total number of bytes uses as arguments.
      47             :  *
      48             :  * The target variable with the lowest
      49             :  * cost is chosen for execution and remaining variables are turned into
      50             :  * a temporary NOOP operation.(You may want to re-use the memo)
      51             :  * They are skipped by the interpreter, but also in subsequent
      52             :  * calls to the scheduler. It reduces the alternatives as we proceed
      53             :  * in the plan.
      54             :  *
      55             :  * A built-in naive cost function is used.
      56             :  * It would be nice if the user could provide a
      57             :  * private cost function defined as a @code{pattern}
      58             :  * with a polymorphic argument for the target and a @code{:lng} result.
      59             :  * Its implementation can use the complete context information to
      60             :  * make a decision. For example, it can trace the potential use
      61             :  * of the target variable in subsequent statements to determine
      62             :  * a total cost when this step is taken towards the final result.
      63             :  *
      64             :  * A complete plan likely includes other expressions to
      65             :  * prepare or use the target variables before reaching the next
      66             :  * choice point. It is the task of the choice operator
      67             :  * to avoid any superfluous operation.
      68             :  *
      69             :  * The MAL block should be privately owned by the caller,
      70             :  * which can be assured with @code{run_isolate.isolation()}.
      71             :  *
      72             :  * A refinement of the scheme is to make cost analysis
      73             :  * part of the plan as well. Then you don't have to
      74             :  * include a hardwired cost function.
      75             :  * @example
      76             :  *  Acost:= aggr.count(A);
      77             :  *  Bcost:= aggr.count(B);
      78             :  *  Ccost:= aggr.count(C);
      79             :  *  T1cost:= Acost+Bcost;
      80             :  *  T2cost:= Bcost+Ccost;
      81             :  *  T3cost:= Ccost+Dcost;
      82             :  *  run_memo.choice(T1cost,T1, T2cost,T2, T3cost,T3);
      83             :  *  T1:= algebra.join(A,B);
      84             :  *  T2:= algebra.join(B,C);
      85             :  *  T3:= algebra.join(C,D);
      86             :  *  ...
      87             :  * @end example
      88             :  * The current implementation assumes a regular plan
      89             :  * and unique use of variables.
      90             :  */
      91             : /*
      92             :  * @+ Memorun implementation
      93             :  * The code below is a mixture of generic routines and
      94             :  * sample implementations to run the tests.
      95             :  */
      96             : #include "monetdb_config.h"
      97             : #include "mal.h"
      98             : #include "mal_interpreter.h"
      99             : #include "mal_linker.h"
     100             : #include "mal_client.h"
     101             : #include "mal_runtime.h"
     102             : #include "mal_exception.h"
     103             : #include "mal_function.h"
     104             : 
     105             : #ifndef WIN32
     106             : #define __declspec(x)
     107             : #endif
     108             : 
     109             : extern __declspec(dllexport) str RUNchoice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p);
     110             : extern __declspec(dllexport) str RUNvolumeCost(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p);
     111             : extern __declspec(dllexport) str RUNcostPrediction(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p);
     112             : extern __declspec(dllexport) str RUNpickResult(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p);
     113             : 
     114             : static void
     115           0 : propagateNonTarget(MalBlkPtr mb, int pc)
     116             : {
     117             :         int i;
     118             :         InstrPtr p;
     119           0 :         const char *scheduler = putName("run_memo");
     120             : 
     121           0 :         for (; pc < mb->stop; pc++) {
     122           0 :                 p = getInstrPtr(mb, pc);
     123           0 :                 if (getModuleId(p) == scheduler)
     124           0 :                         continue;
     125           0 :                 for (i = 0; i < p->argc; i++)
     126           0 :                         if (isVarDisabled(mb, getArg(p, i)) && p->token >= 0)
     127           0 :                                 p->token = -p->token;  /* temporary NOOP */
     128           0 :                 if (p->token < 0)
     129           0 :                         for (i = 0; i < p->retc; i++)
     130           0 :                                 setVarDisabled(mb, getArg(p, i));
     131             :         }
     132           0 : }
     133             : /*
     134             :  * THe choice operator first searches the next one to identify
     135             :  * the fragment to be optimized and to gain access to the variables
     136             :  * without the need to declare them upfront.
     137             :  */
     138             : str
     139           8 : RUNchoice(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     140             : {
     141             :         int target;
     142             :         lng cost, mincost;
     143             :         int i, j, pc;
     144             :         char *nme;
     145             :         InstrPtr q;
     146             : 
     147             :         mincost = 0;
     148           8 :         pc = getPC(mb, p);
     149          16 :         for (i = pc + 1; i < mb->stop; i++) {
     150           8 :                 q = getInstrPtr(mb, i);
     151           8 :                 if (getModuleId(p) == getModuleId(q) &&
     152           0 :                         getFunctionId(p) == getFunctionId(q)) {
     153             :                         p = q;
     154             :                         break;
     155             :                 }
     156             :         }
     157           8 :         if (i == mb->stop)
     158             :                 return MAL_SUCCEED;
     159           0 :         target = getArg(p, 2);
     160           0 :         if (getArgType(mb, p, 1) == TYPE_int && p->argc >= 3 && (p->argc - 1) % 2 == 0) {
     161             :                 /* choice pairs */
     162           0 :                 mincost = *getArgReference_int(stk, p, 1);
     163           0 :                 for (i = 3; i < p->argc; i += 2) {
     164           0 :                         cost = *getArgReference_int(stk, p, i);
     165           0 :                         if (cost < mincost && !isVarDisabled(mb, getArg(p, i + 1))) {
     166             :                                 mincost = cost;
     167             :                                 target = getArg(p, i + 1);
     168             :                         }
     169             :                 }
     170           0 :         } else if (getArgType(mb, p, 1) == TYPE_str) {
     171           0 :                 nme = *getArgReference_str(stk, p, 1);
     172             :                 /* should be generalized to allow an arbitrary user defined function */
     173           0 :                 if (strcmp(nme, "getVolume") != 0)
     174           0 :                         throw(MAL, "run_memo.choice", ILLEGAL_ARGUMENT "Illegal cost function");
     175             : 
     176             :                 mincost = -1;
     177           0 :                 for (j = 2; j < p->argc; j++) {
     178           0 :                         if (!isVarDisabled(mb, getArg(p, j)))
     179           0 :                                 for (i = pc + 1; i < mb->stop; i++) {
     180           0 :                                         InstrPtr q = getInstrPtr(mb, i);
     181           0 :                                         if (p->token >= 0 && getArg(q, 0) == getArg(p, j)) {
     182           0 :                                                 cost = getVolume(stk, q, 1);
     183           0 :                                                 if (cost > 0 && (cost < mincost || mincost == -1)) {
     184             :                                                         mincost = cost;
     185           0 :                                                         target = getArg(p, j);
     186             :                                                 }
     187             :                                                 break;
     188             :                                         }
     189             :                                 }
     190             : 
     191             :                 }
     192             :         }
     193             : 
     194             :         (void) cntxt;
     195             : 
     196             :         /* remove non-qualifying variables */
     197           0 :         for (i = 2; i < p->argc; i += 2)
     198           0 :                 if (getArg(p, i) != target) {
     199           0 :                         setVarDisabled(mb, getArg(p, i - 1));
     200           0 :                         setVarDisabled(mb, getArg(p, i));
     201             :                 }
     202             : 
     203           0 :         propagateNonTarget(mb, pc + 1);
     204           0 :         return MAL_SUCCEED;
     205             : }
     206             : /*
     207             :  * At the end of the query plan we save the result in
     208             :  * a separate variable.
     209             :  */
     210             : str
     211           2 : RUNpickResult(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     212             : {
     213             :         ValPtr lhs, rhs;
     214             :         int i;
     215             : 
     216             :         (void) cntxt;
     217           2 :         lhs = &stk->stk[getArg(p, 0)];
     218           2 :         for (i = p->retc; i < p->argc; i++)
     219           2 :                 if (!isVarDisabled(mb, getArg(p, i))) {
     220           2 :                         rhs = &stk->stk[getArg(p, i)];
     221           2 :                         if ((rhs)->vtype < TYPE_str)
     222           2 :                                 *lhs = *rhs;
     223           0 :                         else if (VALcopy(lhs, rhs) == NULL)
     224           0 :                                 throw(MAL, "run_memo.pick", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     225           2 :                         if (lhs->vtype == TYPE_bat)
     226           2 :                                 BBPretain(lhs->val.bval);
     227           2 :                         return MAL_SUCCEED;
     228             :                 }
     229             : 
     230           0 :         throw(MAL, "run_memo.pick", OPERATION_FAILED "No result available");
     231             : }
     232             : /*
     233             :  * The routine below calculates a cost based on the BAT volume in bytes.
     234             :  * The MAL compiler ensures that all arguments have been
     235             :  * assigned a value.
     236             :  */
     237             : str
     238           0 : RUNvolumeCost(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     239             : {
     240           0 :         lng *cost = getArgReference_lng(stk, p, 0);
     241             :         (void) mb;
     242             : 
     243             :         (void) cntxt;
     244           0 :         *cost = getVolume(stk, p, 0); /* calculate total input size */
     245           0 :         return MAL_SUCCEED;
     246             : }
     247             : /*
     248             :  * The second example shows how you can look into the remaining
     249             :  * instructions to assess the total cost if you follow the path
     250             :  * starting at the argument given.
     251             :  */
     252             : str
     253           0 : RUNcostPrediction(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
     254             : {
     255           0 :         lng *cost = getArgReference_lng(stk, p, 0);
     256             :         (void) mb;
     257             :         (void) cntxt;
     258             : 
     259           0 :         *cost = 0;
     260           0 :         return MAL_SUCCEED;
     261             : }

Generated by: LCOV version 1.14