LCOV - code coverage report
Current view: top level - sql/backends/monet5/UDF/udf - udf.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 112 168 66.7 %
Date: 2021-10-13 02:24:04 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /* monetdb_config.h must be the first include in each .c file */
      10             : #include "monetdb_config.h"
      11             : #include "udf.h"
      12             : #include "str.h"
      13             : 
      14             : /* Reverse a string */
      15             : 
      16             : /* actual implementation */
      17             : /* all non-exported functions must be declared static */
      18             : static str
      19          15 : UDFreverse_(str *buf, size_t *buflen, const char *src)
      20             : {
      21          15 :         size_t len = strlen(src);
      22             :         char *dst = NULL;
      23             : 
      24             :         /* assert calling sanity */
      25          15 :         assert(buf);
      26             :         /* test if input buffer is large enough for result string, otherwise re-allocate it */
      27          15 :         CHECK_STR_BUFFER_LENGTH(buf, buflen, (len + 1), "udf.reverse");
      28          15 :         dst = *buf;
      29             : 
      30          15 :         dst[len] = 0;
      31             :         /* all strings in MonetDB are encoded using UTF-8; we must
      32             :          * make sure that the reversed string is also encoded in valid
      33             :          * UTF-8, so we treat multibyte characters as single units */
      34         257 :         while (*src) {
      35         242 :                 if ((*src & 0xF8) == 0xF0) {
      36             :                         /* 4 byte UTF-8 sequence */
      37           0 :                         assert(len >= 4);
      38           0 :                         dst[len - 4] = *src++;
      39           0 :                         assert((*src & 0xC0) == 0x80);
      40           0 :                         dst[len - 3] = *src++;
      41           0 :                         assert((*src & 0xC0) == 0x80);
      42           0 :                         dst[len - 2] = *src++;
      43           0 :                         assert((*src & 0xC0) == 0x80);
      44           0 :                         dst[len - 1] = *src++;
      45             :                         len -= 4;
      46         242 :                 } else if ((*src & 0xF0) == 0xE0) {
      47             :                         /* 3 byte UTF-8 sequence */
      48           6 :                         assert(len >= 3);
      49           6 :                         dst[len - 3] = *src++;
      50           6 :                         assert((*src & 0xC0) == 0x80);
      51           6 :                         dst[len - 2] = *src++;
      52           6 :                         assert((*src & 0xC0) == 0x80);
      53           6 :                         dst[len - 1] = *src++;
      54             :                         len -= 3;
      55         236 :                 } else if ((*src & 0xE0) == 0xC0) {
      56             :                         /* 2 byte UTF-8 sequence */
      57           0 :                         assert(len >= 2);
      58           0 :                         dst[len - 2] = *src++;
      59           0 :                         assert((*src & 0xC0) == 0x80);
      60           0 :                         dst[len - 1] = *src++;
      61             :                         len -= 2;
      62             :                 } else {
      63             :                         /* 1 byte UTF-8 "sequence" */
      64         236 :                         assert(len >= 1);
      65         236 :                         assert((*src & 0x80) == 0);
      66         236 :                         dst[--len] = *src++;
      67             :                 }
      68             :         }
      69          15 :         assert(len == 0);
      70             : 
      71             :         return MAL_SUCCEED;
      72             : }
      73             : 
      74             : /* MAL wrapper */
      75             : str
      76           7 : UDFreverse(str *res, const str *arg)
      77             : {
      78             :         str msg = MAL_SUCCEED, s;
      79             : 
      80             :         /* assert calling sanity */
      81           7 :         assert(res && arg);
      82           7 :         s = *arg;
      83           7 :         if (strNil(s)) {
      84           0 :                 if (!(*res = GDKstrdup(str_nil)))
      85           0 :                         throw(MAL, "udf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      86             :         } else {
      87           7 :                 size_t buflen = strlen(s) + 1;
      88             : 
      89           7 :                 if (!(*res = GDKmalloc(buflen)))
      90           0 :                         throw(MAL, "udf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      91           7 :                 if ((msg = UDFreverse_(res, &buflen, s)) != MAL_SUCCEED) {
      92           0 :                         GDKfree(*res);
      93           0 :                         *res = NULL;
      94           0 :                         return msg;
      95             :                 }
      96             :         }
      97             :         return msg;
      98             : }
      99             : 
     100             : /* Reverse a BAT of strings */
     101             : /*
     102             :  * Generic "type-oblivious" version,
     103             :  * using generic "type-oblivious" BAT access interface.
     104             :  */
     105             : 
     106             : /* actual implementation */
     107             : static char *
     108           2 : UDFBATreverse_(BAT **ret, BAT *src)
     109             : {
     110             :         BATiter li;
     111             :         BAT *bn = NULL;
     112             :         BUN p = 0, q = 0;
     113           2 :         size_t buflen = INITIAL_STR_BUFFER_LENGTH;
     114             :         str msg = MAL_SUCCEED, buf;
     115             :         bool nils = false;
     116             : 
     117             :         /* assert calling sanity */
     118           2 :         assert(ret);
     119             : 
     120             :         /* handle NULL pointer */
     121           2 :         if (src == NULL)
     122           0 :                 throw(MAL, "batudf.reverse", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     123             :         /* check tail type */
     124           2 :         if (src->ttype != TYPE_str)
     125           0 :                 throw(MAL, "batudf.reverse", SQLSTATE(42000) "tail-type of input BAT must be TYPE_str");
     126             : 
     127             :         /* to avoid many allocations, we allocate a single buffer, which will reallocate if a
     128             :            larger string is found and freed at the end */
     129           2 :         if (!(buf = GDKmalloc(buflen))) {
     130           0 :                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     131           0 :                 goto bailout;
     132             :         }
     133           2 :         q = BATcount(src);
     134             :         /* allocate void-headed result BAT */
     135           2 :         if (!(bn = COLnew(src->hseqbase, TYPE_str, q, TRANSIENT))) {
     136           0 :                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     137           0 :                 goto bailout;
     138             :         }
     139             : 
     140             :         /* create BAT iterator */
     141           2 :         li = bat_iterator(src);
     142             :         /* the core of the algorithm */
     143          10 :         for (p = 0; p < q ; p++) {
     144           8 :                 str x = (str) BUNtvar(li, p);
     145             : 
     146           8 :                 if (strNil(x)) {
     147             :                         /* if the input string is null, then append directly */
     148           0 :                         if (tfastins_nocheckVAR(bn, p, str_nil) != GDK_SUCCEED) {
     149           0 :                                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     150           0 :                                 goto bailout1;
     151             :                         }
     152             :                         nils = true;
     153             :                 } else {
     154             :                         /* revert tail value */
     155           8 :                         if ((msg = UDFreverse_(&buf, &buflen, x)) != MAL_SUCCEED)
     156           0 :                                 goto bailout1;
     157             :                         /* assert logical sanity */
     158           8 :                         assert(buf && x);
     159             :                         /* append to the output BAT. We are using a faster route, because we know what we are doing */
     160           8 :                         if (tfastins_nocheckVAR(bn, p, buf) != GDK_SUCCEED) {
     161           0 :                                 msg = createException(MAL, "batudf.reverse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     162           0 :                                 goto bailout1;
     163             :                         }
     164             :                 }
     165             :         }
     166           2 : bailout1:
     167           2 :         bat_iterator_end(&li);
     168             : 
     169           2 : bailout:
     170           2 :         GDKfree(buf);
     171           2 :         if (bn && !msg) {
     172           2 :                 BATsetcount(bn, q);
     173           2 :                 bn->tnil = nils;
     174           2 :                 bn->tnonil = !nils;
     175           2 :                 bn->tkey = BATcount(bn) <= 1;
     176           2 :                 bn->tsorted = BATcount(bn) <= 1;
     177           2 :                 bn->trevsorted = BATcount(bn) <= 1;
     178           0 :         } else if (bn) {
     179           0 :                 BBPreclaim(bn);
     180             :                 bn = NULL;
     181             :         }
     182           2 :         *ret = bn;
     183           2 :         return msg;
     184             : }
     185             : 
     186             : /* MAL wrapper */
     187             : char *
     188           2 : UDFBATreverse(bat *ret, const bat *arg)
     189             : {
     190           2 :         BAT *res = NULL, *src = NULL;
     191             :         char *msg = NULL;
     192             : 
     193             :         /* assert calling sanity */
     194           2 :         assert(ret != NULL && arg != NULL);
     195             : 
     196             :         /* bat-id -> BAT-descriptor */
     197           2 :         if ((src = BATdescriptor(*arg)) == NULL)
     198           0 :                 throw(MAL, "batudf.reverse", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     199             : 
     200             :         /* do the work */
     201           2 :         msg = UDFBATreverse_( &res, src );
     202             : 
     203             :         /* release input BAT-descriptor */
     204           2 :         BBPunfix(src->batCacheid);
     205             : 
     206           2 :         if (msg == MAL_SUCCEED) {
     207             :                 /* register result BAT in buffer pool */
     208           2 :                 BBPkeepref((*ret = res->batCacheid));
     209             :         }
     210             : 
     211             :         return msg;
     212             : }
     213             : 
     214             : /* fuse */
     215             : 
     216             : /* instantiate type-specific functions */
     217             : 
     218             : #define UI bte
     219             : #define UU unsigned char
     220             : #define UO sht
     221             : #include "udf_impl.h"
     222             : #undef UI
     223             : #undef UU
     224             : #undef UO
     225             : 
     226             : #define UI sht
     227             : #define UU unsigned short
     228             : #define UO int
     229             : #include "udf_impl.h"
     230             : #undef UI
     231             : #undef UU
     232             : #undef UO
     233             : 
     234             : #define UI int
     235             : #define UU unsigned int
     236             : #define UO lng
     237             : #include "udf_impl.h"
     238             : #undef UI
     239             : #undef UU
     240             : #undef UO
     241             : 
     242             : #ifdef HAVE_HGE
     243             : #define UI lng
     244             : #define UU ulng
     245             : #define UO hge
     246             : #include "udf_impl.h"
     247             : #undef UI
     248             : #undef UU
     249             : #undef UO
     250             : #endif
     251             : 
     252             : /* BAT fuse */
     253             : 
     254             : /* actual implementation */
     255             : static char *
     256           8 : UDFBATfuse_(BAT **ret, BAT *bone, BAT *btwo)
     257             : {
     258             :         BAT *bres = NULL;
     259           8 :         bit two_tail_sorted_unsigned = FALSE;
     260           8 :         bit two_tail_revsorted_unsigned = FALSE;
     261             :         BUN n;
     262             :         char *msg = NULL;
     263             : 
     264             :         /* assert calling sanity */
     265           8 :         assert(ret != NULL);
     266             : 
     267             :         /* handle NULL pointer */
     268           8 :         if (bone == NULL || btwo == NULL)
     269           0 :                 throw(MAL, "batudf.fuse",  SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     270             : 
     271             :         /* check for aligned heads */
     272           8 :         if (BATcount(bone) != BATcount(btwo) ||
     273           8 :             bone->hseqbase != btwo->hseqbase) {
     274           0 :                 throw(MAL, "batudf.fuse",
     275             :                       "heads of input BATs must be aligned");
     276             :         }
     277             :         n = BATcount(bone);
     278             : 
     279             :         /* check tail types */
     280           8 :         if (bone->ttype != btwo->ttype) {
     281           0 :                 throw(MAL, "batudf.fuse",
     282             :                       "tails of input BATs must be identical");
     283             :         }
     284             : 
     285             :         /* allocate result BAT */
     286           8 :         switch (bone->ttype) {
     287           2 :         case TYPE_bte:
     288           2 :                 bres = COLnew(bone->hseqbase, TYPE_sht, n, TRANSIENT);
     289           2 :                 break;
     290           2 :         case TYPE_sht:
     291           2 :                 bres = COLnew(bone->hseqbase, TYPE_int, n, TRANSIENT);
     292           2 :                 break;
     293           4 :         case TYPE_int:
     294           4 :                 bres = COLnew(bone->hseqbase, TYPE_lng, n, TRANSIENT);
     295           4 :                 break;
     296             : #ifdef HAVE_HGE
     297           0 :         case TYPE_lng:
     298           0 :                 bres = COLnew(bone->hseqbase, TYPE_hge, n, TRANSIENT);
     299           0 :                 break;
     300             : #endif
     301           0 :         default:
     302           0 :                 throw(MAL, "batudf.fuse",
     303             :                       "tails of input BATs must be one of {bte, sht, int"
     304             : #ifdef HAVE_HGE
     305             :                       ", lng"
     306             : #endif
     307             :                       "}");
     308             :         }
     309           8 :         if (bres == NULL)
     310           0 :                 throw(MAL, "batudf.fuse", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     311             : 
     312             :         /* call type-specific core algorithm */
     313           8 :         switch (bone->ttype) {
     314           2 :         case TYPE_bte:
     315           2 :                 msg = UDFBATfuse_bte_sht ( bres, bone, btwo, n,
     316             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     317           2 :                 break;
     318           2 :         case TYPE_sht:
     319           2 :                 msg = UDFBATfuse_sht_int ( bres, bone, btwo, n,
     320             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     321           2 :                 break;
     322           4 :         case TYPE_int:
     323           4 :                 msg = UDFBATfuse_int_lng ( bres, bone, btwo, n,
     324             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     325           4 :                 break;
     326             : #ifdef HAVE_HGE
     327           0 :         case TYPE_lng:
     328           0 :                 msg = UDFBATfuse_lng_hge ( bres, bone, btwo, n,
     329             :                         &two_tail_sorted_unsigned, &two_tail_revsorted_unsigned );
     330           0 :                 break;
     331             : #endif
     332           0 :         default:
     333           0 :                 BBPunfix(bres->batCacheid);
     334           0 :                 throw(MAL, "batudf.fuse",
     335             :                       "tails of input BATs must be one of {bte, sht, int"
     336             : #ifdef HAVE_HGE
     337             :                       ", lng"
     338             : #endif
     339             :                       "}");
     340             :         }
     341             : 
     342           8 :         if (msg != MAL_SUCCEED) {
     343           0 :                 BBPunfix(bres->batCacheid);
     344             :         } else {
     345             :                 /* set number of tuples in result BAT */
     346           8 :                 BATsetcount(bres, n);
     347             : 
     348             :                 /* Result tail is sorted, if the left/first input tail is
     349             :                  * sorted and key (unique), or if the left/first input tail is
     350             :                  * sorted and the second/right input tail is sorted and the
     351             :                  * second/right tail values are either all >= 0 or all < 0;
     352             :                  * otherwise, we cannot tell.
     353             :                  */
     354           8 :                 if (BATtordered(bone)
     355           0 :                     && (BATtkey(bone) || two_tail_sorted_unsigned))
     356           0 :                         bres->tsorted = true;
     357             :                 else
     358           8 :                         bres->tsorted = (BATcount(bres) <= 1);
     359           8 :                 if (BATtrevordered(bone)
     360           0 :                     && (BATtkey(bone) || two_tail_revsorted_unsigned))
     361           0 :                         bres->trevsorted = true;
     362             :                 else
     363           8 :                         bres->trevsorted = (BATcount(bres) <= 1);
     364             :                 /* result tail is key (unique), iff both input tails are */
     365          16 :                 BATkey(bres, BATtkey(bone) || BATtkey(btwo));
     366             : 
     367           8 :                 *ret = bres;
     368             :         }
     369             : 
     370             :         return msg;
     371             : }
     372             : 
     373             : /* MAL wrapper */
     374             : char *
     375           8 : UDFBATfuse(bat *ires, const bat *ione, const bat *itwo)
     376             : {
     377           8 :         BAT *bres = NULL, *bone = NULL, *btwo = NULL;
     378             :         char *msg = NULL;
     379             : 
     380             :         /* assert calling sanity */
     381           8 :         assert(ires != NULL && ione != NULL && itwo != NULL);
     382             : 
     383             :         /* bat-id -> BAT-descriptor */
     384           8 :         if ((bone = BATdescriptor(*ione)) == NULL)
     385           0 :                 throw(MAL, "batudf.fuse",  SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     386             : 
     387             :         /* bat-id -> BAT-descriptor */
     388           8 :         if ((btwo = BATdescriptor(*itwo)) == NULL) {
     389           0 :                 BBPunfix(bone->batCacheid);
     390           0 :                 throw(MAL, "batudf.fuse",  SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
     391             :         }
     392             : 
     393             :         /* do the work */
     394           8 :         msg = UDFBATfuse_ ( &bres, bone, btwo );
     395             : 
     396             :         /* release input BAT-descriptors */
     397           8 :         BBPunfix(bone->batCacheid);
     398           8 :         BBPunfix(btwo->batCacheid);
     399             : 
     400           8 :         if (msg == MAL_SUCCEED) {
     401             :                 /* register result BAT in buffer pool */
     402           8 :                 BBPkeepref((*ires = bres->batCacheid));
     403             :         }
     404             : 
     405             :         return msg;
     406             : }
     407             : 
     408             : #include "mel.h"
     409             : static mel_func udf_init_funcs[] = {
     410             :  command("udf", "reverse", UDFreverse, false, "Reverse a string", args(1,2, arg("",str),arg("ra1",str))),
     411             :  command("batudf", "reverse", UDFBATreverse, false, "Reverse a BAT of strings", args(1,2, batarg("",str),batarg("b",str))),
     412             :  command("udf", "fuse", UDFfuse_bte_sht, false, "fuse two (1-byte) bte values into one (2-byte) sht value", args(1,3, arg("",sht),arg("one",bte),arg("two",bte))),
     413             :  command("udf", "fuse", UDFfuse_sht_int, false, "fuse two (2-byte) sht values into one (4-byte) int value", args(1,3, arg("",int),arg("one",sht),arg("two",sht))),
     414             :  command("udf", "fuse", UDFfuse_int_lng, false, "fuse two (4-byte) int values into one (8-byte) lng value", args(1,3, arg("",lng),arg("one",int),arg("two",int))),
     415             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (1-byte) bte values into one (2-byte) sht value", args(1,3, batarg("",sht),batarg("one",bte),batarg("two",bte))),
     416             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (2-byte) sht values into one (4-byte) int value", args(1,3, batarg("",int),batarg("one",sht),batarg("two",sht))),
     417             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (4-byte) int values into one (8-byte) lng value", args(1,3, batarg("",lng),batarg("one",int),batarg("two",int))),
     418             : #ifdef HAVE_HGE
     419             :  command("udf", "fuse", UDFfuse_lng_hge, false, "fuse two (8-byte) lng values into one (16-byte) hge value", args(1,3, arg("",hge),arg("one",lng),arg("two",lng))),
     420             :  command("batudf", "fuse", UDFBATfuse, false, "fuse two (8-byte) lng values into one (16-byte) hge value", args(1,3, batarg("",hge),batarg("one",lng),batarg("two",lng))),
     421             : #endif
     422             :  { .imp=NULL }
     423             : };
     424             : #include "mal_import.h"
     425             : #ifdef _MSC_VER
     426             : #undef read
     427             : #pragma section(".CRT$XCU",read)
     428             : #endif
     429           4 : LIB_STARTUP_FUNC(init_udf_mal)
     430           4 : { mal_module("udf", NULL, udf_init_funcs); }

Generated by: LCOV version 1.14