LCOV - code coverage report
Current view: top level - monetdb5/modules/mal - pcre.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 673 966 69.7 %
Date: 2021-01-13 20:07:21 Functions: 56 70 80.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * N. Nes
      11             :  * PCRE library interface
      12             :  * The  PCRE library is a set of functions that implement regular
      13             :  * expression pattern matching using the same syntax  and  semantics  as  Perl,
      14             :  * with  just  a  few  differences.  The  current  implementation of PCRE
      15             :  * (release 4.x) corresponds approximately with Perl 5.8, including  support
      16             :  * for  UTF-8  encoded  strings.   However,  this support has to be
      17             :  * explicitly enabled; it is not the default.
      18             :  *
      19             :  * ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
      20             :  */
      21             : #include "monetdb_config.h"
      22             : #include <string.h>
      23             : 
      24             : #include "mal.h"
      25             : #include "mal_client.h"
      26             : #include "mal_interpreter.h"
      27             : #include "mal_exception.h"
      28             : 
      29             : #include <wchar.h>
      30             : #include <wctype.h>
      31             : 
      32             : #ifdef HAVE_LIBPCRE
      33             : #include <pcre.h>
      34             : #ifndef PCRE_STUDY_JIT_COMPILE
      35             : /* old library version on e.g. EPEL 6 */
      36             : #define pcre_free_study(x)              pcre_free(x)
      37             : #define PCRE_STUDY_JIT_COMPILE  0
      38             : #endif
      39             : #define JIT_COMPILE_MIN 1024    /* when to try JIT compilation of patterns */
      40             : 
      41             : #else
      42             : 
      43             : #include <regex.h>
      44             : 
      45             : typedef regex_t pcre;
      46             : #endif
      47             : 
      48             : /* current implementation assumes simple %keyword% [keyw%]* */
      49             : struct RE {
      50             :         char *k;
      51             :         uint32_t *w;
      52             :         bool search:1,
      53             :                 atend:1;
      54             :         size_t len;
      55             :         struct RE *n;
      56             : };
      57             : 
      58             : /* We cannot use strcasecmp and strncasecmp since they work byte for
      59             :  * byte and don't deal with multibyte encodings (such as UTF-8).
      60             :  *
      61             :  * We implement our own conversion from UTF-8 encoding to Unicode code
      62             :  * points which we store in uint32_t.  The reason for this is,
      63             :  * functions like mbsrtowcs are locale-dependent (so we need a UTF-8
      64             :  * locale to use them), and on Windows, wchar_t is only 2 bytes and
      65             :  * therefore cannot hold all Unicode code points.  We do use functions
      66             :  * such as towlower to convert a Unicode code point to its lower-case
      67             :  * equivalent, but again on Windows, if the code point doesn't fit in
      68             :  * 2 bytes, we skip this conversion and compare the unconverted code
      69             :  * points.
      70             :  *
      71             :  * Note, towlower is also locale-dependent, but we don't need a UTF-8
      72             :  * locale in order to use it. */
      73             : 
      74             : /* helper function to convert a UTF-8 multibyte character to a wide
      75             :  * character */
      76             : static size_t
      77      256337 : utfc8touc(uint32_t *restrict dest, const char *restrict src)
      78             : {
      79      256337 :         if ((src[0] & 0x80) == 0) {
      80      256280 :                 *dest = src[0];
      81      256280 :                 return src[0] != 0;
      82          57 :         } else if ((src[0] & 0xE0) == 0xC0
      83          40 :                    && (src[1] & 0xC0) == 0x80
      84          40 :                    && (src[0] & 0x1E) != 0) {
      85          40 :                 *dest = (src[0] & 0x1F) << 6
      86          40 :                         | (src[1] & 0x3F);
      87          40 :                 return 2;
      88          17 :         } else if ((src[0] & 0xF0) == 0xE0
      89          17 :                    && (src[1] & 0xC0) == 0x80
      90          17 :                    && (src[2] & 0xC0) == 0x80
      91          17 :                    && ((src[0] & 0x0F) != 0
      92           0 :                        || (src[1] & 0x20) != 0)) {
      93          17 :                 *dest = (src[0] & 0x0F) << 12
      94          17 :                         | (src[1] & 0x3F) << 6
      95          17 :                         | (src[2] & 0x3F);
      96          17 :                 return 3;
      97           0 :         } else if ((src[0] & 0xF8) == 0xF0
      98           0 :                    && (src[1] & 0xC0) == 0x80
      99           0 :                    && (src[2] & 0xC0) == 0x80
     100           0 :                    && (src[3] & 0xC0) == 0x80) {
     101           0 :                 uint32_t c = (src[0] & 0x07) << 18
     102           0 :                         | (src[1] & 0x3F) << 12
     103           0 :                         | (src[2] & 0x3F) << 6
     104           0 :                         | (src[3] & 0x3F);
     105           0 :                 if (c < 0x10000
     106           0 :                     || c > 0x10FFFF
     107             :                     || (c & 0x1FF800) == 0x00D800)
     108             :                         return (size_t) -1;
     109           0 :                 *dest = c;
     110           0 :                 return 4;
     111             :         }
     112             :         return (size_t) -1;
     113             : }
     114             : 
     115             : /* helper function to convert a UTF-8 string to a wide character
     116             :  * string, the wide character string is allocated */
     117             : static uint32_t *
     118         202 : utf8stoucs(const char *src)
     119             : {
     120             :         uint32_t *dest;
     121             :         size_t i = 0;
     122             :         size_t j = 0;
     123             : 
     124             :         /* count how many uint32_t's we need, while also checking for
     125             :          * correctness of the input */
     126        1513 :         while (src[j]) {
     127        1311 :                 i++;
     128        1311 :                 if ((src[j+0] & 0x80) == 0) {
     129        1262 :                         j += 1;
     130          49 :                 } else if ((src[j+0] & 0xE0) == 0xC0
     131          24 :                            && (src[j+1] & 0xC0) == 0x80
     132          24 :                            && (src[j+0] & 0x1E) != 0) {
     133          24 :                         j += 2;
     134          25 :                 } else if ((src[j+0] & 0xF0) == 0xE0
     135          25 :                            && (src[j+1] & 0xC0) == 0x80
     136          25 :                            && (src[j+2] & 0xC0) == 0x80
     137          25 :                            && ((src[j+0] & 0x0F) != 0
     138           0 :                                || (src[j+1] & 0x20) != 0)) {
     139          25 :                         j += 3;
     140           0 :                 } else if ((src[j+0] & 0xF8) == 0xF0
     141           0 :                            && (src[j+1] & 0xC0) == 0x80
     142           0 :                            && (src[j+2] & 0xC0) == 0x80
     143           0 :                            && (src[j+3] & 0xC0) == 0x80) {
     144           0 :                         uint32_t c = (src[j+0] & 0x07) << 18
     145           0 :                                 | (src[j+1] & 0x3F) << 12
     146           0 :                                 | (src[j+2] & 0x3F) << 6
     147           0 :                                 | (src[j+3] & 0x3F);
     148           0 :                         if (c < 0x10000
     149           0 :                             || c > 0x10FFFF
     150             :                             || (c & 0x1FF800) == 0x00D800)
     151             :                                 return NULL;
     152           0 :                         j += 4;
     153             :                 } else {
     154             :                         return NULL;
     155             :                 }
     156             :         }
     157         202 :         dest = GDKmalloc((i + 1) * sizeof(uint32_t));
     158         202 :         if (dest == NULL)
     159             :                 return NULL;
     160             :         /* go through the source string again, this time we can skip
     161             :          * the correctness tests */
     162             :         i = j = 0;
     163        1513 :         while (src[j]) {
     164        1311 :                 if ((src[j+0] & 0x80) == 0) {
     165        1262 :                         dest[i++] = src[j+0];
     166        1262 :                         j += 1;
     167          49 :                 } else if ((src[j+0] & 0xE0) == 0xC0) {
     168          24 :                         dest[i++] = (src[j+0] & 0x1F) << 6
     169          24 :                                 | (src[j+1] & 0x3F);
     170          24 :                         j += 2;
     171          25 :                 } else if ((src[j+0] & 0xF0) == 0xE0) {
     172          25 :                         dest[i++] = (src[j+0] & 0x0F) << 12
     173          25 :                                 | (src[j+1] & 0x3F) << 6
     174          25 :                                 | (src[j+2] & 0x3F);
     175          25 :                         j += 3;
     176           0 :                 } else if ((src[j+0] & 0xF8) == 0xF0) {
     177           0 :                         dest[i++] = (src[j+0] & 0x07) << 18
     178           0 :                                 | (src[j+1] & 0x3F) << 12
     179           0 :                                 | (src[j+2] & 0x3F) << 6
     180           0 :                                 | (src[j+3] & 0x3F);
     181           0 :                         j += 4;
     182             :                 }
     183             :         }
     184         202 :         dest[i] = 0;
     185         202 :         return dest;
     186             : }
     187             : 
     188             : static size_t
     189             : myucslen(const uint32_t *ucs)
     190             : {
     191             :         size_t i = 0;
     192             : 
     193      124082 :         while (ucs[i])
     194      116267 :                 i++;
     195             :         return i;
     196             : }
     197             : 
     198             : static inline bool
     199          51 : mywstrncaseeq(const char *restrict s1, const uint32_t *restrict s2, size_t n2, bool atend)
     200             : {
     201             :         uint32_t c1;
     202             : 
     203         138 :         while (n2 > 0) {
     204         112 :                 size_t nn1 = utfc8touc(&c1, s1);
     205         112 :                 if (nn1 == 0 || nn1 == (size_t) -1)
     206           0 :                         return (*s2 == 0);
     207         112 :                 if (*s2 == 0)
     208             :                         return false;
     209         112 :                 if (nn1 == (size_t) -1 || nn1 == (size_t) -2)
     210             :                         return true;     /* actually an error that shouldn't happen */
     211             : #if SIZEOF_WCHAR_T == 2
     212             :                 if (c1 > 0xFFFF || *s2 > 0xFFFF) {
     213             :                         if (c1 != *s2)
     214             :                                 return false;
     215             :                 } else
     216             : #endif
     217         112 :                 if (towlower((wint_t) c1) != towlower((wint_t) *s2))
     218             :                         return false;
     219          87 :                 s1 += nn1;
     220          87 :                 n2--;
     221          87 :                 s2++;
     222             :         }
     223          26 :         return !atend || *s1 == 0;
     224             : }
     225             : 
     226             : static inline int
     227           4 : mystrcasecmp(const char *s1, const char *s2)
     228             : {
     229             :         uint32_t c1, c2;
     230             : 
     231           0 :         for (;;) {
     232           4 :                 size_t nn1 = utfc8touc(&c1, s1);
     233           4 :                 size_t nn2 = utfc8touc(&c2, s2);
     234           4 :                 if (nn1 == 0 || nn1 == (size_t) -1)
     235           1 :                         return -(nn2 != 0 && nn2 != (size_t) -1);
     236           3 :                 if (nn2 == 0 || nn2 == (size_t) -1)
     237             :                         return 1;
     238           2 :                 if (nn1 == (size_t) -1 || nn1 == (size_t) -2 ||
     239           2 :                         nn2 == (size_t) -1 || nn2 == (size_t) -2)
     240             :                         return 0;        /* actually an error that shouldn't happen */
     241             : #if SIZEOF_WCHAR_T == 2
     242             :                 if (c1 > 0xFFFF || c2 > 0xFFFF) {
     243             :                         if (c1 != c2)
     244             :                                 return c1 - c2;
     245             :                 } else
     246             : #endif
     247           2 :                 if (towlower((wint_t) c1) != towlower((wint_t) c2))
     248           2 :                         return towlower((wint_t) c1) - towlower((wint_t) c2);
     249           0 :                 s1 += nn1;
     250           0 :                 s2 += nn2;
     251             :         }
     252             : }
     253             : 
     254             : static inline int
     255         137 : mywstrcasecmp(const char *restrict s1, const uint32_t *restrict s2)
     256             : {
     257             :         uint32_t c1;
     258             : 
     259         529 :         for (;;) {
     260         666 :                 size_t nn1 = utfc8touc(&c1, s1);
     261         666 :                 if (nn1 == 0 || nn1 == (size_t) -1)
     262          97 :                         return -(*s2 != 0);
     263         569 :                 if (*s2 == 0)
     264             :                         return 1;
     265         567 :                 if (nn1 == (size_t) -1 || nn1 == (size_t) -2)
     266             :                         return 0;        /* actually an error that shouldn't happen */
     267             : #if SIZEOF_WCHAR_T == 2
     268             :                 if (c1 > 0xFFFF || *s2 > 0xFFFF) {
     269             :                         if (c1 != *s2)
     270             :                                 return c1 - *s2;
     271             :                 } else
     272             : #endif
     273         567 :                 if (towlower((wint_t) c1) != towlower((wint_t) *s2))
     274          38 :                         return towlower((wint_t) c1) - towlower((wint_t) *s2);
     275         529 :                 s1 += nn1;
     276         529 :                 s2++;
     277             :         }
     278             : }
     279             : 
     280             : static inline const char *
     281        7815 : mywstrcasestr(const char *restrict haystack, const uint32_t *restrict wneedle, bool atend)
     282             : {
     283             :         size_t nlen = myucslen(wneedle);
     284             : 
     285        7815 :         if (nlen == 0)
     286           0 :                 return atend ? haystack + strlen(haystack) : haystack;
     287             : 
     288             :         size_t hlen = strlen(haystack);
     289             : 
     290      231090 :         while (*haystack) {
     291             :                 size_t i;
     292             :                 size_t h;
     293             :                 size_t step = 0;
     294      261236 :                 for (i = h = 0; i < nlen; i++) {
     295             :                         uint32_t c;
     296      260538 :                         size_t j = utfc8touc(&c, haystack + h);
     297      260549 :                         if (j == 0 || j == (size_t) -1)
     298           0 :                                 return NULL;
     299      260549 :                         if (i == 0) {
     300             :                                 step = j;
     301             :                         }
     302             : #if SIZEOF_WCHAR_T == 2
     303             :                         if (c > 0xFFFF || wneedle[i] > 0xFFFF) {
     304             :                                 if (c != wneedle[i])
     305             :                                         break;
     306             :                         } else
     307             : #endif
     308      260549 :                         if (towlower((wint_t) c) != towlower((wint_t) wneedle[i]))
     309             :                                 break;
     310       36778 :                         h += j;
     311             :                 }
     312      224469 :                 if (i == nlen && (!atend || haystack[h] == 0))
     313        1194 :                         return haystack;
     314      223275 :                 haystack += step;
     315             :                 hlen -= step;
     316             :         }
     317             :         return NULL;
     318             : }
     319             : 
     320             : /* returns true if the pattern does not contain unescaped `_' (single
     321             :  * character match) and ends with unescaped `%' (any sequence
     322             :  * match) */
     323             : static inline bool
     324             : re_simple(const char *pat, unsigned char esc)
     325             : {
     326             :         bool escaped = false;
     327             : 
     328             :         if (pat == 0)
     329             :                 return false;
     330             :         if (*pat == '%') {
     331             :                 pat++;
     332             :         }
     333             :         while (*pat) {
     334             :                 if (escaped) {
     335             :                         escaped = false;
     336             :                 } else if ((unsigned char) *pat == esc) {
     337             :                         escaped = true;
     338             :                 } else if (*pat == '_') {
     339             :                         return false;
     340             :                 }
     341             :                 pat++;
     342             :         }
     343             :         return true;
     344             : }
     345             : 
     346             : static inline bool
     347             : re_is_pattern_properly_escaped(const char *pat, unsigned char esc)
     348             : {
     349             :         bool escaped = false;
     350             : 
     351             :         if (pat == 0)
     352             :                 return true;
     353             :         while (*pat) {
     354             :                 if (escaped) {
     355             :                         escaped = false;
     356             :                 } else if ((unsigned char) *pat == esc) {
     357             :                         escaped = true;
     358             :                 }
     359             :                 pat++;
     360             :         }
     361             :         return escaped ? false : true;
     362             : }
     363             : 
     364             : static inline bool
     365        2120 : is_strcmpable(const char *pat, const char *esc)
     366             : {
     367        2120 :         if (pat[strcspn(pat, "%_")])
     368             :                 return false;
     369         510 :         return strlen(esc) == 0 || strNil(esc) || strstr(pat, esc) == NULL;
     370             : }
     371             : 
     372             : static inline bool
     373        7891 : re_match_ignore(const char *restrict s, const struct RE *restrict pattern)
     374             : {
     375             :         const struct RE *r;
     376             : 
     377        9111 :         for (r = pattern; r; r = r->n) {
     378        7916 :                 if (*r->w == 0 && (r->search || *s == 0))
     379             :                         return true;
     380       15784 :                 if (!*s ||
     381             :                         (r->search
     382        7867 :                          ? (s = mywstrcasestr(s, r->w, r->atend)) == NULL
     383          51 :                          : !mywstrncaseeq(s, r->w, r->len, r->atend)))
     384        6697 :                         return false;
     385        1220 :                 s += r->len;
     386             :         }
     387             :         return true;
     388             : }
     389             : 
     390             : static inline bool
     391       59106 : re_match_no_ignore(const char *restrict s, const struct RE *restrict pattern)
     392             : {
     393             :         const struct RE *r;
     394             :         size_t l;
     395             : 
     396       64359 :         for (r = pattern; r; r = r->n) {
     397       61913 :                 if (*r->k == 0 && (r->search || *s == 0))
     398             :                         return true;
     399       62040 :                 if (!*s ||
     400             :                         (r->search
     401       61597 :                          ? (r->atend
     402       41517 :                                 ? (l = strlen(s)) < r->len || strcmp(s + l - r->len, r->k) != 0
     403       41164 :                                 : (s = strstr(s, r->k)) == NULL)
     404             :                          : (r->atend
     405       20080 :                                 ? strcmp(s, r->k) != 0
     406       20007 :                                 : strncmp(s, r->k, r->len) != 0)))
     407             :                         return false;
     408        5253 :                 s += r->len;
     409             :         }
     410             :         return true;
     411             : }
     412             : 
     413             : static void
     414        1432 : re_destroy(struct RE *p)
     415             : {
     416        1432 :         if (p) {
     417        1432 :                 GDKfree(p->k);
     418        1434 :                 GDKfree(p->w);
     419             :                 do {
     420        1521 :                         struct RE *n = p->n;
     421             : 
     422        1521 :                         GDKfree(p);
     423             :                         p = n;
     424        1521 :                 } while (p);
     425             :         }
     426        1434 : }
     427             : 
     428             : /* Create a linked list of RE structures.  Depending on the caseignore
     429             :  * flag, the w (if true) or the k (if false) field is used.  These
     430             :  * fields in the first structure are allocated, whereas in all
     431             :  * subsequent structures the fields point into the allocated buffer of
     432             :  * the first. */
     433             : static struct RE *
     434        1434 : re_create(const char *pat, bool caseignore, uint32_t esc)
     435             : {
     436        1434 :         struct RE *r = GDKmalloc(sizeof(struct RE)), *n = r;
     437             :         bool escaped = false;
     438             : 
     439        1434 :         if (r == NULL)
     440             :                 return NULL;
     441        1434 :         *r = (struct RE) {.atend = true};
     442             : 
     443        2380 :         while (esc != '%' && *pat == '%') {
     444         946 :                 pat++; /* skip % */
     445         946 :                 r->search = true;
     446             :         }
     447        1434 :         if (caseignore) {
     448             :                 uint32_t *wp;
     449             :                 uint32_t *wq;
     450          79 :                 wp = utf8stoucs(pat);
     451          79 :                 if (wp == NULL) {
     452           0 :                         GDKfree(r);
     453           0 :                         return NULL;
     454             :                 }
     455          79 :                 r->w = wp;
     456             :                 wq = wp;
     457         670 :                 while (*wp) {
     458         591 :                         if (escaped) {
     459           0 :                                 *wq++ = *wp;
     460           0 :                                 n->len++;
     461             :                                 escaped = false;
     462         591 :                         } else if (*wp == esc) {
     463             :                                 escaped = true;
     464         591 :                         } else if (*wp == '%') {
     465          83 :                                 n->atend = false;
     466          83 :                                 while (wp[1] == '%')
     467           0 :                                         wp++;
     468          83 :                                 if (wp[1]) {
     469          20 :                                         n = n->n = GDKmalloc(sizeof(struct RE));
     470          20 :                                         if (n == NULL)
     471           0 :                                                 goto bailout;
     472          20 :                                         *n = (struct RE) {.search = true, .atend = true, .w = wp + 1};
     473             :                                 }
     474          83 :                                 *wq++ = 0;
     475             :                         } else {
     476         508 :                                 *wq++ = *wp;
     477         508 :                                 n->len++;
     478             :                         }
     479         591 :                         wp++;
     480             :                 }
     481          79 :                 *wq = 0;
     482             :         } else {
     483             :                 char *p, *q;
     484        1355 :                 if ((p = GDKstrdup(pat)) == NULL) {
     485           0 :                         GDKfree(r);
     486           0 :                         return NULL;
     487             :                 }
     488        1355 :                 r->k = p;
     489             :                 q = p;
     490        9883 :                 while (*p) {
     491        8528 :                         if (escaped) {
     492          87 :                                 *q++ = *p;
     493          87 :                                 n->len++;
     494             :                                 escaped = false;
     495        8441 :                         } else if ((unsigned char) *p == esc) {
     496             :                                 escaped = true;
     497        8354 :                         } else if (*p == '%') {
     498        1258 :                                 n->atend = false;
     499        1258 :                                 while (p[1] == '%')
     500           0 :                                         p++;
     501        1258 :                                 if (p[1]) {
     502          67 :                                         n = n->n = GDKmalloc(sizeof(struct RE));
     503          67 :                                         if (n == NULL)
     504           0 :                                                 goto bailout;
     505          67 :                                         *n = (struct RE) {.search = true, .atend = true, .k = p + 1};
     506             :                                 }
     507        1258 :                                 *q++ = 0;
     508             :                         } else {
     509        7096 :                                 *q++ = *p;
     510        7096 :                                 n->len++;
     511             :                         }
     512        8528 :                         p++;
     513             :                 }
     514        1355 :                 *q = 0;
     515             :         }
     516             :         return r;
     517           0 :   bailout:
     518           0 :         re_destroy(r);
     519           0 :         return NULL;
     520             : }
     521             : 
     522             : #ifdef HAVE_LIBPCRE
     523             : static str
     524          24 : pcre_compile_wrap(pcre **res, const char *pattern, bit insensitive)
     525             : {
     526             :         pcre *r;
     527          24 :         const char *err_p = NULL;
     528          24 :         int errpos = 0;
     529             :         int options = PCRE_UTF8 | PCRE_MULTILINE;
     530          24 :         if (insensitive)
     531             :                 options |= PCRE_CASELESS;
     532             : 
     533          24 :         if ((r = pcre_compile(pattern, options, &err_p, &errpos, NULL)) == NULL) {
     534           0 :                 throw(MAL, "pcre.compile", OPERATION_FAILED
     535             :                           " with\n'%s'\nat %d in\n'%s'.\n",
     536             :                           err_p, errpos, pattern);
     537             :         }
     538          24 :         *res = r;
     539          24 :         return MAL_SUCCEED;
     540             : }
     541             : #endif
     542             : 
     543             : /* maximum number of back references and quoted \ or $ in replacement string */
     544             : #define MAX_NR_REFS             20
     545             : 
     546             : struct backref {
     547             :         int idx;
     548             :         int start;
     549             :         int end;
     550             : };
     551             : 
     552             : #ifdef HAVE_LIBPCRE
     553             : /* fill in parameter backrefs (length maxrefs) with information about
     554             :  * back references in the replacement string; a back reference is a
     555             :  * dollar or backslash followed by a number */
     556             : static int
     557          50 : parse_replacement(const char *replacement, int len_replacement,
     558             :                                   struct backref *backrefs, int maxrefs)
     559             : {
     560             :         int nbackrefs = 0;
     561             : 
     562          70 :         for (int i = 0; i < len_replacement && nbackrefs < maxrefs; i++) {
     563          20 :                 if (replacement[i] == '$' || replacement[i] == '\\') {
     564             :                         char *endptr;
     565           5 :                         backrefs[nbackrefs].idx = strtol(replacement + i + 1, &endptr, 10);
     566           5 :                         if (endptr > replacement + i + 1) {
     567           5 :                                 int k = (int) (endptr - (replacement + i + 1));
     568           5 :                                 backrefs[nbackrefs].start = i;
     569           5 :                                 backrefs[nbackrefs].end = i + k + 1;
     570           5 :                                 nbackrefs++;
     571           0 :                         } else if (replacement[i] == replacement[i + 1]) {
     572             :                                 /* doubled $ or \, we must copy just one to the output */
     573           0 :                                 backrefs[nbackrefs].idx = INT_MAX; /* impossible value > 0 */
     574           0 :                                 backrefs[nbackrefs].start = i;
     575           0 :                                 backrefs[nbackrefs].end = i + 1;
     576             :                                 i++;                    /* don't look at second $ or \ again */
     577           0 :                                 nbackrefs++;
     578             :                         }
     579             :                         /* else: $ or \ followed by something we don't recognize,
     580             :                          * so just leave it */
     581             :                 }
     582             :         }
     583          50 :         return nbackrefs;
     584             : }
     585             : 
     586             : static char *
     587       33265 : single_replace(pcre *pcre_code, pcre_extra *extra,
     588             :                            const char *origin_str, int len_origin_str,
     589             :                            int exec_options, int *ovector, int ovecsize,
     590             :                            const char *replacement, int len_replacement,
     591             :                            struct backref *backrefs, int nbackrefs,
     592             :                            bool global, char *result, int *max_result)
     593             : {
     594             :         int offset = 0;
     595             :         int len_result = 0;
     596             :         int addlen;
     597             :         char *tmp;
     598             : 
     599             :         do {
     600       72203 :                 int j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, offset,
     601             :                                           exec_options, ovector, ovecsize);
     602       72228 :                 if (j <= 0)
     603             :                         break;
     604       40087 :                 addlen = ovector[0] - offset + (nbackrefs == 0 ? len_replacement : 0);
     605       40087 :                 if (len_result + addlen >= *max_result) {
     606        2868 :                         tmp = GDKrealloc(result, len_result + addlen + 1);
     607        2868 :                         if (tmp == NULL) {
     608           0 :                                 GDKfree(result);
     609           0 :                                 return NULL;
     610             :                         }
     611             :                         result = tmp;
     612        2868 :                         *max_result = len_result + addlen + 1;
     613             :                 }
     614       40087 :                 if (ovector[0] > offset) {
     615       38480 :                         strncpy(result + len_result, origin_str + offset,
     616       38480 :                                         ovector[0] - offset);
     617       38480 :                         len_result += ovector[0] - offset;
     618             :                 }
     619       40087 :                 if (nbackrefs == 0) {
     620       38938 :                         strncpy(result + len_result, replacement, len_replacement);
     621       38938 :                         len_result += len_replacement;
     622             :                 } else {
     623             :                         int prevend = 0;
     624        2298 :                         for (int i = 0; i < nbackrefs; i++) {
     625             :                                 int off, len;
     626        1149 :                                 if (backrefs[i].idx >= ovecsize / 3) {
     627             :                                         /* out of bounds, replace with empty string */
     628             :                                         off = 0;
     629             :                                         len = 0;
     630             :                                 } else {
     631        1149 :                                         off = ovector[backrefs[i].idx * 2];
     632        1149 :                                         len = ovector[backrefs[i].idx * 2 + 1] - off;
     633             :                                 }
     634        1149 :                                 addlen = backrefs[i].start - prevend + len;
     635        1149 :                                 if (len_result + addlen >= *max_result) {
     636          12 :                                         tmp = GDKrealloc(result, len_result + addlen + 1);
     637          12 :                                         if (tmp == NULL) {
     638           0 :                                                 GDKfree(result);
     639           0 :                                                 return NULL;
     640             :                                         }
     641             :                                         result = tmp;
     642          12 :                                         *max_result = len_result + addlen + 1;
     643             :                                 }
     644        1149 :                                 if (backrefs[i].start > prevend) {
     645           0 :                                         strncpy(result + len_result, replacement + prevend,
     646           0 :                                                         backrefs[i].start - prevend);
     647           0 :                                         len_result += backrefs[i].start - prevend;
     648             :                                 }
     649        1149 :                                 if (len > 0) {
     650        1149 :                                         strncpy(result + len_result, origin_str + off, len);
     651        1149 :                                         len_result += len;
     652             :                                 }
     653        1149 :                                 prevend = backrefs[i].end;
     654             :                         }
     655             :                         /* copy rest of replacement string (after last backref) */
     656        1149 :                         addlen = len_replacement - prevend;
     657        1149 :                         if (addlen > 0) {
     658           0 :                                 if (len_result + addlen >= *max_result) {
     659           0 :                                         tmp = GDKrealloc(result, len_result + addlen + 1);
     660           0 :                                         if (tmp == NULL) {
     661           0 :                                                 GDKfree(result);
     662           0 :                                                 return NULL;
     663             :                                         }
     664             :                                         result = tmp;
     665           0 :                                         *max_result = len_result + addlen + 1;
     666             :                                 }
     667           0 :                                 strncpy(result + len_result, replacement + prevend, addlen);
     668             :                                 len_result += addlen;
     669             :                         }
     670             :                 }
     671       40087 :                 offset = ovector[1];
     672       40087 :         } while (offset < len_origin_str && global);
     673       33290 :         if (offset < len_origin_str) {
     674       31982 :                 addlen = len_origin_str - offset;
     675       31982 :                 if (len_result + addlen >= *max_result) {
     676         299 :                         tmp = GDKrealloc(result, len_result + addlen + 1);
     677         299 :                         if (tmp == NULL) {
     678           0 :                                 GDKfree(result);
     679           0 :                                 return NULL;
     680             :                         }
     681             :                         result = tmp;
     682         299 :                         *max_result = len_result + addlen + 1;
     683             :                 }
     684       31982 :                 strncpy(result + len_result, origin_str + offset, addlen);
     685             :                 len_result += addlen;
     686             :         }
     687             :         /* null terminate string */
     688       33290 :         result[len_result] = '\0';
     689       33290 :         return result;
     690             : }
     691             : #endif
     692             : 
     693             : static str
     694           0 : pcre_replace(str *res, const char *origin_str, const char *pattern,
     695             :                          const char *replacement, const char *flags, bool global)
     696             : {
     697             : #ifdef HAVE_LIBPCRE
     698           0 :         const char *err_p = NULL;
     699             :         pcre *pcre_code = NULL;
     700             :         pcre_extra *extra;
     701             :         char *tmpres;
     702             :         int max_result;
     703           0 :         int i, errpos = 0;
     704             :         int compile_options = PCRE_UTF8, exec_options = PCRE_NOTEMPTY;
     705             :         int *ovector, ovecsize;
     706           0 :         int len_origin_str = (int) strlen(origin_str);
     707           0 :         int len_replacement = (int) strlen(replacement);
     708             :         struct backref backrefs[MAX_NR_REFS];
     709             :         int nbackrefs = 0;
     710             : 
     711           0 :         while (*flags) {
     712           0 :                 switch (*flags) {
     713             :                 case 'e':
     714             :                         exec_options &= ~PCRE_NOTEMPTY;
     715             :                         break;
     716           0 :                 case 'i':
     717           0 :                         compile_options |= PCRE_CASELESS;
     718           0 :                         break;
     719           0 :                 case 'm':
     720           0 :                         compile_options |= PCRE_MULTILINE;
     721           0 :                         break;
     722           0 :                 case 's':
     723           0 :                         compile_options |= PCRE_DOTALL;
     724           0 :                         break;
     725           0 :                 case 'x':
     726           0 :                         compile_options |= PCRE_EXTENDED;
     727           0 :                         break;
     728           0 :                 default:
     729           0 :                         throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     730             :                                   ILLEGAL_ARGUMENT ": unsupported flag character '%c'\n",
     731             :                                   *flags);
     732             :                 }
     733           0 :                 flags++;
     734             :         }
     735             : 
     736           0 :         if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) {
     737           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     738             :                           OPERATION_FAILED ": pcre compile of pattern (%s) failed at %d with\n'%s'.\n",
     739             :                           pattern, errpos, err_p);
     740             :         }
     741             : 
     742             :         /* Since the compiled pattern is going to be used several times, it is
     743             :          * worth spending more time analyzing it in order to speed up the time
     744             :          * taken for matching.
     745             :          */
     746           0 :         extra = pcre_study(pcre_code, 0, &err_p);
     747           0 :         if (err_p != NULL) {
     748           0 :                 pcre_free(pcre_code);
     749           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     750             :                           OPERATION_FAILED ": pcre study of pattern (%s) failed with '%s'.\n",
     751             :                           pattern, err_p);
     752             :         }
     753           0 :         pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i);
     754           0 :         ovecsize = (i + 1) * 3;
     755           0 :         if ((ovector = (int *) GDKmalloc(sizeof(int) * ovecsize)) == NULL) {
     756           0 :                 pcre_free_study(extra);
     757           0 :                 pcre_free(pcre_code);
     758           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     759             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     760             :         }
     761             : 
     762             :         /* identify back references in the replacement string */
     763           0 :         nbackrefs = parse_replacement(replacement, len_replacement,
     764             :                                                                   backrefs, MAX_NR_REFS);
     765             : 
     766           0 :         max_result = len_origin_str + 1;
     767           0 :         tmpres = GDKmalloc(max_result);
     768           0 :         if (tmpres == NULL) {
     769           0 :                 GDKfree(ovector);
     770           0 :                 pcre_free_study(extra);
     771           0 :                 pcre_free(pcre_code);
     772           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     773             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     774             :         }
     775             : 
     776           0 :         tmpres = single_replace(pcre_code, extra, origin_str, len_origin_str,
     777             :                                                         exec_options, ovector, ovecsize, replacement,
     778             :                                                         len_replacement, backrefs, nbackrefs, global,
     779             :                                                         tmpres, &max_result);
     780           0 :         GDKfree(ovector);
     781           0 :         pcre_free_study(extra);
     782           0 :         pcre_free(pcre_code);
     783           0 :         if (tmpres == NULL)
     784           0 :                 throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     785             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     786             : 
     787           0 :         *res = tmpres;
     788           0 :         return MAL_SUCCEED;
     789             : #else
     790             :         (void) res;
     791             :         (void) origin_str;
     792             :         (void) pattern;
     793             :         (void) replacement;
     794             :         (void) flags;
     795             :         (void) global;
     796             :         throw(MAL, global ? "pcre.replace" : "pcre.replace_first",
     797             :                   "Database was compiled without PCRE support.");
     798             : #endif
     799             : }
     800             : 
     801             : static str
     802          50 : pcre_replace_bat(BAT **res, BAT *origin_strs, const char *pattern,
     803             :                                  const char *replacement, const char *flags, bool global)
     804             : {
     805             : #ifdef HAVE_LIBPCRE
     806             :         BATiter origin_strsi = bat_iterator(origin_strs);
     807          50 :         const char *err_p = NULL;
     808             :         char *tmpres;
     809          50 :         int i, errpos = 0;
     810             :         int compile_options = PCRE_UTF8, exec_options = PCRE_NOTEMPTY;
     811             :         pcre *pcre_code = NULL;
     812             :         pcre_extra *extra;
     813             :         BAT *tmpbat;
     814             :         BUN p, q;
     815             :         int *ovector, ovecsize;
     816          50 :         int len_replacement = (int) strlen(replacement);
     817             :         struct backref backrefs[MAX_NR_REFS];
     818             :         int nbackrefs = 0;
     819             :         const char *origin_str;
     820          50 :         int max_dest_size = 0;
     821             : 
     822          70 :         while (*flags) {
     823          20 :                 switch (*flags) {
     824             :                 case 'e':
     825             :                         exec_options &= ~PCRE_NOTEMPTY;
     826             :                         break;
     827           5 :                 case 'i':
     828           5 :                         compile_options |= PCRE_CASELESS;
     829           5 :                         break;
     830          10 :                 case 'm':
     831          10 :                         compile_options |= PCRE_MULTILINE;
     832          10 :                         break;
     833           5 :                 case 's':
     834           5 :                         compile_options |= PCRE_DOTALL;
     835           5 :                         break;
     836           0 :                 case 'x':
     837           0 :                         compile_options |= PCRE_EXTENDED;
     838           0 :                         break;
     839           0 :                 default:
     840           0 :                         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     841             :                                   ILLEGAL_ARGUMENT ": unsupported flag character '%c'\n",
     842             :                                   *flags);
     843             :                 }
     844          20 :                 flags++;
     845             :         }
     846             : 
     847          50 :         if ((pcre_code = pcre_compile(pattern, compile_options, &err_p, &errpos, NULL)) == NULL) {
     848           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     849             :                           OPERATION_FAILED
     850             :                           ": pcre compile of pattern (%s) failed at %d with\n'%s'.\n",
     851             :                           pattern, errpos, err_p);
     852             :         }
     853             : 
     854             :         /* Since the compiled pattern is going to be used several times,
     855             :          * it is worth spending more time analyzing it in order to speed
     856             :          * up the time taken for matching.
     857             :          */
     858          50 :         extra = pcre_study(pcre_code, BATcount(origin_strs) > JIT_COMPILE_MIN ? PCRE_STUDY_JIT_COMPILE : 0, &err_p);
     859          50 :         if (err_p != NULL) {
     860           0 :                 pcre_free(pcre_code);
     861           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     862             :                           OPERATION_FAILED);
     863             :         }
     864          50 :         pcre_fullinfo(pcre_code, extra, PCRE_INFO_CAPTURECOUNT, &i);
     865          50 :         ovecsize = (i + 1) * 3;
     866          50 :         if ((ovector = (int *) GDKzalloc(sizeof(int) * ovecsize)) == NULL) {
     867           0 :                 pcre_free_study(extra);
     868           0 :                 pcre_free(pcre_code);
     869           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     870             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     871             :         }
     872             : 
     873             :         /* identify back references in the replacement string */
     874          50 :         nbackrefs = parse_replacement(replacement, len_replacement,
     875             :                                                                   backrefs, MAX_NR_REFS);
     876             : 
     877          50 :         tmpbat = COLnew(origin_strs->hseqbase, TYPE_str, BATcount(origin_strs), TRANSIENT);
     878             : 
     879             :         /* the buffer for all destination strings is allocated only once,
     880             :          * and extended when needed */
     881          50 :         max_dest_size = len_replacement + 1;
     882          50 :         tmpres = GDKmalloc(max_dest_size);
     883          50 :         if (tmpbat == NULL || tmpres == NULL) {
     884           0 :                 pcre_free_study(extra);
     885           0 :                 pcre_free(pcre_code);
     886           0 :                 GDKfree(ovector);
     887           0 :                 BBPreclaim(tmpbat);
     888           0 :                 GDKfree(tmpres);
     889           0 :                 throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     890             :                           SQLSTATE(HY013) MAL_MALLOC_FAIL);
     891             :         }
     892       33310 :         BATloop(origin_strs, p, q) {
     893       33260 :                 origin_str = BUNtvar(origin_strsi, p);
     894       33260 :                 tmpres = single_replace(pcre_code, extra, origin_str,
     895       33260 :                                                                 (int) strlen(origin_str), exec_options,
     896             :                                                                 ovector, ovecsize, replacement,
     897             :                                                                 len_replacement, backrefs, nbackrefs, global,
     898             :                                                                 tmpres, &max_dest_size);
     899       33282 :                 if (tmpres == NULL || BUNappend(tmpbat, tmpres, false) != GDK_SUCCEED) {
     900           0 :                         pcre_free_study(extra);
     901           0 :                         pcre_free(pcre_code);
     902           0 :                         GDKfree(ovector);
     903           0 :                         GDKfree(tmpres);
     904           0 :                         BBPreclaim(tmpbat);
     905           0 :                         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     906             :                                   SQLSTATE(HY013) MAL_MALLOC_FAIL);
     907             :                 }
     908             :         }
     909          50 :         pcre_free_study(extra);
     910          50 :         pcre_free(pcre_code);
     911          50 :         GDKfree(ovector);
     912          50 :         GDKfree(tmpres);
     913          50 :         *res = tmpbat;
     914          50 :         return MAL_SUCCEED;
     915             : #else
     916             :         (void) res;
     917             :         (void) origin_strs;
     918             :         (void) pattern;
     919             :         (void) replacement;
     920             :         (void) flags;
     921             :         (void) global;
     922             :         throw(MAL, global ? "batpcre.replace" : "batpcre.replace_first",
     923             :                   "Database was compiled without PCRE support.");
     924             : #endif
     925             : }
     926             : 
     927             : static str
     928         261 : pcre_init(void *ret)
     929             : {
     930             :         (void) ret;
     931         261 :         return NULL;
     932             : }
     933             : 
     934             : static str
     935          35 : pcre_match_with_flags(bit *ret, const char *val, const char *pat, const char *flags)
     936             : {
     937             :         int pos;
     938             : #ifdef HAVE_LIBPCRE
     939          35 :         const char *err_p = NULL;
     940          35 :         int errpos = 0;
     941             :         int options = PCRE_UTF8;
     942             :         pcre *re;
     943             : #else
     944             :         int options = REG_NOSUB;
     945             :         regex_t re;
     946             :         int errcode;
     947             :         int retval;
     948             : #endif
     949             : 
     950          70 :         while (*flags) {
     951          35 :                 switch (*flags) {
     952           0 :                 case 'i':
     953             : #ifdef HAVE_LIBPCRE
     954           0 :                         options |= PCRE_CASELESS;
     955             : #else
     956             :                         options |= REG_ICASE;
     957             : #endif
     958           0 :                         break;
     959           0 :                 case 'm':
     960             : #ifdef HAVE_LIBPCRE
     961           0 :                         options |= PCRE_MULTILINE;
     962             : #else
     963             :                         options |= REG_NEWLINE;
     964             : #endif
     965           0 :                         break;
     966             : #ifdef HAVE_LIBPCRE
     967          35 :                 case 's':
     968          35 :                         options |= PCRE_DOTALL;
     969          35 :                         break;
     970             : #endif
     971           0 :                 case 'x':
     972             : #ifdef HAVE_LIBPCRE
     973           0 :                         options |= PCRE_EXTENDED;
     974             : #else
     975             :                         options |= REG_EXTENDED;
     976             : #endif
     977           0 :                         break;
     978           0 :                 default:
     979           0 :                         throw(MAL, "pcre.match", ILLEGAL_ARGUMENT
     980             :                                   ": unsupported flag character '%c'\n", *flags);
     981             :                 }
     982          35 :                 flags++;
     983             :         }
     984          35 :         if (strNil(val)) {
     985           0 :                 *ret = FALSE;
     986           0 :                 return MAL_SUCCEED;
     987             :         }
     988             : 
     989             : #ifdef HAVE_LIBPCRE
     990          35 :         if ((re = pcre_compile(pat, options, &err_p, &errpos, NULL)) == NULL)
     991             : #else
     992             :                 if ((errcode = regcomp(&re, pat, options)) != 0)
     993             : #endif
     994             :                         {
     995           0 :                                 throw(MAL, "pcre.match", OPERATION_FAILED
     996             :                                           ": compilation of regular expression (%s) failed "
     997             : #ifdef HAVE_LIBPCRE
     998             :                                           "at %d with '%s'", pat, errpos, err_p
     999             : #else
    1000             :                                           , pat
    1001             : #endif
    1002             :                                         );
    1003             :                         }
    1004             : #ifdef HAVE_LIBPCRE
    1005          35 :         pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0);
    1006          35 :         pcre_free(re);
    1007             : #else
    1008             :         retval = regexec(&re, val, (size_t) 0, NULL, 0);
    1009             :         pos = retval == REG_NOMATCH ? -1 : (retval == REG_ENOSYS ? -2 : 0);
    1010             :         regfree(&re);
    1011             : #endif
    1012          35 :         if (pos >= 0)
    1013          24 :                 *ret = TRUE;
    1014          11 :         else if (pos == -1)
    1015          11 :                 *ret = FALSE;
    1016             :         else
    1017           0 :                 throw(MAL, "pcre.match", OPERATION_FAILED
    1018             :                           ": matching of regular expression (%s) failed with %d",
    1019             :                           pat, pos);
    1020             :         return MAL_SUCCEED;
    1021             : }
    1022             : 
    1023             : #ifdef HAVE_LIBPCRE
    1024             : /* special characters in PCRE that need to be escaped */
    1025             : static const char *pcre_specials = ".+?*()[]{}|^$\\";
    1026             : #else
    1027             : /* special characters in POSIX basic regular expressions that need to
    1028             :  * be escaped */
    1029             : static const char *pcre_specials = ".*[]^$\\";
    1030             : #endif
    1031             : 
    1032             : /* change SQL LIKE pattern into PCRE pattern */
    1033             : static str
    1034         209 : sql2pcre(str *r, const char *pat, const char *esc_str)
    1035             : {
    1036             :         int escaped = 0;
    1037             :         int hasWildcard = 0;
    1038             :         char *ppat;
    1039         209 :         int esc = esc_str[0] == '\200' ? 0 : esc_str[0]; /* should change to utf8_convert() */
    1040             :         int specials;
    1041             :         int c;
    1042             : 
    1043         209 :         if (strlen(esc_str) > 1)
    1044           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(22019) ILLEGAL_ARGUMENT ": ESCAPE string must have length 1");
    1045         209 :         if (pat == NULL)
    1046           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(22019) ILLEGAL_ARGUMENT ": (I)LIKE pattern must not be NULL");
    1047         209 :         ppat = GDKmalloc(strlen(pat)*3+3 /* 3 = "^'the translated regexp'$0" */);
    1048         209 :         if (ppat == NULL)
    1049           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1050             : 
    1051         209 :         *r = ppat;
    1052             :         /* The escape character can be a char which is special in a PCRE
    1053             :          * expression.  If the user used the "+" char as escape and has "++"
    1054             :          * in their pattern, then replacing this with "+" is not correct and
    1055             :          * should be "\+" instead. */
    1056         209 :         specials = (esc && strchr(pcre_specials, esc) != NULL);
    1057             : 
    1058         209 :         *ppat++ = '^';
    1059        1445 :         while ((c = *pat++) != 0) {
    1060        1236 :                 if (c == esc) {
    1061          10 :                         if (escaped) {
    1062           1 :                                 if (specials) { /* change ++ into \+ */
    1063           1 :                                         *ppat++ = esc;
    1064             :                                 } else { /* do not escape simple escape symbols */
    1065           0 :                                         ppat[-1] = esc; /* overwrite backslash */
    1066             :                                 }
    1067             :                                 escaped = 0;
    1068             :                         } else {
    1069           9 :                                 *ppat++ = '\\';
    1070             :                                 escaped = 1;
    1071             :                         }
    1072             :                         hasWildcard = 1;
    1073        1226 :                 } else if (strchr(pcre_specials, c) != NULL) {
    1074             :                         /* escape PCRE special chars, avoid double backslash if the
    1075             :                          * user uses an invalid escape sequence */
    1076          26 :                         if (!escaped)
    1077          26 :                                 *ppat++ = '\\';
    1078          26 :                         *ppat++ = c;
    1079             :                         hasWildcard = 1;
    1080             :                         escaped = 0;
    1081        1200 :                 } else if (c == '%' && !escaped) {
    1082         150 :                         *ppat++ = '.';
    1083         150 :                         *ppat++ = '*';
    1084         150 :                         *ppat++ = '?';
    1085             :                         hasWildcard = 1;
    1086             :                         /* collapse multiple %, but only if it isn't the escape */
    1087         150 :                         if (esc != '%')
    1088         150 :                                 while (*pat == '%')
    1089           0 :                                         pat++;
    1090        1050 :                 } else if (c == '_' && !escaped) {
    1091         220 :                         *ppat++ = '.';
    1092             :                         hasWildcard = 1;
    1093             :                 } else {
    1094         830 :                         if (escaped) {
    1095           8 :                                 ppat[-1] = c; /* overwrite backslash of invalid escape */
    1096             :                         } else {
    1097         822 :                                 *ppat++ = c;
    1098             :                         }
    1099             :                         escaped = 0;
    1100             :                 }
    1101             :         }
    1102             :         /* no wildcard or escape character at end of string */
    1103         209 :         if (!hasWildcard || escaped) {
    1104           1 :                 GDKfree(*r);
    1105           1 :                 *r = NULL;
    1106           1 :                 if (escaped)
    1107           0 :                         throw(MAL, "pcre.sql2pcre", SQLSTATE(22019) ILLEGAL_ARGUMENT ": (I)LIKE pattern must not end with escape character");
    1108           1 :                 *r = GDKstrdup(str_nil);
    1109           1 :                 if (*r == NULL)
    1110           0 :                         throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1111             :         } else {
    1112         208 :                 *ppat++ = '$';
    1113         208 :                 *ppat = 0;
    1114             :         }
    1115             :         return MAL_SUCCEED;
    1116             : }
    1117             : 
    1118             : #ifdef HAVE_LIBPCRE
    1119             : /* change SQL PATINDEX pattern into PCRE pattern */
    1120             : static str
    1121          24 : pat2pcre(str *r, const char *pat)
    1122             : {
    1123          24 :         size_t len = strlen(pat);
    1124          24 :         char *ppat = GDKmalloc(len*2+3 /* 3 = "^'the translated regexp'$0" */);
    1125             :         int start = 0;
    1126             : 
    1127          24 :         if (ppat == NULL)
    1128           0 :                 throw(MAL, "pcre.sql2pcre", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1129          24 :         *r = ppat;
    1130          72 :         while (*pat) {
    1131          48 :                 int c = *pat++;
    1132             : 
    1133          48 :                 if (strchr(pcre_specials, c) != NULL) {
    1134          17 :                         *ppat++ = '\\';
    1135          17 :                         *ppat++ = c;
    1136          31 :                 } else if (c == '%') {
    1137           1 :                         if (start && *pat) {
    1138           0 :                                 *ppat++ = '.';
    1139           0 :                                 *ppat++ = '*';
    1140             :                         }
    1141           1 :                         start++;
    1142          30 :                 } else if (c == '_') {
    1143           0 :                         *ppat++ = '.';
    1144             :                 } else {
    1145          30 :                         *ppat++ = c;
    1146             :                 }
    1147             :         }
    1148          24 :         *ppat = 0;
    1149          24 :         return MAL_SUCCEED;
    1150             : }
    1151             : #endif
    1152             : 
    1153             : /*
    1154             :  * @+ Wrapping
    1155             :  */
    1156             : #include "mal.h"
    1157             : static str
    1158           0 : PCREreplace_wrap(str *res, const str *or, const str *pat, const str *repl, const str *flags)
    1159             : {
    1160           0 :         return pcre_replace(res, *or, *pat, *repl, *flags, true);
    1161             : }
    1162             : 
    1163             : static str
    1164          50 : PCREreplace_bat_wrap(bat *res, const bat *bid, const str *pat, const str *repl, const str *flags)
    1165             : {
    1166          50 :         BAT *b, *bn = NULL;
    1167             :         str msg;
    1168          50 :         if ((b = BATdescriptor(*bid)) == NULL)
    1169           0 :                 throw(MAL, "batpcre.replace", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    1170             : 
    1171          50 :         msg = pcre_replace_bat(&bn, b, *pat, *repl, *flags, true);
    1172          50 :         if (msg == MAL_SUCCEED) {
    1173          50 :                 *res = bn->batCacheid;
    1174          50 :                 BBPkeepref(*res);
    1175             :         }
    1176          50 :         BBPunfix(b->batCacheid);
    1177          50 :         return msg;
    1178             : }
    1179             : 
    1180             : static str
    1181           0 : PCREreplacefirst_bat_wrap(bat *res, const bat *bid, const str *pat, const str *repl, const str *flags)
    1182             : {
    1183           0 :         BAT *b,*bn = NULL;
    1184             :         str msg;
    1185           0 :         if ((b = BATdescriptor(*bid)) == NULL)
    1186           0 :                 throw(MAL, "batpcre.replace_first", RUNTIME_OBJECT_MISSING);
    1187             : 
    1188           0 :         msg = pcre_replace_bat(&bn, b, *pat, *repl, *flags, false);
    1189           0 :         if (msg == MAL_SUCCEED) {
    1190           0 :                 *res = bn->batCacheid;
    1191           0 :                 BBPkeepref(*res);
    1192             :         }
    1193           0 :         BBPunfix(b->batCacheid);
    1194           0 :         return msg;
    1195             : }
    1196             : 
    1197             : static str
    1198           4 : PCREmatch(bit *ret, const str *val, const str *pat)
    1199             : {
    1200          35 :         return pcre_match_with_flags(ret, *val, *pat,
    1201             : #ifdef HAVE_LIBPCRE
    1202             :                                                                  "s"
    1203             : #else
    1204             :                                                                  "x"
    1205             : #endif
    1206             :                 );
    1207             : }
    1208             : 
    1209             : static str
    1210           0 : PCREimatch(bit *ret, const str *val, const str *pat)
    1211             : {
    1212           0 :         return pcre_match_with_flags(ret, *val, *pat, "i"
    1213             : #ifndef HAVE_LIBPCRE
    1214             :                                                                  "x"
    1215             : #endif
    1216             :                 );
    1217             : }
    1218             : 
    1219             : static str
    1220          24 : PCREindex(int *res, const pcre *pattern, const str *s)
    1221             : {
    1222             : #ifdef HAVE_LIBPCRE
    1223             :         int v[3];
    1224             : 
    1225          24 :         v[0] = v[1] = *res = 0;
    1226          24 :         if (pcre_exec(pattern, NULL, *s, (int) strlen(*s), 0, 0, v, 3) >= 0) {
    1227          22 :                 *res = v[1];
    1228             :         }
    1229          24 :         return MAL_SUCCEED;
    1230             : #else
    1231             :         (void) res;
    1232             :         (void) pattern;
    1233             :         (void) s;
    1234             :         throw(MAL, "pcre.index", "Database was compiled without PCRE support.");
    1235             : #endif
    1236             : }
    1237             : 
    1238             : static str
    1239          26 : PCREpatindex(int *ret, const str *pat, const str *val)
    1240             : {
    1241             : #ifdef HAVE_LIBPCRE
    1242          26 :         pcre *re = NULL;
    1243          26 :         char *ppat = NULL, *msg;
    1244             : 
    1245          77 :         if (strNil(*pat) || strNil(*val)) {
    1246           2 :                 *ret = int_nil;
    1247           2 :                 return MAL_SUCCEED;
    1248             :         }
    1249             : 
    1250          24 :         if ((msg = pat2pcre(&ppat, *pat)) != MAL_SUCCEED)
    1251             :                 return msg;
    1252          24 :         if ((msg = pcre_compile_wrap(&re, ppat, FALSE)) != MAL_SUCCEED) {
    1253           0 :                 GDKfree(ppat);
    1254           0 :                 return msg;
    1255             :         }
    1256          24 :         GDKfree(ppat);
    1257          24 :         msg = PCREindex(ret, re, val);
    1258          24 :         pcre_free(re);
    1259          24 :         return msg;
    1260             : #else
    1261             :         (void) ret;
    1262             :         (void) pat;
    1263             :         (void) val;
    1264             :         throw(MAL, "pcre.patindex", "Database was compiled without PCRE support.");
    1265             : #endif
    1266             : }
    1267             : 
    1268             : static str
    1269           0 : PCREquote(str *ret, const str *val)
    1270             : {
    1271             :         char *p;
    1272           0 :         const char *s = *val;
    1273             : 
    1274           0 :         *ret = p = GDKmalloc(strlen(s) * 2 + 1); /* certainly long enough */
    1275           0 :         if (p == NULL)
    1276           0 :                 throw(MAL, "pcre.quote", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1277             :         /* quote all non-alphanumeric ASCII characters (i.e. leave
    1278             :            non-ASCII and alphanumeric alone) */
    1279           0 :         while (*s) {
    1280           0 :                 if (!((*s & 0x80) != 0 ||
    1281           0 :                       ('a' <= *s && *s <= 'z') ||
    1282           0 :                       ('A' <= *s && *s <= 'Z') ||
    1283           0 :                       isdigit((unsigned char) *s)))
    1284           0 :                         *p++ = '\\';
    1285           0 :                 *p++ = *s++;
    1286             :         }
    1287           0 :         *p = 0;
    1288           0 :         return MAL_SUCCEED;
    1289             : }
    1290             : 
    1291             : static str
    1292           6 : PCREsql2pcre(str *ret, const str *pat, const str *esc)
    1293             : {
    1294           6 :         return sql2pcre(ret, *pat, *esc);
    1295             : }
    1296             : 
    1297             : static inline str
    1298             : choose_like_path(char **ppat, bool *use_re, bool *use_strcmp, bool *empty, const str *pat, const str *esc)
    1299             : {
    1300             :         str res = MAL_SUCCEED;
    1301             :         *use_re = false;
    1302             :         *use_strcmp = false;
    1303             :         *empty = false;
    1304             : 
    1305             :         if (strNil(*pat) || strNil(*esc)) {
    1306             :                 *empty = true;
    1307             :         } else {
    1308             :                 if (!re_is_pattern_properly_escaped(*pat, (unsigned char) **esc))
    1309             :                         throw(MAL, "pcre.sql2pcre", SQLSTATE(22019) ILLEGAL_ARGUMENT ": (I)LIKE pattern must not end with escape character");
    1310             :                 if (is_strcmpable(*pat, *esc)) {
    1311             :                         *use_re = true;
    1312             :                         *use_strcmp = true;
    1313             :                 } else if (re_simple(*pat, (unsigned char) **esc)) {
    1314             :                         *use_re = true;
    1315             :                 } else {
    1316             :                         if ((res = sql2pcre(ppat, *pat, *esc)) != MAL_SUCCEED)
    1317             :                                 return res;
    1318             :                         if (strNil(*ppat)) {
    1319             :                                 GDKfree(*ppat);
    1320             :                                 *ppat = NULL;
    1321             :                                 *use_re = true;
    1322             :                                 *use_strcmp = true;
    1323             :                         }
    1324             :                 }
    1325             :         }
    1326             :         return res;
    1327             : }
    1328             : 
    1329             : static str
    1330         127 : PCRElike4(bit *ret, const str *s, const str *pat, const str *esc, const bit *isens)
    1331             : {
    1332             :         str res = MAL_SUCCEED;
    1333         127 :         char *ppat = NULL;
    1334         127 :         bool use_re = false, use_strcmp = false, isnull = false;
    1335             :         struct RE *re = NULL;
    1336             : 
    1337         127 :         if ((res = choose_like_path(&ppat, &use_re, &use_strcmp, &isnull, pat, esc)) != MAL_SUCCEED)
    1338             :                 return res;
    1339             : 
    1340         252 :         if (strNil(*s) || isnull) {
    1341           0 :                 *ret = bit_nil;
    1342         126 :         } else if (use_re) {
    1343          95 :                 if (use_strcmp) {
    1344          18 :                         *ret = *isens ? mystrcasecmp(*s, *pat) == 0 : strcmp(*s, *pat) == 0;
    1345             :                 } else {
    1346          77 :                         if (!(re = re_create(*pat, *isens, (unsigned char) **esc)))
    1347           0 :                                 res = createException(MAL, "pcre.like4", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1348             :                         else
    1349          77 :                                 *ret = *isens ? re_match_ignore(*s, re) : re_match_no_ignore(*s, re);
    1350             :                 }
    1351             :         } else {
    1352          31 :                 res = *isens ? PCREimatch(ret, s, &ppat) : PCREmatch(ret, s, &ppat);
    1353             :         }
    1354             : 
    1355          77 :         if (re)
    1356          77 :                 re_destroy(re);
    1357         126 :         GDKfree(ppat);
    1358         126 :         return res;
    1359             : }
    1360             : 
    1361             : static str
    1362          32 : PCRElike3(bit *ret, const str *s, const str *pat, const str *esc)
    1363             : {
    1364         107 :         bit no = FALSE;
    1365             : 
    1366          32 :         return PCRElike4(ret, s, pat, esc, &no);
    1367             : }
    1368             : 
    1369             : static str
    1370          27 : PCRElike2(bit *ret, const str *s, const str *pat)
    1371             : {
    1372          27 :         char *esc = "";
    1373             : 
    1374          27 :         return PCRElike3(ret, s, pat, &esc);
    1375             : }
    1376             : 
    1377             : static str
    1378          30 : PCREnotlike3(bit *ret, const str *s, const str *pat, const str *esc)
    1379             : {
    1380             :         str tmp;
    1381             :         bit r;
    1382             : 
    1383          30 :         rethrow("str.not_like", tmp, PCRElike3(&r, s, pat, esc));
    1384          30 :         *ret = r==bit_nil?bit_nil:!r;
    1385          30 :         return MAL_SUCCEED;
    1386             : }
    1387             : 
    1388             : static str
    1389          18 : PCREnotlike2(bit *ret, const str *s, const str *pat)
    1390             : {
    1391             :         str tmp;
    1392             :         bit r;
    1393             : 
    1394          18 :         rethrow("str.not_like", tmp, PCRElike2(&r, s, pat));
    1395          18 :         *ret = r==bit_nil?bit_nil:!r;
    1396          18 :         return MAL_SUCCEED;
    1397             : }
    1398             : 
    1399             : static str
    1400           0 : PCREilike3(bit *ret, const str *s, const str *pat, const str *esc)
    1401             : {
    1402          20 :         bit yes = TRUE;
    1403             : 
    1404           0 :         return PCRElike4(ret, s, pat, esc, &yes);
    1405             : }
    1406             : 
    1407             : static str
    1408           9 : PCREilike2(bit *ret, const str *s, const str *pat)
    1409             : {
    1410           9 :         char *esc = "";
    1411             : 
    1412           9 :         return PCREilike3(ret, s, pat, &esc);
    1413             : }
    1414             : 
    1415             : static str
    1416           0 : PCREnotilike3(bit *ret, const str *s, const str *pat, const str *esc)
    1417             : {
    1418             :         str tmp;
    1419             :         bit r;
    1420             : 
    1421           0 :         rethrow("str.not_ilike", tmp, PCREilike3(&r, s, pat, esc));
    1422           0 :         *ret = r==bit_nil?bit_nil:!r;
    1423           0 :         return MAL_SUCCEED;
    1424             : }
    1425             : 
    1426             : static str
    1427          11 : PCREnotilike2(bit *ret, const str *s, const str *pat)
    1428             : {
    1429             :         str tmp;
    1430             :         bit r;
    1431             : 
    1432          11 :         rethrow("str.not_ilike", tmp, PCREilike2(&r, s, pat));
    1433          11 :         *ret = r==bit_nil?bit_nil:!r;
    1434          11 :         return MAL_SUCCEED;
    1435             : }
    1436             : 
    1437             : static inline str
    1438        1822 : re_like_build(struct RE **re, uint32_t **wpat, const char *pat, bool caseignore, bool use_strcmp, uint32_t esc)
    1439             : {
    1440        1822 :         if (!use_strcmp) {
    1441        1356 :                 if (!(*re = re_create(pat, caseignore, esc)))
    1442           0 :                         return createException(MAL, "pcre.re_like_build", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1443         466 :         } else if (caseignore) {
    1444         123 :                 if (!(*wpat = utf8stoucs(pat)))
    1445           0 :                         return createException(MAL, "pcre.re_like_build", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1446             :         }
    1447             :         return MAL_SUCCEED;
    1448             : }
    1449             : 
    1450             : #define proj_scanloop(TEST)     \
    1451             :         do {    \
    1452             :                 if (*s == '\200') \
    1453             :                         return bit_nil; \
    1454             :                 else \
    1455             :                         return TEST; \
    1456             :         } while (0)
    1457             : 
    1458             : static inline bit
    1459        4981 : re_like_proj_apply(str s, struct RE *re, uint32_t *wpat, const char *pat, bool caseignore, bool anti, bool use_strcmp)
    1460             : {
    1461        4981 :         if (use_strcmp) {
    1462         278 :                 if (caseignore) {
    1463          93 :                         if (anti)
    1464          68 :                                 proj_scanloop(mywstrcasecmp(s, wpat) != 0);
    1465             :                         else
    1466          25 :                                 proj_scanloop(mywstrcasecmp(s, wpat) == 0);
    1467             :                 } else {
    1468         185 :                         if (anti)
    1469          54 :                                 proj_scanloop(strcmp(s, pat) != 0);
    1470             :                         else
    1471         131 :                                 proj_scanloop(strcmp(s, pat) == 0);
    1472             :                 }
    1473             :         } else {
    1474        4703 :                 if (caseignore) {
    1475          24 :                         if (anti)
    1476           3 :                                 proj_scanloop(!re_match_ignore(s, re));
    1477             :                         else
    1478          21 :                                 proj_scanloop(re_match_ignore(s, re));
    1479             :                 } else {
    1480        4679 :                         if (anti)
    1481           0 :                                 proj_scanloop(!re_match_no_ignore(s, re));
    1482             :                         else
    1483        4679 :                                 proj_scanloop(re_match_no_ignore(s, re));
    1484             :                 }
    1485             :         }
    1486             : }
    1487             : 
    1488             : static inline void
    1489        1957 : re_like_clean(struct RE **re, uint32_t **wpat)
    1490             : {
    1491        1957 :         if (*re) {
    1492        1355 :                 re_destroy(*re);
    1493        1357 :                 *re = NULL;
    1494             :         }
    1495        1959 :         if (*wpat) {
    1496         123 :                 GDKfree(*wpat);
    1497         123 :                 *wpat = NULL;
    1498             :         }
    1499        1959 : }
    1500             : 
    1501             : static inline str
    1502         172 : pcre_like_build(
    1503             : #ifdef HAVE_LIBPCRE
    1504             :         pcre **res,
    1505             :         pcre_extra **ex
    1506             : #else
    1507             :         regex_t *res,
    1508             :         void *ex
    1509             : #endif
    1510             : , const char *ppat, bool caseignore, BUN count)
    1511             : {
    1512             : #ifdef HAVE_LIBPCRE
    1513         172 :         const char *err_p = NULL;
    1514         172 :         int errpos = 0;
    1515             :         int options = PCRE_UTF8 | PCRE_MULTILINE | PCRE_DOTALL;
    1516         172 :         int pcrestopt = count > JIT_COMPILE_MIN ? PCRE_STUDY_JIT_COMPILE : 0;
    1517             : 
    1518         172 :         *res = NULL;
    1519         172 :         *ex = NULL;
    1520             : #else
    1521             :         int options = REG_NEWLINE | REG_NOSUB | REG_EXTENDED;
    1522             :         int errcode;
    1523             : 
    1524             :         *res = (regex_t) {0};
    1525             :         (void) count;
    1526             : #endif
    1527             : 
    1528         172 :         if (caseignore) {
    1529             : #ifdef HAVE_LIBPCRE
    1530             :                 options |= PCRE_CASELESS;
    1531             : #else
    1532             :                 options |= REG_ICASE;
    1533             : #endif
    1534             :         }
    1535         172 :         if (
    1536             : #ifdef HAVE_LIBPCRE
    1537         172 :                 (*res = pcre_compile(ppat, options, &err_p, &errpos, NULL)) == NULL
    1538             : #else
    1539             :                 (errcode = regcomp(res, ppat, options)) != 0
    1540             : #endif
    1541             :                 )
    1542           0 :                 return createException(MAL, "pcre.pcre_like_build", OPERATION_FAILED
    1543             :                                                                 ": compilation of regular expression (%s) failed"
    1544             : #ifdef HAVE_LIBPCRE
    1545             :                                                                 " at %d with '%s'", ppat, errpos, err_p
    1546             : #else
    1547             :                                                                 , ppat
    1548             : #endif
    1549             :                         );
    1550             : #ifdef HAVE_LIBPCRE
    1551         172 :         *ex = pcre_study(*res, pcrestopt, &err_p);
    1552         172 :         if (err_p != NULL)
    1553           0 :                 return createException(MAL, "pcre.pcre_like_build", OPERATION_FAILED
    1554             :                                                                 ": pcre study of pattern (%s) "
    1555             :                                                                 "failed with '%s'", ppat, err_p);
    1556             : #else
    1557             :         (void) ex;
    1558             : #endif
    1559             :         return MAL_SUCCEED;
    1560             : }
    1561             : 
    1562             : #define PCRE_LIKE_BODY(LOOP_BODY, RES1, RES2) \
    1563             :         do { \
    1564             :                 LOOP_BODY  \
    1565             :                 if (*s == '\200') \
    1566             :                         *ret = bit_nil; \
    1567             :                 else if (pos >= 0) \
    1568             :                         *ret = RES1; \
    1569             :                 else if (pos == -1) \
    1570             :                         *ret = RES2; \
    1571             :                 else \
    1572             :                         return createException(MAL, "pcre.match", OPERATION_FAILED ": matching of regular expression (%s) failed with %d", ppat, pos); \
    1573             :         } while(0)
    1574             : 
    1575             : static inline str
    1576        1096 : pcre_like_apply(bit *ret, str s,
    1577             : #ifdef HAVE_LIBPCRE
    1578             :         pcre *re, pcre_extra *ex
    1579             : #else
    1580             :         regex_t re, void *ex
    1581             : #endif
    1582             : , const char *ppat, bool anti)
    1583             : {
    1584             :         int pos;
    1585             : 
    1586             : #ifdef HAVE_LIBPCRE
    1587             : #define LOOP_BODY       \
    1588             :         pos = pcre_exec(re, ex, s, (int) strlen(s), 0, 0, NULL, 0);
    1589             : #else
    1590             : #define LOOP_BODY       \
    1591             :         int retval = regexec(&re, s, (size_t) 0, NULL, 0); \
    1592             :         (void) ex; \
    1593             :         pos = retval == REG_NOMATCH ? -1 : (retval == REG_ENOSYS ? -2 : 0);
    1594             : #endif
    1595             : 
    1596        1096 :         if (anti)
    1597           6 :                 PCRE_LIKE_BODY(LOOP_BODY, FALSE, TRUE);
    1598             :         else
    1599        1090 :                 PCRE_LIKE_BODY(LOOP_BODY, TRUE, FALSE);
    1600             : 
    1601             :         return MAL_SUCCEED;
    1602             : }
    1603             : 
    1604             : static inline void
    1605         593 : pcre_clean(
    1606             : #ifdef HAVE_LIBPCRE
    1607             :         pcre **re, pcre_extra **ex) {
    1608         593 :         if (*re)
    1609         172 :                 pcre_free(*re);
    1610         593 :         if (*ex)
    1611         172 :                 pcre_free_study(*ex);
    1612         592 :         *re = NULL;
    1613         592 :         *ex = NULL;
    1614             : #else
    1615             :         regex_t *re, void *ex) {
    1616             :         regfree(re);
    1617             :         *re = (regex_t) {0};
    1618             :         (void) ex;
    1619             : #endif
    1620         592 : }
    1621             : 
    1622             : static str
    1623             : BATPCRElike3(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, const str *esc, const bit *isens, const bit *not)
    1624             : {
    1625             :         str msg = MAL_SUCCEED, input = NULL, pat = NULL;
    1626             :         BAT *b = NULL, *pbn = NULL, *bn = NULL;
    1627             :         char *ppat = NULL;
    1628             :         bool use_re = false, use_strcmp = false, allnulls = false, isensitive = (bool) *isens, anti = (bool) *not, has_nil = false,
    1629             :                  input_is_a_bat = isaBatType(getArgType(mb, pci, 1)), pattern_is_a_bat = isaBatType(getArgType(mb, pci, 2));
    1630             :         bat *r = getArgReference_bat(stk, pci, 0);
    1631             :         BUN q = 0;
    1632             :         bit *ret = NULL;
    1633             : #ifdef HAVE_LIBPCRE
    1634             :         pcre *re = NULL;
    1635             :         pcre_extra *ex = NULL;
    1636             : #else
    1637             :         regex_t re = (regex_t) {0};
    1638             :         void *ex = NULL;
    1639             : #endif
    1640             :         struct RE *re_simple = NULL;
    1641             :         uint32_t *wpat = NULL;
    1642             :         BATiter bi = (BATiter) {0}, pi;
    1643             : 
    1644             :         (void) cntxt;
    1645             :         if (input_is_a_bat) {
    1646             :                 bat *bid = getArgReference_bat(stk, pci, 1);
    1647             :                 if (!(b = BATdescriptor(*bid))) {
    1648             :                         msg = createException(MAL, "batalgebra.batpcrelike3", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
    1649             :                         goto bailout;
    1650             :                 }
    1651             :         }
    1652             :         if (pattern_is_a_bat) {
    1653             :                 bat *pb = getArgReference_bat(stk, pci, 2);
    1654             :                 if (!(pbn = BATdescriptor(*pb))) {
    1655             :                         msg = createException(MAL, "batalgebra.batpcrelike3", SQLSTATE(HY005) RUNTIME_OBJECT_MISSING);
    1656             :                         goto bailout;
    1657             :                 }
    1658             :         }
    1659             :         assert((!b || ATOMstorage(b->ttype) == TYPE_str) && (!pbn || ATOMstorage(pbn->ttype) == TYPE_str));
    1660             : 
    1661             :         q = BATcount(b ? b : pbn);
    1662             :         if (!(bn = COLnew(b ? b->hseqbase : pbn->hseqbase, TYPE_bit, q, TRANSIENT))) {
    1663             :                 msg = createException(MAL, "batalgebra.batpcrelike3", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1664             :                 goto bailout;
    1665             :         }
    1666             :         ret = (bit*) Tloc(bn, 0);
    1667             : 
    1668             :         if (pattern_is_a_bat) {
    1669             :                 pi = bat_iterator(pbn);
    1670             :                 if (b)
    1671             :                         bi = bat_iterator(b);
    1672             :                 else
    1673             :                         input = *getArgReference_str(stk, pci, 1);
    1674             : 
    1675             :                 for (BUN p = 0; p < q; p++) {
    1676             :                         const str next_input = b ? BUNtail(bi, p) : input, np = BUNtail(pi, p);
    1677             : 
    1678             :                         if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &allnulls, &np, esc)) != MAL_SUCCEED)
    1679             :                                 goto bailout;
    1680             : 
    1681             :                         if (use_re) {
    1682             :                                 if ((msg = re_like_build(&re_simple, &wpat, np, isensitive, use_strcmp, (unsigned char) **esc)) != MAL_SUCCEED)
    1683             :                                         goto bailout;
    1684             :                                 ret[p] = re_like_proj_apply(next_input, re_simple, wpat, np, isensitive, anti, use_strcmp);
    1685             :                                 re_like_clean(&re_simple, &wpat);
    1686             :                         } else if (allnulls) {
    1687             :                                 ret[p] = bit_nil;
    1688             :                         } else {
    1689             :                                 if ((msg = pcre_like_build(&re, &ex, ppat, isensitive, 1)) != MAL_SUCCEED)
    1690             :                                         goto bailout;
    1691             :                                 if ((msg = pcre_like_apply(&(ret[p]), next_input, re, ex, ppat, anti)) != MAL_SUCCEED)
    1692             :                                         goto bailout;
    1693             :                                 pcre_clean(&re, &ex);
    1694             :                         }
    1695             :                         has_nil |= is_bit_nil(ret[p]);
    1696             :                         GDKfree(ppat);
    1697             :                         ppat = NULL;
    1698             :                 }
    1699             :         } else {
    1700             :                 bi = bat_iterator(b);
    1701             :                 pat = *getArgReference_str(stk, pci, 2);
    1702             :                 if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &allnulls, &pat, esc)) != MAL_SUCCEED)
    1703             :                         goto bailout;
    1704             : 
    1705             :                 if (use_re) {
    1706             :                         if ((msg = re_like_build(&re_simple, &wpat, pat, isensitive, use_strcmp, (unsigned char) **esc)) != MAL_SUCCEED)
    1707             :                                 goto bailout;
    1708             :                         for (BUN p = 0; p < q; p++) {
    1709             :                                 const str s = BUNtail(bi, p);
    1710             :                                 ret[p] = re_like_proj_apply(s, re_simple, wpat, pat, isensitive, anti, use_strcmp);
    1711             :                                 has_nil |= is_bit_nil(ret[p]);
    1712             :                         }
    1713             :                 } else if (allnulls) {
    1714             :                         for (BUN p = 0; p < q; p++)
    1715             :                                 ret[p] = bit_nil;
    1716             :                         has_nil = true;
    1717             :                 } else {
    1718             :                         if ((msg = pcre_like_build(&re, &ex, ppat, isensitive, q)) != MAL_SUCCEED)
    1719             :                                 goto bailout;
    1720             :                         for (BUN p = 0; p < q; p++) {
    1721             :                                 const str s = BUNtail(bi, p);
    1722             :                                 if ((msg = pcre_like_apply(&(ret[p]), s, re, ex, ppat, anti)) != MAL_SUCCEED)
    1723             :                                         goto bailout;
    1724             :                                 has_nil |= is_bit_nil(ret[p]);
    1725             :                         }
    1726             :                 }
    1727             :         }
    1728             : 
    1729             : bailout:
    1730             :         GDKfree(ppat);
    1731             :         re_like_clean(&re_simple, &wpat);
    1732             :         pcre_clean(&re, &ex);
    1733             :         if (bn && !msg) {
    1734             :                 BATsetcount(bn, q);
    1735             :                 bn->tnil = has_nil;
    1736             :                 bn->tnonil = !has_nil;
    1737             :                 bn->tkey = BATcount(bn) <= 1;
    1738             :                 bn->tsorted = BATcount(bn) <= 1;
    1739             :                 bn->trevsorted = BATcount(bn) <= 1;
    1740             :                 BBPkeepref(*r = bn->batCacheid);
    1741             :         } else if (bn)
    1742             :                 BBPreclaim(bn);
    1743             :         if (b)
    1744             :                 BBPunfix(b->batCacheid);
    1745             :         if (pbn)
    1746             :                 BBPunfix(pbn->batCacheid);
    1747             :         return msg;
    1748             : }
    1749             : 
    1750             : static str
    1751          14 : BATPCRElike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1752             : {
    1753          14 :         const str *esc = getArgReference_str(stk, pci, 3);
    1754             :         bit no = FALSE;
    1755             : 
    1756          14 :         return BATPCRElike3(cntxt, mb, stk, pci, esc, &no, &no);
    1757             : }
    1758             : 
    1759             : static str
    1760         377 : BATPCRElike2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1761             : {
    1762         377 :         char *esc = "";
    1763             :         bit no = FALSE;
    1764             : 
    1765         377 :         return BATPCRElike3(cntxt, mb, stk, pci, &esc, &no, &no);
    1766             : }
    1767             : 
    1768             : static str
    1769           0 : BATPCREnotlike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1770             : {
    1771           0 :         const str *esc = getArgReference_str(stk, pci, 3);
    1772             :         bit no = FALSE, yes = TRUE;
    1773             : 
    1774           0 :         return BATPCRElike3(cntxt, mb, stk, pci, esc, &no, &yes);
    1775             : }
    1776             : 
    1777             : static str
    1778           5 : BATPCREnotlike2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1779             : {
    1780           5 :         char *esc = "";
    1781             :         bit no = FALSE, yes = TRUE;
    1782             : 
    1783           5 :         return BATPCRElike3(cntxt, mb, stk, pci, &esc, &no, &yes);
    1784             : }
    1785             : 
    1786             : static str
    1787          17 : BATPCREilike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1788             : {
    1789          17 :         const str *esc = getArgReference_str(stk, pci, 3);
    1790             :         bit yes = TRUE, no = FALSE;
    1791             : 
    1792          17 :         return BATPCRElike3(cntxt, mb, stk, pci, esc, &yes, &no);
    1793             : }
    1794             : 
    1795             : static str
    1796           8 : BATPCREilike2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1797             : {
    1798           8 :         char *esc = "";
    1799             :         bit yes = TRUE, no = FALSE;
    1800             : 
    1801           8 :         return BATPCRElike3(cntxt, mb, stk, pci, &esc, &yes, &no);
    1802             : }
    1803             : 
    1804             : static str
    1805           0 : BATPCREnotilike(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1806             : {
    1807           0 :         const str *esc = getArgReference_str(stk, pci, 3);
    1808             :         bit yes = TRUE;
    1809             : 
    1810           0 :         return BATPCRElike3(cntxt, mb, stk, pci, esc, &yes, &yes);
    1811             : }
    1812             : 
    1813             : static str
    1814           5 : BATPCREnotilike2(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
    1815             : {
    1816           5 :         char *esc = "";
    1817             :         bit yes = TRUE;
    1818             : 
    1819           5 :         return BATPCRElike3(cntxt, mb, stk, pci, &esc, &yes, &yes);
    1820             : }
    1821             : 
    1822             : /* these two defines are copies from gdk_select.c */
    1823             : 
    1824             : /* scan select loop with candidates */
    1825             : #define candscanloop(TEST)                                                                                              \
    1826             :         do {                                                                                                                            \
    1827             :                 TRC_DEBUG(ALGO,                                                                                                 \
    1828             :                                   "BATselect(b=%s#"BUNFMT",s=%s,anti=%d): "                         \
    1829             :                                   "scanselect %s\n", BATgetId(b), BATcount(b),                        \
    1830             :                                   s ? BATgetId(s) : "NULL", anti, #TEST);                             \
    1831             :                 for (p = 0; p < ci.ncand; p++) {                                                             \
    1832             :                         o = canditer_next(&ci);                                                                             \
    1833             :                         r = (BUN) (o - off);                                                                            \
    1834             :                         v = BUNtvar(bi, r);                                                                                     \
    1835             :                         if (TEST)                                                                                                       \
    1836             :                                 if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED) {           \
    1837             :                                         msg = createException(MAL, "pcre.likeselect", OPERATION_FAILED); \
    1838             :                                         goto bailout;                                                                   \
    1839             :                                 }                                                                                                               \
    1840             :                 }                                                                                                                               \
    1841             :         } while (0)
    1842             : 
    1843             : /* scan select loop without candidates */
    1844             : #define scanloop(TEST)                                                                                                  \
    1845             :         do {                                                                                                                            \
    1846             :                 TRC_DEBUG(ALGO,                                                                                                 \
    1847             :                                   "BATselect(b=%s#"BUNFMT",s=%s,anti=%d): "                         \
    1848             :                                   "scanselect %s\n", BATgetId(b), BATcount(b),                        \
    1849             :                                   s ? BATgetId(s) : "NULL", anti, #TEST);                             \
    1850             :                 while (p < q) {                                                                                                      \
    1851             :                         v = BUNtvar(bi, p-off);                                                                         \
    1852             :                         if (TEST) {                                                                                                     \
    1853             :                                 o = (oid) p;                                                                                    \
    1854             :                                 if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)     {               \
    1855             :                                         msg = createException(MAL, "pcre.likeselect", OPERATION_FAILED); \
    1856             :                                         goto bailout;                                                                   \
    1857             :                                 }                                                                                                               \
    1858             :                         }                                                                                                                       \
    1859             :                         p++;                                                                                                            \
    1860             :                 }                                                                                                                               \
    1861             :         } while (0)
    1862             : 
    1863             : #ifdef HAVE_LIBPCRE
    1864             : #define PCRE_LIKESELECT_BODY (pcre_exec(re, ex, v, (int) strlen(v), 0, 0, NULL, 0) >= 0)
    1865             : #else
    1866             : #define PCRE_LIKESELECT_BODY (regexec(&re, v, (size_t) 0, NULL, 0) != REG_NOMATCH)
    1867             : #endif
    1868             : 
    1869             : static str
    1870          79 : pcre_likeselect(BAT **bnp, BAT *b, BAT *s, const char *pat, bool caseignore, bool anti)
    1871             : {
    1872             : #ifdef HAVE_LIBPCRE
    1873          79 :         pcre *re = NULL;
    1874          79 :         pcre_extra *ex = NULL;
    1875             : #else
    1876             :         regex_t re = (regex_t) {0};
    1877             :         void *ex = NULL;
    1878             : #endif
    1879             :         BATiter bi = bat_iterator(b);
    1880             :         BAT *bn;
    1881             :         BUN p, q, r;
    1882          79 :         oid o, off = b->hseqbase;
    1883             :         const char *v;
    1884             :         str msg = MAL_SUCCEED;
    1885             :         struct canditer ci;
    1886             : 
    1887          79 :         canditer_init(&ci, b, s);
    1888             : 
    1889          79 :         if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
    1890           0 :                 msg = createException(MAL, "pcre.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1891           0 :                 goto bailout;
    1892             :         }
    1893          79 :         if ((msg = pcre_like_build(&re, &ex, pat, caseignore, ci.ncand)) != MAL_SUCCEED)
    1894           0 :                 goto bailout;
    1895             : 
    1896          79 :         if (s && !BATtdense(s)) {
    1897           0 :                 if (anti)
    1898           0 :                         candscanloop(v && *v != '\200' && !PCRE_LIKESELECT_BODY);
    1899             :                 else
    1900           0 :                         candscanloop(v && *v != '\200' && PCRE_LIKESELECT_BODY);
    1901             :         } else {
    1902          79 :                 if (s) {
    1903             :                         assert(BATtdense(s));
    1904          61 :                         p = (BUN) s->tseqbase;
    1905          61 :                         q = p + BATcount(s);
    1906          61 :                         if ((oid) p < b->hseqbase)
    1907             :                                 p = b->hseqbase;
    1908          61 :                         if ((oid) q > b->hseqbase + BATcount(b))
    1909             :                                 q = b->hseqbase + BATcount(b);
    1910             :                 } else {
    1911             :                         p = off;
    1912          18 :                         q = BUNlast(b) + off;
    1913             :                 }
    1914             : 
    1915          79 :                 if (anti)
    1916           0 :                         scanloop(v && *v != '\200' && !PCRE_LIKESELECT_BODY);
    1917             :                 else
    1918        9478 :                         scanloop(v && *v != '\200' && PCRE_LIKESELECT_BODY);
    1919             :         }
    1920             : 
    1921          79 : bailout:
    1922          79 :         pcre_clean(&re, &ex);
    1923          78 :         if (bn && !msg) {
    1924          78 :                 BATsetcount(bn, BATcount(bn)); /* set some properties */
    1925          79 :                 bn->tsorted = true;
    1926          79 :                 bn->trevsorted = bn->batCount <= 1;
    1927          79 :                 bn->tkey = true;
    1928          79 :                 bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
    1929             :         }
    1930          79 :         *bnp = bn;
    1931          79 :         return msg;
    1932             : }
    1933             : 
    1934             : static str
    1935        1194 : re_likeselect(BAT **bnp, BAT *b, BAT *s, const char *pat, bool caseignore, bool anti, bool use_strcmp, uint32_t esc)
    1936             : {
    1937             :         BATiter bi = bat_iterator(b);
    1938             :         BAT *bn = NULL;
    1939             :         BUN p, q, r;
    1940        1194 :         oid o, off = b->hseqbase;
    1941             :         const char *v;
    1942        1194 :         struct RE *re = NULL;
    1943        1194 :         uint32_t *wpat = NULL;
    1944             :         str msg = MAL_SUCCEED;
    1945             :         struct canditer ci;
    1946             : 
    1947        1194 :         canditer_init(&ci, b, s);
    1948             : 
    1949        1194 :         if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
    1950           0 :                 msg = createException(MAL, "pcre.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    1951           0 :                 goto bailout;
    1952             :         }
    1953        1194 :         if ((msg = re_like_build(&re, &wpat, pat, caseignore, use_strcmp, esc)) != MAL_SUCCEED)
    1954           0 :                 goto bailout;
    1955             : 
    1956        1194 :         if (s && !BATtdense(s)) {
    1957          16 :                 if (use_strcmp) {
    1958           4 :                         if (caseignore) {
    1959           0 :                                 if (anti)
    1960           0 :                                         candscanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) != 0);
    1961             :                                 else
    1962           0 :                                         candscanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) == 0);
    1963             :                         } else {
    1964           4 :                                 if (anti)
    1965           0 :                                         candscanloop(v && *v != '\200' && strcmp(v, pat) != 0);
    1966             :                                 else
    1967          58 :                                         candscanloop(v && *v != '\200' && strcmp(v, pat) == 0);
    1968             :                         }
    1969             :                 } else {
    1970          12 :                         if (caseignore) {
    1971           0 :                                 if (anti)
    1972           0 :                                         candscanloop(v && *v != '\200' && !re_match_ignore(v, re));
    1973             :                                 else
    1974           0 :                                         candscanloop(v && *v != '\200' && re_match_ignore(v, re));
    1975             :                         } else {
    1976          12 :                                 if (anti)
    1977        3842 :                                         candscanloop(v && *v != '\200' && !re_match_no_ignore(v, re));
    1978             :                                 else
    1979         748 :                                         candscanloop(v && *v != '\200' && re_match_no_ignore(v, re));
    1980             :                         }
    1981             :                 }
    1982             :         } else {
    1983        1178 :                 if (s) {
    1984             :                         assert(BATtdense(s));
    1985         731 :                         p = (BUN) s->tseqbase;
    1986         731 :                         q = p + BATcount(s);
    1987         731 :                         if ((oid) p < b->hseqbase)
    1988             :                                 p = b->hseqbase;
    1989         731 :                         if ((oid) q > b->hseqbase + BATcount(b))
    1990             :                                 q = b->hseqbase + BATcount(b);
    1991             :                 } else {
    1992             :                         p = off;
    1993         447 :                         q = BUNlast(b) + off;
    1994             :                 }
    1995        1178 :                 if (use_strcmp) {
    1996         134 :                         if (caseignore) {
    1997          13 :                                 if (anti)
    1998          14 :                                         scanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) != 0);
    1999             :                                 else
    2000          40 :                                         scanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) == 0);
    2001             :                         } else {
    2002         121 :                                 if (anti)
    2003          61 :                                         scanloop(v && *v != '\200' && strcmp(v, pat) != 0);
    2004             :                                 else
    2005         543 :                                         scanloop(v && *v != '\200' && strcmp(v, pat) == 0);
    2006             :                         }
    2007             :                 } else {
    2008        1044 :                         if (caseignore) {
    2009          52 :                                 if (anti)
    2010           0 :                                         scanloop(v && *v != '\200' && !re_match_ignore(v, re));
    2011             :                                 else
    2012        7954 :                                         scanloop(v && *v != '\200' && re_match_ignore(v, re));
    2013             :                         } else {
    2014         992 :                                 if (anti)
    2015       30030 :                                         scanloop(v && *v != '\200' && !re_match_no_ignore(v, re));
    2016             :                                 else
    2017       23516 :                                         scanloop(v && *v != '\200' && re_match_no_ignore(v, re));
    2018             :                         }
    2019             :                 }
    2020             :         }
    2021             : 
    2022         978 : bailout:
    2023        1194 :         re_like_clean(&re, &wpat);
    2024        1194 :         if (bn && !msg) {
    2025        1194 :                 BATsetcount(bn, BATcount(bn)); /* set some properties */
    2026        1194 :                 bn->tsorted = true;
    2027        1194 :                 bn->trevsorted = bn->batCount <= 1;
    2028        1194 :                 bn->tkey = true;
    2029        1194 :                 bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
    2030             :         }
    2031        1194 :         *bnp = bn;
    2032        1194 :         return msg;
    2033             : }
    2034             : 
    2035             : static str
    2036        1274 : PCRElikeselect2(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *caseignore, const bit *anti)
    2037             : {
    2038        1274 :         BAT *b, *s = NULL, *bn = NULL;
    2039             :         str msg = MAL_SUCCEED;
    2040        1274 :         char *ppat = NULL;
    2041        1274 :         bool use_re = false, use_strcmp = false, empty = false;
    2042             : 
    2043        1274 :         if ((b = BATdescriptor(*bid)) == NULL) {
    2044           0 :                 msg = createException(MAL, "algebra.likeselect", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    2045           0 :                 goto bailout;
    2046             :         }
    2047        1274 :         if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) {
    2048           0 :                 msg = createException(MAL, "algebra.likeselect", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    2049           0 :                 goto bailout;
    2050             :         }
    2051             : 
    2052             :         assert(ATOMstorage(b->ttype) == TYPE_str);
    2053        1274 :         if ((msg = choose_like_path(&ppat, &use_re, &use_strcmp, &empty, pat, esc)) != MAL_SUCCEED)
    2054           0 :                 goto bailout;
    2055             : 
    2056        1274 :         if (use_re) {
    2057        1194 :                 msg = re_likeselect(&bn, b, s, *pat, (bool) *caseignore, (bool) *anti, use_strcmp, (unsigned char) **esc);
    2058          80 :         } else if (empty) {
    2059           1 :                 if (!(bn = BATdense(0, 0, 0)))
    2060           0 :                         msg = createException(MAL, "algebra.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    2061             :         } else {
    2062          79 :                 msg = pcre_likeselect(&bn, b, s, ppat, (bool) *caseignore, (bool) *anti);
    2063             :         }
    2064             : 
    2065        1273 : bailout:
    2066        1273 :         if (b)
    2067        1274 :                 BBPunfix(b->batCacheid);
    2068        1274 :         if (s)
    2069         808 :                 BBPunfix(s->batCacheid);
    2070        1274 :         GDKfree(ppat);
    2071        1273 :         if (bn && !msg)
    2072        1273 :                 BBPkeepref(*ret = bn->batCacheid);
    2073           0 :         else if (bn)
    2074           0 :                 BBPreclaim(bn);
    2075        1273 :         return msg;
    2076             : }
    2077             : 
    2078             : static str
    2079          69 : PCRElikeselect1(bat *ret, const bat *bid, const bat *cid, const str *pat, const str *esc, const bit *anti)
    2080             : {
    2081          69 :         const bit f = TRUE;
    2082          69 :         return PCRElikeselect2(ret, bid, cid, pat, esc, &f, anti);
    2083             : }
    2084             : 
    2085             : static str
    2086        1199 : PCRElikeselect3(bat *ret, const bat *bid, const bat *sid, const str *pat, const str *esc, const bit *anti)
    2087             : {
    2088        1199 :         const bit f = FALSE;
    2089        1199 :         return PCRElikeselect2(ret, bid, sid, pat, esc, &f, anti);
    2090             : }
    2091             : 
    2092             : static str
    2093           0 : PCRElikeselect4(bat *ret, const bat *bid, const bat *cid, const str *pat, const bit *anti)
    2094             : {
    2095           0 :         const bit f = TRUE;
    2096           0 :         const str esc ="";
    2097           0 :         return PCRElikeselect2(ret, bid, cid, pat, &esc, &f, anti);
    2098             : }
    2099             : 
    2100             : static str
    2101           1 : PCRElikeselect5(bat *ret, const bat *bid, const bat *sid, const str *pat, const bit *anti)
    2102             : {
    2103           1 :         const bit f = FALSE;
    2104           1 :         const str esc ="";
    2105           1 :         return PCRElikeselect2(ret, bid, sid, pat, &esc, &f, anti);
    2106             : }
    2107             : 
    2108             : #define APPEND(b, o)    (((oid *) b->theap.base)[b->batCount++] = (o))
    2109             : #define VALUE(s, x)             (s##vars + VarHeapVal(s##vals, (x), s##width))
    2110             : 
    2111             : #ifdef HAVE_LIBPCRE
    2112             : #define PCRE_EXEC \
    2113             :         do { \
    2114             :                 retval = pcre_exec(pcrere, pcreex, vl, (int) strlen(vl), 0, 0, NULL, 0); \
    2115             :         } while (0)
    2116             : #define PCRE_EXEC_COND (retval < 0)
    2117             : #else
    2118             : #define PCRE_EXEC \
    2119             :         do { \
    2120             :                 retval = regexec(&pcrere, vl, (size_t) 0, NULL, 0); \
    2121             :         } while (0)
    2122             : #define PCRE_EXEC_COND (retval == REG_NOMATCH || retval == REG_ENOSYS)
    2123             : #endif
    2124             : 
    2125             : /* nested loop implementation for PCRE join */
    2126             : #define pcre_join_loop(STRCMP, RE_MATCH, PCRE_COND) \
    2127             :         do { \
    2128             :                 for (BUN ri = 0; ri < rci.ncand; ri++) { \
    2129             :                         ro = canditer_next(&rci); \
    2130             :                         vr = VALUE(r, ro - r->hseqbase); \
    2131             :                         nl = 0; \
    2132             :                         use_re = use_strcmp = empty = false; \
    2133             :                         if ((msg = choose_like_path(&pcrepat, &use_re, &use_strcmp, &empty, (const str*)&vr, (const str*)&esc))) \
    2134             :                                 goto bailout; \
    2135             :                         if (!empty) { \
    2136             :                                 if (use_re) { \
    2137             :                                         if ((msg = re_like_build(&re, &wpat, vr, caseignore, use_strcmp, (unsigned char) *esc)) != MAL_SUCCEED) \
    2138             :                                                 goto bailout; \
    2139             :                                 } else if (pcrepat) { \
    2140             :                                         if ((msg = pcre_like_build(&pcrere, &pcreex, pcrepat, caseignore, lci.ncand)) != MAL_SUCCEED) \
    2141             :                                                 goto bailout; \
    2142             :                                         GDKfree(pcrepat); \
    2143             :                                         pcrepat = NULL; \
    2144             :                                 } \
    2145             :                                 canditer_reset(&lci); \
    2146             :                                 for (BUN li = 0; li < lci.ncand; li++) { \
    2147             :                                         lo = canditer_next(&lci); \
    2148             :                                         vl = VALUE(l, lo - l->hseqbase); \
    2149             :                                         if (strNil(vl)) { \
    2150             :                                                 continue; \
    2151             :                                         } else if (use_re) { \
    2152             :                                                 if (use_strcmp) { \
    2153             :                                                         if (STRCMP) \
    2154             :                                                                 continue; \
    2155             :                                                 } else { \
    2156             :                                                         assert(re); \
    2157             :                                                         if (RE_MATCH) \
    2158             :                                                                 continue; \
    2159             :                                                 } \
    2160             :                                         } else { \
    2161             :                                                 int retval; \
    2162             :                                                 PCRE_EXEC;  \
    2163             :                                                 if (PCRE_COND) \
    2164             :                                                         continue; \
    2165             :                                         } \
    2166             :                                         if (BUNlast(r1) == BATcapacity(r1)) { \
    2167             :                                                 newcap = BATgrows(r1); \
    2168             :                                                 BATsetcount(r1, BATcount(r1)); \
    2169             :                                                 if (r2) \
    2170             :                                                         BATsetcount(r2, BATcount(r2)); \
    2171             :                                                 if (BATextend(r1, newcap) != GDK_SUCCEED || (r2 && BATextend(r2, newcap) != GDK_SUCCEED)) { \
    2172             :                                                         msg = createException(MAL, "pcre.join", SQLSTATE(HY013) MAL_MALLOC_FAIL); \
    2173             :                                                         goto bailout; \
    2174             :                                                 } \
    2175             :                                                 assert(!r2 || BATcapacity(r1) == BATcapacity(r2)); \
    2176             :                                         } \
    2177             :                                         if (BATcount(r1) > 0) { \
    2178             :                                                 if (lastl + 1 != lo) \
    2179             :                                                         r1->tseqbase = oid_nil; \
    2180             :                                                 if (nl == 0) { \
    2181             :                                                         if (r2) \
    2182             :                                                                 r2->trevsorted = false; \
    2183             :                                                         if (lastl > lo) { \
    2184             :                                                                 r1->tsorted = false; \
    2185             :                                                                 r1->tkey = false; \
    2186             :                                                         } else if (lastl < lo) { \
    2187             :                                                                 r1->trevsorted = false; \
    2188             :                                                         } else { \
    2189             :                                                                 r1->tkey = false; \
    2190             :                                                         } \
    2191             :                                                 } \
    2192             :                                         } \
    2193             :                                         APPEND(r1, lo); \
    2194             :                                         if (r2) \
    2195             :                                                 APPEND(r2, ro); \
    2196             :                                         lastl = lo; \
    2197             :                                         nl++; \
    2198             :                                 } \
    2199             :                                 re_like_clean(&re, &wpat); \
    2200             :                                 pcre_clean(&pcrere, &pcreex); \
    2201             :                         } \
    2202             :                         if (r2) { \
    2203             :                                 if (nl > 1) { \
    2204             :                                         r2->tkey = false; \
    2205             :                                         r2->tseqbase = oid_nil; \
    2206             :                                         r1->trevsorted = false; \
    2207             :                                 } else if (nl == 0) { \
    2208             :                                         rskipped = BATcount(r2) > 0; \
    2209             :                                 } else if (rskipped) { \
    2210             :                                         r2->tseqbase = oid_nil; \
    2211             :                                 } \
    2212             :                         } else if (nl > 1) { \
    2213             :                                 r1->trevsorted = false; \
    2214             :                         } \
    2215             :                 } \
    2216             :         } while (0)
    2217             : 
    2218             : static char *
    2219          16 : pcrejoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, const char *esc, bool caseignore, bool anti)
    2220             : {
    2221             :         struct canditer lci, rci;
    2222             :         const char *lvals, *rvals, *lvars, *rvars, *vl, *vr;
    2223             :         int lwidth, rwidth, rskipped = 0;       /* whether we skipped values in r */
    2224             :         oid lo, ro, lastl = 0;          /* last value inserted into r1 */
    2225             :         BUN nl, newcap;
    2226          16 :         char *pcrepat = NULL, *msg = MAL_SUCCEED;
    2227          16 :         struct RE *re = NULL;
    2228          16 :         bool use_re = false, use_strcmp = false, empty = false;
    2229          16 :         uint32_t *wpat = NULL;
    2230             : #ifdef HAVE_LIBPCRE
    2231          16 :         pcre *pcrere = NULL;
    2232          16 :         pcre_extra *pcreex = NULL;
    2233             : #else
    2234             :         regex_t pcrere = (regex_t) {0};
    2235             :         void *pcreex = NULL;
    2236             : #endif
    2237             : 
    2238          16 :         TRC_DEBUG(ALGO,
    2239             :                           "pcrejoin(l=%s#" BUNFMT "[%s]%s%s,"
    2240             :                           "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
    2241             :                           "sr=%s#" BUNFMT "%s%s)\n",
    2242             :                           BATgetId(l), BATcount(l), ATOMname(l->ttype),
    2243             :                           l->tsorted ? "-sorted" : "",
    2244             :                           l->trevsorted ? "-revsorted" : "",
    2245             :                           BATgetId(r), BATcount(r), ATOMname(r->ttype),
    2246             :                           r->tsorted ? "-sorted" : "",
    2247             :                           r->trevsorted ? "-revsorted" : "",
    2248             :                           sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
    2249             :                           sl && sl->tsorted ? "-sorted" : "",
    2250             :                           sl && sl->trevsorted ? "-revsorted" : "",
    2251             :                           sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
    2252             :                           sr && sr->tsorted ? "-sorted" : "",
    2253             :                           sr && sr->trevsorted ? "-revsorted" : "");
    2254             : 
    2255             :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2256             :         assert(ATOMtype(l->ttype) == TYPE_str);
    2257             :         assert(sl == NULL || sl->tsorted);
    2258             :         assert(sr == NULL || sr->tsorted);
    2259             : 
    2260          16 :         canditer_init(&lci, l, sl);
    2261          16 :         canditer_init(&rci, r, sr);
    2262             : 
    2263          16 :         lvals = (const char *) Tloc(l, 0);
    2264          16 :         rvals = (const char *) Tloc(r, 0);
    2265             :         assert(r->tvarsized && r->ttype);
    2266          16 :         lvars = l->tvheap->base;
    2267          16 :         rvars = r->tvheap->base;
    2268          16 :         lwidth = l->twidth;
    2269          16 :         rwidth = r->twidth;
    2270             : 
    2271          16 :         r1->tkey = true;
    2272          16 :         r1->tsorted = true;
    2273          16 :         r1->trevsorted = true;
    2274          16 :         if (r2) {
    2275           8 :                 r2->tkey = true;
    2276           8 :                 r2->tsorted = true;
    2277           8 :                 r2->trevsorted = true;
    2278             :         }
    2279             : 
    2280          16 :         if (anti) {
    2281           7 :                 if (caseignore) {
    2282          81 :                         pcre_join_loop(mywstrcasecmp(vl, wpat) == 0, re_match_ignore(vl, re), !PCRE_EXEC_COND);
    2283             :                 } else {
    2284         203 :                         pcre_join_loop(strcmp(vl, vr) == 0, re_match_no_ignore(vl, re), !PCRE_EXEC_COND);
    2285             :                 }
    2286             :         } else {
    2287           9 :                 if (caseignore) {
    2288           4 :                         pcre_join_loop(mywstrcasecmp(vl, wpat) != 0, !re_match_ignore(vl, re), PCRE_EXEC_COND);
    2289             :                 } else {
    2290         103 :                         pcre_join_loop(strcmp(vl, vr) != 0, !re_match_no_ignore(vl, re), PCRE_EXEC_COND);
    2291             :                 }
    2292             :         }
    2293             : 
    2294             :         assert(!r2 || BATcount(r1) == BATcount(r2));
    2295             :         /* also set other bits of heap to correct value to indicate size */
    2296          16 :         BATsetcount(r1, BATcount(r1));
    2297          16 :         if (r2)
    2298           8 :                 BATsetcount(r2, BATcount(r2));
    2299          16 :         if (BATcount(r1) > 0) {
    2300          10 :                 if (BATtdense(r1))
    2301           4 :                         r1->tseqbase = ((oid *) r1->theap.base)[0];
    2302          10 :                 if (r2 && BATtdense(r2))
    2303           2 :                         r2->tseqbase = ((oid *) r2->theap.base)[0];
    2304             :         } else {
    2305           6 :                 r1->tseqbase = 0;
    2306           6 :                 if (r2)
    2307           2 :                         r2->tseqbase = 0;
    2308             :         }
    2309          16 :         if (r2)
    2310           8 :                 TRC_DEBUG(ALGO,
    2311             :                                 "pcrejoin(l=%s,r=%s)=(%s#"BUNFMT"%s%s,%s#"BUNFMT"%s%s\n",
    2312             :                                 BATgetId(l), BATgetId(r),
    2313             :                                 BATgetId(r1), BATcount(r1),
    2314             :                                 r1->tsorted ? "-sorted" : "",
    2315             :                                 r1->trevsorted ? "-revsorted" : "",
    2316             :                                 BATgetId(r2), BATcount(r2),
    2317             :                                 r2->tsorted ? "-sorted" : "",
    2318             :                                 r2->trevsorted ? "-revsorted" : "");
    2319             :         else
    2320           8 :                 TRC_DEBUG(ALGO,
    2321             :                         "pcrejoin(l=%s,r=%s)=(%s#"BUNFMT"%s%s\n",
    2322             :                         BATgetId(l), BATgetId(r),
    2323             :                         BATgetId(r1), BATcount(r1),
    2324             :                         r1->tsorted ? "-sorted" : "",
    2325             :                         r1->trevsorted ? "-revsorted" : "");
    2326             :         return MAL_SUCCEED;
    2327             : 
    2328           0 : bailout:
    2329           0 :         GDKfree(pcrepat);
    2330           0 :         re_like_clean(&re, &wpat);
    2331           0 :         pcre_clean(&pcrere, &pcreex);
    2332             :         assert(msg != MAL_SUCCEED);
    2333           0 :         return msg;
    2334             : }
    2335             : 
    2336             : static str
    2337          16 : PCREjoin(bat *r1, bat *r2, bat lid, bat rid, bat slid, bat srid, bat elid, bool caseignore, bool anti)
    2338             : {
    2339             :         BAT *left = NULL, *right = NULL, *escape = NULL, *candleft = NULL, *candright = NULL;
    2340             :         BAT *result1 = NULL, *result2 = NULL;
    2341             :         char *msg = MAL_SUCCEED, *esc = "";
    2342             : 
    2343          16 :         if ((left = BATdescriptor(lid)) == NULL)
    2344           0 :                 goto fail;
    2345          16 :         if ((right = BATdescriptor(rid)) == NULL)
    2346           0 :                 goto fail;
    2347          16 :         if (!is_bat_nil(elid) && (escape = BATdescriptor(elid)) == NULL)
    2348           0 :                 goto fail;
    2349          16 :         if (!is_bat_nil(slid) && (candleft = BATdescriptor(slid)) == NULL)
    2350           0 :                 goto fail;
    2351          16 :         if (!is_bat_nil(srid) && (candright = BATdescriptor(srid)) == NULL)
    2352           0 :                 goto fail;
    2353          16 :         result1 = COLnew(0, TYPE_oid, BATcount(left), TRANSIENT);
    2354          16 :         if (r2)
    2355           8 :                 result2 = COLnew(0, TYPE_oid, BATcount(left), TRANSIENT);
    2356          16 :         if (!result1 || (r2 && !result2)) {
    2357           0 :                 msg = createException(MAL, "pcre.join", SQLSTATE(HY013) MAL_MALLOC_FAIL);
    2358           0 :                 goto fail;
    2359             :         }
    2360          16 :         result1->tnil = false;
    2361          16 :         result1->tnonil = true;
    2362          16 :         result1->tkey = true;
    2363          16 :         result1->tsorted = true;
    2364          16 :         result1->trevsorted = true;
    2365          16 :         result1->tseqbase = 0;
    2366          16 :         if (r2) {
    2367           8 :                 result2->tnil = false;
    2368           8 :                 result2->tnonil = true;
    2369           8 :                 result2->tkey = true;
    2370           8 :                 result2->tsorted = true;
    2371           8 :                 result2->trevsorted = true;
    2372           8 :                 result2->tseqbase = 0;
    2373             :         }
    2374          16 :         if (escape) {
    2375          16 :                 if (BATcount(escape) != 1) {
    2376           0 :                         msg = createException(MAL, "pcre.join", SQLSTATE(42000) "At the moment, only one value is allowed for the escape input at pcre join");
    2377           0 :                         goto fail;
    2378             :                 }
    2379          32 :                 esc = BUNtvar(bat_iterator(escape), 0);
    2380             :         }
    2381          16 :         msg = pcrejoin(result1, result2, left, right, candleft, candright, esc, caseignore, anti);
    2382          16 :         if (msg)
    2383           0 :                 goto fail;
    2384          16 :         *r1 = result1->batCacheid;
    2385          16 :         BBPkeepref(*r1);
    2386          16 :         if (r2) {
    2387           8 :                 *r2 = result2->batCacheid;
    2388           8 :                 BBPkeepref(*r2);
    2389             :         }
    2390          16 :         BBPunfix(left->batCacheid);
    2391          16 :         BBPunfix(right->batCacheid);
    2392          16 :         if (escape)
    2393          16 :                 BBPunfix(escape->batCacheid);
    2394          16 :         if (candleft)
    2395           0 :                 BBPunfix(candleft->batCacheid);
    2396          16 :         if (candright)
    2397           0 :                 BBPunfix(candright->batCacheid);
    2398             :         return MAL_SUCCEED;
    2399             : 
    2400           0 :   fail:
    2401           0 :         if (left)
    2402           0 :                 BBPunfix(left->batCacheid);
    2403           0 :         if (right)
    2404           0 :                 BBPunfix(right->batCacheid);
    2405           0 :         if (escape)
    2406           0 :                 BBPunfix(escape->batCacheid);
    2407           0 :         if (candleft)
    2408           0 :                 BBPunfix(candleft->batCacheid);
    2409           0 :         if (candright)
    2410           0 :                 BBPunfix(candright->batCacheid);
    2411           0 :         if (result1)
    2412           0 :                 BBPunfix(result1->batCacheid);
    2413           0 :         if (result2)
    2414           0 :                 BBPunfix(result2->batCacheid);
    2415           0 :         if (msg)
    2416             :                 return msg;
    2417           0 :         throw(MAL, "pcre.join", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
    2418             : }
    2419             : 
    2420             : static str
    2421           6 : LIKEjoin_esc(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *elid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2422             : {
    2423             :         (void) nil_matches;
    2424             :         (void) estimate;
    2425           6 :         return PCREjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, elid ? *elid : 0, 0, *anti);
    2426             : }
    2427             : 
    2428             : static str
    2429           6 : LIKEjoin_esc1(bat *r1, const bat *lid, const bat *rid, const bat *elid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2430             : {
    2431             :         (void) nil_matches;
    2432             :         (void) estimate;
    2433           6 :         return PCREjoin(r1, NULL, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, elid ? *elid : 0, 0, *anti);
    2434             : }
    2435             : 
    2436             : static str
    2437           0 : LIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2438             : {
    2439             :         (void) nil_matches;
    2440             :         (void) estimate;
    2441           0 :         return PCREjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, 0, 0, *anti);
    2442             : }
    2443             : 
    2444             : static str
    2445           0 : LIKEjoin1(bat *r1, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2446             : {
    2447             :         (void) nil_matches;
    2448             :         (void) estimate;
    2449           0 :         return PCREjoin(r1, NULL, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, 0, 0, *anti);
    2450             : }
    2451             : 
    2452             : static str
    2453           2 : ILIKEjoin_esc(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *elid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2454             : {
    2455             :         (void) nil_matches;
    2456             :         (void) estimate;
    2457           2 :         return PCREjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, elid ? *elid : 0, 1, *anti);
    2458             : }
    2459             : 
    2460             : static str
    2461           2 : ILIKEjoin_esc1(bat *r1, const bat *lid, const bat *rid, const bat *elid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2462             : {
    2463             :         (void) nil_matches;
    2464             :         (void) estimate;
    2465           2 :         return PCREjoin(r1, NULL, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, elid ? *elid : 0, 1, *anti);
    2466             : }
    2467             : 
    2468             : static str
    2469           0 : ILIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2470             : {
    2471             :         (void) nil_matches;
    2472             :         (void) estimate;
    2473           0 :         return PCREjoin(r1, r2, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, 0, 1, *anti);
    2474             : }
    2475             : 
    2476             : static str
    2477           0 : ILIKEjoin1(bat *r1, const bat *lid, const bat *rid, const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate, const bit *anti)
    2478             : {
    2479             :         (void) nil_matches;
    2480             :         (void) estimate;
    2481           0 :         return PCREjoin(r1, NULL, *lid, *rid, slid ? *slid : 0, srid ? *srid : 0, 0, 1, *anti);
    2482             : }
    2483             : 
    2484             : #include "mel.h"
    2485             : mel_atom pcre_init_atoms[] = {
    2486             :  { .name="pcre", },  { .cmp=NULL }
    2487             : };
    2488             : mel_func pcre_init_funcs[] = {
    2489             :  command("pcre", "index", PCREindex, false, "match a pattern, return matched position (or 0 when not found)", args(1,3, arg("",int),arg("pat",pcre),arg("s",str))),
    2490             :  command("pcre", "match", PCREmatch, false, "Perl Compatible Regular Expression pattern matching against a string", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2491             :  command("pcre", "imatch", PCREimatch, false, "Caseless Perl Compatible Regular Expression pattern matching against a string", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2492             :  command("pcre", "patindex", PCREpatindex, false, "Location of the first POSIX pattern matching against a string", args(1,3, arg("",int),arg("pat",str),arg("s",str))),
    2493             :  command("pcre", "replace", PCREreplace_wrap, false, "Replace _all_ matches of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2494             :  command("pcre", "replace_first", PCREreplace_wrap, false, "Replace _the first_ match of \"pattern\" in \"origin_str\" with \"replacement\".\nParameter \"flags\" accept these flags: 'i', 'm', 's', and 'x'.\n'e': if present, an empty string is considered to be a valid match\n'i': if present, the match operates in case-insensitive mode.\nOtherwise, in case-sensitive mode.\n'm': if present, the match operates in multi-line mode.\n's': if present, the match operates in \"dot-all\"\nThe specifications of the flags can be found in \"man pcreapi\"\nThe flag letters may be repeated.\nNo other letters than 'e', 'i', 'm', 's' and 'x' are allowed in \"flags\".\nReturns the replaced string, or if no matches found, the original string.", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2495             :  command("pcre", "pcre_quote", PCREquote, false, "Return a PCRE pattern string that matches the argument exactly.", args(1,2, arg("",str),arg("s",str))),
    2496             :  command("pcre", "sql2pcre", PCREsql2pcre, false, "Convert a SQL like pattern with the given escape character into a PCRE pattern.", args(1,3, arg("",str),arg("pat",str),arg("esc",str))),
    2497             :  command("pcre", "prelude", pcre_init, false, "Initialize pcre", args(1,1, arg("",void))),
    2498             :  command("str", "replace", PCREreplace_wrap, false, "", args(1,5, arg("",str),arg("origin",str),arg("pat",str),arg("repl",str),arg("flags",str))),
    2499             :  command("algebra", "like", PCRElike3, false, "", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
    2500             :  command("algebra", "like", PCRElike2, false, "", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2501             :  command("algebra", "not_like", PCREnotlike3, false, "", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
    2502             :  command("algebra", "not_like", PCREnotlike2, false, "", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2503             :  command("algebra", "ilike", PCREilike3, false, "", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
    2504             :  command("algebra", "ilike", PCREilike2, false, "", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2505             :  command("algebra", "not_ilike", PCREnotilike3, false, "", args(1,4, arg("",bit),arg("s",str),arg("pat",str),arg("esc",str))),
    2506             :  command("algebra", "not_ilike", PCREnotilike2, false, "", args(1,3, arg("",bit),arg("s",str),arg("pat",str))),
    2507             :  command("batpcre", "replace", PCREreplace_bat_wrap, false, "", args(1,5, batarg("",str),batarg("orig",str),arg("pat",str),arg("repl",str),arg("flag",str))),
    2508             :  command("batpcre", "replace_first", PCREreplacefirst_bat_wrap, false, "", args(1,5, batarg("",str),batarg("orig",str),arg("pat",str),arg("repl",str),arg("flag",str))),
    2509             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,4, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str))),
    2510             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,4, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str))),
    2511             :  pattern("batalgebra", "like", BATPCRElike, false, "", args(1,4, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str))),
    2512             :  pattern("batalgebra", "like", BATPCRElike2, false, "", args(1,3, batarg("",bit),batarg("s",str),arg("pat",str))),
    2513             :  pattern("batalgebra", "like", BATPCRElike2, false, "", args(1,3, batarg("",bit),arg("s",str),batarg("pat",str))),
    2514             :  pattern("batalgebra", "like", BATPCRElike2, false, "", args(1,3, batarg("",bit),batarg("s",str),batarg("pat",str))),
    2515             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,4, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str))),
    2516             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,4, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str))),
    2517             :  pattern("batalgebra", "not_like", BATPCREnotlike, false, "", args(1,4, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str))),
    2518             :  pattern("batalgebra", "not_like", BATPCREnotlike2, false, "", args(1,3, batarg("",bit),batarg("s",str),arg("pat",str))),
    2519             :  pattern("batalgebra", "not_like", BATPCREnotlike2, false, "", args(1,3, batarg("",bit),arg("s",str),batarg("pat",str))),
    2520             :  pattern("batalgebra", "not_like", BATPCREnotlike2, false, "", args(1,3, batarg("",bit),batarg("s",str),batarg("pat",str))),
    2521             :  pattern("batalgebra", "ilike", BATPCREilike, false, "", args(1,4, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str))),
    2522             :  pattern("batalgebra", "ilike", BATPCREilike, false, "", args(1,4, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str))),
    2523             :  pattern("batalgebra", "ilike", BATPCREilike, false, "", args(1,4, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str))),
    2524             :  pattern("batalgebra", "ilike", BATPCREilike2, false, "", args(1,3, batarg("",bit),batarg("s",str),arg("pat",str))),
    2525             :  pattern("batalgebra", "ilike", BATPCREilike2, false, "", args(1,3, batarg("",bit),arg("s",str),batarg("pat",str))),
    2526             :  pattern("batalgebra", "ilike", BATPCREilike2, false, "", args(1,3, batarg("",bit),batarg("s",str),batarg("pat",str))),
    2527             :  pattern("batalgebra", "not_ilike", BATPCREnotilike, false, "", args(1,4, batarg("",bit),batarg("s",str),arg("pat",str),arg("esc",str))),
    2528             :  pattern("batalgebra", "not_ilike", BATPCREnotilike, false, "", args(1,4, batarg("",bit),arg("s",str),batarg("pat",str),arg("esc",str))),
    2529             :  pattern("batalgebra", "not_ilike", BATPCREnotilike, false, "", args(1,4, batarg("",bit),batarg("s",str),batarg("pat",str),arg("esc",str))),
    2530             :  pattern("batalgebra", "not_ilike", BATPCREnotilike2, false, "", args(1,3, batarg("",bit),batarg("s",str),arg("pat",str))),
    2531             :  pattern("batalgebra", "not_ilike", BATPCREnotilike2, false, "", args(1,3, batarg("",bit),arg("s",str),batarg("pat",str))),
    2532             :  pattern("batalgebra", "not_ilike", BATPCREnotilike2, false, "", args(1,3, batarg("",bit),batarg("s",str),batarg("pat",str))),
    2533             :  command("algebra", "likeselect", PCRElikeselect2, false, "Select all head values of the first input BAT for which the\ntail value is \"like\" the given (SQL-style) pattern and for\nwhich the head value occurs in the tail of the second input\nBAT.\nInput is a dense-headed BAT, output is a dense-headed BAT with in\nthe tail the head value of the input BAT for which the\nrelationship holds.  The output BAT is sorted on the tail value.", args(1,7, batarg("",oid),batarg("b",str),batarg("s",oid),arg("pat",str),arg("esc",str),arg("caseignore",bit),arg("anti",bit))),
    2534             :  command("algebra", "likeselect", PCRElikeselect3, false, "", args(1,6, batarg("",oid),batarg("b",str),batarg("cand",oid),arg("pat",str),arg("esc",str),arg("anti",bit))),
    2535             :  command("algebra", "ilikeselect", PCRElikeselect1, false, "", args(1,6, batarg("",oid),batarg("b",str),batarg("cand",oid),arg("pat",str),arg("esc",str),arg("anti",bit))),
    2536             :  command("algebra", "likeselect", PCRElikeselect5, false, "", args(1,5, batarg("",oid),batarg("b",str),batarg("cand",oid),arg("pat",str),arg("anti",bit))),
    2537             :  command("algebra", "ilikeselect", PCRElikeselect4, false, "", args(1,5, batarg("",oid),batarg("b",str),batarg("cand",oid),arg("pat",str),arg("anti",bit))),
    2538             :  command("algebra", "likejoin", LIKEjoin_esc, false, "Join the string bat L with the pattern bat R\nwith optional candidate lists SL and SR using pattern escape string ESC\nand doing a case sensitive match.\nThe result is two aligned bats with oids of matching rows.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2539             :  command("algebra", "likejoin", LIKEjoin_esc1, false, "The same as LIKEjoin_esc, but only produce one output", args(1,9,batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng), arg("anti",bit))),
    2540             :  command("algebra", "ilikejoin", ILIKEjoin_esc, false, "Join the string bat L with the pattern bat R\nwith optional candidate lists SL and SR using pattern escape string ESC\nand doing a case insensitive match.\nThe result is two aligned bats with oids of matching rows.", args(2,10, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2541             :  command("algebra", "ilikejoin", ILIKEjoin_esc1, false, "The same as ILIKEjoin_esc, but only produce one output", args(1,9, batarg("",oid),batarg("l",str),batarg("r",str),batarg("esc",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2542             :  command("algebra", "likejoin", LIKEjoin, false, "", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2543             :  command("algebra", "likejoin", LIKEjoin1, false, "", args(1,8, batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2544             :  command("algebra", "ilikejoin", ILIKEjoin, false, "", args(2,9, batarg("",oid),batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2545             :  command("algebra", "ilikejoin", ILIKEjoin1, false, "", args(1,8, batarg("",oid),batarg("l",str),batarg("r",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng),arg("anti",bit))),
    2546             :  { .imp=NULL }
    2547             : };
    2548             : #include "mal_import.h"
    2549             : #ifdef _MSC_VER
    2550             : #undef read
    2551             : #pragma section(".CRT$XCU",read)
    2552             : #endif
    2553         255 : LIB_STARTUP_FUNC(init_pcre_mal)
    2554         255 : { mal_module("pcre", pcre_init_atoms, pcre_init_funcs); }

Generated by: LCOV version 1.14