LCOV - code coverage report
Current view: top level - monetdb5/modules/atoms - url.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 273 325 84.0 %
Date: 2021-10-13 02:24:04 Functions: 23 24 95.8 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  *  M. Kersten
      11             :  *  Y. Zhang
      12             :  * The URL module
      13             :  * The URL module contains a collection of commands to manipulate
      14             :  * Uniform Resource Locators - a resource on the World Wide Web-
      15             :  * represented as a string in Monet. The URL can represent
      16             :  * anything from a file, a directory or a complete movie.
      17             :  * This module is geared towards manipulation of their name only.
      18             :  * A complementary module can be used to gain access.[IOgate]
      19             :  *
      20             :  * The URL syntax is specified in RFC2396, Uniform Resource Identifiers
      21             :  * (URI): Generic Syntax. The URL syntax is dependent upon the scheme.
      22             :  * In general, a URL has the form <scheme>:<scheme-specific-part>.
      23             :  * Thus, accepting a valid URL is a simple proccess, unless the scheme
      24             :  * is known and schema-specific syntax is checked (e.g., http or ftp
      25             :  * scheme). For the URL module implemented here, we assume some common
      26             :  * fields of the <scheme-specific-part> that are shared among different
      27             :  * schemes.
      28             :  *
      29             :  * The core of the extension involves several operators to extract
      30             :  * portions of the URLs for further manipulation. In particular,
      31             :  * the domain, the server, and the protocol, and the file extension
      32             :  * can be extracted without copying the complete URL from the heap
      33             :  * into a string variable first.
      34             :  *
      35             :  * The commands provided are based on the corresponding Java class.
      36             :  *
      37             :  * A future version should use a special atom, because this may save
      38             :  * considerable space. Alternatively, break the URL strings into
      39             :  * components and represent them with a bunch of BATs. An intermediate
      40             :  * step would be to refine the atom STR, then it would be possible to
      41             :  * redefine hashing.
      42             :  */
      43             : 
      44             : #include "monetdb_config.h"
      45             : #include "mal.h"
      46             : #include "gdk.h"
      47             : #include <ctype.h>
      48             : #include "mal_exception.h"
      49             : 
      50             : typedef str url;
      51             : 
      52             : /* SCHEME "://" AUTHORITY [ PATH ] [ "?" SEARCH ] [ "#" FRAGMENT ]
      53             :  * AUTHORITY is: [ USER [ ":" PASSWORD ] "@" ] HOST [ ":" PORT ] */
      54             : 
      55             : /* return pointer to string after the scheme and colon; input: pointer
      56             :  * to start of URI */
      57             : static const char *
      58          54 : skip_scheme(const char *uri)
      59             : {
      60          54 :         if (('a' <= *uri && *uri <= 'z') || ('A' <= *uri && *uri <= 'Z')) {
      61          54 :                 uri++;
      62         228 :                 while (('a' <= *uri && *uri <= 'z') ||
      63          54 :                            ('A' <= *uri && *uri <= 'Z') ||
      64          54 :                            isdigit((unsigned char) *uri) ||
      65          54 :                            *uri == '+' || *uri == '-' || *uri == '.')
      66         174 :                         uri++;
      67          54 :                 if (*uri == ':')
      68          54 :                         return uri + 1;
      69             :         }
      70             :         return NULL;
      71             : }
      72             : 
      73             : #define ishex(c)                isxdigit((unsigned char) (c))
      74             : #define isreserved(c)   ((c) == ';' || (c) == '/' || (c) == '?' || \
      75             :                                                  (c) == ':' || (c) == '@' || (c) == '&' || \
      76             :                                                  (c) == '=' || (c) == '+' || (c) == '$' || \
      77             :                                                  (c) == ',')
      78             : #define isunreserved(c) (('a' <= (c) && (c) <= 'z') || \
      79             :                                                  ('A' <= (c) && (c) <= 'Z') || \
      80             :                                                  isdigit((unsigned char) (c)) || \
      81             :                                                  (c) == '-' || (c) == '_' || (c) == '.' || \
      82             :                                                  (c) == '!' || (c) == '~' || (c) == '*' || \
      83             :                                                  (c) == '\'' || (c) == '(' || (c) == ')')
      84             : 
      85             : /* return pointer to string after the authority, filling in pointers
      86             :  * to start of user, password, host, and port, if provided; input:
      87             :  * result of skip_scheme() */
      88             : static const char *
      89          47 : skip_authority(const char *uri, const char **userp, const char **passp, const char **hostp, const char **portp)
      90             : {
      91             :         const char *user = NULL, *pass = NULL, *host = NULL, *port = NULL;
      92             : 
      93          47 :         if (uri[0] == '/' && uri[1] == '/') {
      94          47 :                 uri += 2;
      95             :                 user = host = uri;
      96         828 :                 while (isunreserved(*uri) ||
      97          83 :                            (*uri == '%' && ishex(uri[1]) && ishex(uri[2])) ||
      98          83 :                            *uri == ';' || *uri == ':' || *uri == '=' || *uri == '+'|| *uri == '$' || *uri == ',' ||
      99             :                            *uri == '@') {
     100         781 :                         if (*uri == ':') {
     101          23 :                                 if (user == host)
     102          12 :                                         port = pass = uri + 1;
     103             :                                 else
     104          11 :                                         port = uri + 1;
     105         758 :                         } else if (*uri == '@')
     106          13 :                                 host = uri + 1;
     107        1562 :                         uri += *uri == '%' ? 3 : 1;
     108             :                 }
     109          47 :                 if (user == host) {
     110             :                         /* no "@", so no user info */
     111          34 :                         if (userp)
     112           4 :                                 *userp = NULL;
     113          34 :                         if (passp)
     114           4 :                                 *passp = NULL;
     115             :                 } else {
     116          13 :                         if (userp)
     117           3 :                                 *userp = user;
     118          13 :                         if (passp)
     119           3 :                                 *passp = pass;
     120             :                 }
     121          47 :                 if (portp)
     122          12 :                         *portp = port;
     123          47 :                 if (hostp)
     124          15 :                         *hostp = host;
     125          47 :                 return uri;
     126             :         }
     127             :         return NULL;
     128             : }
     129             : 
     130             : /* return pointer to string after the path, filling in pointer to
     131             :  * start of last component and extension of that component; input:
     132             :  * result of skip_authority() */
     133             : static const char *
     134          24 : skip_path(const char *uri, const char **basep, const char **extp)
     135             : {
     136             :         const char *base = NULL, *ext = NULL;
     137             : 
     138          24 :         if (*uri == '/') {
     139          18 :                 uri++;
     140             :                 base = uri;
     141         504 :                 while (isunreserved(*uri) ||
     142          48 :                            (*uri == '%' && ishex(uri[1]) && ishex(uri[2])) ||
     143          48 :                            *uri == ':' || *uri == '@' || *uri == '&' || *uri == '=' || *uri == '+' || *uri == '$' || *uri == ',' ||
     144          48 :                            *uri == ';' ||
     145             :                            *uri == '/') {
     146         486 :                         if (*uri == '/') {
     147          30 :                                 base = uri + 1;
     148             :                                 ext = NULL;
     149         456 :                         } else if (*uri == '.' && ext == NULL && uri != base) {
     150             :                                 ext = uri;
     151             :                         }
     152         972 :                         uri += *uri == '%' ? 3 : 1;
     153             :                 }
     154             :         }
     155          24 :         if (basep)
     156           8 :                 *basep = base;
     157          24 :         if (extp)
     158           8 :                 *extp = ext;
     159          24 :         return uri;
     160             : }
     161             : 
     162             : /* return pointer to string after the search string; input: result of
     163             :  * skip_path() */
     164             : static const char *
     165           8 : skip_search(const char *uri)
     166             : {
     167           8 :         if (*uri == '?') {
     168           4 :                 uri++;
     169          42 :                 while (isreserved(*uri) || isunreserved(*uri) ||
     170           0 :                            (*uri == '%' && ishex(uri[1]) && ishex(uri[2]))) {
     171          76 :                         uri += *uri == '%' ? 3 : 1;
     172             :                 }
     173             :         }
     174           8 :         return uri;
     175             : }
     176             : 
     177             : #if 0
     178             : /*
     179             :  * Utilities
     180             :  */
     181             : 
     182             : static char
     183             : x2c(char *what)
     184             : {
     185             :         char digit;
     186             : 
     187             :         digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
     188             :         digit *= 16;
     189             :         digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));
     190             :         return (digit);
     191             : }
     192             : 
     193             : static int needEscape(char c){
     194             :         if( isalnum((unsigned char)c) )
     195             :                 return 0;
     196             :         if( c == '#' || c == '-' || c == '_' || c == '.' || c == '!' ||
     197             :                 c == '~' || c == '*' || c == '\'' || c == '(' || c == ')' )
     198             :                 return 0;
     199             :         return 1;
     200             : }
     201             : 
     202             : /* COMMAND "escape": this function applies the URI escaping rules defined in
     203             :  * section 2 of [RFC 3986] to the string supplied as 's'.
     204             :  * The effect of the function is to escape a set of identified characters in
     205             :  * the string. Each such character is replaced in the string by an escape
     206             :  * sequence, which is formed by encoding the character as a sequence of octets
     207             :  * in UTF-8, and then reprensenting each of these octets in the form %HH.
     208             :  *
     209             :  * All characters are escaped other than:
     210             :  * [a-z], [A-Z], [0-9], "#", "-", "_", ".", "!", "~", "*", "'", "(", ")"
     211             :  *
     212             :  * This function must always generate hexadecimal values using the upper-case
     213             :  * letters A-F.
     214             :  *
     215             :  * SIGNATURE: escape(str) : str; */
     216             : static str
     217             : escape_str(str *retval, str s)
     218             : {
     219             :         int x, y;
     220             :         str res;
     221             : 
     222             :         if (!s)
     223             :                 throw(ILLARG, "url.escape", "url missing");
     224             : 
     225             :         if (!( res = (str) GDKmalloc( strlen(s) * 3 ) ))
     226             :                 throw(MAL, "url.escape", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     227             :         for (x = 0, y = 0; s[x]; ++x, ++y) {
     228             :                 if (needEscape(s[x])) {
     229             :                         if (s[x] == ' ') {
     230             :                                 res[y] = '+';
     231             :                         } else {
     232             :                                 sprintf(res+y, "%%%2x", (uint8_t) s[x]);
     233             :                                 y += 2;
     234             :                         }
     235             :                 } else {
     236             :                         res[y] = s[x];
     237             :                 }
     238             :         }
     239             :         res[y] = '\0';
     240             : 
     241             :         if ((*retval = GDKrealloc(res, strlen(res)+1)) == NULL) {
     242             :                 GDKfree(res);
     243             :                 throw(MAL, "url.escape", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     244             :         }
     245             :         return MAL_SUCCEED;
     246             : }
     247             : 
     248             : /* COMMAND "unescape": Convert hexadecimal representations to ASCII characters.
     249             :  *                     All sequences of the form "% HEX HEX" are unescaped.
     250             :  * SIGNATURE: unescape(str) : str; */
     251             : static str
     252             : unescape_str(str *retval, str s)
     253             : {
     254             :         int x, y;
     255             :         str res;
     256             : 
     257             :         if (!s)
     258             :                 throw(ILLARG, "url.escape", "url missing");
     259             : 
     260             :         res = (str) GDKmalloc(strlen(s));
     261             :         if (!res)
     262             :                 throw(MAL, "url.unescape", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     263             : 
     264             :         for (x = 0, y = 0; s[x]; ++x, ++y) {
     265             :                 if (s[x] == '%') {
     266             :                         res[y] = x2c(&s[x + 1]);
     267             :                         x += 2;
     268             :                 } else {
     269             :                         res[y] = s[x];
     270             :                 }
     271             :         }
     272             :         res[y] = '\0';
     273             : 
     274             :         if ((*retval = GDKrealloc(res, strlen(res)+1)) == NULL) {
     275             :                 GDKfree(res);
     276             :                 throw(MAL, "url.unescape", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     277             :         }
     278             :         return MAL_SUCCEED;
     279             : }
     280             : #endif
     281             : 
     282             : /*
     283             :  * Wrapping
     284             :  * Here you find the wrappers around the V4 url library included above.
     285             :  */
     286             : 
     287             : static ssize_t
     288          13 : URLfromString(const char *src, size_t *len, void **U, bool external)
     289             : {
     290             :         char **u = (char **) U;
     291          13 :         size_t l = strlen(src) + 1;
     292             : 
     293          13 :         if (*len < l || *u == NULL) {
     294          13 :                 GDKfree(*u);
     295          13 :                 *u = GDKmalloc(l);
     296          13 :                 if (*u == NULL)
     297             :                         return -1;
     298          13 :                 *len = l;
     299             :         }
     300             : 
     301             :         /* actually parse the message for valid url */
     302             : 
     303          13 :         if (external && strcmp(src, "nil") == 0)
     304           0 :                 strcpy(*u, str_nil);
     305             :         else
     306          13 :                 memcpy(*u, src, l);
     307          13 :         return (ssize_t) l - 1;
     308             : }
     309             : 
     310             : static ssize_t
     311         185 : URLtoString(str *s, size_t *len, const void *SRC, bool external)
     312             : {
     313             :         const char *src = SRC;
     314         185 :         size_t l = strlen(src);
     315             : 
     316         185 :         if (external)
     317         181 :                 l += 2;
     318         185 :         if (l >= *len || *s == NULL) {
     319          18 :                 GDKfree(*s);
     320          18 :                 *s = GDKmalloc(l + 1);
     321          18 :                 if (*s == NULL)
     322             :                         return -1;
     323          18 :                 *len = l + 1;
     324             :         }
     325             : 
     326         185 :         if (external) {
     327         181 :                 if (strNil(src)) {
     328           0 :                         strcpy(*s, "nil");
     329           0 :                         return 3;
     330             :                 }
     331         181 :                 snprintf(*s, l + 1, "\"%s\"", src);
     332             :         } else {
     333           4 :                 strcpy(*s, src);
     334             :         }
     335         185 :         return (ssize_t) l;
     336             : }
     337             : 
     338             : /* COMMAND "getAnchor": Extract an anchor (reference) from the URL
     339             :  * SIGNATURE: getAnchor(url) : str; */
     340             : static str
     341           5 : URLgetAnchor(str *retval, url *val)
     342             : {
     343             :         const char *s;
     344             : 
     345           5 :         if (val == NULL || *val == NULL)
     346           0 :                 throw(ILLARG, "url.getAnchor", "url missing");
     347             : 
     348           5 :         if (strNil(*val)) {
     349             :                 s = str_nil;
     350             :         } else {
     351           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     352           8 :                         (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL ||
     353           8 :                         (s = skip_path(s, NULL, NULL)) == NULL ||
     354           4 :                         (s = skip_search(s)) == NULL)
     355           0 :                         throw(ILLARG, "url.getAnchor", "bad url");
     356           4 :                 if (*s == '#')
     357           1 :                         s++;
     358             :                 else
     359             :                         s = str_nil;
     360             :         }
     361             : 
     362           5 :         if ((*retval = GDKstrdup(s)) == NULL)
     363           0 :                 throw(MAL, "url.getAnchor", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     364             :         return MAL_SUCCEED;
     365             : }
     366             : 
     367             : /* COMMAND "getBasename": Extract the base of the last file name of the URL,
     368             :  *                        thus, excluding the file extension.
     369             :  * SIGNATURE: getBasename(str) : str; */
     370             : static str
     371           5 : URLgetBasename(str *retval, url *val)
     372             : {
     373             :         const char *s;
     374           5 :         const char *b = NULL;
     375           5 :         const char *e = NULL;
     376             : 
     377           5 :         if (val == NULL || *val == NULL)
     378           0 :                 throw(ILLARG, "url.getBasename", "url missing");
     379             : 
     380           5 :         if (strNil(*val)) {
     381           1 :                 *retval = GDKstrdup(str_nil);
     382             :         } else {
     383           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     384           8 :                         (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL ||
     385           4 :                         (s = skip_path(s, &b, &e)) == NULL)
     386           0 :                         throw(ILLARG, "url.getBasename", "bad url");
     387           4 :                 if (b == NULL) {
     388           1 :                         *retval = GDKstrdup(str_nil);
     389             :                 } else {
     390             :                         size_t l;
     391             : 
     392           3 :                         if (e != NULL) {
     393           2 :                                 l = e - b;
     394             :                         } else {
     395           1 :                                 l = s - b;
     396             :                         }
     397           3 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     398           3 :                                 strcpy_len(*retval, b, l + 1);
     399             :                         }
     400             :                 }
     401             :         }
     402             : 
     403           5 :         if (*retval == NULL)
     404           0 :                 throw(MAL, "url.getBasename", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     405             :         return MAL_SUCCEED;
     406             : }
     407             : 
     408             : /* COMMAND "getContext": Extract the path context from the URL
     409             :  * SIGNATURE: getContext(str) : str; */
     410             : static str
     411           5 : URLgetContext(str *retval, url *val)
     412             : {
     413             :         const char *s;
     414             :         const char *p;
     415             : 
     416           5 :         if (val == NULL || *val == NULL)
     417           0 :                 throw(ILLARG, "url.getContext", "url missing");
     418             : 
     419           5 :         if (strNil(*val)) {
     420           1 :                 *retval = GDKstrdup(str_nil);
     421             :         } else {
     422           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     423           8 :                         (p = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL ||
     424           4 :                         (s = skip_path(p, NULL, NULL)) == NULL)
     425           0 :                         throw(ILLARG, "url.getContext", "bad url");
     426           4 :                 if (p == s) {
     427           1 :                         *retval = GDKstrdup(str_nil);
     428           3 :                 } else if ((*retval = GDKmalloc(s - p + 1)) != NULL) {
     429           3 :                         strcpy_len(*retval, p, s - p + 1);
     430             :                 }
     431             :         }
     432             : 
     433           5 :         if (*retval == NULL)
     434           0 :                 throw(MAL, "url.getContext", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     435             :         return MAL_SUCCEED;
     436             : }
     437             : 
     438             : /* COMMAND "getExtension": Extract the file extension of the URL
     439             :  * SIGNATURE: getExtension(str) : str; */
     440             : static str
     441           5 : URLgetExtension(str *retval, url *val)
     442             : {
     443             :         const char *s;
     444           5 :         const char *e = NULL;
     445             : 
     446           5 :         if (val == NULL || *val == NULL)
     447           0 :                 throw(ILLARG, "url.getExtension", "url missing");
     448             : 
     449           5 :         if (strNil(*val)) {
     450           1 :                 *retval = GDKstrdup(str_nil);
     451             :         } else {
     452           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     453           8 :                         (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL ||
     454           4 :                         (s = skip_path(s, NULL, &e)) == NULL)
     455           0 :                         throw(ILLARG, "url.getExtension", "bad url");
     456           4 :                 if (e == NULL) {
     457           2 :                         *retval = GDKstrdup(str_nil);
     458             :                 } else {
     459           2 :                         size_t l = s - e;
     460             : 
     461           2 :                         assert(*e == '.');
     462           2 :                         if ((*retval = GDKmalloc(l)) != NULL) {
     463           2 :                                 strcpy_len(*retval, e + 1, l);
     464             :                         }
     465             :                 }
     466             :         }
     467             : 
     468           5 :         if (*retval == NULL)
     469           0 :                 throw(MAL, "url.getExtension", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     470             :         return MAL_SUCCEED;
     471             : }
     472             : 
     473             : /* COMMAND "getFile": Extract the last file name of the URL
     474             :  * SIGNATURE: getFile(str) : str; */
     475             : static str
     476           5 : URLgetFile(str *retval, url *val)
     477             : {
     478             :         const char *s;
     479           5 :         const char *b = NULL;
     480             : 
     481           5 :         if (val == NULL || *val == NULL)
     482           0 :                 throw(ILLARG, "url.getFile", "url missing");
     483             : 
     484           5 :         if (strNil(*val)) {
     485           1 :                 *retval = GDKstrdup(str_nil);
     486             :         } else {
     487           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     488           8 :                         (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL ||
     489           4 :                         (s = skip_path(s, &b, NULL)) == NULL)
     490           0 :                         throw(ILLARG, "url.getFile", "bad url");
     491           4 :                 if (b == NULL) {
     492           1 :                         *retval = GDKstrdup(str_nil);
     493             :                 } else {
     494             :                         size_t l;
     495             : 
     496           3 :                         l = s - b;
     497           3 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     498           3 :                                 strcpy_len(*retval, b, l + 1);
     499             :                         }
     500             :                 }
     501             :         }
     502             : 
     503           5 :         if (*retval == NULL)
     504           0 :                 throw(MAL, "url.getFile", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     505             :         return MAL_SUCCEED;
     506             : }
     507             : 
     508             : /* COMMAND "getHost": Extract the server identity from the URL */
     509             : /* SIGNATURE: getHost(str) : str; */
     510             : static str
     511           5 : URLgetHost(str *retval, url *val)
     512             : {
     513             :         const char *s;
     514           5 :         const char *h = NULL;
     515           5 :         const char *p = NULL;
     516             : 
     517           5 :         if (val == NULL || *val == NULL)
     518           0 :                 throw(ILLARG, "url.getHost", "url missing");
     519             : 
     520           5 :         if (strNil(*val)) {
     521           1 :                 *retval = GDKstrdup(str_nil);
     522             :         } else {
     523           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     524           4 :                         (s = skip_authority(s, NULL, NULL, &h, &p)) == NULL)
     525           0 :                         throw(ILLARG, "url.getHost", "bad url");
     526           4 :                 if (h == NULL) {
     527           0 :                         *retval = GDKstrdup(str_nil);
     528             :                 } else {
     529             :                         size_t l;
     530             : 
     531           4 :                         if (p != NULL) {
     532           2 :                                 l = p - h - 1;
     533             :                         } else {
     534           2 :                                 l = s - h;
     535             :                         }
     536           4 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     537           4 :                                 strcpy_len(*retval, h, l + 1);
     538             :                         }
     539             :                 }
     540             :         }
     541             : 
     542           5 :         if (*retval == NULL)
     543           0 :                 throw(MAL, "url.getHost", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     544             :         return MAL_SUCCEED;
     545             : }
     546             : 
     547             : /* COMMAND "getDomain": Extract the Internet domain from the URL
     548             :  * SIGNATURE: getDomain(str) : str; */
     549             : static str
     550           5 : URLgetDomain(str *retval, url *val)
     551             : {
     552             :         const char *s;
     553           5 :         const char *h = NULL;
     554           5 :         const char *p = NULL;
     555             : 
     556           5 :         if (val == NULL || *val == NULL)
     557           0 :                 throw(ILLARG, "url.getDomain", "url missing");
     558             : 
     559           5 :         if (strNil(*val)) {
     560           1 :                 *retval = GDKstrdup(str_nil);
     561             :         } else {
     562           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     563           4 :                         (s = skip_authority(s, NULL, NULL, &h, &p)) == NULL)
     564           0 :                         throw(ILLARG, "url.getDomain", "bad url");
     565           4 :                 if (h == NULL) {
     566           0 :                         *retval = GDKstrdup(str_nil);
     567             :                 } else {
     568             :                         size_t l;
     569             : 
     570           4 :                         if (p != NULL)
     571           2 :                                 p--;
     572             :                         else
     573           2 :                                 p = s;
     574             :                         l = 0;
     575          15 :                         while (p > h && p[-1] != '.') {
     576          11 :                                 p--;
     577          11 :                                 l++;
     578             :                         }
     579           4 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     580           4 :                                 strcpy_len(*retval, p, l + 1);
     581             :                         }
     582             :                 }
     583             :         }
     584             : 
     585           5 :         if (*retval == NULL)
     586           0 :                 throw(MAL, "url.getDomain", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     587             :         return MAL_SUCCEED;
     588             : }
     589             : 
     590             : /* COMMAND "getPort": Extract the port id from the URL
     591             :  * SIGNATURE: getPort(str) : str; */
     592             : static str
     593           5 : URLgetPort(str *retval, url *val)
     594             : {
     595             :         const char *s;
     596           5 :         const char *p = NULL;
     597             : 
     598           5 :         if (val == NULL || *val == NULL)
     599           0 :                 throw(ILLARG, "url.getPort", "url missing");
     600             : 
     601           5 :         if (strNil(*val)) {
     602           1 :                 *retval = GDKstrdup(str_nil);
     603             :         } else {
     604           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     605           4 :                         (s = skip_authority(s, NULL, NULL, NULL, &p)) == NULL)
     606           0 :                         throw(ILLARG, "url.getPort", "bad url");
     607           4 :                 if (p == NULL) {
     608           2 :                         *retval = GDKstrdup(str_nil);
     609             :                 } else {
     610           2 :                         size_t l = s - p;
     611             : 
     612           2 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     613           2 :                                 strcpy_len(*retval, p, l + 1);
     614             :                         }
     615             :                 }
     616             :         }
     617             : 
     618           5 :         if (*retval == NULL)
     619           0 :                 throw(MAL, "url.getPort", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     620             :         return MAL_SUCCEED;
     621             : }
     622             : 
     623             : /* COMMAND "getProtocol": Extract the protocol from the URL
     624             :  * SIGNATURE: getProtocol(str) : str; */
     625             : static str
     626           2 : URLgetProtocol(str *retval, url *val)
     627             : {
     628             :         const char *s;
     629             : 
     630           2 :         if (val == NULL || *val == NULL)
     631           0 :                 throw(ILLARG, "url.getProtocol", "url missing");
     632             : 
     633           2 :         if (strNil(*val)) {
     634           1 :                 *retval = GDKstrdup(str_nil);
     635             :         } else {
     636           1 :                 if ((s = skip_scheme(*val)) == NULL)
     637           0 :                         throw(ILLARG, "url.getProtocol", "bad url");
     638           1 :                 size_t l = s - *val;
     639             : 
     640           1 :                 if ((*retval = GDKmalloc(l)) != NULL) {
     641           1 :                         strcpy_len(*retval, *val, l);
     642             :                 }
     643             :         }
     644             : 
     645           2 :         if (*retval == NULL)
     646           0 :                 throw(MAL, "url.getProtocol", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     647             :         return MAL_SUCCEED;
     648             : }
     649             : 
     650             : /* COMMAND "getQuery": Extract the query part from the URL
     651             :  * SIGNATURE: getQuery(str) : str; */
     652             : static str
     653           5 : URLgetQuery(str *retval, url *val)
     654             : {
     655             :         const char *s;
     656             :         const char *q;
     657             : 
     658           5 :         if (val == NULL || *val == NULL)
     659           0 :                 throw(ILLARG, "url.getQuery", "url missing");
     660             : 
     661           5 :         if (strNil(*val)) {
     662           1 :                 *retval = GDKstrdup(str_nil);
     663             :         } else {
     664           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     665           8 :                         (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL ||
     666           8 :                         (q = skip_path(s, NULL, NULL)) == NULL ||
     667           4 :                         (s = skip_search(q)) == NULL)
     668           0 :                         throw(ILLARG, "url.getQuery", "bad url");
     669           4 :                 if (*q == '?') {
     670             :                         size_t l;
     671             : 
     672           2 :                         q++;
     673           2 :                         l = s - q;
     674           2 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     675           2 :                                 strcpy_len(*retval, q, l + 1);
     676             :                         }
     677             :                 } else {
     678           2 :                         *retval = GDKstrdup(str_nil);
     679             :                 }
     680             :         }
     681             : 
     682           5 :         if (*retval == NULL)
     683           0 :                 throw(MAL, "url.getQuery", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     684             :         return MAL_SUCCEED;
     685             : }
     686             : 
     687             : /* COMMAND "getRobotURL": Extract the location of the robot control file
     688             :  * SIGNATURE: getRobotURL(str) : str; */
     689             : static str
     690           5 : URLgetRobotURL(str *retval, url *val)
     691             : {
     692             :         const char *s;
     693             :         size_t l;
     694             : 
     695           5 :         if (val == NULL || *val == NULL)
     696           0 :                 throw(ILLARG, "url.getQuery", "url missing");
     697             : 
     698           5 :         if (strNil(*val)) {
     699           1 :                 *retval = GDKstrdup(str_nil);
     700             :         } else {
     701           8 :                 if ((s = skip_scheme(*val)) == NULL ||
     702           4 :                         (s = skip_authority(s, NULL, NULL, NULL, NULL)) == NULL)
     703           0 :                         throw(ILLARG, "url.getQuery", "bad url");
     704           4 :                 l = s - *val;
     705             : 
     706           4 :                 if ((*retval = GDKmalloc(l + sizeof("/robots.txt"))) != NULL) {
     707           4 :                         sprintf(*retval, "%.*s/robots.txt", (int) l, *val);
     708             :                 }
     709             :         }
     710             : 
     711           5 :         if (*retval == NULL)
     712           0 :                 throw(MAL, "url.getQuery", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     713             :         return MAL_SUCCEED;
     714             : }
     715             : 
     716             : /* COMMAND "getUser": Extract the user identity from the URL
     717             :  * SIGNATURE: getUser(str) : str; */
     718             : static str
     719           8 : URLgetUser(str *retval, url *val)
     720             : {
     721             :         const char *s, *h, *u, *p;
     722             : 
     723           8 :         if (val == NULL || *val == NULL)
     724           0 :                 throw(ILLARG, "url.getUser", "url missing");
     725             : 
     726           8 :         if (strNil(*val)) {
     727           1 :                 *retval = GDKstrdup(str_nil);
     728             :         } else {
     729          14 :                 if ((s = skip_scheme(*val)) == NULL ||
     730           7 :                         (s = skip_authority(s, &u, &p, &h, NULL)) == NULL)
     731           0 :                         throw(ILLARG, "url.getHost", "bad url");
     732           7 :                 if (u == NULL || h == NULL) {
     733           4 :                         *retval = GDKstrdup(str_nil);
     734             :                 } else {
     735             :                         size_t l;
     736             : 
     737           3 :                         if (p) {
     738           1 :                                 l = p - u - 1;
     739             :                         } else {
     740           2 :                                 l = h - u - 1;
     741             :                         }
     742           3 :                         if ((*retval = GDKmalloc(l + 1)) != NULL) {
     743           3 :                                 strcpy_len(*retval, u, l + 1);
     744             :                         }
     745             :                 }
     746             :         }
     747             : 
     748           8 :         if (*retval == NULL)
     749           0 :                 throw(MAL, "url.getUser", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     750             :         return MAL_SUCCEED;
     751             : }
     752             : 
     753             : /* COMMAND "isaURL": Check conformity of the URL syntax
     754             :  * SIGNATURE: isaURL(str) : bit; */
     755             : static str
     756           6 : URLisaURL(bit *retval, str *val)
     757             : {
     758           6 :         if (val == NULL || *val == NULL)
     759           0 :                 throw(ILLARG, "url.isaURL", "url missing");
     760           6 :         if (strNil(*val))
     761           0 :                 *retval = bit_nil;
     762             :         else
     763           6 :                 *retval = skip_scheme(*val) != NULL;
     764             :         return MAL_SUCCEED;
     765             : }
     766             : 
     767             : static str
     768          48 : URLnew(url *u, str *val)
     769             : {
     770          48 :         *u = GDKstrdup(*val);
     771          48 :         if (*u == NULL)
     772           0 :                 throw(MAL, "url.new", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     773             :         return MAL_SUCCEED;
     774             : }
     775             : 
     776             : static str
     777           1 : URLnew3(url *u, str *protocol, str *server, str *file)
     778             : {
     779             :         size_t l;
     780             : 
     781           1 :         l = strLen(*file) + strLen(*server) + strLen(*protocol) + 10;
     782           1 :         *u = GDKmalloc(l);
     783           1 :         if (*u == NULL)
     784           0 :                 throw(MAL, "url.newurl", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     785           1 :         snprintf(*u, l, "%s://%s/%s", *protocol, *server, *file);
     786           1 :         return MAL_SUCCEED;
     787             : }
     788             : 
     789             : static str
     790           1 : URLnew4(url *u, str *protocol, str *server, int *port, str *file)
     791             : {
     792           1 :         str Protocol = *protocol;
     793           1 :         str Server = *server;
     794           1 :         str File = *file;
     795             :         size_t l;
     796             : 
     797           1 :         if (strNil(File))
     798             :                 File = "";
     799           1 :         else if (*File == '/')
     800           0 :                 File++;
     801           1 :         if (strNil(Server))
     802             :                 Server = "";
     803           1 :         if (strNil(Protocol))
     804             :                 Protocol = "";
     805           1 :         l = strlen(File) + strlen(Server) + strlen(Protocol) + 20;
     806           1 :         *u = GDKmalloc(l);
     807           1 :         if (*u == NULL)
     808           0 :                 throw(MAL, "url.newurl", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     809           1 :         snprintf(*u, l, "%s://%s:%d/%s", Protocol, Server, *port, File);
     810           1 :         return MAL_SUCCEED;
     811             : }
     812             : 
     813           0 : static str URLnoop(url *u, url *val)
     814             : {
     815           0 :         *u = GDKstrdup(*val);
     816           0 :         if (*u == NULL)
     817           0 :                 throw(MAL, "url.noop", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     818             :         return MAL_SUCCEED;
     819             : }
     820             : 
     821             : #include "mel.h"
     822             : mel_atom url_init_atoms[] = {
     823             :  { .name="url", .basetype="str", .fromstr=URLfromString, .tostr=URLtoString, },  { .cmp=NULL }
     824             : };
     825             : mel_func url_init_funcs[] = {
     826             :  command("url", "url", URLnew, false, "Create an URL from a string literal", args(1,2, arg("",url),arg("s",str))),
     827             :  command("url", "url", URLnoop, false, "Create an URL from a string literal", args(1,2, arg("",url),arg("s",url))),
     828             :  command("calc", "url", URLnew, false, "Create an URL from a string literal", args(1,2, arg("",url),arg("s",str))),
     829             :  command("calc", "url", URLnoop, false, "Create an URL from a string literal", args(1,2, arg("",url),arg("s",url))),
     830             :  command("url", "getAnchor", URLgetAnchor, false, "Extract the URL anchor (reference)", args(1,2, arg("",str),arg("u",url))),
     831             :  command("url", "getBasename", URLgetBasename, false, "Extract the URL base file name", args(1,2, arg("",str),arg("u",url))),
     832             :  command("url", "getContext", URLgetContext, false, "Get the path context of a URL", args(1,2, arg("",str),arg("u",url))),
     833             :  command("url", "getDomain", URLgetDomain, false, "Extract Internet domain from the URL", args(1,2, arg("",str),arg("u",url))),
     834             :  command("url", "getExtension", URLgetExtension, false, "Extract the file extension of the URL", args(1,2, arg("",str),arg("u",url))),
     835             :  command("url", "getFile", URLgetFile, false, "Extract the last file name of the URL", args(1,2, arg("",str),arg("u",url))),
     836             :  command("url", "getHost", URLgetHost, false, "Extract the server name from the URL", args(1,2, arg("",str),arg("u",url))),
     837             :  command("url", "getPort", URLgetPort, false, "Extract the port id from the URL", args(1,2, arg("",str),arg("u",url))),
     838             :  command("url", "getProtocol", URLgetProtocol, false, "Extract the protocol from the URL", args(1,2, arg("",str),arg("u",url))),
     839             :  command("url", "getQuery", URLgetQuery, false, "Extract the query string from the URL", args(1,2, arg("",str),arg("u",url))),
     840             :  command("url", "getUser", URLgetUser, false, "Extract the user identity from the URL", args(1,2, arg("",str),arg("u",url))),
     841             :  command("url", "getRobotURL", URLgetRobotURL, false, "Extract the location of the robot control file", args(1,2, arg("",str),arg("u",url))),
     842             :  command("url", "isaURL", URLisaURL, false, "Check conformity of the URL syntax", args(1,2, arg("",bit),arg("u",str))),
     843             :  command("url", "new", URLnew4, false, "Construct URL from protocol, host, port, and file", args(1,5, arg("",url),arg("p",str),arg("h",str),arg("prt",int),arg("f",str))),
     844             :  command("url", "new", URLnew3, false, "Construct URL from protocol, host,and file", args(1,4, arg("",url),arg("prot",str),arg("host",str),arg("fnme",str))),
     845             :  { .imp=NULL }
     846             : };
     847             : #include "mal_import.h"
     848             : #ifdef _MSC_VER
     849             : #undef read
     850             : #pragma section(".CRT$XCU",read)
     851             : #endif
     852         259 : LIB_STARTUP_FUNC(init_url_mal)
     853         259 : { mal_module("url", url_init_atoms, url_init_funcs); }

Generated by: LCOV version 1.14