LCOV - code coverage report
Current view: top level - sql/backends/monet5/UDF/pyapi3 - formatinput3.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 105 171 61.4 %
Date: 2021-10-13 02:24:04 Functions: 1 3 33.3 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : #include "monetdb_config.h"
      10             : #include "formatinput.h"
      11             : #include "type_conversion.h"
      12             : 
      13             : //! Parse a PyCodeObject from a string, the string is expected to be in the
      14             : //! format {@<encoded_function>};, where <encoded_function> is the Marshalled
      15             : //! code object
      16             : PyObject *PyCodeObject_ParseString(char *string, char **msg);
      17           0 : PyObject *PyCodeObject_ParseString(char *string, char **msg)
      18             : {
      19           0 :         size_t length = strlen(string);
      20             :         PyObject *code_object, *tuple, *mystr;
      21           0 :         char *code_copy = GDKmalloc(length * sizeof(char));
      22             :         char hex[3];
      23             :         size_t i, j;
      24           0 :         hex[2] = '\0';
      25           0 :         if (code_copy == NULL) {
      26           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      27           0 :                 return NULL;
      28             :         }
      29             :         // decode hex codes (e.g. \x00) in the string to the actual numeric
      30             :         // representation
      31           0 :         for (i = 2, j = 0; i < length - 2; i++) {
      32           0 :                 if (string[i] == '\\' && string[i + 1] == '\\')
      33             :                         i++;
      34           0 :                 if (string[i] == '\\' && string[i + 1] == 't') {
      35           0 :                         code_copy[j++] = '\t';
      36           0 :                         i++;
      37           0 :                 } else if (string[i] == '\\' && string[i + 1] == 'n') {
      38           0 :                         code_copy[j++] = '\n';
      39           0 :                         i++;
      40           0 :                 } else if (string[i] == '\\' && string[i + 1] == 'x') {
      41           0 :                         hex[0] = string[i + 2];
      42           0 :                         hex[1] = string[i + 3];
      43           0 :                         code_copy[j++] = (char)strtol(hex, NULL, 16);
      44           0 :                         i += 3;
      45             :                 } else {
      46           0 :                         code_copy[j++] = string[i];
      47             :                 }
      48             :         }
      49           0 :         code_copy[j] = '\0';
      50           0 :         tuple = PyTuple_New(1);
      51           0 :         mystr = PyString_FromStringAndSize(
      52             :                 code_copy,
      53             :                 j); // use FromStringAndSize because the string is not null-terminated
      54           0 :         PyTuple_SetItem(tuple, 0, mystr);
      55           0 :         code_object = PyObject_CallObject(marshal_loads, tuple);
      56             :         Py_DECREF(tuple);
      57           0 :         GDKfree(code_copy);
      58           0 :         if (code_object == NULL) {
      59           0 :                 PyErr_Print();
      60           0 :                 *msg = createException(MAL, "pyapi3.eval",
      61             :                                                            SQLSTATE(PY000) "Failed to marshal.loads() encoded object");
      62           0 :                 return NULL;
      63             :         }
      64           0 :         *msg = MAL_SUCCEED;
      65           0 :         return code_object;
      66             : }
      67             : 
      68         168 : char *FormatCode(char *code, char **args, size_t argcount, size_t tabwidth,
      69             :                                  PyObject **code_object, char **msg, char **additional_args,
      70             :                                  size_t additional_argcount)
      71             : {
      72             :         // Format the python code by fixing the indentation levels
      73             :         // We do two passes, first we get the length of the resulting formatted code
      74             :         // and then we actually create the resulting code
      75             :         size_t i = 0, j = 0, k = 0;
      76         168 :         size_t length = strlen(code);
      77             :         size_t size = 0;
      78             :         size_t spaces_per_level = 2;
      79             : 
      80             :         size_t code_location = 0;
      81             :         char *newcode = NULL;
      82             : 
      83             :         size_t indentation_count = 0;
      84             :         size_t max_indentation = 100;
      85             :         // This keeps track of the different indentation levels
      86             :         // indentation_levels is a sorted array with how many spaces of indentation
      87             :         // that specific array has
      88             :         // so indentation_levels[0] = 4 means that the first level (level 0) has 4
      89             :         // spaces in the source code
      90             :         // after this array is constructed we can count the amount of spaces before
      91             :         // a statement and look in this
      92             :         // array to immediately find the indentation level of the statement
      93             :         size_t *indentation_levels;
      94             :         // statements_per_level keeps track of how many statements are at the
      95             :         // specified indentation level
      96             :         // this is needed to compute the size of the resulting formatted code
      97             :         // for every indentation level i, we add statements_per_level[i] * (i + 1) *
      98             :         // spaces_per_level spaces
      99             :         size_t *statements_per_level;
     100             : 
     101             :         size_t initial_spaces = 0;
     102             :         size_t statement_size = 0;
     103             :         bool seen_statement = false;
     104             :         bool multiline_statement = false;
     105             :         int multiline_quotes = 0;
     106             : 
     107         168 :         char base_start[] = "def pyfun(";
     108         168 :         char base_end[] = "):\n";
     109         168 :         *msg = NULL;
     110             :         (void)code_object;
     111             : 
     112         168 :         indentation_levels = (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
     113             :         statements_per_level =
     114         168 :                 (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
     115         168 :         if (indentation_levels == NULL || statements_per_level == NULL) {
     116           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     117           0 :                 goto finally;
     118             :         }
     119             : 
     120             :         // Base function definition size
     121             :         // For every argument, add a comma, and add another entry for the '\0'
     122         168 :         size += strlen(base_start) + strlen(base_end) + argcount + 1;
     123        1132 :         for (i = 0; i < argcount; i++) {
     124         964 :                 if (args[i] != NULL) {
     125         399 :                         size += strlen(args[i]) + 1;
     126             :                 }
     127             :         }
     128             :         // Additional parameters
     129         647 :         for (i = 0; i < additional_argcount; i++)
     130         479 :                 size += strlen(additional_args[i]) + 1;
     131             : 
     132             :         // First remove the "{" at the start and the "};" at the end of the
     133             :         // function, this is added when we have a function created through SQL and
     134             :         // python doesn't like them
     135             :         // We need to be careful to only remove ones at the start/end, otherwise we
     136             :         // might invalidate some otherwise valid python code containing them
     137         510 :         for (i = length - 1, j = 0; i > 0; i--) {
     138         510 :                 if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
     139         168 :                         code[i] != ';' && code[i] != '}')
     140             :                         break;
     141         498 :                 if (j == 0) {
     142         189 :                         if (code[i] == ';') {
     143         157 :                                 code[i] = ' ';
     144             :                                 j = 1;
     145             :                         }
     146         309 :                 } else if (j == 1) {
     147         309 :                         if (code[i] == '}') {
     148         156 :                                 code[i] = ' ';
     149         156 :                                 break;
     150             :                         }
     151             :                 }
     152             :         }
     153         895 :         for (i = 0; i < length; i++) {
     154         895 :                 if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
     155             :                         code[i] != '{')
     156             :                         break;
     157         727 :                 if (code[i] == '{') {
     158         156 :                         code[i] = ' ';
     159             :                 }
     160             :         }
     161             :         // We indent using spaces, four spaces per level
     162             :         // We also erase empty lines
     163       20750 :         for (i = 0; i < length; i++) {
     164             :                 // handle multiline strings (strings that start with """)
     165       20582 :                 if (code[i] == '\"') {
     166          46 :                         if (!multiline_statement) {
     167          39 :                                 multiline_quotes++;
     168          39 :                                 multiline_statement = multiline_quotes == 3;
     169             :                         } else {
     170           7 :                                 multiline_quotes--;
     171           7 :                                 multiline_statement = multiline_quotes != 0;
     172             :                         }
     173             :                 } else {
     174       20536 :                         multiline_quotes = multiline_statement ? 3 : 0;
     175             :                 }
     176             : 
     177       20582 :                 if (!seen_statement) {
     178             :                         // We have not seen a statement on this line yet
     179        3802 :                         if (code[i] == '\n') {
     180             :                                 // Empty line, skip to the next one
     181             :                                 initial_spaces = 0;
     182        3511 :                         } else if (code[i] == ' ') {
     183        2495 :                                 initial_spaces++;
     184        1016 :                         } else if (code[i] == '\t') {
     185         366 :                                 initial_spaces += tabwidth;
     186             :                         } else {
     187             :                                 // Statement starts here
     188             :                                 seen_statement = true;
     189             :                         }
     190             :                 }
     191       19932 :                 if (seen_statement) {
     192             :                         // We have seen a statement on this line, check the indentation
     193             :                         // level
     194       17430 :                         statement_size++;
     195             : 
     196       17430 :                         if (code[i] == '\n' || i == length - 1) {
     197             :                                 // Statement ends here
     198             :                                 bool placed = false;
     199             :                                 size_t level = 0;
     200             : 
     201         654 :                                 if (multiline_statement) {
     202             :                                         // if we are in a multiline statement, we don't want to mess
     203             :                                         // with the indentation
     204           4 :                                         size += statement_size;
     205             :                                         initial_spaces = 0;
     206             :                                         statement_size = 0;
     207           4 :                                         continue;
     208             :                                 }
     209             :                                 // First put the indentation in the indentation table
     210         650 :                                 if (indentation_count >= max_indentation) {
     211             :                                         // If there is no room in the indentation arrays we will
     212             :                                         // extend them
     213             :                                         // This probably will never happen unless in really extreme
     214             :                                         // code (or if max_indentation is set very low)
     215             :                                         size_t *new_indentation =
     216           0 :                                                 GDKzalloc(2 * max_indentation * sizeof(size_t));
     217             :                                         size_t *new_statements_per_level;
     218           0 :                                         if (new_indentation == NULL) {
     219           0 :                                                 *msg =
     220           0 :                                                         createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     221           0 :                                                 goto finally;
     222             :                                         }
     223             :                                         new_statements_per_level =
     224           0 :                                                 GDKzalloc(2 * max_indentation * sizeof(size_t));
     225           0 :                                         if (new_statements_per_level == NULL) {
     226           0 :                                                 *msg =
     227           0 :                                                         createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     228           0 :                                                 goto finally;
     229             :                                         }
     230             : 
     231           0 :                                         for (i = 0; i < max_indentation; i++) {
     232           0 :                                                 new_indentation[i] = indentation_levels[i];
     233           0 :                                                 new_statements_per_level[i] = statements_per_level[i];
     234             :                                         }
     235           0 :                                         GDKfree(indentation_levels);
     236           0 :                                         GDKfree(statements_per_level);
     237             :                                         indentation_levels = new_indentation;
     238             :                                         statements_per_level = new_statements_per_level;
     239           0 :                                         max_indentation *= 2;
     240             :                                 }
     241             : 
     242         880 :                                 for (j = 0; j < indentation_count; j++) {
     243         621 :                                         if (initial_spaces == indentation_levels[j]) {
     244             :                                                 // The exact space count is already in the array, so we
     245             :                                                 // can stop
     246             :                                                 level = j;
     247             :                                                 placed = true;
     248             :                                                 break;
     249             :                                         }
     250             : 
     251         230 :                                         if (initial_spaces < indentation_levels[j]) {
     252             :                                                 // The indentation level is smaller than this level (but
     253             :                                                 // bigger than the previous level)
     254             :                                                 // So the indentation level belongs here, so we move
     255             :                                                 // every level past this one upward one level
     256             :                                                 // and put the indentation level here
     257           0 :                                                 for (k = indentation_count; k > j; k--) {
     258           0 :                                                         indentation_levels[k] = indentation_levels[k - 1];
     259           0 :                                                         statements_per_level[k] =
     260           0 :                                                                 statements_per_level[k - 1];
     261             :                                                 }
     262           0 :                                                 indentation_count++;
     263           0 :                                                 statements_per_level[j] = 0;
     264           0 :                                                 indentation_levels[j] = initial_spaces;
     265             :                                                 level = j;
     266             :                                                 placed = true;
     267           0 :                                                 break;
     268             :                                         }
     269             :                                 }
     270         650 :                                 if (!placed) {
     271             :                                         // The space count is the biggest we have seen, so we add it
     272             :                                         // to the end of the array
     273             :                                         level = indentation_count;
     274         259 :                                         indentation_levels[indentation_count++] = initial_spaces;
     275             :                                 }
     276         650 :                                 statements_per_level[level]++;
     277         650 :                                 size += statement_size;
     278             :                                 seen_statement = false;
     279             :                                 initial_spaces = 0;
     280             :                                 statement_size = 0;
     281             :                         }
     282             :                 }
     283             :         }
     284             :         // Add the amount of spaces we will add to the size
     285         427 :         for (i = 0; i < indentation_count; i++) {
     286         259 :                 size += (i + 1) * spaces_per_level * statements_per_level[i];
     287             :         }
     288             : 
     289             :         // Allocate space for the function
     290         168 :         newcode = GDKzalloc(size);
     291         168 :         if (newcode == NULL) {
     292           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     293           0 :                 goto finally;
     294             :         }
     295             :         initial_spaces = 0;
     296             :         seen_statement = false;
     297             : 
     298             :         // First print in the function definition and arguments
     299        1848 :         for (i = 0; i < strlen(base_start); i++) {
     300        1680 :                 newcode[code_location++] = base_start[i];
     301             :         }
     302             :         // Add user-defined parameters
     303        1132 :         for (i = 0; i < argcount; i++) {
     304         964 :                 if (args[i] != NULL) {
     305        1929 :                         for (j = 0; j < strlen(args[i]); j++) {
     306        1530 :                                 newcode[code_location++] = args[i][j];
     307             :                         }
     308         399 :                         if (i != argcount - 1 || additional_argcount > 0) {
     309         399 :                                 newcode[code_location++] = ',';
     310             :                         }
     311             :                 }
     312             :         }
     313             :         // Add additional parameters
     314         647 :         for (i = 0; i < additional_argcount; i++) {
     315         479 :                 if (additional_args[i] != NULL) {
     316        4447 :                         for (j = 0; j < strlen(additional_args[i]); j++) {
     317        3968 :                                 newcode[code_location++] = additional_args[i][j];
     318             :                         }
     319         479 :                         if (i != additional_argcount - 1) {
     320         311 :                                 newcode[code_location++] = ',';
     321             :                         }
     322             :                 }
     323             :         }
     324         672 :         for (i = 0; i < strlen(base_end); i++) {
     325         504 :                 newcode[code_location++] = base_end[i];
     326             :         }
     327             : 
     328             :         // Now the second pass, actually construct the code
     329       20750 :         for (i = 0; i < length; i++) {
     330             :                 // handle multiline statements
     331       20582 :                 if (code[i] == '\"') {
     332          46 :                         if (!multiline_statement) {
     333          39 :                                 multiline_quotes++;
     334          39 :                                 multiline_statement = multiline_quotes == 3;
     335             :                         } else {
     336           7 :                                 multiline_quotes--;
     337           7 :                                 multiline_statement = multiline_quotes != 0;
     338             :                         }
     339             :                 } else {
     340       20536 :                         multiline_quotes = multiline_statement ? 3 : 0;
     341             :                 }
     342             : 
     343       20582 :                 if (!seen_statement) {
     344        3806 :                         if (multiline_statement)
     345             :                                 seen_statement = true; // if we are in a multiline string, we
     346             :                                                                            // simply want to copy everything
     347             :                                                                            // (including indentation)
     348             :                         // We have not seen a statement on this line yet
     349        3802 :                         else if (code[i] == '\n') {
     350             :                                 // Empty line, skip to the next one
     351             :                                 initial_spaces = 0;
     352        3511 :                         } else if (code[i] == ' ') {
     353        2495 :                                 initial_spaces++;
     354        1016 :                         } else if (code[i] == '\t') {
     355         366 :                                 initial_spaces += tabwidth;
     356             :                         } else {
     357             :                                 // Look through the indentation_levels array to find the level
     358             :                                 // of the statement
     359             :                                 // from the amount of initial spaces
     360             :                                 bool placed = false;
     361             :                                 size_t level = 0;
     362             :                                 // Statement starts here
     363             :                                 seen_statement = true;
     364         880 :                                 for (j = 0; j < indentation_count; j++) {
     365         880 :                                         if (initial_spaces == indentation_levels[j]) {
     366             :                                                 level = j;
     367             :                                                 placed = true;
     368             :                                                 break;
     369             :                                         }
     370             :                                 }
     371         650 :                                 if (!placed) {
     372             :                                         // This should never happen, because it means the initial
     373             :                                         // spaces was not present in the array
     374             :                                         // When we just did exactly the same loop over the array, we
     375             :                                         // should have encountered this statement
     376             :                                         // This means that something happened to either the
     377             :                                         // indentation_levels array or something happened to the
     378             :                                         // code
     379           0 :                                         *msg = createException(MAL, "pyapi3.eval",
     380             :                                                                                    SQLSTATE(PY000) "If you see this error something "
     381             :                                                                                    "went wrong in the code. Sorry.");
     382           0 :                                         goto finally;
     383             :                                 }
     384        2410 :                                 for (j = 0; j < (level + 1) * spaces_per_level; j++) {
     385             :                                         // Add spaces to the code
     386        1760 :                                         newcode[code_location++] = ' ';
     387             :                                 }
     388             :                         }
     389             :                 }
     390       20582 :                 if (seen_statement) {
     391             :                         // We have seen a statement on this line, copy it
     392       17430 :                         newcode[code_location++] = code[i];
     393       17430 :                         if (code[i] == '\n') {
     394             :                                 // The statement has ended, move on to the next line
     395             :                                 seen_statement = false;
     396             :                                 initial_spaces = 0;
     397             :                                 statement_size = 0;
     398             :                         }
     399             :                 }
     400             :         }
     401         168 :         newcode[code_location] = '\0';
     402         168 :         if (code_location >= size) {
     403             :                 // Something went wrong with our size computation, this also should
     404             :                 // never happen
     405           0 :                 *msg = createException(MAL, "pyapi3.eval",
     406             :                                                            SQLSTATE(PY000) "If you see this error something went wrong in "
     407             :                                                            "the code (size computation). Sorry.");
     408           0 :                 goto finally;
     409             :         }
     410         168 : finally:
     411         168 :         GDKfree(indentation_levels);
     412         168 :         GDKfree(statements_per_level);
     413         168 :         return newcode;
     414             : }
     415             : 
     416           0 : void _formatinput_init(void) { _import_array(); }

Generated by: LCOV version 1.14