LCOV - code coverage report
Current view: top level - sql/backends/monet5/UDF/pyapi3 - formatinput3.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 138 216 63.9 %
Date: 2020-06-29 20:00:14 Functions: 1 3 33.3 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2020 MonetDB B.V.
       7             :  */
       8             : 
       9             : #include "monetdb_config.h"
      10             : #include "formatinput.h"
      11             : #include "type_conversion.h"
      12             : 
      13             : //! Parse a PyCodeObject from a string, the string is expected to be in the
      14             : //! format {@<encoded_function>};, where <encoded_function> is the Marshalled
      15             : //! code object
      16             : PyObject *PyCodeObject_ParseString(char *string, char **msg);
      17           0 : PyObject *PyCodeObject_ParseString(char *string, char **msg)
      18             : {
      19           0 :         size_t length = strlen(string);
      20           0 :         PyObject *code_object, *tuple, *mystr;
      21           0 :         char *code_copy = GDKmalloc(length * sizeof(char));
      22           0 :         char hex[3];
      23           0 :         size_t i, j;
      24           0 :         hex[2] = '\0';
      25           0 :         if (code_copy == NULL) {
      26           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
      27           0 :                 return NULL;
      28             :         }
      29             :         // decode hex codes (e.g. \x00) in the string to the actual numeric
      30             :         // representation
      31           0 :         for (i = 2, j = 0; i < length - 2; i++) {
      32           0 :                 if (string[i] == '\\' && string[i + 1] == '\\')
      33           0 :                         i++;
      34           0 :                 if (string[i] == '\\' && string[i + 1] == 't') {
      35           0 :                         code_copy[j++] = '\t';
      36           0 :                         i++;
      37           0 :                 } else if (string[i] == '\\' && string[i + 1] == 'n') {
      38           0 :                         code_copy[j++] = '\n';
      39           0 :                         i++;
      40           0 :                 } else if (string[i] == '\\' && string[i + 1] == 'x') {
      41           0 :                         hex[0] = string[i + 2];
      42           0 :                         hex[1] = string[i + 3];
      43           0 :                         code_copy[j++] = (char)strtol(hex, NULL, 16);
      44           0 :                         i += 3;
      45             :                 } else {
      46           0 :                         code_copy[j++] = string[i];
      47             :                 }
      48             :         }
      49           0 :         code_copy[j] = '\0';
      50           0 :         tuple = PyTuple_New(1);
      51           0 :         mystr = PyString_FromStringAndSize(
      52             :                 code_copy,
      53             :                 j); // use FromStringAndSize because the string is not null-terminated
      54           0 :         PyTuple_SetItem(tuple, 0, mystr);
      55           0 :         code_object = PyObject_CallObject(marshal_loads, tuple);
      56           0 :         Py_DECREF(tuple);
      57           0 :         GDKfree(code_copy);
      58           0 :         if (code_object == NULL) {
      59           0 :                 PyErr_Print();
      60           0 :                 *msg = createException(MAL, "pyapi3.eval",
      61             :                                                            SQLSTATE(PY000) "Failed to marshal.loads() encoded object");
      62           0 :                 return NULL;
      63             :         }
      64           0 :         *msg = MAL_SUCCEED;
      65           0 :         return code_object;
      66             : }
      67             : 
      68         164 : char *FormatCode(char *code, char **args, size_t argcount, size_t tabwidth,
      69             :                                  PyObject **code_object, char **msg, char **additional_args,
      70             :                                  size_t additional_argcount)
      71             : {
      72             :         // Format the python code by fixing the indentation levels
      73             :         // We do two passes, first we get the length of the resulting formatted code
      74             :         // and then we actually create the resulting code
      75         164 :         size_t i = 0, j = 0, k = 0;
      76         164 :         size_t length = strlen(code);
      77         164 :         size_t size = 0;
      78         164 :         size_t spaces_per_level = 2;
      79             : 
      80         164 :         size_t code_location = 0;
      81         164 :         char *newcode = NULL;
      82             : 
      83         164 :         size_t indentation_count = 0;
      84         164 :         size_t max_indentation = 100;
      85             :         // This keeps track of the different indentation levels
      86             :         // indentation_levels is a sorted array with how many spaces of indentation
      87             :         // that specific array has
      88             :         // so indentation_levels[0] = 4 means that the first level (level 0) has 4
      89             :         // spaces in the source code
      90             :         // after this array is constructed we can count the amount of spaces before
      91             :         // a statement and look in this
      92             :         // array to immediately find the indentation level of the statement
      93         164 :         size_t *indentation_levels;
      94             :         // statements_per_level keeps track of how many statements are at the
      95             :         // specified indentation level
      96             :         // this is needed to compute the size of the resulting formatted code
      97             :         // for every indentation level i, we add statements_per_level[i] * (i + 1) *
      98             :         // spaces_per_level spaces
      99         164 :         size_t *statements_per_level;
     100             : 
     101         164 :         size_t initial_spaces = 0;
     102         164 :         size_t statement_size = 0;
     103         164 :         bool seen_statement = false;
     104         164 :         bool multiline_statement = false;
     105         164 :         int multiline_quotes = 0;
     106             : 
     107         164 :         char base_start[] = "def pyfun(";
     108         164 :         char base_end[] = "):\n";
     109         164 :         *msg = NULL;
     110             : #ifndef IS_PY3K
     111             :         if (code[1] == '@') {
     112             :                 *code_object = PyCodeObject_ParseString(code, msg);
     113             :                 return NULL;
     114             :         }
     115             : #else
     116         164 :         (void)code_object;
     117             : #endif
     118             : 
     119         164 :         indentation_levels = (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
     120         328 :         statements_per_level =
     121         164 :                 (size_t *)GDKzalloc(max_indentation * sizeof(size_t));
     122         164 :         if (indentation_levels == NULL || statements_per_level == NULL) {
     123           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     124           0 :                 goto finally;
     125             :         }
     126             : 
     127             :         // Base function definition size
     128             :         // For every argument, add a comma, and add another entry for the '\0'
     129         164 :         size += strlen(base_start) + strlen(base_end) + argcount + 1;
     130        1110 :         for (i = 0; i < argcount; i++) {
     131         946 :                 if (args[i] != NULL) {
     132         393 :                         size += strlen(args[i]) + 1;
     133             :                 }
     134             :         }
     135             :         // Additional parameters
     136         631 :         for (i = 0; i < additional_argcount; i++)
     137         467 :                 size += strlen(additional_args[i]) + 1;
     138             : 
     139             :         // First remove the "{" at the start and the "};" at the end of the
     140             :         // function, this is added when we have a function created through SQL and
     141             :         // python doesn't like them
     142             :         // We need to be careful to only remove ones at the start/end, otherwise we
     143             :         // might invalidate some otherwise valid python code containing them
     144         346 :         for (i = length - 1, j = 0; i > 0; i--) {
     145         346 :                 if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
     146         164 :                         code[i] != ';' && code[i] != '}')
     147             :                         break;
     148         334 :                 if (j == 0) {
     149         182 :                         if (code[i] == ';') {
     150         153 :                                 code[i] = ' ';
     151         153 :                                 j = 1;
     152             :                         }
     153         152 :                 } else if (j == 1) {
     154         152 :                         if (code[i] == '}') {
     155         152 :                                 code[i] = ' ';
     156         152 :                                 break;
     157             :                         }
     158             :                 }
     159             :         }
     160         881 :         for (i = 0; i < length; i++) {
     161         881 :                 if (code[i] != '\n' && code[i] != ' ' && code[i] != '\t' &&
     162             :                         code[i] != '{')
     163             :                         break;
     164         717 :                 if (code[i] == '{') {
     165         152 :                         code[i] = ' ';
     166             :                 }
     167             :         }
     168             :         // We indent using spaces, four spaces per level
     169             :         // We also erase empty lines
     170       20535 :         for (i = 0; i < length; i++) {
     171             :                 // handle multiline strings (strings that start with """)
     172       20371 :                 if (code[i] == '\"') {
     173          46 :                         if (!multiline_statement) {
     174          39 :                                 multiline_quotes++;
     175          39 :                                 multiline_statement = multiline_quotes == 3;
     176             :                         } else {
     177           7 :                                 multiline_quotes--;
     178           7 :                                 multiline_statement = multiline_quotes != 0;
     179             :                         }
     180             :                 } else {
     181       20325 :                         multiline_quotes = multiline_statement ? 3 : 0;
     182             :                 }
     183             : 
     184       20371 :                 if (!seen_statement) {
     185             :                         // We have not seen a statement on this line yet
     186        3624 :                         if (code[i] == '\n') {
     187             :                                 // Empty line, skip to the next one
     188             :                                 initial_spaces = 0;
     189        3481 :                         } else if (code[i] == ' ') {
     190        2469 :                                 initial_spaces++;
     191        1012 :                         } else if (code[i] == '\t') {
     192         366 :                                 initial_spaces += tabwidth;
     193             :                         } else {
     194             :                                 // Statement starts here
     195             :                                 seen_statement = true;
     196             :                         }
     197             :                 }
     198       19725 :                 if (seen_statement) {
     199             :                         // We have seen a statement on this line, check the indentation
     200             :                         // level
     201       17393 :                         statement_size++;
     202             : 
     203       17393 :                         if (code[i] == '\n' || i == length - 1) {
     204             :                                 // Statement ends here
     205         650 :                                 bool placed = false;
     206         650 :                                 size_t level = 0;
     207             : 
     208         650 :                                 if (multiline_statement) {
     209             :                                         // if we are in a multiline statement, we don't want to mess
     210             :                                         // with the indentation
     211           4 :                                         size += statement_size;
     212           4 :                                         initial_spaces = 0;
     213           4 :                                         statement_size = 0;
     214           4 :                                         continue;
     215             :                                 }
     216             :                                 // First put the indentation in the indentation table
     217         646 :                                 if (indentation_count >= max_indentation) {
     218             :                                         // If there is no room in the indentation arrays we will
     219             :                                         // extend them
     220             :                                         // This probably will never happen unless in really extreme
     221             :                                         // code (or if max_indentation is set very low)
     222           0 :                                         size_t *new_indentation =
     223           0 :                                                 GDKzalloc(2 * max_indentation * sizeof(size_t));
     224           0 :                                         size_t *new_statements_per_level;
     225           0 :                                         if (new_indentation == NULL) {
     226           0 :                                                 *msg =
     227           0 :                                                         createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     228           0 :                                                 goto finally;
     229             :                                         }
     230           0 :                                         new_statements_per_level =
     231           0 :                                                 GDKzalloc(2 * max_indentation * sizeof(size_t));
     232           0 :                                         if (new_statements_per_level == NULL) {
     233           0 :                                                 *msg =
     234           0 :                                                         createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     235           0 :                                                 goto finally;
     236             :                                         }
     237             : 
     238           0 :                                         for (i = 0; i < max_indentation; i++) {
     239           0 :                                                 new_indentation[i] = indentation_levels[i];
     240           0 :                                                 new_statements_per_level[i] = statements_per_level[i];
     241             :                                         }
     242           0 :                                         GDKfree(indentation_levels);
     243           0 :                                         GDKfree(statements_per_level);
     244           0 :                                         indentation_levels = new_indentation;
     245           0 :                                         statements_per_level = new_statements_per_level;
     246           0 :                                         max_indentation *= 2;
     247             :                                 }
     248             : 
     249         876 :                                 for (j = 0; j < indentation_count; j++) {
     250         621 :                                         if (initial_spaces == indentation_levels[j]) {
     251             :                                                 // The exact space count is already in the array, so we
     252             :                                                 // can stop
     253             :                                                 level = j;
     254             :                                                 placed = true;
     255             :                                                 break;
     256             :                                         }
     257             : 
     258         230 :                                         if (initial_spaces < indentation_levels[j]) {
     259             :                                                 // The indentation level is smaller than this level (but
     260             :                                                 // bigger than the previous level)
     261             :                                                 // So the indentation level belongs here, so we move
     262             :                                                 // every level past this one upward one level
     263             :                                                 // and put the indentation level here
     264           0 :                                                 for (k = indentation_count; k > j; k--) {
     265           0 :                                                         indentation_levels[k] = indentation_levels[k - 1];
     266           0 :                                                         statements_per_level[k] =
     267           0 :                                                                 statements_per_level[k - 1];
     268             :                                                 }
     269           0 :                                                 indentation_count++;
     270           0 :                                                 statements_per_level[j] = 0;
     271           0 :                                                 indentation_levels[j] = initial_spaces;
     272           0 :                                                 level = j;
     273           0 :                                                 placed = true;
     274           0 :                                                 break;
     275             :                                         }
     276             :                                 }
     277         646 :                                 if (!placed) {
     278             :                                         // The space count is the biggest we have seen, so we add it
     279             :                                         // to the end of the array
     280         255 :                                         level = indentation_count;
     281         255 :                                         indentation_levels[indentation_count++] = initial_spaces;
     282             :                                 }
     283         646 :                                 statements_per_level[level]++;
     284         646 :                                 size += statement_size;
     285         646 :                                 seen_statement = false;
     286         646 :                                 initial_spaces = 0;
     287         646 :                                 statement_size = 0;
     288             :                         }
     289             :                 }
     290             :         }
     291             :         // Add the amount of spaces we will add to the size
     292         419 :         for (i = 0; i < indentation_count; i++) {
     293         255 :                 size += (i + 1) * spaces_per_level * statements_per_level[i];
     294             :         }
     295             : 
     296             :         // Allocate space for the function
     297         164 :         newcode = GDKzalloc(size);
     298         164 :         if (newcode == NULL) {
     299           0 :                 *msg = createException(MAL, "pyapi3.eval", SQLSTATE(HY013) MAL_MALLOC_FAIL);
     300           0 :                 goto finally;
     301             :         }
     302        1804 :         initial_spaces = 0;
     303        1804 :         seen_statement = false;
     304             : 
     305             :         // First print in the function definition and arguments
     306        1804 :         for (i = 0; i < strlen(base_start); i++) {
     307        1640 :                 newcode[code_location++] = base_start[i];
     308             :         }
     309             :         // Add user-defined parameters
     310        1110 :         for (i = 0; i < argcount; i++) {
     311         946 :                 if (args[i] != NULL) {
     312        1905 :                         for (j = 0; j < strlen(args[i]); j++) {
     313        1512 :                                 newcode[code_location++] = args[i][j];
     314             :                         }
     315         393 :                         if (i != argcount - 1 || additional_argcount > 0) {
     316         393 :                                 newcode[code_location++] = ',';
     317             :                         }
     318             :                 }
     319             :         }
     320             :         // Add additional parameters
     321         631 :         for (i = 0; i < additional_argcount; i++) {
     322         467 :                 if (additional_args[i] != NULL) {
     323        4331 :                         for (j = 0; j < strlen(additional_args[i]); j++) {
     324        3864 :                                 newcode[code_location++] = additional_args[i][j];
     325             :                         }
     326         467 :                         if (i != additional_argcount - 1) {
     327         303 :                                 newcode[code_location++] = ',';
     328             :                         }
     329             :                 }
     330             :         }
     331         656 :         for (i = 0; i < strlen(base_end); i++) {
     332         492 :                 newcode[code_location++] = base_end[i];
     333             :         }
     334             : 
     335             :         // Now the second pass, actually construct the code
     336       20535 :         for (i = 0; i < length; i++) {
     337             :                 // handle multiline statements
     338       20371 :                 if (code[i] == '\"') {
     339          46 :                         if (!multiline_statement) {
     340          39 :                                 multiline_quotes++;
     341          39 :                                 multiline_statement = multiline_quotes == 3;
     342             :                         } else {
     343           7 :                                 multiline_quotes--;
     344           7 :                                 multiline_statement = multiline_quotes != 0;
     345             :                         }
     346             :                 } else {
     347       20325 :                         multiline_quotes = multiline_statement ? 3 : 0;
     348             :                 }
     349             : 
     350       20371 :                 if (!seen_statement) {
     351        3628 :                         if (multiline_statement)
     352             :                                 seen_statement = true; // if we are in a multiline string, we
     353             :                                                                            // simply want to copy everything
     354             :                                                                            // (including indentation)
     355             :                         // We have not seen a statement on this line yet
     356        3624 :                         else if (code[i] == '\n') {
     357             :                                 // Empty line, skip to the next one
     358             :                                 initial_spaces = 0;
     359        3481 :                         } else if (code[i] == ' ') {
     360        2469 :                                 initial_spaces++;
     361        1012 :                         } else if (code[i] == '\t') {
     362         366 :                                 initial_spaces += tabwidth;
     363             :                         } else {
     364             :                                 // Look through the indentation_levels array to find the level
     365             :                                 // of the statement
     366             :                                 // from the amount of initial spaces
     367         876 :                                 bool placed = false;
     368         876 :                                 size_t level = 0;
     369             :                                 // Statement starts here
     370         876 :                                 seen_statement = true;
     371         876 :                                 for (j = 0; j < indentation_count; j++) {
     372         876 :                                         if (initial_spaces == indentation_levels[j]) {
     373             :                                                 level = j;
     374             :                                                 placed = true;
     375             :                                                 break;
     376             :                                         }
     377             :                                 }
     378         646 :                                 if (!placed) {
     379             :                                         // This should never happen, because it means the initial
     380             :                                         // spaces was not present in the array
     381             :                                         // When we just did exactly the same loop over the array, we
     382             :                                         // should have encountered this statement
     383             :                                         // This means that something happened to either the
     384             :                                         // indentation_levels array or something happened to the
     385             :                                         // code
     386           0 :                                         *msg = createException(MAL, "pyapi3.eval",
     387             :                                                                                    SQLSTATE(PY000) "If you see this error something "
     388             :                                                                                    "went wrong in the code. Sorry.");
     389           0 :                                         goto finally;
     390             :                                 }
     391        2398 :                                 for (j = 0; j < (level + 1) * spaces_per_level; j++) {
     392             :                                         // Add spaces to the code
     393        1752 :                                         newcode[code_location++] = ' ';
     394             :                                 }
     395             :                         }
     396             :                 }
     397       20371 :                 if (seen_statement) {
     398             :                         // We have seen a statement on this line, copy it
     399       17393 :                         newcode[code_location++] = code[i];
     400       17393 :                         if (code[i] == '\n') {
     401             :                                 // The statement has ended, move on to the next line
     402         629 :                                 seen_statement = false;
     403         629 :                                 initial_spaces = 0;
     404         629 :                                 statement_size = 0;
     405             :                         }
     406             :                 }
     407             :         }
     408         164 :         newcode[code_location] = '\0';
     409         164 :         if (code_location >= size) {
     410             :                 // Something went wrong with our size computation, this also should
     411             :                 // never happen
     412           0 :                 *msg = createException(MAL, "pyapi3.eval",
     413             :                                                            SQLSTATE(PY000) "If you see this error something went wrong in "
     414             :                                                            "the code (size computation). Sorry.");
     415           0 :                 goto finally;
     416             :         }
     417         164 : finally:
     418         164 :         GDKfree(indentation_levels);
     419         164 :         GDKfree(statements_per_level);
     420         164 :         return newcode;
     421             : }
     422             : 
     423           0 : void _formatinput_init(void) { _import_array(); }

Generated by: LCOV version 1.14