LCOV - code coverage report
Current view: top level - common/stream - text_stream.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 104 130 80.0 %
Date: 2021-10-13 02:24:04 Functions: 12 13 92.3 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : #include "monetdb_config.h"
      10             : #include "stream.h"
      11             : #include "stream_internal.h"
      12             : #include "pump.h"
      13             : 
      14             : /* When reading, text streams convert \r\n to \n regardless of operating system,
      15             :  * and they drop the leading UTF-8 BOM marker if found.
      16             :  * When writing on Windows, \n is translated back to \r\n.
      17             :  *
      18             :  * Currently, skipping the BOM happens when opening, not on the first read action.
      19             :  */
      20             : 
      21             : #define UTF8BOM         "\xEF\xBB\xBF"        /* UTF-8 encoding of Unicode BOM */
      22             : #define UTF8BOMLENGTH   3       /* length of above */
      23             : 
      24             : #define BUFFER_SIZE (65536)
      25             : struct inner_state {
      26             :         pump_buffer src_win;
      27             :         pump_buffer dst_win;
      28             :         pump_buffer putback_win;
      29             :         char putback_buf[UTF8BOMLENGTH];
      30             :         bool crlf_pending;
      31             :         char buffer[BUFFER_SIZE];
      32             : };
      33             : 
      34             : 
      35             : static pump_buffer
      36      326109 : get_src_win(inner_state_t *inner_state)
      37             : {
      38      326109 :         return inner_state->src_win;
      39             : }
      40             : 
      41             : 
      42             : static void
      43       34927 : set_src_win(inner_state_t *inner_state, pump_buffer buf)
      44             : {
      45       34927 :         inner_state->src_win = buf;
      46       34927 : }
      47             : 
      48             : 
      49             : static pump_buffer
      50      618070 : get_dst_win(inner_state_t *inner_state)
      51             : {
      52      618070 :         return inner_state->dst_win;
      53             : }
      54             : 
      55             : 
      56             : static void
      57      146015 : set_dst_win(inner_state_t *inner_state, pump_buffer buf)
      58             : {
      59      146015 :         inner_state->dst_win = buf;
      60      146015 : }
      61             : 
      62             : 
      63             : static pump_buffer
      64      146327 : get_buffer(inner_state_t *inner_state)
      65             : {
      66      146327 :         return (pump_buffer) { .start = inner_state->buffer, .count = BUFFER_SIZE };
      67             : }
      68             : 
      69             : inline static void
      70  2238067088 : put_byte(inner_state_t *ist, char byte)
      71             : {
      72  2238067088 :         *ist->dst_win.start++ = byte;
      73  2238067088 :         assert(ist->dst_win.count > 0);
      74  2238067088 :         ist->dst_win.count--;
      75  2238067088 : }
      76             : 
      77             : inline static char
      78             : take_byte(inner_state_t *ist)
      79             : {
      80  2238097953 :         ist->src_win.count--;
      81  2238097953 :         return *ist->src_win.start++;
      82             : }
      83             : 
      84             : static pump_result
      85      180608 : text_pump_in(inner_state_t *ist, pump_action action)
      86             : {
      87      180608 :         bool crlf_pending = ist->crlf_pending;
      88             : 
      89  2238278561 :         while (ist->src_win.count > 0 && ist->dst_win.count > 0) {
      90             :                 char c = take_byte(ist);
      91  2238097953 :                 switch (c) {
      92       31465 :                         case '\r':
      93       31465 :                                 if (crlf_pending) {
      94             :                                         // put the previous one, which is clearly not followed by an \n
      95         435 :                                         put_byte(ist, '\r');
      96             :                                 }
      97             :                                 crlf_pending = true;
      98       31465 :                                 continue;
      99   140167584 :                         case '\n':
     100   140167584 :                                 put_byte(ist, c);
     101             :                                 crlf_pending = false;
     102   140167584 :                                 continue;
     103  2097898904 :                         default:
     104  2097898904 :                                 if (crlf_pending) {
     105         165 :                                         put_byte(ist, '\r');
     106             :                                         crlf_pending = false;
     107             :                                         // if dst_win.count was 1, there is no room for another put_byte().
     108         165 :                                         if (ist->dst_win.count > 0) {
     109         165 :                                                 put_byte(ist, c);
     110             :                                         } else {
     111             :                                                 // no room anymore for char c, put it back!
     112           0 :                                                 ist->src_win.start--;
     113           0 :                                                 ist->src_win.count++;
     114             :                                         }
     115             :                                 } else {
     116  2097898739 :                                         put_byte(ist, c);
     117             :                                 }
     118  2097898904 :                                 continue;
     119             :                 }
     120             :         }
     121             : 
     122      180608 :         ist->crlf_pending = crlf_pending;
     123             : 
     124      180608 :         if (action == PUMP_FINISH) {
     125         367 :                 if (ist->src_win.count > 0)
     126             :                         // More work to do
     127             :                         return PUMP_OK;
     128         367 :                 if (!ist->crlf_pending)
     129             :                         // Completely done
     130             :                         return PUMP_END;
     131           0 :                 if (ist->dst_win.count > 0) {
     132           0 :                         put_byte(ist, '\r');
     133           0 :                         ist->crlf_pending = false; // not strictly necessary
     134             :                         // Now we're completely done
     135           0 :                         return PUMP_END;
     136             :                 } else
     137             :                         // Come back another time to flush the pending CR
     138             :                         return PUMP_OK;
     139             :         } else
     140             :                 // There is no error and we are not finishing so clearly we
     141             :                 // must return PUMP_OK
     142             :                 return PUMP_OK;
     143             : }
     144             : 
     145             : 
     146             : static pump_result
     147      180337 : text_pump_in_with_putback(inner_state_t *ist, pump_action action)
     148             : {
     149      180337 :         if (ist->putback_win.count > 0) {
     150         271 :                 pump_buffer tmp = ist->src_win;
     151         271 :                 ist->src_win = ist->putback_win;
     152         271 :                 pump_result ret = text_pump_in(ist, PUMP_NO_FLUSH);
     153         271 :                 ist->putback_win = ist->src_win;
     154         271 :                 ist->src_win = tmp;
     155         271 :                 if (ret == PUMP_ERROR)
     156             :                         return PUMP_ERROR;
     157             :         }
     158      180337 :         return text_pump_in(ist, action);
     159             : }
     160             : 
     161             : 
     162             : static pump_result
     163          34 : text_pump_out(inner_state_t *ist, pump_action action)
     164             : {
     165          34 :         size_t src_count = ist->src_win.count;
     166          34 :         size_t dst_count = ist->dst_win.count;
     167          34 :         size_t ncopy = src_count < dst_count ? src_count : dst_count;
     168             : 
     169          34 :         memcpy(ist->dst_win.start, ist->src_win.start, ncopy);
     170          34 :         ist->dst_win.start += ncopy;
     171          34 :         ist->dst_win.count -= ncopy;
     172          34 :         ist->src_win.start += ncopy;
     173          34 :         ist->src_win.count -= ncopy;
     174             : 
     175          34 :         if (ist->src_win.count > 0)
     176             :                 // definitely not done
     177             :                 return PUMP_OK;
     178          34 :         if (action == PUMP_NO_FLUSH)
     179             :                 // never return PUMP_END
     180             :                 return PUMP_OK;
     181           6 :         if (ist->crlf_pending)
     182             :                 // src win empty but cr still pending so not done
     183           0 :                 return PUMP_OK;
     184             :         // src win empty and no cr pending and flush or finish requested
     185             :         return PUMP_END;
     186             : }
     187             : 
     188             : 
     189             : static pump_result
     190             : text_pump_out_crlf(inner_state_t *ist, pump_action action)
     191             : {
     192             :         if (ist->crlf_pending && ist->dst_win.count > 0) {
     193             :                 put_byte(ist, '\n');
     194             :                 ist->crlf_pending = false;
     195             :         }
     196             : 
     197             :         while (ist->src_win.count > 0 && ist->dst_win.count > 0) {
     198             :                 char c = take_byte(ist);
     199             :                 if (c != '\n') {
     200             :                         put_byte(ist, c);
     201             :                         continue;
     202             :                 }
     203             :                 put_byte(ist, '\r');
     204             :                 if (ist->dst_win.count > 0)
     205             :                         put_byte(ist, '\n');
     206             :                 else {
     207             :                         ist->crlf_pending = true;
     208             :                         break;
     209             :                 }
     210             :         }
     211             : 
     212             :         if (ist->src_win.count > 0)
     213             :                 // definitely not done
     214             :                 return PUMP_OK;
     215             :         if (action == PUMP_NO_FLUSH)
     216             :                 // never return PUMP_END
     217             :                 return PUMP_OK;
     218             :         if (ist->crlf_pending)
     219             :                 // src win empty but cr still pending so not done
     220             :                 return PUMP_OK;
     221             :         // src win empty and no cr pending and flush or finish requested
     222             :         return PUMP_END;
     223             : }
     224             : 
     225             : 
     226             : static void
     227         284 : text_end(inner_state_t *s)
     228             : {
     229         284 :         free(s);
     230         284 : }
     231             : 
     232             : 
     233             : static const char*
     234           0 : get_error(inner_state_t *s)
     235             : {
     236             :         (void)s;
     237           0 :         return "line ending conversion failure";
     238             : }
     239             : 
     240             : static ssize_t
     241         279 : skip_bom(stream *s)
     242             : {
     243         279 :         pump_state *state = (pump_state*) s->stream_data.p;
     244         279 :         stream *inner = s->inner;
     245         279 :         inner_state_t *ist = state->inner_state;
     246             : 
     247         279 :         ssize_t nread = mnstr_read(inner, ist->putback_buf, 1, UTF8BOMLENGTH);
     248         279 :         if (nread < 0) {
     249           0 :                 mnstr_copy_error(s, inner);
     250           0 :                 return nread;
     251             :         }
     252             : 
     253         279 :         if (nread == UTF8BOMLENGTH &&  memcmp(ist->putback_buf, UTF8BOM, nread) == 0) {
     254             :                 // Bingo! Skip it!
     255           1 :                 s->isutf8 = true;
     256           1 :                 return 3;
     257             :         }
     258             : 
     259             :         // We have consumed some bytes that have to be unconsumed.
     260             :         // skip_bom left them in the putback_buf.
     261         278 :         ist->putback_win.start = ist->putback_buf;
     262         278 :         ist->putback_win.count = nread;
     263             : 
     264         278 :         return 0;
     265             : }
     266             : 
     267             : 
     268             : stream *
     269         284 : create_text_stream(stream *inner)
     270             : {
     271         284 :         inner_state_t *inner_state = calloc(1, sizeof(inner_state_t));
     272         284 :         if (inner_state == NULL) {
     273           0 :                 mnstr_set_open_error(inner->name, errno, NULL);
     274           0 :                 return NULL;
     275             :         }
     276             : 
     277         284 :         pump_state *state = calloc(1, sizeof(pump_state));
     278         284 :         if (inner_state == NULL || state == NULL) {
     279           0 :                 free(inner_state);
     280           0 :                 mnstr_set_open_error(inner->name, errno, NULL);
     281           0 :                 return NULL;
     282             :         }
     283             : 
     284         284 :         state->inner_state = inner_state;
     285         284 :         state->get_src_win = get_src_win;
     286         284 :         state->set_src_win = set_src_win;
     287         284 :         state->get_dst_win = get_dst_win;
     288         284 :         state->set_dst_win = set_dst_win;
     289         284 :         state->get_buffer = get_buffer;
     290         284 :         state->finalizer = text_end;
     291         284 :         state->get_error = get_error;
     292             : 
     293         284 :         inner_state->putback_win.start = inner_state->putback_buf;
     294         284 :         inner_state->putback_win.count = 0;
     295         284 :         if (inner->readonly) {
     296         279 :                 inner_state->src_win.start = inner_state->buffer;
     297         279 :                 inner_state->src_win.count = 0;
     298         279 :                 state->worker = text_pump_in_with_putback;
     299             :         } else {
     300           5 :                 inner_state->dst_win.start = inner_state->buffer;
     301           5 :                 inner_state->dst_win.count = BUFFER_SIZE;
     302             : #ifdef _MSC_VER
     303             :                 state->worker = text_pump_out_crlf;
     304             :                 (void) text_pump_out;
     305             : #else
     306           5 :                 state->worker = text_pump_out;
     307             :                 (void) text_pump_out_crlf;
     308             : #endif
     309             :         }
     310             : 
     311         284 :         stream *s = pump_stream(inner, state);
     312         284 :         if (s == NULL) {
     313           0 :                 free(inner_state);
     314           0 :                 free(state);
     315           0 :                 return NULL;
     316             :         }
     317             : 
     318         284 :         s->binary = false;
     319             : 
     320         284 :         if (s->readonly)
     321         279 :                 if (skip_bom(s) < 0) {
     322           0 :                         free(inner_state);
     323           0 :                         free(state);
     324           0 :                         char *err = mnstr_error(s);
     325           0 :                         mnstr_set_open_error(inner->name, 0, "while looking for a byte order mark: %s", err);
     326           0 :                         free(err);
     327           0 :                         destroy_stream(s);
     328           0 :                         return NULL;
     329             :                 }
     330             : 
     331             :         return s;
     332             : }

Generated by: LCOV version 1.14