LCOV - code coverage report
Current view: top level - common/stream - iconv_stream.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 83 173 48.0 %
Date: 2021-10-27 03:06:47 Functions: 6 8 75.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /* Generic stream handling code such as init and close */
      10             : 
      11             : #include "monetdb_config.h"
      12             : #include "stream.h"
      13             : #include "stream_internal.h"
      14             : 
      15             : 
      16             : /* ------------------------------------------------------------------ */
      17             : /* streams working on a substream, converting character sets using iconv */
      18             : 
      19             : #ifdef HAVE_ICONV
      20             : 
      21             : struct icstream {
      22             :         iconv_t cd;
      23             :         char buffer[BUFSIZ];
      24             :         size_t buflen;
      25             :         bool eof;
      26             : };
      27             : 
      28             : static ssize_t
      29         332 : ic_write(stream *restrict s, const void *restrict buf, size_t elmsize, size_t cnt)
      30             : {
      31         332 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
      32             :         ICONV_CONST char *inbuf;
      33         332 :         size_t inbytesleft = elmsize * cnt;
      34             :         char *bf = NULL;
      35             : 
      36         332 :         if (ic == NULL) {
      37           0 :                 mnstr_set_error(s, MNSTR_WRITE_ERROR, "stream already ended");
      38           0 :                 goto bailout;
      39             :         }
      40             : 
      41             :         /* if unconverted data from a previous call remains, add it to
      42             :          * the start of the new data, using temporary space */
      43         332 :         if (ic->buflen > 0) {
      44           7 :                 bf = malloc(ic->buflen + inbytesleft);
      45           7 :                 if (bf == NULL) {
      46             :                         /* cannot allocate memory */
      47           0 :                         mnstr_set_error(s, MNSTR_WRITE_ERROR, "out of memory");
      48           0 :                         goto bailout;
      49             :                 }
      50           7 :                 memcpy(bf, ic->buffer, ic->buflen);
      51           7 :                 memcpy(bf + ic->buflen, buf, inbytesleft);
      52             :                 buf = bf;
      53           7 :                 inbytesleft += ic->buflen;
      54           7 :                 ic->buflen = 0;
      55             :         }
      56         332 :         inbuf = (ICONV_CONST char *) buf;
      57         655 :         while (inbytesleft > 0) {
      58         332 :                 char *outbuf = ic->buffer;
      59         332 :                 size_t outbytesleft = sizeof(ic->buffer);
      60             : 
      61         332 :                 if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
      62           9 :                         switch (errno) {
      63           2 :                         case EILSEQ:
      64             :                                 /* invalid multibyte sequence encountered */
      65           2 :                                 mnstr_set_error(s, MNSTR_WRITE_ERROR, "invalid multibyte sequence");
      66           2 :                                 goto bailout;
      67           7 :                         case EINVAL:
      68             :                                 /* incomplete multibyte sequence
      69             :                                  * encountered flush what has been
      70             :                                  * converted */
      71           7 :                                 if (outbytesleft < sizeof(ic->buffer) &&
      72           0 :                                     mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
      73           0 :                                         mnstr_set_error(s, MNSTR_WRITE_ERROR, "incomplete multibyte sequence");
      74           0 :                                         goto bailout;
      75             :                                 }
      76             :                                 /* remember what hasn't been converted */
      77           7 :                                 if (inbytesleft > sizeof(ic->buffer)) {
      78             :                                         /* ridiculously long multibyte
      79             :                                          * sequence, so return
      80             :                                          * error */
      81           0 :                                         mnstr_set_error(s, MNSTR_WRITE_ERROR, "multibyte sequence too long");
      82           0 :                                         goto bailout;
      83             :                                 }
      84           7 :                                 memcpy(ic->buffer, inbuf, inbytesleft);
      85           7 :                                 ic->buflen = inbytesleft;
      86           7 :                                 if (bf)
      87           0 :                                         free(bf);
      88           7 :                                 return (ssize_t) cnt;
      89             :                         case E2BIG:
      90             :                                 /* not enough space in output buffer */
      91             :                                 break;
      92           0 :                         default:
      93             :                                 /* cannot happen (according to manual) */
      94           0 :                                 mnstr_set_error(s, MNSTR_WRITE_ERROR, "iconv internal error %d", errno);
      95           0 :                                 goto bailout;
      96             :                         }
      97         323 :                 }
      98         323 :                 if (mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
      99           0 :                         mnstr_copy_error(s, s->inner);
     100           0 :                         goto bailout;
     101             :                 }
     102             :         }
     103         323 :         if (bf)
     104           6 :                 free(bf);
     105         323 :         return (ssize_t) cnt;
     106             : 
     107           2 :         bailout:
     108           2 :         assert(s->errkind != MNSTR_NO__ERROR);
     109           2 :         if (bf)
     110           1 :                 free(bf);
     111             :         return -1;
     112             : }
     113             : 
     114             : static ssize_t
     115           0 : ic_read(stream *restrict s, void *restrict buf, size_t elmsize, size_t cnt)
     116             : {
     117           0 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
     118             :         ICONV_CONST char *inbuf;
     119             :         size_t inbytesleft;
     120             :         char *outbuf;
     121             :         size_t outbytesleft;
     122             : 
     123           0 :         if (ic == NULL) {
     124           0 :                 mnstr_set_error(s, MNSTR_READ_ERROR, "stream already ended");
     125           0 :                 return -1;
     126             :         }
     127           0 :         inbuf = ic->buffer;
     128           0 :         inbytesleft = ic->buflen;
     129           0 :         outbuf = (char *) buf;
     130           0 :         outbytesleft = elmsize * cnt;
     131           0 :         if (outbytesleft == 0)
     132             :                 return 0;
     133           0 :         while (outbytesleft > 0 && !ic->eof) {
     134           0 :                 if (ic->buflen == sizeof(ic->buffer)) {
     135             :                         /* ridiculously long multibyte sequence, return error */
     136           0 :                         mnstr_set_error(s, MNSTR_READ_ERROR, "multibyte sequence too long");
     137           0 :                         return -1;
     138             :                 }
     139             : 
     140           0 :                 switch (mnstr_read(s->inner, ic->buffer + ic->buflen, 1, 1)) {
     141           0 :                 case 1:
     142             :                         /* expected: read one byte */
     143           0 :                         ic->buflen++;
     144           0 :                         inbytesleft++;
     145             :                         break;
     146           0 :                 case 0:
     147             :                         /* end of file */
     148           0 :                         ic->eof = true;
     149           0 :                         if (ic->buflen > 0) {
     150             :                                 /* incomplete input */
     151           0 :                                 mnstr_set_error(s, MNSTR_READ_ERROR, "incomplete input");
     152           0 :                                 return -1;
     153             :                         }
     154           0 :                         if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) {
     155             :                                 /* some error occurred */
     156           0 :                                 mnstr_set_error(s, MNSTR_READ_ERROR, "unspecified iconv error occurred");
     157           0 :                                 return -1;
     158             :                         }
     159           0 :                         goto exit_func; /* double break */
     160           0 :                 default:
     161             :                         /* error */
     162           0 :                         mnstr_copy_error(s, s->inner);
     163           0 :                         return -1;
     164             :                 }
     165           0 :                 if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
     166           0 :                         switch (errno) {
     167           0 :                         case EILSEQ:
     168           0 :                                 mnstr_set_error(s, MNSTR_READ_ERROR, "invalid multibyte sequence");
     169           0 :                                 return -1;
     170             :                         case EINVAL:
     171             :                                 /* incomplete multibyte sequence encountered */
     172             :                                 break;
     173           0 :                         case E2BIG:
     174             :                                 /* not enough space in output buffer,
     175             :                                  * return what we have, saving what's in
     176             :                                  * the buffer */
     177           0 :                                 goto exit_func;
     178           0 :                         default:
     179             :                                 /* cannot happen (according to manual) */
     180           0 :                                 mnstr_set_error(s, MNSTR_READ_ERROR, "inconv stream: internal error");
     181           0 :                                 return -1;
     182             :                         }
     183           0 :                 }
     184           0 :                 if (inbytesleft == 0) {
     185             :                         /* converted complete buffer */
     186           0 :                         inbuf = ic->buffer;
     187           0 :                         ic->buflen = 0;
     188             :                 }
     189             :         }
     190           0 :       exit_func:
     191           0 :         if (inbuf > ic->buffer)
     192           0 :                 memmove(ic->buffer, inbuf, inbytesleft);
     193           0 :         ic->buflen = inbytesleft;
     194           0 :         if (outbytesleft == elmsize * cnt && !s->inner->eof) {
     195             :                 /* if we're returning data, we must pass on EOF on the
     196             :                  * next call (i.e. keep ic->eof set), otherwise we
     197             :                  * must clear it so that the next call will cause the
     198             :                  * underlying stream to be read again */
     199           0 :                 ic->eof = false;
     200             :         }
     201           0 :         return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize);
     202             : }
     203             : 
     204             : static int
     205           8 : ic_flush(stream *s, mnstr_flush_level flush_level)
     206             : {
     207           8 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
     208             :         char *outbuf;
     209             :         size_t outbytesleft;
     210             : 
     211           8 :         if (ic == NULL)
     212             :                 return -1;
     213           8 :         outbuf = ic->buffer;
     214           8 :         outbytesleft = sizeof(ic->buffer);
     215             :         /* if unconverted data from a previous call remains, it was an
     216             :          * incomplete multibyte sequence, so an error */
     217          16 :         if (ic->buflen > 0 ||
     218           8 :             iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1 ||
     219           8 :             (outbytesleft < sizeof(ic->buffer) &&
     220           0 :              mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0)) {
     221           0 :                 mnstr_copy_error(s, s->inner);
     222           0 :                 return -1;
     223             :         }
     224           8 :         return mnstr_flush(s->inner, flush_level);
     225             : }
     226             : 
     227             : static void
     228           4 : ic_close(stream *s)
     229             : {
     230           4 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
     231             : 
     232           4 :         if (ic) {
     233           4 :                 if (!s->readonly)
     234           4 :                         ic_flush(s, MNSTR_FLUSH_DATA);
     235           4 :                 iconv_close(ic->cd);
     236           4 :                 close_stream(s->inner);
     237           4 :                 s->inner = NULL;
     238           4 :                 free(s->stream_data.p);
     239           4 :                 s->stream_data.p = NULL;
     240             :         }
     241           4 : }
     242             : 
     243             : static void
     244           4 : ic_destroy(stream *s)
     245             : {
     246           4 :         ic_close(s);
     247           4 :         mnstr_destroy(s->inner);
     248           4 :         destroy_stream(s);
     249           4 : }
     250             : 
     251             : static stream *
     252           4 : ic_open(iconv_t cd, stream *restrict ss, const char *restrict name)
     253             : {
     254             :         stream *s;
     255             :         struct icstream *ic;
     256             : 
     257           4 :         if (ss->isutf8)
     258             :                 return ss;
     259           4 :         if ((s = create_wrapper_stream(name, ss)) == NULL)
     260             :                 return NULL;
     261           4 :         s->read = ic_read;
     262           4 :         s->write = ic_write;
     263           4 :         s->close = ic_close;
     264           4 :         s->destroy = ic_destroy;
     265           4 :         s->flush = ic_flush;
     266           4 :         ic = malloc(sizeof(struct icstream));
     267           4 :         if (ic == NULL) {
     268           0 :                 mnstr_destroy(s);
     269           0 :                 mnstr_set_open_error(s->name, errno, NULL);
     270           0 :                 return NULL;
     271             :         }
     272           4 :         s->stream_data.p = ic;
     273           4 :         *ic = (struct icstream) {
     274             :                 .cd = cd,
     275             :                 .buflen = 0,
     276             :                 .eof = false,
     277             :         };
     278           4 :         return s;
     279             : }
     280             : 
     281             : stream *
     282           0 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     283             : {
     284             :         stream *s;
     285             :         iconv_t cd;
     286             : 
     287           0 :         if (ss == NULL || charset == NULL || name == NULL)
     288             :                 return NULL;
     289             : #ifdef STREAM_DEBUG
     290             :         fprintf(stderr, "iconv_rstream %s %s\n", charset, name);
     291             : #endif
     292           0 :         if (ss->isutf8)
     293             :                 return ss;
     294           0 :         cd = iconv_open("utf-8", charset);
     295           0 :         if (cd == (iconv_t) -1) {
     296           0 :                 mnstr_set_open_error(name, errno, "iconv_open");
     297           0 :                 return NULL;
     298             :         }
     299           0 :         s = ic_open(cd, ss, name);
     300           0 :         if (s == NULL) {
     301           0 :                 iconv_close(cd);
     302           0 :                 return NULL;
     303             :         }
     304           0 :         s->readonly = true;
     305           0 :         s->isutf8 = true;
     306           0 :         return s;
     307             : }
     308             : 
     309             : stream *
     310           4 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     311             : {
     312             :         stream *s;
     313             :         iconv_t cd;
     314             : 
     315           4 :         if (ss == NULL || charset == NULL || name == NULL)
     316             :                 return NULL;
     317             : #ifdef STREAM_DEBUG
     318             :         fprintf(stderr, "iconv_wstream %s %s\n", charset, name);
     319             : #endif
     320           4 :         if (ss->isutf8)
     321             :                 return ss;
     322           4 :         cd = iconv_open(charset, "utf-8");
     323           4 :         if (cd == (iconv_t) -1) {
     324           0 :                 mnstr_set_open_error(name, errno, "iconv_open");
     325           0 :                 return NULL;
     326             :         }
     327           4 :         s = ic_open(cd, ss, name);
     328           4 :         if (s == NULL) {
     329           0 :                 iconv_close(cd);
     330           0 :                 return NULL;
     331             :         }
     332           4 :         s->readonly = false;
     333           4 :         return s;
     334             : }
     335             : 
     336             : #else
     337             : stream *
     338             : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     339             : {
     340             :         if (ss == NULL || charset == NULL || name == NULL)
     341             :                 return NULL;
     342             :         if (ss->isutf8 ||
     343             :             strcmp(charset, "utf-8") == 0 ||
     344             :             strcmp(charset, "UTF-8") == 0 ||
     345             :             strcmp(charset, "UTF8") == 0)
     346             :                 return ss;
     347             : 
     348             :         mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
     349             :         return NULL;
     350             : }
     351             : 
     352             : stream *
     353             : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     354             : {
     355             :         if (ss == NULL || charset == NULL || name == NULL)
     356             :                 return NULL;
     357             :         if (ss->isutf8 ||
     358             :             strcmp(charset, "utf-8") == 0 ||
     359             :             strcmp(charset, "UTF-8") == 0 ||
     360             :             strcmp(charset, "UTF8") == 0)
     361             :                 return ss;
     362             : 
     363             :         mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
     364             :         return NULL;
     365             : }
     366             : #endif /* HAVE_ICONV */

Generated by: LCOV version 1.14