Line data Source code
1 : /*
2 : * This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 : *
6 : * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
7 : */
8 :
9 : /* Generic stream handling code such as init and close */
10 :
11 : #include "monetdb_config.h"
12 : #include "stream.h"
13 : #include "stream_internal.h"
14 :
15 :
16 : /* ------------------------------------------------------------------ */
17 : /* streams working on a substream, converting character sets using iconv */
18 :
19 : #ifdef HAVE_ICONV
20 :
21 : struct icstream {
22 : iconv_t cd;
23 : char buffer[BUFSIZ];
24 : size_t buflen;
25 : bool eof;
26 : };
27 :
28 : static ssize_t
29 332 : ic_write(stream *restrict s, const void *restrict buf, size_t elmsize, size_t cnt)
30 : {
31 332 : struct icstream *ic = (struct icstream *) s->stream_data.p;
32 : ICONV_CONST char *inbuf;
33 332 : size_t inbytesleft = elmsize * cnt;
34 : char *bf = NULL;
35 :
36 332 : if (ic == NULL) {
37 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "stream already ended");
38 0 : goto bailout;
39 : }
40 :
41 : /* if unconverted data from a previous call remains, add it to
42 : * the start of the new data, using temporary space */
43 332 : if (ic->buflen > 0) {
44 7 : bf = malloc(ic->buflen + inbytesleft);
45 7 : if (bf == NULL) {
46 : /* cannot allocate memory */
47 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "out of memory");
48 0 : goto bailout;
49 : }
50 7 : memcpy(bf, ic->buffer, ic->buflen);
51 7 : memcpy(bf + ic->buflen, buf, inbytesleft);
52 : buf = bf;
53 7 : inbytesleft += ic->buflen;
54 7 : ic->buflen = 0;
55 : }
56 332 : inbuf = (ICONV_CONST char *) buf;
57 655 : while (inbytesleft > 0) {
58 332 : char *outbuf = ic->buffer;
59 332 : size_t outbytesleft = sizeof(ic->buffer);
60 :
61 332 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
62 9 : switch (errno) {
63 2 : case EILSEQ:
64 : /* invalid multibyte sequence encountered */
65 2 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "invalid multibyte sequence");
66 2 : goto bailout;
67 7 : case EINVAL:
68 : /* incomplete multibyte sequence
69 : * encountered flush what has been
70 : * converted */
71 7 : if (outbytesleft < sizeof(ic->buffer) &&
72 0 : mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
73 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "incomplete multibyte sequence");
74 0 : goto bailout;
75 : }
76 : /* remember what hasn't been converted */
77 7 : if (inbytesleft > sizeof(ic->buffer)) {
78 : /* ridiculously long multibyte
79 : * sequence, so return
80 : * error */
81 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "multibyte sequence too long");
82 0 : goto bailout;
83 : }
84 7 : memcpy(ic->buffer, inbuf, inbytesleft);
85 7 : ic->buflen = inbytesleft;
86 7 : if (bf)
87 0 : free(bf);
88 7 : return (ssize_t) cnt;
89 : case E2BIG:
90 : /* not enough space in output buffer */
91 : break;
92 0 : default:
93 : /* cannot happen (according to manual) */
94 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "iconv internal error %d", errno);
95 0 : goto bailout;
96 : }
97 323 : }
98 323 : if (mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
99 0 : mnstr_copy_error(s, s->inner);
100 0 : goto bailout;
101 : }
102 : }
103 323 : if (bf)
104 6 : free(bf);
105 323 : return (ssize_t) cnt;
106 :
107 2 : bailout:
108 2 : assert(s->errkind != MNSTR_NO__ERROR);
109 2 : if (bf)
110 1 : free(bf);
111 : return -1;
112 : }
113 :
114 : static ssize_t
115 0 : ic_read(stream *restrict s, void *restrict buf, size_t elmsize, size_t cnt)
116 : {
117 0 : struct icstream *ic = (struct icstream *) s->stream_data.p;
118 : ICONV_CONST char *inbuf;
119 : size_t inbytesleft;
120 : char *outbuf;
121 : size_t outbytesleft;
122 :
123 0 : if (ic == NULL) {
124 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "stream already ended");
125 0 : return -1;
126 : }
127 0 : inbuf = ic->buffer;
128 0 : inbytesleft = ic->buflen;
129 0 : outbuf = (char *) buf;
130 0 : outbytesleft = elmsize * cnt;
131 0 : if (outbytesleft == 0)
132 : return 0;
133 0 : while (outbytesleft > 0 && !ic->eof) {
134 0 : if (ic->buflen == sizeof(ic->buffer)) {
135 : /* ridiculously long multibyte sequence, return error */
136 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "multibyte sequence too long");
137 0 : return -1;
138 : }
139 :
140 0 : switch (mnstr_read(s->inner, ic->buffer + ic->buflen, 1, 1)) {
141 0 : case 1:
142 : /* expected: read one byte */
143 0 : ic->buflen++;
144 0 : inbytesleft++;
145 : break;
146 0 : case 0:
147 : /* end of file */
148 0 : ic->eof = true;
149 0 : if (ic->buflen > 0) {
150 : /* incomplete input */
151 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "incomplete input");
152 0 : return -1;
153 : }
154 0 : if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) {
155 : /* some error occurred */
156 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "unspecified iconv error occurred");
157 0 : return -1;
158 : }
159 0 : goto exit_func; /* double break */
160 0 : default:
161 : /* error */
162 0 : mnstr_copy_error(s, s->inner);
163 0 : return -1;
164 : }
165 0 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
166 0 : switch (errno) {
167 0 : case EILSEQ:
168 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "invalid multibyte sequence");
169 0 : return -1;
170 : case EINVAL:
171 : /* incomplete multibyte sequence encountered */
172 : break;
173 0 : case E2BIG:
174 : /* not enough space in output buffer,
175 : * return what we have, saving what's in
176 : * the buffer */
177 0 : goto exit_func;
178 0 : default:
179 : /* cannot happen (according to manual) */
180 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "inconv stream: internal error");
181 0 : return -1;
182 : }
183 0 : }
184 0 : if (inbytesleft == 0) {
185 : /* converted complete buffer */
186 0 : inbuf = ic->buffer;
187 0 : ic->buflen = 0;
188 : }
189 : }
190 0 : exit_func:
191 0 : if (inbuf > ic->buffer)
192 0 : memmove(ic->buffer, inbuf, inbytesleft);
193 0 : ic->buflen = inbytesleft;
194 0 : if (outbytesleft == elmsize * cnt && !s->inner->eof) {
195 : /* if we're returning data, we must pass on EOF on the
196 : * next call (i.e. keep ic->eof set), otherwise we
197 : * must clear it so that the next call will cause the
198 : * underlying stream to be read again */
199 0 : ic->eof = false;
200 : }
201 0 : return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize);
202 : }
203 :
204 : static int
205 8 : ic_flush(stream *s, mnstr_flush_level flush_level)
206 : {
207 8 : struct icstream *ic = (struct icstream *) s->stream_data.p;
208 : char *outbuf;
209 : size_t outbytesleft;
210 :
211 8 : if (ic == NULL)
212 : return -1;
213 8 : outbuf = ic->buffer;
214 8 : outbytesleft = sizeof(ic->buffer);
215 : /* if unconverted data from a previous call remains, it was an
216 : * incomplete multibyte sequence, so an error */
217 16 : if (ic->buflen > 0 ||
218 8 : iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1 ||
219 8 : (outbytesleft < sizeof(ic->buffer) &&
220 0 : mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0)) {
221 0 : mnstr_copy_error(s, s->inner);
222 0 : return -1;
223 : }
224 8 : return mnstr_flush(s->inner, flush_level);
225 : }
226 :
227 : static void
228 4 : ic_close(stream *s)
229 : {
230 4 : struct icstream *ic = (struct icstream *) s->stream_data.p;
231 :
232 4 : if (ic) {
233 4 : if (!s->readonly)
234 4 : ic_flush(s, MNSTR_FLUSH_DATA);
235 4 : iconv_close(ic->cd);
236 4 : close_stream(s->inner);
237 4 : s->inner = NULL;
238 4 : free(s->stream_data.p);
239 4 : s->stream_data.p = NULL;
240 : }
241 4 : }
242 :
243 : static void
244 4 : ic_destroy(stream *s)
245 : {
246 4 : ic_close(s);
247 4 : mnstr_destroy(s->inner);
248 4 : destroy_stream(s);
249 4 : }
250 :
251 : static stream *
252 4 : ic_open(iconv_t cd, stream *restrict ss, const char *restrict name)
253 : {
254 : stream *s;
255 : struct icstream *ic;
256 :
257 4 : if (ss->isutf8)
258 : return ss;
259 4 : if ((s = create_wrapper_stream(name, ss)) == NULL)
260 : return NULL;
261 4 : s->read = ic_read;
262 4 : s->write = ic_write;
263 4 : s->close = ic_close;
264 4 : s->destroy = ic_destroy;
265 4 : s->flush = ic_flush;
266 4 : ic = malloc(sizeof(struct icstream));
267 4 : if (ic == NULL) {
268 0 : mnstr_destroy(s);
269 0 : mnstr_set_open_error(s->name, errno, NULL);
270 0 : return NULL;
271 : }
272 4 : s->stream_data.p = ic;
273 4 : *ic = (struct icstream) {
274 : .cd = cd,
275 : .buflen = 0,
276 : .eof = false,
277 : };
278 4 : return s;
279 : }
280 :
281 : stream *
282 0 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
283 : {
284 : stream *s;
285 : iconv_t cd;
286 :
287 0 : if (ss == NULL || charset == NULL || name == NULL)
288 : return NULL;
289 : #ifdef STREAM_DEBUG
290 : fprintf(stderr, "iconv_rstream %s %s\n", charset, name);
291 : #endif
292 0 : if (ss->isutf8)
293 : return ss;
294 0 : cd = iconv_open("utf-8", charset);
295 0 : if (cd == (iconv_t) -1) {
296 0 : mnstr_set_open_error(name, errno, "iconv_open");
297 0 : return NULL;
298 : }
299 0 : s = ic_open(cd, ss, name);
300 0 : if (s == NULL) {
301 0 : iconv_close(cd);
302 0 : return NULL;
303 : }
304 0 : s->readonly = true;
305 0 : s->isutf8 = true;
306 0 : return s;
307 : }
308 :
309 : stream *
310 4 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
311 : {
312 : stream *s;
313 : iconv_t cd;
314 :
315 4 : if (ss == NULL || charset == NULL || name == NULL)
316 : return NULL;
317 : #ifdef STREAM_DEBUG
318 : fprintf(stderr, "iconv_wstream %s %s\n", charset, name);
319 : #endif
320 4 : if (ss->isutf8)
321 : return ss;
322 4 : cd = iconv_open(charset, "utf-8");
323 4 : if (cd == (iconv_t) -1) {
324 0 : mnstr_set_open_error(name, errno, "iconv_open");
325 0 : return NULL;
326 : }
327 4 : s = ic_open(cd, ss, name);
328 4 : if (s == NULL) {
329 0 : iconv_close(cd);
330 0 : return NULL;
331 : }
332 4 : s->readonly = false;
333 4 : return s;
334 : }
335 :
336 : #else
337 : stream *
338 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
339 : {
340 : if (ss == NULL || charset == NULL || name == NULL)
341 : return NULL;
342 : if (ss->isutf8 ||
343 : strcmp(charset, "utf-8") == 0 ||
344 : strcmp(charset, "UTF-8") == 0 ||
345 : strcmp(charset, "UTF8") == 0)
346 : return ss;
347 :
348 : mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
349 : return NULL;
350 : }
351 :
352 : stream *
353 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
354 : {
355 : if (ss == NULL || charset == NULL || name == NULL)
356 : return NULL;
357 : if (ss->isutf8 ||
358 : strcmp(charset, "utf-8") == 0 ||
359 : strcmp(charset, "UTF-8") == 0 ||
360 : strcmp(charset, "UTF8") == 0)
361 : return ss;
362 :
363 : mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
364 : return NULL;
365 : }
366 : #endif /* HAVE_ICONV */
|