LCOV - code coverage report
Current view: top level - common/utils - mstring.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 35 37 94.6 %
Date: 2021-10-27 03:06:47 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : #ifndef _MSTRING_H_
      10             : #define _MSTRING_H_
      11             : 
      12             : #include <stdarg.h>               /* va_list etc. */
      13             : #include <string.h>               /* strlen */
      14             : 
      15             : #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4))
      16             : /* not on CentOS 6 (GCC 4.4.7) */
      17             : #define GCC_Pragma(pragma)      _Pragma(pragma)
      18             : #else
      19             : #define GCC_Pragma(pragma)
      20             : #endif
      21             : 
      22             : /* copy at most (n-1) bytes from src to dst and add a terminating NULL
      23             :  * byte; return length of src (i.e. can be more than what is copied) */
      24             : static inline size_t
      25   577729069 : strcpy_len(char *restrict dst, const char *restrict src, size_t n)
      26             : {
      27   577729069 :         if (dst != NULL && n != 0) {
      28 10366086995 :                 for (size_t i = 0; i < n; i++) {
      29 10361927515 :                         if ((dst[i] = src[i]) == 0)
      30   573569589 :                                 return i;
      31             :                 }
      32             :                 /* for correctness, the decrement isn't needed (just assigning 0
      33             :                  * to dst[n-1] would be sufficient), but to work around a too
      34             :                  * strict GNU C compiler, we do need it */
      35     4159480 :                 dst[--n] = 0;
      36             : /* in some versions of GCC (at least gcc (Ubuntu 7.5.0-3ubuntu1~18.04)
      37             :  * 7.5.0), the error just can't be turned off when using
      38             :  * --enable-strict, so we just use the (more) expensive way of getting the
      39             :  * right answer (rescan the whole string) */
      40             : #if !defined(__GNUC__) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 5)
      41             : /* This code is correct, but GCC gives a warning in certain
      42             :  * conditions, so we disable the warning temporarily.
      43             :  * The warning happens e.g. in
      44             :  *   strcpy_len(buf, "fixed string", sizeof(buf))
      45             :  * where buf is larger than the string. In that case we never get here
      46             :  * since return is executed in the loop above, but the compiler
      47             :  * complains anyway about reading out-of-bounds.
      48             :  * For GCC we use _Pragma to disable the warning (and hence error).
      49             :  * Since other compilers may warn (and hence error out) on
      50             :  * unrecognized pragmas, we use some preprocessor trickery. */
      51             : GCC_Pragma("GCC diagnostic push")
      52             : GCC_Pragma("GCC diagnostic ignored \"-Warray-bounds\"")
      53     4159480 :                 return n + strlen(src + n);
      54             : GCC_Pragma("GCC diagnostic pop")
      55             : #endif
      56             :         }
      57           0 :         return strlen(src);
      58             : }
      59             : 
      60             : /* copy the NULL terminated list of src strings with a maximum of n
      61             :  * bytes to dst; return the combined length of the src strings */
      62             : static inline size_t
      63   111253598 : strconcat_len(char *restrict dst, size_t n, const char *restrict src, ...)
      64             : {
      65             :         va_list ap;
      66             :         size_t i = 0;
      67             : 
      68   111253598 :         va_start(ap, src);
      69   599752899 :         while (src) {
      70             :                 size_t l;
      71   488523815 :                 if (dst && i < n)
      72   488523042 :                         l = strcpy_len(dst + i, src, n - i);
      73             :                 else
      74         773 :                         l = strlen(src);
      75   488467109 :                 i += l;
      76   488467109 :                 src = va_arg(ap, const char *);
      77             :         }
      78   111229084 :         va_end(ap);
      79   111229084 :         return i;
      80             : }
      81             : 
      82             : #ifndef __GNUC__
      83             : /* __builtin_expect returns its first argument; it is expected to be
      84             :  * equal to the second argument */
      85             : #define __builtin_expect(expr, expect)  (expr)
      86             : #endif
      87             : 
      88             : /*
      89             :  * UTF-8 encoding is as follows:
      90             :  * U-00000000 - U-0000007F: 0xxxxxxx
      91             :  * U-00000080 - U-000007FF: 110zzzzx 10xxxxxx
      92             :  * U-00000800 - U-0000FFFF: 1110zzzz 10zxxxxx 10xxxxxx
      93             :  * U-00010000 - U-0010FFFF: 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx
      94             :  *
      95             :  * To be correctly coded UTF-8, the sequence should be the shortest
      96             :  * possible encoding of the value being encoded.  This means that at
      97             :  * least one of the z bits must be non-zero.  Also note that the four
      98             :  * byte sequence can encode more than is allowed and that the values
      99             :  * U+D800..U+DFFF are not allowed to be encoded.
     100             :  */
     101             : static inline bool
     102    37371890 : checkUTF8(const char *v)
     103             : {
     104             :         /* It is unlikely that this functions returns false, because
     105             :          * it is likely that the string presented is a correctly coded
     106             :          * UTF-8 string.  So we annotate the tests that are very
     107             :          * unlikely to succeed, i.e. the ones that lead to a return of
     108             :          * false, as being expected to return 0 using the
     109             :          * __builtin_expect function. */
     110    37371890 :         if (v != NULL) {
     111    37371890 :                 if (v[0] != '\200' || v[1] != '\0') {
     112             :                         /* check that string is correctly encoded UTF-8 */
     113  2576820218 :                         for (size_t i = 0; v[i]; i++) {
     114             :                                 /* we do not annotate all tests, only the ones
     115             :                                  * leading directly to an unlikely return
     116             :                                  * statement */
     117  2539475525 :                                 if ((v[i] & 0x80) == 0) {
     118             :                                         ;
     119       13651 :                                 } else if ((v[i] & 0xE0) == 0xC0) {
     120        1354 :                                         if (__builtin_expect(((v[i] & 0x1E) == 0), 0))
     121             :                                                 return false;
     122        1354 :                                         if (__builtin_expect(((v[++i] & 0xC0) != 0x80), 0))
     123             :                                                 return false;
     124       12297 :                                 } else if ((v[i] & 0xF0) == 0xE0) {
     125       12252 :                                         if ((v[i++] & 0x0F) == 0) {
     126           0 :                                                 if (__builtin_expect(((v[i] & 0xE0) != 0xA0), 0))
     127             :                                                         return false;
     128             :                                         } else {
     129       12252 :                                                 if (__builtin_expect(((v[i] & 0xC0) != 0x80), 0))
     130             :                                                         return false;
     131             :                                         }
     132       12252 :                                         if (__builtin_expect(((v[++i] & 0xC0) != 0x80), 0))
     133             :                                                 return false;
     134          45 :                                 } else if (__builtin_expect(((v[i] & 0xF8) == 0xF0), 1)) {
     135          45 :                                         if ((v[i++] & 0x07) == 0) {
     136          45 :                                                 if (__builtin_expect(((v[i] & 0x30) == 0), 0))
     137             :                                                         return false;
     138             :                                         }
     139          45 :                                         if (__builtin_expect(((v[i] & 0xC0) != 0x80), 0))
     140             :                                                 return false;
     141          45 :                                         if (__builtin_expect(((v[++i] & 0xC0) != 0x80), 0))
     142             :                                                 return false;
     143          45 :                                         if (__builtin_expect(((v[++i] & 0xC0) != 0x80), 0))
     144             :                                                 return false;
     145             :                                 } else {
     146             :                                         return false;
     147             :                                 }
     148             :                         }
     149             :                 }
     150             :         }
     151             :         return true;
     152             : }
     153             : 
     154             : #ifndef __GNUC__
     155             : #undef __builtin_expect
     156             : #endif
     157             : 
     158             : #endif

Generated by: LCOV version 1.14