LCOV - code coverage report
Current view: top level - lib/gl - striconv.c (source / functions) Hit Total Coverage
Test: GNU Libidn Lines: 91 143 63.6 %
Date: 2020-07-22 17:53:13 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Charset conversion.
       2             :    Copyright (C) 2001-2007, 2010-2020 Free Software Foundation, Inc.
       3             :    Written by Bruno Haible and Simon Josefsson.
       4             : 
       5             :    This program is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU Lesser General Public License as published by
       7             :    the Free Software Foundation; either version 2.1, or (at your option)
       8             :    any later version.
       9             : 
      10             :    This program is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU Lesser General Public License for more details.
      14             : 
      15             :    You should have received a copy of the GNU Lesser General Public License
      16             :    along with this program; if not, see <https://www.gnu.org/licenses/>.  */
      17             : 
      18             : #include <config.h>
      19             : 
      20             : /* Specification.  */
      21             : #include "striconv.h"
      22             : 
      23             : #include <errno.h>
      24             : #include <stdlib.h>
      25             : #include <string.h>
      26             : 
      27             : #if HAVE_ICONV
      28             : # include <iconv.h>
      29             : /* Get MB_LEN_MAX, CHAR_BIT.  */
      30             : # include <limits.h>
      31             : #endif
      32             : 
      33             : #include "c-strcase.h"
      34             : 
      35             : #ifndef SIZE_MAX
      36             : # define SIZE_MAX ((size_t) -1)
      37             : #endif
      38             : 
      39             : 
      40             : #if HAVE_ICONV
      41             : 
      42             : int
      43           4 : mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
      44             :               char **resultp, size_t *lengthp)
      45             : {
      46             : # define tmpbufsize 4096
      47             :   size_t length;
      48             :   char *result;
      49             : 
      50             :   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
      51             : # if defined _LIBICONV_VERSION \
      52             :      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
      53             :           || defined __sun)
      54             :   /* Set to the initial state.  */
      55           4 :   iconv (cd, NULL, NULL, NULL, NULL);
      56             : # endif
      57             : 
      58             :   /* Determine the length we need.  */
      59             :   {
      60           4 :     size_t count = 0;
      61             :     /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
      62             :        libiconv's UCS-4-INTERNAL encoding.  */
      63             :     union { unsigned int align; char buf[tmpbufsize]; } tmp;
      64             : # define tmpbuf tmp.buf
      65           4 :     const char *inptr = src;
      66           4 :     size_t insize = srclen;
      67             : 
      68           6 :     while (insize > 0)
      69             :       {
      70           4 :         char *outptr = tmpbuf;
      71           4 :         size_t outsize = tmpbufsize;
      72           4 :         size_t res = iconv (cd,
      73             :                             (ICONV_CONST char **) &inptr, &insize,
      74             :                             &outptr, &outsize);
      75             : 
      76           4 :         if (res == (size_t)(-1))
      77             :           {
      78           2 :             if (errno == E2BIG)
      79             :               ;
      80           2 :             else if (errno == EINVAL)
      81           1 :               break;
      82             :             else
      83           1 :               return -1;
      84             :           }
      85             : # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
      86             :         /* Irix iconv() inserts a NUL byte if it cannot convert.
      87             :            NetBSD iconv() inserts a question mark if it cannot convert.
      88             :            Only GNU libiconv and GNU libc are known to prefer to fail rather
      89             :            than doing a lossy conversion.  */
      90             :         else if (res > 0)
      91             :           {
      92             :             errno = EILSEQ;
      93             :             return -1;
      94             :           }
      95             : # endif
      96           2 :         count += outptr - tmpbuf;
      97             :       }
      98             :     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
      99             : # if defined _LIBICONV_VERSION \
     100             :      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     101             :           || defined __sun)
     102             :     {
     103           3 :       char *outptr = tmpbuf;
     104           3 :       size_t outsize = tmpbufsize;
     105           3 :       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
     106             : 
     107           3 :       if (res == (size_t)(-1))
     108           0 :         return -1;
     109           3 :       count += outptr - tmpbuf;
     110             :     }
     111             : # endif
     112           3 :     length = count;
     113             : # undef tmpbuf
     114             :   }
     115             : 
     116           3 :   if (length == 0)
     117             :     {
     118           1 :       *lengthp = 0;
     119           1 :       return 0;
     120             :     }
     121           2 :   if (*resultp != NULL && *lengthp >= length)
     122           0 :     result = *resultp;
     123             :   else
     124             :     {
     125           2 :       result = (char *) malloc (length);
     126           2 :       if (result == NULL)
     127             :         {
     128           0 :           errno = ENOMEM;
     129           0 :           return -1;
     130             :         }
     131             :     }
     132             : 
     133             :   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
     134             : # if defined _LIBICONV_VERSION \
     135             :      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     136             :           || defined __sun)
     137             :   /* Return to the initial state.  */
     138           2 :   iconv (cd, NULL, NULL, NULL, NULL);
     139             : # endif
     140             : 
     141             :   /* Do the conversion for real.  */
     142             :   {
     143           2 :     const char *inptr = src;
     144           2 :     size_t insize = srclen;
     145           2 :     char *outptr = result;
     146           2 :     size_t outsize = length;
     147             : 
     148           4 :     while (insize > 0)
     149             :       {
     150           2 :         size_t res = iconv (cd,
     151             :                             (ICONV_CONST char **) &inptr, &insize,
     152             :                             &outptr, &outsize);
     153             : 
     154           2 :         if (res == (size_t)(-1))
     155             :           {
     156           0 :             if (errno == EINVAL)
     157           0 :               break;
     158             :             else
     159           0 :               goto fail;
     160             :           }
     161             : # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
     162             :         /* Irix iconv() inserts a NUL byte if it cannot convert.
     163             :            NetBSD iconv() inserts a question mark if it cannot convert.
     164             :            Only GNU libiconv and GNU libc are known to prefer to fail rather
     165             :            than doing a lossy conversion.  */
     166             :         else if (res > 0)
     167             :           {
     168             :             errno = EILSEQ;
     169             :             goto fail;
     170             :           }
     171             : # endif
     172             :       }
     173             :     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
     174             : # if defined _LIBICONV_VERSION \
     175             :      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     176             :           || defined __sun)
     177             :     {
     178           2 :       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
     179             : 
     180           2 :       if (res == (size_t)(-1))
     181           0 :         goto fail;
     182             :     }
     183             : # endif
     184           2 :     if (outsize != 0)
     185           0 :       abort ();
     186             :   }
     187             : 
     188           2 :   *resultp = result;
     189           2 :   *lengthp = length;
     190             : 
     191           2 :   return 0;
     192             : 
     193           0 :  fail:
     194             :   {
     195           0 :     if (result != *resultp)
     196             :       {
     197           0 :         int saved_errno = errno;
     198           0 :         free (result);
     199           0 :         errno = saved_errno;
     200             :       }
     201           0 :     return -1;
     202             :   }
     203             : # undef tmpbufsize
     204             : }
     205             : 
     206             : char *
     207        3617 : str_cd_iconv (const char *src, iconv_t cd)
     208             : {
     209             :   /* For most encodings, a trailing NUL byte in the input will be converted
     210             :      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
     211             :      function is usable for UTF-7, we have to exclude the NUL byte from the
     212             :      conversion and add it by hand afterwards.  */
     213             : # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
     214             :   /* Irix iconv() inserts a NUL byte if it cannot convert.
     215             :      NetBSD iconv() inserts a question mark if it cannot convert.
     216             :      Only GNU libiconv and GNU libc are known to prefer to fail rather
     217             :      than doing a lossy conversion.  For other iconv() implementations,
     218             :      we have to look at the number of irreversible conversions returned;
     219             :      but this information is lost when iconv() returns for an E2BIG reason.
     220             :      Therefore we cannot use the second, faster algorithm.  */
     221             : 
     222             :   char *result = NULL;
     223             :   size_t length = 0;
     224             :   int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
     225             :   char *final_result;
     226             : 
     227             :   if (retval < 0)
     228             :     {
     229             :       if (result != NULL)
     230             :         abort ();
     231             :       return NULL;
     232             :     }
     233             : 
     234             :   /* Add the terminating NUL byte.  */
     235             :   final_result =
     236             :     (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
     237             :   if (final_result == NULL)
     238             :     {
     239             :       free (result);
     240             :       errno = ENOMEM;
     241             :       return NULL;
     242             :     }
     243             :   final_result[length] = '\0';
     244             : 
     245             :   return final_result;
     246             : 
     247             : # else
     248             :   /* This algorithm is likely faster than the one above.  But it may produce
     249             :      iconv() returns for an E2BIG reason, when the output size guess is too
     250             :      small.  Therefore it can only be used when we don't need the number of
     251             :      irreversible conversions performed.  */
     252             :   char *result;
     253             :   size_t result_size;
     254             :   size_t length;
     255        3617 :   const char *inptr = src;
     256        3617 :   size_t inbytes_remaining = strlen (src);
     257             : 
     258             :   /* Make a guess for the worst-case output size, in order to avoid a
     259             :      realloc.  It's OK if the guess is wrong as long as it is not zero and
     260             :      doesn't lead to an integer overflow.  */
     261        3617 :   result_size = inbytes_remaining;
     262             :   {
     263        3617 :     size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
     264        3617 :     if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
     265        3617 :       result_size *= MB_LEN_MAX;
     266             :   }
     267        3617 :   result_size += 1; /* for the terminating NUL */
     268             : 
     269        3617 :   result = (char *) malloc (result_size);
     270        3617 :   if (result == NULL)
     271             :     {
     272           0 :       errno = ENOMEM;
     273           0 :       return NULL;
     274             :     }
     275             : 
     276             :   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
     277             : # if defined _LIBICONV_VERSION \
     278             :      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     279             :           || defined __sun)
     280             :   /* Set to the initial state.  */
     281        3617 :   iconv (cd, NULL, NULL, NULL, NULL);
     282             : # endif
     283             : 
     284             :   /* Do the conversion.  */
     285             :   {
     286        3617 :     char *outptr = result;
     287        3617 :     size_t outbytes_remaining = result_size - 1;
     288             : 
     289             :     for (;;)
     290           0 :       {
     291             :         /* Here inptr + inbytes_remaining = src + strlen (src),
     292             :                 outptr + outbytes_remaining = result + result_size - 1.  */
     293        3617 :         size_t res = iconv (cd,
     294             :                             (ICONV_CONST char **) &inptr, &inbytes_remaining,
     295             :                             &outptr, &outbytes_remaining);
     296             : 
     297        3617 :         if (res == (size_t)(-1))
     298             :           {
     299        1776 :             if (errno == EINVAL)
     300           2 :               break;
     301        1774 :             else if (errno == E2BIG)
     302             :               {
     303           0 :                 size_t used = outptr - result;
     304           0 :                 size_t newsize = result_size * 2;
     305             :                 char *newresult;
     306             : 
     307           0 :                 if (!(newsize > result_size))
     308             :                   {
     309           0 :                     errno = ENOMEM;
     310        1774 :                     goto failed;
     311             :                   }
     312           0 :                 newresult = (char *) realloc (result, newsize);
     313           0 :                 if (newresult == NULL)
     314             :                   {
     315           0 :                     errno = ENOMEM;
     316           0 :                     goto failed;
     317             :                   }
     318           0 :                 result = newresult;
     319           0 :                 result_size = newsize;
     320           0 :                 outptr = result + used;
     321           0 :                 outbytes_remaining = result_size - 1 - used;
     322             :               }
     323             :             else
     324        1774 :               goto failed;
     325             :           }
     326             :         else
     327        1841 :           break;
     328             :       }
     329             :     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
     330             : # if defined _LIBICONV_VERSION \
     331             :      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     332             :           || defined __sun)
     333             :     for (;;)
     334           0 :       {
     335             :         /* Here outptr + outbytes_remaining = result + result_size - 1.  */
     336        1843 :         size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
     337             : 
     338        1843 :         if (res == (size_t)(-1))
     339             :           {
     340           0 :             if (errno == E2BIG)
     341             :               {
     342           0 :                 size_t used = outptr - result;
     343           0 :                 size_t newsize = result_size * 2;
     344             :                 char *newresult;
     345             : 
     346           0 :                 if (!(newsize > result_size))
     347             :                   {
     348           0 :                     errno = ENOMEM;
     349           0 :                     goto failed;
     350             :                   }
     351           0 :                 newresult = (char *) realloc (result, newsize);
     352           0 :                 if (newresult == NULL)
     353             :                   {
     354           0 :                     errno = ENOMEM;
     355           0 :                     goto failed;
     356             :                   }
     357           0 :                 result = newresult;
     358           0 :                 result_size = newsize;
     359           0 :                 outptr = result + used;
     360           0 :                 outbytes_remaining = result_size - 1 - used;
     361             :               }
     362             :             else
     363           0 :               goto failed;
     364             :           }
     365             :         else
     366        1843 :           break;
     367             :       }
     368             : # endif
     369             : 
     370             :     /* Add the terminating NUL byte.  */
     371        1843 :     *outptr++ = '\0';
     372             : 
     373        1843 :     length = outptr - result;
     374             :   }
     375             : 
     376             :   /* Give away unused memory.  */
     377        1843 :   if (length < result_size)
     378             :     {
     379        1843 :       char *smaller_result = (char *) realloc (result, length);
     380             : 
     381        1843 :       if (smaller_result != NULL)
     382        1843 :         result = smaller_result;
     383             :     }
     384             : 
     385        1843 :   return result;
     386             : 
     387        1774 :  failed:
     388             :   {
     389        1774 :     int saved_errno = errno;
     390        1774 :     free (result);
     391        1774 :     errno = saved_errno;
     392        1774 :     return NULL;
     393             :   }
     394             : 
     395             : # endif
     396             : }
     397             : 
     398             : #endif
     399             : 
     400             : char *
     401        3783 : str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
     402             : {
     403        3783 :   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
     404             :     {
     405         170 :       char *result = strdup (src);
     406             : 
     407         170 :       if (result == NULL)
     408           0 :         errno = ENOMEM;
     409         170 :       return result;
     410             :     }
     411             :   else
     412             :     {
     413             : #if HAVE_ICONV
     414             :       iconv_t cd;
     415             :       char *result;
     416             : 
     417             :       /* Avoid glibc-2.1 bug with EUC-KR.  */
     418             : # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     419             :      && !defined _LIBICONV_VERSION
     420             :       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
     421             :           || c_strcasecmp (to_codeset, "EUC-KR") == 0)
     422             :         {
     423             :           errno = EINVAL;
     424             :           return NULL;
     425             :         }
     426             : # endif
     427        3613 :       cd = iconv_open (to_codeset, from_codeset);
     428        3613 :       if (cd == (iconv_t) -1)
     429           0 :         return NULL;
     430             : 
     431        3613 :       result = str_cd_iconv (src, cd);
     432             : 
     433        3613 :       if (result == NULL)
     434             :         {
     435             :           /* Close cd, but preserve the errno from str_cd_iconv.  */
     436        1773 :           int saved_errno = errno;
     437        1773 :           iconv_close (cd);
     438        1773 :           errno = saved_errno;
     439             :         }
     440             :       else
     441             :         {
     442        1840 :           if (iconv_close (cd) < 0)
     443             :             {
     444             :               /* Return NULL, but free the allocated memory, and while doing
     445             :                  that, preserve the errno from iconv_close.  */
     446           0 :               int saved_errno = errno;
     447           0 :               free (result);
     448           0 :               errno = saved_errno;
     449           0 :               return NULL;
     450             :             }
     451             :         }
     452        3613 :       return result;
     453             : #else
     454             :       /* This is a different error code than if iconv_open existed but didn't
     455             :          support from_codeset and to_codeset, so that the caller can emit
     456             :          an error message such as
     457             :            "iconv() is not supported. Installing GNU libiconv and
     458             :             then reinstalling this package would fix this."  */
     459             :       errno = ENOSYS;
     460             :       return NULL;
     461             : #endif
     462             :     }
     463             : }

Generated by: LCOV version 1.13