LCOV - code coverage report
Current view: top level - lib - idna.c (source / functions) Hit Total Coverage
Test: GNU Libidn Lines: 235 252 93.3 %
Date: 2020-07-22 17:53:13 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* idna.c --- Prototypes for Internationalized Domain Name library.
       2             :    Copyright (C) 2002-2020 Simon Josefsson
       3             : 
       4             :    This file is part of GNU Libidn.
       5             : 
       6             :    GNU Libidn is free software: you can redistribute it and/or
       7             :    modify it under the terms of either:
       8             : 
       9             :      * the GNU Lesser General Public License as published by the Free
      10             :        Software Foundation; either version 3 of the License, or (at
      11             :        your option) any later version.
      12             : 
      13             :    or
      14             : 
      15             :      * the GNU General Public License as published by the Free
      16             :        Software Foundation; either version 2 of the License, or (at
      17             :        your option) any later version.
      18             : 
      19             :    or both in parallel, as here.
      20             : 
      21             :    GNU Libidn is distributed in the hope that it will be useful,
      22             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      23             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      24             :    General Public License for more details.
      25             : 
      26             :    You should have received copies of the GNU General Public License and
      27             :    the GNU Lesser General Public License along with this program.  If
      28             :    not, see <http://www.gnu.org/licenses/>. */
      29             : 
      30             : #ifdef HAVE_CONFIG_H
      31             : # include "config.h"
      32             : #endif
      33             : 
      34             : #include <stdlib.h>
      35             : #include <string.h>
      36             : #include <stringprep.h>
      37             : #include <punycode.h>
      38             : 
      39             : #include "idna.h"
      40             : 
      41             : /* Get c_strcasecmp. */
      42             : #include <c-strcase.h>
      43             : 
      44             : #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||      \
      45             :                  (c) == 0xFF0E || (c) == 0xFF61)
      46             : 
      47             : /* Core functions */
      48             : 
      49             : /**
      50             :  * idna_to_ascii_4i:
      51             :  * @in: input array with unicode code points.
      52             :  * @inlen: length of input array with unicode code points.
      53             :  * @out: output zero terminated string that must have room for at
      54             :  *       least 63 characters plus the terminating zero.
      55             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
      56             :  *   %IDNA_USE_STD3_ASCII_RULES.
      57             :  *
      58             :  * The ToASCII operation takes a sequence of Unicode code points that
      59             :  * make up one domain label and transforms it into a sequence of code
      60             :  * points in the ASCII range (0..7F). If ToASCII succeeds, the
      61             :  * original sequence and the resulting sequence are equivalent labels.
      62             :  *
      63             :  * It is important to note that the ToASCII operation can fail. ToASCII
      64             :  * fails if any step of it fails. If any step of the ToASCII operation
      65             :  * fails on any label in a domain name, that domain name MUST NOT be used
      66             :  * as an internationalized domain name. The method for deadling with this
      67             :  * failure is application-specific.
      68             :  *
      69             :  * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
      70             :  * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
      71             :  * sequence of ASCII code points or a failure condition.
      72             :  *
      73             :  * ToASCII never alters a sequence of code points that are all in the ASCII
      74             :  * range to begin with (although it could fail). Applying the ToASCII
      75             :  * operation multiple times has exactly the same effect as applying it just
      76             :  * once.
      77             :  *
      78             :  * Return value: Returns 0 on success, or an #Idna_rc error code.
      79             :  */
      80             : int
      81       24164 : idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
      82             : {
      83             :   size_t len, outlen;
      84             :   uint32_t *src;                /* XXX don't need to copy data? */
      85             :   int rc;
      86             : 
      87             :   /*
      88             :    * ToASCII consists of the following steps:
      89             :    *
      90             :    * 1. If all code points in the sequence are in the ASCII range (0..7F)
      91             :    * then skip to step 3.
      92             :    */
      93             : 
      94             :   {
      95             :     size_t i;
      96             :     int inasciirange;
      97             : 
      98       24164 :     inasciirange = 1;
      99      134428 :     for (i = 0; i < inlen; i++)
     100      110264 :       if (in[i] > 0x7F)
     101       63414 :         inasciirange = 0;
     102       24164 :     if (inasciirange)
     103             :       {
     104       10806 :         src = malloc (sizeof (in[0]) * (inlen + 1));
     105       10806 :         if (src == NULL)
     106           0 :           return IDNA_MALLOC_ERROR;
     107             : 
     108       10806 :         memcpy (src, in, sizeof (in[0]) * inlen);
     109       10806 :         src[inlen] = 0;
     110             : 
     111       10806 :         goto step3;
     112             :       }
     113             :   }
     114             : 
     115             :   /*
     116             :    * 2. Perform the steps specified in [NAMEPREP] and fail if there is
     117             :    * an error. The AllowUnassigned flag is used in [NAMEPREP].
     118             :    */
     119             : 
     120             :   {
     121             :     char *p;
     122             : 
     123       13358 :     p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
     124       13358 :     if (p == NULL)
     125         176 :       return IDNA_MALLOC_ERROR;
     126             : 
     127       13182 :     len = strlen (p);
     128             :     do
     129             :       {
     130             :         char *newp;
     131             : 
     132       14588 :         len = 2 * len + 10;     /* XXX better guess? */
     133       14588 :         newp = realloc (p, len);
     134       14588 :         if (newp == NULL)
     135             :           {
     136           0 :             free (p);
     137           0 :             return IDNA_MALLOC_ERROR;
     138             :           }
     139       14588 :         p = newp;
     140             : 
     141       14588 :         if (flags & IDNA_ALLOW_UNASSIGNED)
     142        7059 :           rc = stringprep_nameprep (p, len);
     143             :         else
     144        7529 :           rc = stringprep_nameprep_no_unassigned (p, len);
     145             :       }
     146       14588 :     while (rc == STRINGPREP_TOO_SMALL_BUFFER);
     147             : 
     148       13182 :     if (rc != STRINGPREP_OK)
     149             :       {
     150        1644 :         free (p);
     151        1644 :         return IDNA_STRINGPREP_ERROR;
     152             :       }
     153             : 
     154       11538 :     src = stringprep_utf8_to_ucs4 (p, -1, NULL);
     155             : 
     156       11538 :     free (p);
     157             : 
     158       11538 :     if (!src)
     159           0 :       return IDNA_MALLOC_ERROR;
     160             :   }
     161             : 
     162       11538 : step3:
     163             :   /*
     164             :    * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
     165             :    *
     166             :    * (a) Verify the absence of non-LDH ASCII code points; that is,
     167             :    * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
     168             :    *
     169             :    * (b) Verify the absence of leading and trailing hyphen-minus;
     170             :    * that is, the absence of U+002D at the beginning and end of
     171             :    * the sequence.
     172             :    */
     173             : 
     174       22344 :   if (flags & IDNA_USE_STD3_ASCII_RULES)
     175             :     {
     176             :       size_t i;
     177             : 
     178       66795 :       for (i = 0; src[i]; i++)
     179       57960 :         if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
     180       56974 :             (src[i] >= 0x3A && src[i] <= 0x40) ||
     181       56427 :             (src[i] >= 0x5B && src[i] <= 0x60) ||
     182       55865 :             (src[i] >= 0x7B && src[i] <= 0x7F))
     183             :           {
     184        2313 :             free (src);
     185        2313 :             return IDNA_CONTAINS_NON_LDH;
     186             :           }
     187             : 
     188        8835 :       if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
     189             :         {
     190         638 :           free (src);
     191         638 :           return IDNA_CONTAINS_MINUS;
     192             :         }
     193             :     }
     194             : 
     195             :   /*
     196             :    * 4. If all code points in the sequence are in the ASCII range
     197             :    * (0..7F), then skip to step 8.
     198             :    */
     199             : 
     200             :   {
     201             :     size_t i;
     202             :     int inasciirange;
     203             : 
     204       19393 :     inasciirange = 1;
     205      133950 :     for (i = 0; src[i]; i++)
     206             :       {
     207      114557 :         if (src[i] > 0x7F)
     208       69363 :           inasciirange = 0;
     209             :         /* copy string to output buffer if we are about to skip to step8 */
     210      114557 :         if (i < 64)
     211      106299 :           out[i] = src[i];
     212             :       }
     213       19393 :     if (i < 64)
     214       19104 :       out[i] = '\0';
     215             :     else
     216             :       {
     217         289 :         free (src);
     218         289 :         return IDNA_INVALID_LENGTH;
     219             :       }
     220       19104 :     if (inasciirange)
     221        9574 :       goto step8;
     222             :   }
     223             : 
     224             :   /*
     225             :    * 5. Verify that the sequence does NOT begin with the ACE prefix.
     226             :    *
     227             :    */
     228             : 
     229             :   {
     230             :     size_t i;
     231             :     int match;
     232             : 
     233        9530 :     match = 1;
     234       20473 :     for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
     235       10943 :       if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
     236        9242 :         match = 0;
     237        9530 :     if (match)
     238             :       {
     239         288 :         free (src);
     240         288 :         return IDNA_CONTAINS_ACE_PREFIX;
     241             :       }
     242             :   }
     243             : 
     244             :   /*
     245             :    * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
     246             :    * and fail if there is an error.
     247             :    */
     248       82901 :   for (len = 0; src[len]; len++)
     249             :     ;
     250        9242 :   src[len] = '\0';
     251        9242 :   outlen = 63 - strlen (IDNA_ACE_PREFIX);
     252        9242 :   rc = punycode_encode (len, src, NULL,
     253             :                         &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
     254        9242 :   if (rc != PUNYCODE_SUCCESS)
     255             :     {
     256         851 :       free (src);
     257         851 :       return IDNA_PUNYCODE_ERROR;
     258             :     }
     259        8391 :   out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
     260             : 
     261             :   /*
     262             :    * 7. Prepend the ACE prefix.
     263             :    */
     264             : 
     265        8391 :   memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
     266             : 
     267             :   /*
     268             :    * 8. Verify that the number of code points is in the range 1 to 63
     269             :    * inclusive (0 is excluded).
     270             :    */
     271             : 
     272       17965 : step8:
     273       17965 :   free (src);
     274       17965 :   if (strlen (out) < 1)
     275        4581 :     return IDNA_INVALID_LENGTH;
     276             : 
     277       13384 :   return IDNA_SUCCESS;
     278             : }
     279             : 
     280             : /* ToUnicode().  May realloc() utf8in.  Will free utf8in unconditionally. */
     281             : static int
     282       44238 : idna_to_unicode_internal (char *utf8in,
     283             :                           uint32_t * out, size_t *outlen, int flags)
     284             : {
     285             :   int rc;
     286             :   char tmpout[64];
     287       44238 :   size_t utf8len = strlen (utf8in) + 1;
     288       44238 :   size_t addlen = 0, addinc = utf8len / 10 + 1;
     289             : 
     290             :   /*
     291             :    * ToUnicode consists of the following steps:
     292             :    *
     293             :    * 1. If the sequence contains any code points outside the ASCII range
     294             :    * (0..7F) then proceed to step 2, otherwise skip to step 3.
     295             :    */
     296             : 
     297             :   {
     298             :     size_t i;
     299             :     int inasciirange;
     300             : 
     301       44238 :     inasciirange = 1;
     302      492505 :     for (i = 0; utf8in[i]; i++)
     303      448267 :       if (utf8in[i] & ~0x7F)
     304      169582 :         inasciirange = 0;
     305       44238 :     if (inasciirange)
     306       38640 :       goto step3;
     307             :   }
     308             : 
     309             :   /*
     310             :    * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
     311             :    * error. (If step 3 of ToASCII is also performed here, it will not
     312             :    * affect the overall behavior of ToUnicode, but it is not
     313             :    * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
     314             :    */
     315             :   do
     316             :     {
     317       11611 :       char *newp = realloc (utf8in, utf8len + addlen);
     318       11611 :       if (newp == NULL)
     319             :         {
     320           0 :           free (utf8in);
     321           0 :           return IDNA_MALLOC_ERROR;
     322             :         }
     323       11611 :       utf8in = newp;
     324       11611 :       if (flags & IDNA_ALLOW_UNASSIGNED)
     325        6460 :         rc = stringprep_nameprep (utf8in, utf8len + addlen);
     326             :       else
     327        5151 :         rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
     328       11611 :       addlen += addinc;
     329       11611 :       addinc *= 2;
     330             :     }
     331       11611 :   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
     332             : 
     333        5598 :   if (rc != STRINGPREP_OK)
     334             :     {
     335        2113 :       free (utf8in);
     336        2113 :       return IDNA_STRINGPREP_ERROR;
     337             :     }
     338             : 
     339             :   /* 3. Verify that the sequence begins with the ACE prefix, and save a
     340             :    * copy of the sequence.
     341             :    * ... The ToASCII and ToUnicode operations MUST recognize the ACE
     342             :    prefix in a case-insensitive manner.
     343             :    */
     344             : 
     345        3485 : step3:
     346       42125 :   if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
     347             :     {
     348       20880 :       free (utf8in);
     349       20880 :       return IDNA_NO_ACE_PREFIX;
     350             :     }
     351             : 
     352             :   /* 4. Remove the ACE prefix.
     353             :    */
     354             : 
     355       21245 :   memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
     356       21245 :            strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
     357             : 
     358             :   /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
     359             :    * and fail if there is an error. Save a copy of the result of
     360             :    * this step.
     361             :    */
     362             : 
     363       21245 :   (*outlen)--;                  /* reserve one for the zero */
     364             : 
     365       21245 :   rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
     366       21245 :   if (rc != PUNYCODE_SUCCESS)
     367             :     {
     368        3349 :       free (utf8in);
     369        3349 :       return IDNA_PUNYCODE_ERROR;
     370             :     }
     371             : 
     372       17896 :   out[*outlen] = 0;             /* add zero */
     373             : 
     374             :   /* 6. Apply ToASCII.
     375             :    */
     376             : 
     377       17896 :   rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
     378       17896 :   if (rc != IDNA_SUCCESS)
     379             :     {
     380        9531 :       free (utf8in);
     381        9531 :       return rc;
     382             :     }
     383             : 
     384             :   /* 7. Verify that the result of step 6 matches the saved copy from
     385             :    * step 3, using a case-insensitive ASCII comparison.
     386             :    */
     387             : 
     388        8365 :   if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
     389             :     {
     390        4692 :       free (utf8in);
     391        4692 :       return IDNA_ROUNDTRIP_VERIFY_ERROR;
     392             :     }
     393             : 
     394             :   /* 8. Return the saved copy from step 5.
     395             :    */
     396             : 
     397        3673 :   free (utf8in);
     398        3673 :   return IDNA_SUCCESS;
     399             : }
     400             : 
     401             : /**
     402             :  * idna_to_unicode_44i:
     403             :  * @in: input array with unicode code points.
     404             :  * @inlen: length of input array with unicode code points.
     405             :  * @out: output array with unicode code points.
     406             :  * @outlen: on input, maximum size of output array with unicode code points,
     407             :  *          on exit, actual size of output array with unicode code points.
     408             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     409             :  *   %IDNA_USE_STD3_ASCII_RULES.
     410             :  *
     411             :  * The ToUnicode operation takes a sequence of Unicode code points
     412             :  * that make up one domain label and returns a sequence of Unicode
     413             :  * code points. If the input sequence is a label in ACE form, then the
     414             :  * result is an equivalent internationalized label that is not in ACE
     415             :  * form, otherwise the original sequence is returned unaltered.
     416             :  *
     417             :  * ToUnicode never fails. If any step fails, then the original input
     418             :  * sequence is returned immediately in that step.
     419             :  *
     420             :  * The Punycode decoder can never output more code points than it
     421             :  * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
     422             :  * the number of octets needed to represent a sequence of code points
     423             :  * depends on the particular character encoding used.
     424             :  *
     425             :  * The inputs to ToUnicode are a sequence of code points, the
     426             :  * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
     427             :  * ToUnicode is always a sequence of Unicode code points.
     428             :  *
     429             :  * Return value: Returns #Idna_rc error condition, but it must only be
     430             :  *   used for debugging purposes.  The output buffer is always
     431             :  *   guaranteed to contain the correct data according to the
     432             :  *   specification (sans malloc induced errors).  NB!  This means that
     433             :  *   you normally ignore the return code from this function, as
     434             :  *   checking it means breaking the standard.
     435             :  */
     436             : int
     437       45130 : idna_to_unicode_44i (const uint32_t * in, size_t inlen,
     438             :                      uint32_t * out, size_t *outlen, int flags)
     439             : {
     440             :   int rc;
     441       45130 :   size_t outlensave = *outlen;
     442             :   char *p;
     443             : 
     444       45130 :   p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
     445       45130 :   if (p == NULL)
     446         892 :     return IDNA_MALLOC_ERROR;
     447             : 
     448       44238 :   rc = idna_to_unicode_internal (p, out, outlen, flags);
     449       44238 :   if (rc != IDNA_SUCCESS)
     450             :     {
     451       40565 :       memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
     452       40565 :                                          inlen : outlensave));
     453       40565 :       *outlen = inlen;
     454             :     }
     455             : 
     456             :   /* p is freed in idna_to_unicode_internal.  */
     457             : 
     458       44238 :   return rc;
     459             : }
     460             : 
     461             : /* Wrappers that handle several labels */
     462             : 
     463             : /**
     464             :  * idna_to_ascii_4z:
     465             :  * @input: zero terminated input Unicode string.
     466             :  * @output: pointer to newly allocated output string.
     467             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     468             :  *   %IDNA_USE_STD3_ASCII_RULES.
     469             :  *
     470             :  * Convert UCS-4 domain name to ASCII string.  The domain name may
     471             :  * contain several labels, separated by dots.  The output buffer must
     472             :  * be deallocated by the caller.
     473             :  *
     474             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     475             :  **/
     476             : int
     477        1568 : idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
     478             : {
     479        1568 :   const uint32_t *start = input;
     480             :   const uint32_t *end;
     481             :   char buf[64];
     482        1568 :   char *out = NULL;
     483             :   int rc;
     484             : 
     485             :   /* 1) Whenever dots are used as label separators, the following
     486             :      characters MUST be recognized as dots: U+002E (full stop),
     487             :      U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
     488             :      U+FF61 (halfwidth ideographic full stop). */
     489             : 
     490        1568 :   if (input[0] == 0)
     491             :     {
     492             :       /* Handle implicit zero-length root label. */
     493         131 :       *output = malloc (1);
     494         131 :       if (!*output)
     495           0 :         return IDNA_MALLOC_ERROR;
     496         131 :       strcpy (*output, "");
     497         131 :       return IDNA_SUCCESS;
     498             :     }
     499             : 
     500        1437 :   if (DOTP (input[0]) && input[1] == 0)
     501             :     {
     502             :       /* Handle explicit zero-length root label. */
     503          14 :       *output = malloc (2);
     504          14 :       if (!*output)
     505           0 :         return IDNA_MALLOC_ERROR;
     506          14 :       strcpy (*output, ".");
     507          14 :       return IDNA_SUCCESS;
     508             :     }
     509             : 
     510        1423 :   *output = NULL;
     511             :   do
     512             :     {
     513        5752 :       end = start;
     514             : 
     515       23774 :       for (; *end && !DOTP (*end); end++)
     516             :         ;
     517             : 
     518        5752 :       if (*end == '\0' && start == end)
     519             :         {
     520             :           /* Handle explicit zero-length root label. */
     521          18 :           buf[0] = '\0';
     522             :         }
     523             :       else
     524             :         {
     525        5734 :           rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
     526        5734 :           if (rc != IDNA_SUCCESS)
     527             :             {
     528         847 :               free (out);
     529         847 :               return rc;
     530             :             }
     531             :         }
     532             : 
     533        4905 :       if (out)
     534             :         {
     535        4199 :           size_t l = strlen (out) + 1 + strlen (buf) + 1;
     536        4199 :           char *newp = realloc (out, l);
     537        4199 :           if (!newp)
     538             :             {
     539           0 :               free (out);
     540           0 :               return IDNA_MALLOC_ERROR;
     541             :             }
     542        4199 :           out = newp;
     543        4199 :           strcat (out, ".");
     544        4199 :           strcat (out, buf);
     545             :         }
     546             :       else
     547             :         {
     548         706 :           out = strdup (buf);
     549         706 :           if (!out)
     550           0 :             return IDNA_MALLOC_ERROR;
     551             :         }
     552             : 
     553        4905 :       start = end + 1;
     554             :     }
     555        4905 :   while (*end);
     556             : 
     557         576 :   *output = out;
     558             : 
     559         576 :   return IDNA_SUCCESS;
     560             : }
     561             : 
     562             : /**
     563             :  * idna_to_ascii_8z:
     564             :  * @input: zero terminated input UTF-8 string.
     565             :  * @output: pointer to newly allocated output string.
     566             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     567             :  *   %IDNA_USE_STD3_ASCII_RULES.
     568             :  *
     569             :  * Convert UTF-8 domain name to ASCII string.  The domain name may
     570             :  * contain several labels, separated by dots.  The output buffer must
     571             :  * be deallocated by the caller.
     572             :  *
     573             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     574             :  **/
     575             : int
     576        1186 : idna_to_ascii_8z (const char *input, char **output, int flags)
     577             : {
     578             :   uint32_t *ucs4;
     579             :   size_t ucs4len;
     580             :   int rc;
     581             : 
     582        1186 :   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
     583        1186 :   if (!ucs4)
     584         131 :     return IDNA_ICONV_ERROR;
     585             : 
     586        1055 :   rc = idna_to_ascii_4z (ucs4, output, flags);
     587             : 
     588        1055 :   free (ucs4);
     589             : 
     590        1055 :   return rc;
     591             : 
     592             : }
     593             : 
     594             : /**
     595             :  * idna_to_ascii_lz:
     596             :  * @input: zero terminated input string encoded in the current locale's
     597             :  *   character set.
     598             :  * @output: pointer to newly allocated output string.
     599             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     600             :  *   %IDNA_USE_STD3_ASCII_RULES.
     601             :  *
     602             :  * Convert domain name in the locale's encoding to ASCII string.  The
     603             :  * domain name may contain several labels, separated by dots.  The
     604             :  * output buffer must be deallocated by the caller.
     605             :  *
     606             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     607             :  **/
     608             : int
     609         724 : idna_to_ascii_lz (const char *input, char **output, int flags)
     610             : {
     611             :   char *utf8;
     612             :   int rc;
     613             : 
     614         724 :   utf8 = stringprep_locale_to_utf8 (input);
     615         724 :   if (!utf8)
     616         364 :     return IDNA_ICONV_ERROR;
     617             : 
     618         360 :   rc = idna_to_ascii_8z (utf8, output, flags);
     619             : 
     620         360 :   free (utf8);
     621             : 
     622         360 :   return rc;
     623             : }
     624             : 
     625             : /**
     626             :  * idna_to_unicode_4z4z:
     627             :  * @input: zero-terminated Unicode string.
     628             :  * @output: pointer to newly allocated output Unicode string.
     629             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     630             :  *   %IDNA_USE_STD3_ASCII_RULES.
     631             :  *
     632             :  * Convert possibly ACE encoded domain name in UCS-4 format into a
     633             :  * UCS-4 string.  The domain name may contain several labels,
     634             :  * separated by dots.  The output buffer must be deallocated by the
     635             :  * caller.
     636             :  *
     637             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     638             :  **/
     639             : int
     640        4047 : idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
     641             : {
     642        4047 :   const uint32_t *start = input;
     643             :   const uint32_t *end;
     644             :   uint32_t *buf;
     645             :   size_t buflen;
     646        4047 :   uint32_t *out = NULL;
     647        4047 :   size_t outlen = 0;
     648             : 
     649        4047 :   *output = NULL;
     650             : 
     651             :   do
     652             :     {
     653       44328 :       end = start;
     654             : 
     655      350537 :       for (; *end && !DOTP (*end); end++)
     656             :         ;
     657             : 
     658       44328 :       buflen = (size_t) (end - start);
     659       44328 :       buf = malloc (sizeof (buf[0]) * (buflen + 1));
     660       44328 :       if (!buf)
     661             :         {
     662           0 :           free (out);
     663           0 :           return IDNA_MALLOC_ERROR;
     664             :         }
     665             : 
     666             :       /* don't check return code as per specification! */
     667       44328 :       idna_to_unicode_44i (start, (size_t) (end - start),
     668             :                            buf, &buflen, flags);
     669             : 
     670       44328 :       if (out)
     671             :         {
     672       40281 :           uint32_t *newp = realloc (out,
     673             :                                     sizeof (out[0])
     674       40281 :                                     * (outlen + 1 + buflen + 1));
     675       40281 :           if (!newp)
     676             :             {
     677           0 :               free (buf);
     678           0 :               free (out);
     679           0 :               return IDNA_MALLOC_ERROR;
     680             :             }
     681       40281 :           out = newp;
     682       40281 :           out[outlen++] = 0x002E;       /* '.' (full stop) */
     683       40281 :           memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
     684       40281 :           outlen += buflen;
     685       40281 :           out[outlen] = 0x0;
     686       40281 :           free (buf);
     687             :         }
     688             :       else
     689             :         {
     690        4047 :           out = buf;
     691        4047 :           outlen = buflen;
     692        4047 :           out[outlen] = 0x0;
     693             :         }
     694             : 
     695       44328 :       start = end + 1;
     696             :     }
     697       44328 :   while (*end);
     698             : 
     699        4047 :   *output = out;
     700             : 
     701        4047 :   return IDNA_SUCCESS;
     702             : }
     703             : 
     704             : /**
     705             :  * idna_to_unicode_8z4z:
     706             :  * @input: zero-terminated UTF-8 string.
     707             :  * @output: pointer to newly allocated output Unicode string.
     708             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     709             :  *   %IDNA_USE_STD3_ASCII_RULES.
     710             :  *
     711             :  * Convert possibly ACE encoded domain name in UTF-8 format into a
     712             :  * UCS-4 string.  The domain name may contain several labels,
     713             :  * separated by dots.  The output buffer must be deallocated by the
     714             :  * caller.
     715             :  *
     716             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     717             :  **/
     718             : int
     719        3580 : idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
     720             : {
     721             :   uint32_t *ucs4;
     722             :   size_t ucs4len;
     723             :   int rc;
     724             : 
     725        3580 :   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
     726        3580 :   if (!ucs4)
     727         313 :     return IDNA_ICONV_ERROR;
     728             : 
     729        3267 :   rc = idna_to_unicode_4z4z (ucs4, output, flags);
     730        3267 :   free (ucs4);
     731             : 
     732        3267 :   return rc;
     733             : }
     734             : 
     735             : /**
     736             :  * idna_to_unicode_8z8z:
     737             :  * @input: zero-terminated UTF-8 string.
     738             :  * @output: pointer to newly allocated output UTF-8 string.
     739             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     740             :  *   %IDNA_USE_STD3_ASCII_RULES.
     741             :  *
     742             :  * Convert possibly ACE encoded domain name in UTF-8 format into a
     743             :  * UTF-8 string.  The domain name may contain several labels,
     744             :  * separated by dots.  The output buffer must be deallocated by the
     745             :  * caller.
     746             :  *
     747             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     748             :  **/
     749             : int
     750        2800 : idna_to_unicode_8z8z (const char *input, char **output, int flags)
     751             : {
     752             :   uint32_t *ucs4;
     753             :   int rc;
     754             : 
     755        2800 :   rc = idna_to_unicode_8z4z (input, &ucs4, flags);
     756        2800 :   if (rc != IDNA_SUCCESS)
     757         217 :     return rc;
     758             : 
     759        2583 :   *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
     760        2583 :   free (ucs4);
     761             : 
     762        2583 :   if (!*output)
     763           0 :     return IDNA_ICONV_ERROR;
     764             : 
     765        2583 :   return IDNA_SUCCESS;
     766             : }
     767             : 
     768             : /**
     769             :  * idna_to_unicode_8zlz:
     770             :  * @input: zero-terminated UTF-8 string.
     771             :  * @output: pointer to newly allocated output string encoded in the
     772             :  *   current locale's character set.
     773             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     774             :  *   %IDNA_USE_STD3_ASCII_RULES.
     775             :  *
     776             :  * Convert possibly ACE encoded domain name in UTF-8 format into a
     777             :  * string encoded in the current locale's character set.  The domain
     778             :  * name may contain several labels, separated by dots.  The output
     779             :  * buffer must be deallocated by the caller.
     780             :  *
     781             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     782             :  **/
     783             : int
     784        1680 : idna_to_unicode_8zlz (const char *input, char **output, int flags)
     785             : {
     786             :   char *utf8;
     787             :   int rc;
     788             : 
     789        1680 :   rc = idna_to_unicode_8z8z (input, &utf8, flags);
     790        1680 :   if (rc != IDNA_SUCCESS)
     791         108 :     return rc;
     792             : 
     793        1572 :   *output = stringprep_utf8_to_locale (utf8);
     794        1572 :   free (utf8);
     795             : 
     796        1572 :   if (!*output)
     797         589 :     return IDNA_ICONV_ERROR;
     798             : 
     799         983 :   return IDNA_SUCCESS;
     800             : }
     801             : 
     802             : /**
     803             :  * idna_to_unicode_lzlz:
     804             :  * @input: zero-terminated string encoded in the current locale's
     805             :  *   character set.
     806             :  * @output: pointer to newly allocated output string encoded in the
     807             :  *   current locale's character set.
     808             :  * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
     809             :  *   %IDNA_USE_STD3_ASCII_RULES.
     810             :  *
     811             :  * Convert possibly ACE encoded domain name in the locale's character
     812             :  * set into a string encoded in the current locale's character set.
     813             :  * The domain name may contain several labels, separated by dots.  The
     814             :  * output buffer must be deallocated by the caller.
     815             :  *
     816             :  * Return value: Returns %IDNA_SUCCESS on success, or error code.
     817             :  **/
     818             : int
     819        1116 : idna_to_unicode_lzlz (const char *input, char **output, int flags)
     820             : {
     821             :   char *utf8;
     822             :   int rc;
     823             : 
     824        1116 :   utf8 = stringprep_locale_to_utf8 (input);
     825        1116 :   if (!utf8)
     826         552 :     return IDNA_ICONV_ERROR;
     827             : 
     828         564 :   rc = idna_to_unicode_8zlz (utf8, output, flags);
     829         564 :   free (utf8);
     830             : 
     831         564 :   return rc;
     832             : }
     833             : 
     834             : /**
     835             :  * IDNA_ACE_PREFIX
     836             :  *
     837             :  * The IANA allocated prefix to use for IDNA. "xn--"
     838             :  */
     839             : 
     840             : /**
     841             :  * Idna_rc:
     842             :  * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
     843             :  *   always be zero, the remaining ones are only guaranteed to hold
     844             :  *   non-zero values, for logical comparison purposes.
     845             :  * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
     846             :  * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
     847             :  * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
     848             :  *   the string contains non-LDH ASCII characters.
     849             :  * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
     850             :  *   with typo in earlier versions.
     851             :  * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
     852             :  *   the string contains a leading or trailing hyphen-minus (U+002D).
     853             :  * @IDNA_INVALID_LENGTH: The final output string is not within the
     854             :  *   (inclusive) range 1 to 63 characters.
     855             :  * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
     856             :  *   (for ToUnicode).
     857             :  * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
     858             :  *   string does not equal the input.
     859             :  * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
     860             :  *   ToASCII).
     861             :  * @IDNA_ICONV_ERROR: Character encoding conversion error.
     862             :  * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
     863             :  *   fatal error).
     864             :  * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
     865             :  *   internally in libc).
     866             :  *
     867             :  * Enumerated return codes of idna_to_ascii_4i(),
     868             :  * idna_to_unicode_44i() functions (and functions derived from those
     869             :  * functions).  The value 0 is guaranteed to always correspond to
     870             :  * success.
     871             :  */
     872             : 
     873             : 
     874             : /**
     875             :  * Idna_flags:
     876             :  * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
     877             :  *   Unicode code points.
     878             :  * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
     879             :  *   rules (i.e., normal host name rules).
     880             :  *
     881             :  * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
     882             :  */

Generated by: LCOV version 1.13