Line data Source code
1 : /* idna.c --- Prototypes for Internationalized Domain Name library.
2 : Copyright (C) 2002-2020 Simon Josefsson
3 :
4 : This file is part of GNU Libidn.
5 :
6 : GNU Libidn is free software: you can redistribute it and/or
7 : modify it under the terms of either:
8 :
9 : * the GNU Lesser General Public License as published by the Free
10 : Software Foundation; either version 3 of the License, or (at
11 : your option) any later version.
12 :
13 : or
14 :
15 : * the GNU General Public License as published by the Free
16 : Software Foundation; either version 2 of the License, or (at
17 : your option) any later version.
18 :
19 : or both in parallel, as here.
20 :
21 : GNU Libidn is distributed in the hope that it will be useful,
22 : but WITHOUT ANY WARRANTY; without even the implied warranty of
23 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 : General Public License for more details.
25 :
26 : You should have received copies of the GNU General Public License and
27 : the GNU Lesser General Public License along with this program. If
28 : not, see <http://www.gnu.org/licenses/>. */
29 :
30 : #ifdef HAVE_CONFIG_H
31 : # include "config.h"
32 : #endif
33 :
34 : #include <stdlib.h>
35 : #include <string.h>
36 : #include <stringprep.h>
37 : #include <punycode.h>
38 :
39 : #include "idna.h"
40 :
41 : /* Get c_strcasecmp. */
42 : #include <c-strcase.h>
43 :
44 : #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
45 : (c) == 0xFF0E || (c) == 0xFF61)
46 :
47 : /* Core functions */
48 :
49 : /**
50 : * idna_to_ascii_4i:
51 : * @in: input array with unicode code points.
52 : * @inlen: length of input array with unicode code points.
53 : * @out: output zero terminated string that must have room for at
54 : * least 63 characters plus the terminating zero.
55 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
56 : * %IDNA_USE_STD3_ASCII_RULES.
57 : *
58 : * The ToASCII operation takes a sequence of Unicode code points that
59 : * make up one domain label and transforms it into a sequence of code
60 : * points in the ASCII range (0..7F). If ToASCII succeeds, the
61 : * original sequence and the resulting sequence are equivalent labels.
62 : *
63 : * It is important to note that the ToASCII operation can fail. ToASCII
64 : * fails if any step of it fails. If any step of the ToASCII operation
65 : * fails on any label in a domain name, that domain name MUST NOT be used
66 : * as an internationalized domain name. The method for deadling with this
67 : * failure is application-specific.
68 : *
69 : * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
70 : * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
71 : * sequence of ASCII code points or a failure condition.
72 : *
73 : * ToASCII never alters a sequence of code points that are all in the ASCII
74 : * range to begin with (although it could fail). Applying the ToASCII
75 : * operation multiple times has exactly the same effect as applying it just
76 : * once.
77 : *
78 : * Return value: Returns 0 on success, or an #Idna_rc error code.
79 : */
80 : int
81 24164 : idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
82 : {
83 : size_t len, outlen;
84 : uint32_t *src; /* XXX don't need to copy data? */
85 : int rc;
86 :
87 : /*
88 : * ToASCII consists of the following steps:
89 : *
90 : * 1. If all code points in the sequence are in the ASCII range (0..7F)
91 : * then skip to step 3.
92 : */
93 :
94 : {
95 : size_t i;
96 : int inasciirange;
97 :
98 24164 : inasciirange = 1;
99 134428 : for (i = 0; i < inlen; i++)
100 110264 : if (in[i] > 0x7F)
101 63414 : inasciirange = 0;
102 24164 : if (inasciirange)
103 : {
104 10806 : src = malloc (sizeof (in[0]) * (inlen + 1));
105 10806 : if (src == NULL)
106 0 : return IDNA_MALLOC_ERROR;
107 :
108 10806 : memcpy (src, in, sizeof (in[0]) * inlen);
109 10806 : src[inlen] = 0;
110 :
111 10806 : goto step3;
112 : }
113 : }
114 :
115 : /*
116 : * 2. Perform the steps specified in [NAMEPREP] and fail if there is
117 : * an error. The AllowUnassigned flag is used in [NAMEPREP].
118 : */
119 :
120 : {
121 : char *p;
122 :
123 13358 : p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
124 13358 : if (p == NULL)
125 176 : return IDNA_MALLOC_ERROR;
126 :
127 13182 : len = strlen (p);
128 : do
129 : {
130 : char *newp;
131 :
132 14588 : len = 2 * len + 10; /* XXX better guess? */
133 14588 : newp = realloc (p, len);
134 14588 : if (newp == NULL)
135 : {
136 0 : free (p);
137 0 : return IDNA_MALLOC_ERROR;
138 : }
139 14588 : p = newp;
140 :
141 14588 : if (flags & IDNA_ALLOW_UNASSIGNED)
142 7059 : rc = stringprep_nameprep (p, len);
143 : else
144 7529 : rc = stringprep_nameprep_no_unassigned (p, len);
145 : }
146 14588 : while (rc == STRINGPREP_TOO_SMALL_BUFFER);
147 :
148 13182 : if (rc != STRINGPREP_OK)
149 : {
150 1644 : free (p);
151 1644 : return IDNA_STRINGPREP_ERROR;
152 : }
153 :
154 11538 : src = stringprep_utf8_to_ucs4 (p, -1, NULL);
155 :
156 11538 : free (p);
157 :
158 11538 : if (!src)
159 0 : return IDNA_MALLOC_ERROR;
160 : }
161 :
162 11538 : step3:
163 : /*
164 : * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
165 : *
166 : * (a) Verify the absence of non-LDH ASCII code points; that is,
167 : * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
168 : *
169 : * (b) Verify the absence of leading and trailing hyphen-minus;
170 : * that is, the absence of U+002D at the beginning and end of
171 : * the sequence.
172 : */
173 :
174 22344 : if (flags & IDNA_USE_STD3_ASCII_RULES)
175 : {
176 : size_t i;
177 :
178 66795 : for (i = 0; src[i]; i++)
179 57960 : if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
180 56974 : (src[i] >= 0x3A && src[i] <= 0x40) ||
181 56427 : (src[i] >= 0x5B && src[i] <= 0x60) ||
182 55865 : (src[i] >= 0x7B && src[i] <= 0x7F))
183 : {
184 2313 : free (src);
185 2313 : return IDNA_CONTAINS_NON_LDH;
186 : }
187 :
188 8835 : if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
189 : {
190 638 : free (src);
191 638 : return IDNA_CONTAINS_MINUS;
192 : }
193 : }
194 :
195 : /*
196 : * 4. If all code points in the sequence are in the ASCII range
197 : * (0..7F), then skip to step 8.
198 : */
199 :
200 : {
201 : size_t i;
202 : int inasciirange;
203 :
204 19393 : inasciirange = 1;
205 133950 : for (i = 0; src[i]; i++)
206 : {
207 114557 : if (src[i] > 0x7F)
208 69363 : inasciirange = 0;
209 : /* copy string to output buffer if we are about to skip to step8 */
210 114557 : if (i < 64)
211 106299 : out[i] = src[i];
212 : }
213 19393 : if (i < 64)
214 19104 : out[i] = '\0';
215 : else
216 : {
217 289 : free (src);
218 289 : return IDNA_INVALID_LENGTH;
219 : }
220 19104 : if (inasciirange)
221 9574 : goto step8;
222 : }
223 :
224 : /*
225 : * 5. Verify that the sequence does NOT begin with the ACE prefix.
226 : *
227 : */
228 :
229 : {
230 : size_t i;
231 : int match;
232 :
233 9530 : match = 1;
234 20473 : for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
235 10943 : if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
236 9242 : match = 0;
237 9530 : if (match)
238 : {
239 288 : free (src);
240 288 : return IDNA_CONTAINS_ACE_PREFIX;
241 : }
242 : }
243 :
244 : /*
245 : * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
246 : * and fail if there is an error.
247 : */
248 82901 : for (len = 0; src[len]; len++)
249 : ;
250 9242 : src[len] = '\0';
251 9242 : outlen = 63 - strlen (IDNA_ACE_PREFIX);
252 9242 : rc = punycode_encode (len, src, NULL,
253 : &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
254 9242 : if (rc != PUNYCODE_SUCCESS)
255 : {
256 851 : free (src);
257 851 : return IDNA_PUNYCODE_ERROR;
258 : }
259 8391 : out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
260 :
261 : /*
262 : * 7. Prepend the ACE prefix.
263 : */
264 :
265 8391 : memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
266 :
267 : /*
268 : * 8. Verify that the number of code points is in the range 1 to 63
269 : * inclusive (0 is excluded).
270 : */
271 :
272 17965 : step8:
273 17965 : free (src);
274 17965 : if (strlen (out) < 1)
275 4581 : return IDNA_INVALID_LENGTH;
276 :
277 13384 : return IDNA_SUCCESS;
278 : }
279 :
280 : /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */
281 : static int
282 44238 : idna_to_unicode_internal (char *utf8in,
283 : uint32_t * out, size_t *outlen, int flags)
284 : {
285 : int rc;
286 : char tmpout[64];
287 44238 : size_t utf8len = strlen (utf8in) + 1;
288 44238 : size_t addlen = 0, addinc = utf8len / 10 + 1;
289 :
290 : /*
291 : * ToUnicode consists of the following steps:
292 : *
293 : * 1. If the sequence contains any code points outside the ASCII range
294 : * (0..7F) then proceed to step 2, otherwise skip to step 3.
295 : */
296 :
297 : {
298 : size_t i;
299 : int inasciirange;
300 :
301 44238 : inasciirange = 1;
302 492505 : for (i = 0; utf8in[i]; i++)
303 448267 : if (utf8in[i] & ~0x7F)
304 169582 : inasciirange = 0;
305 44238 : if (inasciirange)
306 38640 : goto step3;
307 : }
308 :
309 : /*
310 : * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
311 : * error. (If step 3 of ToASCII is also performed here, it will not
312 : * affect the overall behavior of ToUnicode, but it is not
313 : * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
314 : */
315 : do
316 : {
317 11611 : char *newp = realloc (utf8in, utf8len + addlen);
318 11611 : if (newp == NULL)
319 : {
320 0 : free (utf8in);
321 0 : return IDNA_MALLOC_ERROR;
322 : }
323 11611 : utf8in = newp;
324 11611 : if (flags & IDNA_ALLOW_UNASSIGNED)
325 6460 : rc = stringprep_nameprep (utf8in, utf8len + addlen);
326 : else
327 5151 : rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
328 11611 : addlen += addinc;
329 11611 : addinc *= 2;
330 : }
331 11611 : while (rc == STRINGPREP_TOO_SMALL_BUFFER);
332 :
333 5598 : if (rc != STRINGPREP_OK)
334 : {
335 2113 : free (utf8in);
336 2113 : return IDNA_STRINGPREP_ERROR;
337 : }
338 :
339 : /* 3. Verify that the sequence begins with the ACE prefix, and save a
340 : * copy of the sequence.
341 : * ... The ToASCII and ToUnicode operations MUST recognize the ACE
342 : prefix in a case-insensitive manner.
343 : */
344 :
345 3485 : step3:
346 42125 : if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
347 : {
348 20880 : free (utf8in);
349 20880 : return IDNA_NO_ACE_PREFIX;
350 : }
351 :
352 : /* 4. Remove the ACE prefix.
353 : */
354 :
355 21245 : memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
356 21245 : strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
357 :
358 : /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
359 : * and fail if there is an error. Save a copy of the result of
360 : * this step.
361 : */
362 :
363 21245 : (*outlen)--; /* reserve one for the zero */
364 :
365 21245 : rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
366 21245 : if (rc != PUNYCODE_SUCCESS)
367 : {
368 3349 : free (utf8in);
369 3349 : return IDNA_PUNYCODE_ERROR;
370 : }
371 :
372 17896 : out[*outlen] = 0; /* add zero */
373 :
374 : /* 6. Apply ToASCII.
375 : */
376 :
377 17896 : rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
378 17896 : if (rc != IDNA_SUCCESS)
379 : {
380 9531 : free (utf8in);
381 9531 : return rc;
382 : }
383 :
384 : /* 7. Verify that the result of step 6 matches the saved copy from
385 : * step 3, using a case-insensitive ASCII comparison.
386 : */
387 :
388 8365 : if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
389 : {
390 4692 : free (utf8in);
391 4692 : return IDNA_ROUNDTRIP_VERIFY_ERROR;
392 : }
393 :
394 : /* 8. Return the saved copy from step 5.
395 : */
396 :
397 3673 : free (utf8in);
398 3673 : return IDNA_SUCCESS;
399 : }
400 :
401 : /**
402 : * idna_to_unicode_44i:
403 : * @in: input array with unicode code points.
404 : * @inlen: length of input array with unicode code points.
405 : * @out: output array with unicode code points.
406 : * @outlen: on input, maximum size of output array with unicode code points,
407 : * on exit, actual size of output array with unicode code points.
408 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
409 : * %IDNA_USE_STD3_ASCII_RULES.
410 : *
411 : * The ToUnicode operation takes a sequence of Unicode code points
412 : * that make up one domain label and returns a sequence of Unicode
413 : * code points. If the input sequence is a label in ACE form, then the
414 : * result is an equivalent internationalized label that is not in ACE
415 : * form, otherwise the original sequence is returned unaltered.
416 : *
417 : * ToUnicode never fails. If any step fails, then the original input
418 : * sequence is returned immediately in that step.
419 : *
420 : * The Punycode decoder can never output more code points than it
421 : * inputs, but Nameprep can, and therefore ToUnicode can. Note that
422 : * the number of octets needed to represent a sequence of code points
423 : * depends on the particular character encoding used.
424 : *
425 : * The inputs to ToUnicode are a sequence of code points, the
426 : * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
427 : * ToUnicode is always a sequence of Unicode code points.
428 : *
429 : * Return value: Returns #Idna_rc error condition, but it must only be
430 : * used for debugging purposes. The output buffer is always
431 : * guaranteed to contain the correct data according to the
432 : * specification (sans malloc induced errors). NB! This means that
433 : * you normally ignore the return code from this function, as
434 : * checking it means breaking the standard.
435 : */
436 : int
437 45130 : idna_to_unicode_44i (const uint32_t * in, size_t inlen,
438 : uint32_t * out, size_t *outlen, int flags)
439 : {
440 : int rc;
441 45130 : size_t outlensave = *outlen;
442 : char *p;
443 :
444 45130 : p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
445 45130 : if (p == NULL)
446 892 : return IDNA_MALLOC_ERROR;
447 :
448 44238 : rc = idna_to_unicode_internal (p, out, outlen, flags);
449 44238 : if (rc != IDNA_SUCCESS)
450 : {
451 40565 : memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
452 40565 : inlen : outlensave));
453 40565 : *outlen = inlen;
454 : }
455 :
456 : /* p is freed in idna_to_unicode_internal. */
457 :
458 44238 : return rc;
459 : }
460 :
461 : /* Wrappers that handle several labels */
462 :
463 : /**
464 : * idna_to_ascii_4z:
465 : * @input: zero terminated input Unicode string.
466 : * @output: pointer to newly allocated output string.
467 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
468 : * %IDNA_USE_STD3_ASCII_RULES.
469 : *
470 : * Convert UCS-4 domain name to ASCII string. The domain name may
471 : * contain several labels, separated by dots. The output buffer must
472 : * be deallocated by the caller.
473 : *
474 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
475 : **/
476 : int
477 1568 : idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
478 : {
479 1568 : const uint32_t *start = input;
480 : const uint32_t *end;
481 : char buf[64];
482 1568 : char *out = NULL;
483 : int rc;
484 :
485 : /* 1) Whenever dots are used as label separators, the following
486 : characters MUST be recognized as dots: U+002E (full stop),
487 : U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
488 : U+FF61 (halfwidth ideographic full stop). */
489 :
490 1568 : if (input[0] == 0)
491 : {
492 : /* Handle implicit zero-length root label. */
493 131 : *output = malloc (1);
494 131 : if (!*output)
495 0 : return IDNA_MALLOC_ERROR;
496 131 : strcpy (*output, "");
497 131 : return IDNA_SUCCESS;
498 : }
499 :
500 1437 : if (DOTP (input[0]) && input[1] == 0)
501 : {
502 : /* Handle explicit zero-length root label. */
503 14 : *output = malloc (2);
504 14 : if (!*output)
505 0 : return IDNA_MALLOC_ERROR;
506 14 : strcpy (*output, ".");
507 14 : return IDNA_SUCCESS;
508 : }
509 :
510 1423 : *output = NULL;
511 : do
512 : {
513 5752 : end = start;
514 :
515 23774 : for (; *end && !DOTP (*end); end++)
516 : ;
517 :
518 5752 : if (*end == '\0' && start == end)
519 : {
520 : /* Handle explicit zero-length root label. */
521 18 : buf[0] = '\0';
522 : }
523 : else
524 : {
525 5734 : rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
526 5734 : if (rc != IDNA_SUCCESS)
527 : {
528 847 : free (out);
529 847 : return rc;
530 : }
531 : }
532 :
533 4905 : if (out)
534 : {
535 4199 : size_t l = strlen (out) + 1 + strlen (buf) + 1;
536 4199 : char *newp = realloc (out, l);
537 4199 : if (!newp)
538 : {
539 0 : free (out);
540 0 : return IDNA_MALLOC_ERROR;
541 : }
542 4199 : out = newp;
543 4199 : strcat (out, ".");
544 4199 : strcat (out, buf);
545 : }
546 : else
547 : {
548 706 : out = strdup (buf);
549 706 : if (!out)
550 0 : return IDNA_MALLOC_ERROR;
551 : }
552 :
553 4905 : start = end + 1;
554 : }
555 4905 : while (*end);
556 :
557 576 : *output = out;
558 :
559 576 : return IDNA_SUCCESS;
560 : }
561 :
562 : /**
563 : * idna_to_ascii_8z:
564 : * @input: zero terminated input UTF-8 string.
565 : * @output: pointer to newly allocated output string.
566 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
567 : * %IDNA_USE_STD3_ASCII_RULES.
568 : *
569 : * Convert UTF-8 domain name to ASCII string. The domain name may
570 : * contain several labels, separated by dots. The output buffer must
571 : * be deallocated by the caller.
572 : *
573 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
574 : **/
575 : int
576 1186 : idna_to_ascii_8z (const char *input, char **output, int flags)
577 : {
578 : uint32_t *ucs4;
579 : size_t ucs4len;
580 : int rc;
581 :
582 1186 : ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
583 1186 : if (!ucs4)
584 131 : return IDNA_ICONV_ERROR;
585 :
586 1055 : rc = idna_to_ascii_4z (ucs4, output, flags);
587 :
588 1055 : free (ucs4);
589 :
590 1055 : return rc;
591 :
592 : }
593 :
594 : /**
595 : * idna_to_ascii_lz:
596 : * @input: zero terminated input string encoded in the current locale's
597 : * character set.
598 : * @output: pointer to newly allocated output string.
599 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
600 : * %IDNA_USE_STD3_ASCII_RULES.
601 : *
602 : * Convert domain name in the locale's encoding to ASCII string. The
603 : * domain name may contain several labels, separated by dots. The
604 : * output buffer must be deallocated by the caller.
605 : *
606 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
607 : **/
608 : int
609 724 : idna_to_ascii_lz (const char *input, char **output, int flags)
610 : {
611 : char *utf8;
612 : int rc;
613 :
614 724 : utf8 = stringprep_locale_to_utf8 (input);
615 724 : if (!utf8)
616 364 : return IDNA_ICONV_ERROR;
617 :
618 360 : rc = idna_to_ascii_8z (utf8, output, flags);
619 :
620 360 : free (utf8);
621 :
622 360 : return rc;
623 : }
624 :
625 : /**
626 : * idna_to_unicode_4z4z:
627 : * @input: zero-terminated Unicode string.
628 : * @output: pointer to newly allocated output Unicode string.
629 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
630 : * %IDNA_USE_STD3_ASCII_RULES.
631 : *
632 : * Convert possibly ACE encoded domain name in UCS-4 format into a
633 : * UCS-4 string. The domain name may contain several labels,
634 : * separated by dots. The output buffer must be deallocated by the
635 : * caller.
636 : *
637 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
638 : **/
639 : int
640 4047 : idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
641 : {
642 4047 : const uint32_t *start = input;
643 : const uint32_t *end;
644 : uint32_t *buf;
645 : size_t buflen;
646 4047 : uint32_t *out = NULL;
647 4047 : size_t outlen = 0;
648 :
649 4047 : *output = NULL;
650 :
651 : do
652 : {
653 44328 : end = start;
654 :
655 350537 : for (; *end && !DOTP (*end); end++)
656 : ;
657 :
658 44328 : buflen = (size_t) (end - start);
659 44328 : buf = malloc (sizeof (buf[0]) * (buflen + 1));
660 44328 : if (!buf)
661 : {
662 0 : free (out);
663 0 : return IDNA_MALLOC_ERROR;
664 : }
665 :
666 : /* don't check return code as per specification! */
667 44328 : idna_to_unicode_44i (start, (size_t) (end - start),
668 : buf, &buflen, flags);
669 :
670 44328 : if (out)
671 : {
672 40281 : uint32_t *newp = realloc (out,
673 : sizeof (out[0])
674 40281 : * (outlen + 1 + buflen + 1));
675 40281 : if (!newp)
676 : {
677 0 : free (buf);
678 0 : free (out);
679 0 : return IDNA_MALLOC_ERROR;
680 : }
681 40281 : out = newp;
682 40281 : out[outlen++] = 0x002E; /* '.' (full stop) */
683 40281 : memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
684 40281 : outlen += buflen;
685 40281 : out[outlen] = 0x0;
686 40281 : free (buf);
687 : }
688 : else
689 : {
690 4047 : out = buf;
691 4047 : outlen = buflen;
692 4047 : out[outlen] = 0x0;
693 : }
694 :
695 44328 : start = end + 1;
696 : }
697 44328 : while (*end);
698 :
699 4047 : *output = out;
700 :
701 4047 : return IDNA_SUCCESS;
702 : }
703 :
704 : /**
705 : * idna_to_unicode_8z4z:
706 : * @input: zero-terminated UTF-8 string.
707 : * @output: pointer to newly allocated output Unicode string.
708 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
709 : * %IDNA_USE_STD3_ASCII_RULES.
710 : *
711 : * Convert possibly ACE encoded domain name in UTF-8 format into a
712 : * UCS-4 string. The domain name may contain several labels,
713 : * separated by dots. The output buffer must be deallocated by the
714 : * caller.
715 : *
716 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
717 : **/
718 : int
719 3580 : idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
720 : {
721 : uint32_t *ucs4;
722 : size_t ucs4len;
723 : int rc;
724 :
725 3580 : ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
726 3580 : if (!ucs4)
727 313 : return IDNA_ICONV_ERROR;
728 :
729 3267 : rc = idna_to_unicode_4z4z (ucs4, output, flags);
730 3267 : free (ucs4);
731 :
732 3267 : return rc;
733 : }
734 :
735 : /**
736 : * idna_to_unicode_8z8z:
737 : * @input: zero-terminated UTF-8 string.
738 : * @output: pointer to newly allocated output UTF-8 string.
739 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
740 : * %IDNA_USE_STD3_ASCII_RULES.
741 : *
742 : * Convert possibly ACE encoded domain name in UTF-8 format into a
743 : * UTF-8 string. The domain name may contain several labels,
744 : * separated by dots. The output buffer must be deallocated by the
745 : * caller.
746 : *
747 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
748 : **/
749 : int
750 2800 : idna_to_unicode_8z8z (const char *input, char **output, int flags)
751 : {
752 : uint32_t *ucs4;
753 : int rc;
754 :
755 2800 : rc = idna_to_unicode_8z4z (input, &ucs4, flags);
756 2800 : if (rc != IDNA_SUCCESS)
757 217 : return rc;
758 :
759 2583 : *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
760 2583 : free (ucs4);
761 :
762 2583 : if (!*output)
763 0 : return IDNA_ICONV_ERROR;
764 :
765 2583 : return IDNA_SUCCESS;
766 : }
767 :
768 : /**
769 : * idna_to_unicode_8zlz:
770 : * @input: zero-terminated UTF-8 string.
771 : * @output: pointer to newly allocated output string encoded in the
772 : * current locale's character set.
773 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
774 : * %IDNA_USE_STD3_ASCII_RULES.
775 : *
776 : * Convert possibly ACE encoded domain name in UTF-8 format into a
777 : * string encoded in the current locale's character set. The domain
778 : * name may contain several labels, separated by dots. The output
779 : * buffer must be deallocated by the caller.
780 : *
781 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
782 : **/
783 : int
784 1680 : idna_to_unicode_8zlz (const char *input, char **output, int flags)
785 : {
786 : char *utf8;
787 : int rc;
788 :
789 1680 : rc = idna_to_unicode_8z8z (input, &utf8, flags);
790 1680 : if (rc != IDNA_SUCCESS)
791 108 : return rc;
792 :
793 1572 : *output = stringprep_utf8_to_locale (utf8);
794 1572 : free (utf8);
795 :
796 1572 : if (!*output)
797 589 : return IDNA_ICONV_ERROR;
798 :
799 983 : return IDNA_SUCCESS;
800 : }
801 :
802 : /**
803 : * idna_to_unicode_lzlz:
804 : * @input: zero-terminated string encoded in the current locale's
805 : * character set.
806 : * @output: pointer to newly allocated output string encoded in the
807 : * current locale's character set.
808 : * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
809 : * %IDNA_USE_STD3_ASCII_RULES.
810 : *
811 : * Convert possibly ACE encoded domain name in the locale's character
812 : * set into a string encoded in the current locale's character set.
813 : * The domain name may contain several labels, separated by dots. The
814 : * output buffer must be deallocated by the caller.
815 : *
816 : * Return value: Returns %IDNA_SUCCESS on success, or error code.
817 : **/
818 : int
819 1116 : idna_to_unicode_lzlz (const char *input, char **output, int flags)
820 : {
821 : char *utf8;
822 : int rc;
823 :
824 1116 : utf8 = stringprep_locale_to_utf8 (input);
825 1116 : if (!utf8)
826 552 : return IDNA_ICONV_ERROR;
827 :
828 564 : rc = idna_to_unicode_8zlz (utf8, output, flags);
829 564 : free (utf8);
830 :
831 564 : return rc;
832 : }
833 :
834 : /**
835 : * IDNA_ACE_PREFIX
836 : *
837 : * The IANA allocated prefix to use for IDNA. "xn--"
838 : */
839 :
840 : /**
841 : * Idna_rc:
842 : * @IDNA_SUCCESS: Successful operation. This value is guaranteed to
843 : * always be zero, the remaining ones are only guaranteed to hold
844 : * non-zero values, for logical comparison purposes.
845 : * @IDNA_STRINGPREP_ERROR: Error during string preparation.
846 : * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
847 : * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
848 : * the string contains non-LDH ASCII characters.
849 : * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
850 : * with typo in earlier versions.
851 : * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
852 : * the string contains a leading or trailing hyphen-minus (U+002D).
853 : * @IDNA_INVALID_LENGTH: The final output string is not within the
854 : * (inclusive) range 1 to 63 characters.
855 : * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
856 : * (for ToUnicode).
857 : * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
858 : * string does not equal the input.
859 : * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
860 : * ToASCII).
861 : * @IDNA_ICONV_ERROR: Character encoding conversion error.
862 : * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
863 : * fatal error).
864 : * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
865 : * internally in libc).
866 : *
867 : * Enumerated return codes of idna_to_ascii_4i(),
868 : * idna_to_unicode_44i() functions (and functions derived from those
869 : * functions). The value 0 is guaranteed to always correspond to
870 : * success.
871 : */
872 :
873 :
874 : /**
875 : * Idna_flags:
876 : * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
877 : * Unicode code points.
878 : * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
879 : * rules (i.e., normal host name rules).
880 : *
881 : * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
882 : */
|