File: | lib/idna.c |
Location: | line 660, column 9 |
Description: | Potential leak of memory pointed to by 'out' |
1 | /* idna.c --- Prototypes for Internationalized Domain Name library. | |||
2 | Copyright (C) 2002-2016 Simon Josefsson | |||
3 | ||||
4 | This file is part of GNU Libidn. | |||
5 | ||||
6 | GNU Libidn is free software: you can redistribute it and/or | |||
7 | modify it under the terms of either: | |||
8 | ||||
9 | * the GNU Lesser General Public License as published by the Free | |||
10 | Software Foundation; either version 3 of the License, or (at | |||
11 | your option) any later version. | |||
12 | ||||
13 | or | |||
14 | ||||
15 | * the GNU General Public License as published by the Free | |||
16 | Software Foundation; either version 2 of the License, or (at | |||
17 | your option) any later version. | |||
18 | ||||
19 | or both in parallel, as here. | |||
20 | ||||
21 | GNU Libidn is distributed in the hope that it will be useful, | |||
22 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
24 | General Public License for more details. | |||
25 | ||||
26 | You should have received copies of the GNU General Public License and | |||
27 | the GNU Lesser General Public License along with this program. If | |||
28 | not, see <http://www.gnu.org/licenses/>. */ | |||
29 | ||||
30 | #ifdef HAVE_CONFIG_H1 | |||
31 | # include "config.h" | |||
32 | #endif | |||
33 | ||||
34 | #include <stdlib.h> | |||
35 | #include <string.h> | |||
36 | #include <stringprep.h> | |||
37 | #include <punycode.h> | |||
38 | ||||
39 | #include "idna.h" | |||
40 | ||||
41 | /* Get c_strcasecmp. */ | |||
42 | #include <c-strcase.h> | |||
43 | ||||
44 | #define DOTP(c)((c) == 0x002E || (c) == 0x3002 || (c) == 0xFF0E || (c) == 0xFF61 ) ((c) == 0x002E || (c) == 0x3002 || \ | |||
45 | (c) == 0xFF0E || (c) == 0xFF61) | |||
46 | ||||
47 | /* Core functions */ | |||
48 | ||||
49 | /** | |||
50 | * idna_to_ascii_4i: | |||
51 | * @in: input array with unicode code points. | |||
52 | * @inlen: length of input array with unicode code points. | |||
53 | * @out: output zero terminated string that must have room for at | |||
54 | * least 63 characters plus the terminating zero. | |||
55 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
56 | * %IDNA_USE_STD3_ASCII_RULES. | |||
57 | * | |||
58 | * The ToASCII operation takes a sequence of Unicode code points that | |||
59 | * make up one domain label and transforms it into a sequence of code | |||
60 | * points in the ASCII range (0..7F). If ToASCII succeeds, the | |||
61 | * original sequence and the resulting sequence are equivalent labels. | |||
62 | * | |||
63 | * It is important to note that the ToASCII operation can fail. ToASCII | |||
64 | * fails if any step of it fails. If any step of the ToASCII operation | |||
65 | * fails on any label in a domain name, that domain name MUST NOT be used | |||
66 | * as an internationalized domain name. The method for deadling with this | |||
67 | * failure is application-specific. | |||
68 | * | |||
69 | * The inputs to ToASCII are a sequence of code points, the AllowUnassigned | |||
70 | * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a | |||
71 | * sequence of ASCII code points or a failure condition. | |||
72 | * | |||
73 | * ToASCII never alters a sequence of code points that are all in the ASCII | |||
74 | * range to begin with (although it could fail). Applying the ToASCII | |||
75 | * operation multiple times has exactly the same effect as applying it just | |||
76 | * once. | |||
77 | * | |||
78 | * Return value: Returns 0 on success, or an #Idna_rc error code. | |||
79 | */ | |||
80 | int | |||
81 | idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) | |||
82 | { | |||
83 | size_t len, outlen; | |||
84 | uint32_t *src; /* XXX don't need to copy data? */ | |||
85 | int rc; | |||
86 | ||||
87 | /* | |||
88 | * ToASCII consists of the following steps: | |||
89 | * | |||
90 | * 1. If all code points in the sequence are in the ASCII range (0..7F) | |||
91 | * then skip to step 3. | |||
92 | */ | |||
93 | ||||
94 | { | |||
95 | size_t i; | |||
96 | int inasciirange; | |||
97 | ||||
98 | inasciirange = 1; | |||
99 | for (i = 0; i < inlen; i++) | |||
100 | if (in[i] > 0x7F) | |||
101 | inasciirange = 0; | |||
102 | if (inasciirange) | |||
103 | { | |||
104 | src = malloc (sizeof (in[0]) * (inlen + 1)); | |||
105 | if (src == NULL((void*)0)) | |||
106 | return IDNA_MALLOC_ERROR; | |||
107 | ||||
108 | memcpy (src, in, sizeof (in[0]) * inlen); | |||
109 | src[inlen] = 0; | |||
110 | ||||
111 | goto step3; | |||
112 | } | |||
113 | } | |||
114 | ||||
115 | /* | |||
116 | * 2. Perform the steps specified in [NAMEPREP] and fail if there is | |||
117 | * an error. The AllowUnassigned flag is used in [NAMEPREP]. | |||
118 | */ | |||
119 | ||||
120 | { | |||
121 | char *p; | |||
122 | ||||
123 | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL((void*)0), NULL((void*)0)); | |||
124 | if (p == NULL((void*)0)) | |||
125 | return IDNA_MALLOC_ERROR; | |||
126 | ||||
127 | len = strlen (p); | |||
128 | do | |||
129 | { | |||
130 | char *newp; | |||
131 | ||||
132 | len = 2 * len + 10; /* XXX better guess? */ | |||
133 | newp = realloc (p, len); | |||
134 | if (newp == NULL((void*)0)) | |||
135 | { | |||
136 | free (p); | |||
137 | return IDNA_MALLOC_ERROR; | |||
138 | } | |||
139 | p = newp; | |||
140 | ||||
141 | if (flags & IDNA_ALLOW_UNASSIGNED) | |||
142 | rc = stringprep_nameprep (p, len)stringprep(p, len, 0, stringprep_nameprep); | |||
143 | else | |||
144 | rc = stringprep_nameprep_no_unassigned (p, len)stringprep(p, len, STRINGPREP_NO_UNASSIGNED, stringprep_nameprep ); | |||
145 | } | |||
146 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); | |||
147 | ||||
148 | if (rc != STRINGPREP_OK) | |||
149 | { | |||
150 | free (p); | |||
151 | return IDNA_STRINGPREP_ERROR; | |||
152 | } | |||
153 | ||||
154 | src = stringprep_utf8_to_ucs4 (p, -1, NULL((void*)0)); | |||
155 | ||||
156 | free (p); | |||
157 | ||||
158 | if (!src) | |||
159 | return IDNA_MALLOC_ERROR; | |||
160 | } | |||
161 | ||||
162 | step3: | |||
163 | /* | |||
164 | * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: | |||
165 | * | |||
166 | * (a) Verify the absence of non-LDH ASCII code points; that is, | |||
167 | * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | |||
168 | * | |||
169 | * (b) Verify the absence of leading and trailing hyphen-minus; | |||
170 | * that is, the absence of U+002D at the beginning and end of | |||
171 | * the sequence. | |||
172 | */ | |||
173 | ||||
174 | if (flags & IDNA_USE_STD3_ASCII_RULES) | |||
175 | { | |||
176 | size_t i; | |||
177 | ||||
178 | for (i = 0; src[i]; i++) | |||
179 | if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || | |||
180 | (src[i] >= 0x3A && src[i] <= 0x40) || | |||
181 | (src[i] >= 0x5B && src[i] <= 0x60) || | |||
182 | (src[i] >= 0x7B && src[i] <= 0x7F)) | |||
183 | { | |||
184 | free (src); | |||
185 | return IDNA_CONTAINS_NON_LDH; | |||
186 | } | |||
187 | ||||
188 | if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) | |||
189 | { | |||
190 | free (src); | |||
191 | return IDNA_CONTAINS_MINUS; | |||
192 | } | |||
193 | } | |||
194 | ||||
195 | /* | |||
196 | * 4. If all code points in the sequence are in the ASCII range | |||
197 | * (0..7F), then skip to step 8. | |||
198 | */ | |||
199 | ||||
200 | { | |||
201 | size_t i; | |||
202 | int inasciirange; | |||
203 | ||||
204 | inasciirange = 1; | |||
205 | for (i = 0; src[i]; i++) | |||
206 | { | |||
207 | if (src[i] > 0x7F) | |||
208 | inasciirange = 0; | |||
209 | /* copy string to output buffer if we are about to skip to step8 */ | |||
210 | if (i < 64) | |||
211 | out[i] = src[i]; | |||
212 | } | |||
213 | if (i < 64) | |||
214 | out[i] = '\0'; | |||
215 | else | |||
216 | { | |||
217 | free (src); | |||
218 | return IDNA_INVALID_LENGTH; | |||
219 | } | |||
220 | if (inasciirange) | |||
221 | goto step8; | |||
222 | } | |||
223 | ||||
224 | /* | |||
225 | * 5. Verify that the sequence does NOT begin with the ACE prefix. | |||
226 | * | |||
227 | */ | |||
228 | ||||
229 | { | |||
230 | size_t i; | |||
231 | int match; | |||
232 | ||||
233 | match = 1; | |||
234 | for (i = 0; match && i < strlen (IDNA_ACE_PREFIX"xn--"); i++) | |||
235 | if (((uint32_t) IDNA_ACE_PREFIX"xn--"[i] & 0xFF) != src[i]) | |||
236 | match = 0; | |||
237 | if (match) | |||
238 | { | |||
239 | free (src); | |||
240 | return IDNA_CONTAINS_ACE_PREFIX; | |||
241 | } | |||
242 | } | |||
243 | ||||
244 | /* | |||
245 | * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] | |||
246 | * and fail if there is an error. | |||
247 | */ | |||
248 | for (len = 0; src[len]; len++) | |||
249 | ; | |||
250 | src[len] = '\0'; | |||
251 | outlen = 63 - strlen (IDNA_ACE_PREFIX"xn--"); | |||
252 | rc = punycode_encode (len, src, NULL((void*)0), | |||
253 | &outlen, &out[strlen (IDNA_ACE_PREFIX"xn--")]); | |||
254 | if (rc != PUNYCODE_SUCCESS) | |||
255 | { | |||
256 | free (src); | |||
257 | return IDNA_PUNYCODE_ERROR; | |||
258 | } | |||
259 | out[strlen (IDNA_ACE_PREFIX"xn--") + outlen] = '\0'; | |||
260 | ||||
261 | /* | |||
262 | * 7. Prepend the ACE prefix. | |||
263 | */ | |||
264 | ||||
265 | memcpy (out, IDNA_ACE_PREFIX"xn--", strlen (IDNA_ACE_PREFIX"xn--")); | |||
266 | ||||
267 | /* | |||
268 | * 8. Verify that the number of code points is in the range 1 to 63 | |||
269 | * inclusive (0 is excluded). | |||
270 | */ | |||
271 | ||||
272 | step8: | |||
273 | free (src); | |||
274 | if (strlen (out) < 1) | |||
275 | return IDNA_INVALID_LENGTH; | |||
276 | ||||
277 | return IDNA_SUCCESS; | |||
278 | } | |||
279 | ||||
280 | /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */ | |||
281 | static int | |||
282 | idna_to_unicode_internal (char *utf8in, | |||
283 | uint32_t * out, size_t * outlen, int flags) | |||
284 | { | |||
285 | int rc; | |||
286 | char tmpout[64]; | |||
287 | size_t utf8len = strlen (utf8in) + 1; | |||
288 | size_t addlen = 0; | |||
289 | ||||
290 | /* | |||
291 | * ToUnicode consists of the following steps: | |||
292 | * | |||
293 | * 1. If the sequence contains any code points outside the ASCII range | |||
294 | * (0..7F) then proceed to step 2, otherwise skip to step 3. | |||
295 | */ | |||
296 | ||||
297 | { | |||
298 | size_t i; | |||
299 | int inasciirange; | |||
300 | ||||
301 | inasciirange = 1; | |||
302 | for (i = 0; utf8in[i]; i++) | |||
303 | if (utf8in[i] & ~0x7F) | |||
304 | inasciirange = 0; | |||
305 | if (inasciirange) | |||
306 | goto step3; | |||
307 | } | |||
308 | ||||
309 | /* | |||
310 | * 2. Perform the steps specified in [NAMEPREP] and fail if there is an | |||
311 | * error. (If step 3 of ToASCII is also performed here, it will not | |||
312 | * affect the overall behavior of ToUnicode, but it is not | |||
313 | * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. | |||
314 | */ | |||
315 | do | |||
316 | { | |||
317 | char *newp = realloc (utf8in, utf8len + addlen); | |||
318 | if (newp == NULL((void*)0)) | |||
319 | { | |||
320 | free (utf8in); | |||
321 | return IDNA_MALLOC_ERROR; | |||
322 | } | |||
323 | utf8in = newp; | |||
324 | if (flags & IDNA_ALLOW_UNASSIGNED) | |||
325 | rc = stringprep_nameprep (utf8in, utf8len + addlen)stringprep(utf8in, utf8len + addlen, 0, stringprep_nameprep); | |||
326 | else | |||
327 | rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen)stringprep(utf8in, utf8len + addlen, STRINGPREP_NO_UNASSIGNED , stringprep_nameprep); | |||
328 | addlen += 1; | |||
329 | } | |||
330 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); | |||
331 | ||||
332 | if (rc != STRINGPREP_OK) | |||
333 | { | |||
334 | free (utf8in); | |||
335 | return IDNA_STRINGPREP_ERROR; | |||
336 | } | |||
337 | ||||
338 | /* 3. Verify that the sequence begins with the ACE prefix, and save a | |||
339 | * copy of the sequence. | |||
340 | * ... The ToASCII and ToUnicode operations MUST recognize the ACE | |||
341 | prefix in a case-insensitive manner. | |||
342 | */ | |||
343 | ||||
344 | step3: | |||
345 | if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX"xn--", strlen (IDNA_ACE_PREFIX"xn--")) != 0) | |||
346 | { | |||
347 | free (utf8in); | |||
348 | return IDNA_NO_ACE_PREFIX; | |||
349 | } | |||
350 | ||||
351 | /* 4. Remove the ACE prefix. | |||
352 | */ | |||
353 | ||||
354 | memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX"xn--")], | |||
355 | strlen (utf8in) - strlen (IDNA_ACE_PREFIX"xn--") + 1); | |||
356 | ||||
357 | /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] | |||
358 | * and fail if there is an error. Save a copy of the result of | |||
359 | * this step. | |||
360 | */ | |||
361 | ||||
362 | (*outlen)--; /* reserve one for the zero */ | |||
363 | ||||
364 | rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL((void*)0)); | |||
365 | if (rc != PUNYCODE_SUCCESS) | |||
366 | { | |||
367 | free (utf8in); | |||
368 | return IDNA_PUNYCODE_ERROR; | |||
369 | } | |||
370 | ||||
371 | out[*outlen] = 0; /* add zero */ | |||
372 | ||||
373 | /* 6. Apply ToASCII. | |||
374 | */ | |||
375 | ||||
376 | rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); | |||
377 | if (rc != IDNA_SUCCESS) | |||
378 | { | |||
379 | free (utf8in); | |||
380 | return rc; | |||
381 | } | |||
382 | ||||
383 | /* 7. Verify that the result of step 6 matches the saved copy from | |||
384 | * step 3, using a case-insensitive ASCII comparison. | |||
385 | */ | |||
386 | ||||
387 | if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX"xn--")) != 0) | |||
388 | { | |||
389 | free (utf8in); | |||
390 | return IDNA_ROUNDTRIP_VERIFY_ERROR; | |||
391 | } | |||
392 | ||||
393 | /* 8. Return the saved copy from step 5. | |||
394 | */ | |||
395 | ||||
396 | free (utf8in); | |||
397 | return IDNA_SUCCESS; | |||
398 | } | |||
399 | ||||
400 | /** | |||
401 | * idna_to_unicode_44i: | |||
402 | * @in: input array with unicode code points. | |||
403 | * @inlen: length of input array with unicode code points. | |||
404 | * @out: output array with unicode code points. | |||
405 | * @outlen: on input, maximum size of output array with unicode code points, | |||
406 | * on exit, actual size of output array with unicode code points. | |||
407 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
408 | * %IDNA_USE_STD3_ASCII_RULES. | |||
409 | * | |||
410 | * The ToUnicode operation takes a sequence of Unicode code points | |||
411 | * that make up one domain label and returns a sequence of Unicode | |||
412 | * code points. If the input sequence is a label in ACE form, then the | |||
413 | * result is an equivalent internationalized label that is not in ACE | |||
414 | * form, otherwise the original sequence is returned unaltered. | |||
415 | * | |||
416 | * ToUnicode never fails. If any step fails, then the original input | |||
417 | * sequence is returned immediately in that step. | |||
418 | * | |||
419 | * The Punycode decoder can never output more code points than it | |||
420 | * inputs, but Nameprep can, and therefore ToUnicode can. Note that | |||
421 | * the number of octets needed to represent a sequence of code points | |||
422 | * depends on the particular character encoding used. | |||
423 | * | |||
424 | * The inputs to ToUnicode are a sequence of code points, the | |||
425 | * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of | |||
426 | * ToUnicode is always a sequence of Unicode code points. | |||
427 | * | |||
428 | * Return value: Returns #Idna_rc error condition, but it must only be | |||
429 | * used for debugging purposes. The output buffer is always | |||
430 | * guaranteed to contain the correct data according to the | |||
431 | * specification (sans malloc induced errors). NB! This means that | |||
432 | * you normally ignore the return code from this function, as | |||
433 | * checking it means breaking the standard. | |||
434 | */ | |||
435 | int | |||
436 | idna_to_unicode_44i (const uint32_t * in, size_t inlen, | |||
437 | uint32_t * out, size_t * outlen, int flags) | |||
438 | { | |||
439 | int rc; | |||
440 | size_t outlensave = *outlen; | |||
441 | char *p; | |||
442 | ||||
443 | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL((void*)0), NULL((void*)0)); | |||
444 | if (p == NULL((void*)0)) | |||
445 | return IDNA_MALLOC_ERROR; | |||
446 | ||||
447 | rc = idna_to_unicode_internal (p, out, outlen, flags); | |||
448 | if (rc != IDNA_SUCCESS) | |||
449 | { | |||
450 | memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? | |||
451 | inlen : outlensave)); | |||
452 | *outlen = inlen; | |||
453 | } | |||
454 | ||||
455 | /* p is freed in idna_to_unicode_internal. */ | |||
456 | ||||
457 | return rc; | |||
458 | } | |||
459 | ||||
460 | /* Wrappers that handle several labels */ | |||
461 | ||||
462 | /** | |||
463 | * idna_to_ascii_4z: | |||
464 | * @input: zero terminated input Unicode string. | |||
465 | * @output: pointer to newly allocated output string. | |||
466 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
467 | * %IDNA_USE_STD3_ASCII_RULES. | |||
468 | * | |||
469 | * Convert UCS-4 domain name to ASCII string. The domain name may | |||
470 | * contain several labels, separated by dots. The output buffer must | |||
471 | * be deallocated by the caller. | |||
472 | * | |||
473 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
474 | **/ | |||
475 | int | |||
476 | idna_to_ascii_4z (const uint32_t * input, char **output, int flags) | |||
477 | { | |||
478 | const uint32_t *start = input; | |||
479 | const uint32_t *end; | |||
480 | char buf[64]; | |||
481 | char *out = NULL((void*)0); | |||
482 | int rc; | |||
483 | ||||
484 | /* 1) Whenever dots are used as label separators, the following | |||
485 | characters MUST be recognized as dots: U+002E (full stop), | |||
486 | U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), | |||
487 | U+FF61 (halfwidth ideographic full stop). */ | |||
488 | ||||
489 | if (input[0] == 0) | |||
490 | { | |||
491 | /* Handle implicit zero-length root label. */ | |||
492 | *output = malloc (1); | |||
493 | if (!*output) | |||
494 | return IDNA_MALLOC_ERROR; | |||
495 | strcpy (*output, ""); | |||
496 | return IDNA_SUCCESS; | |||
497 | } | |||
498 | ||||
499 | if (DOTP (input[0])((input[0]) == 0x002E || (input[0]) == 0x3002 || (input[0]) == 0xFF0E || (input[0]) == 0xFF61) && input[1] == 0) | |||
500 | { | |||
501 | /* Handle explicit zero-length root label. */ | |||
502 | *output = malloc (2); | |||
503 | if (!*output) | |||
504 | return IDNA_MALLOC_ERROR; | |||
505 | strcpy (*output, "."); | |||
506 | return IDNA_SUCCESS; | |||
507 | } | |||
508 | ||||
509 | *output = NULL((void*)0); | |||
510 | do | |||
511 | { | |||
512 | end = start; | |||
513 | ||||
514 | for (; *end && !DOTP (*end)((*end) == 0x002E || (*end) == 0x3002 || (*end) == 0xFF0E || ( *end) == 0xFF61); end++) | |||
515 | ; | |||
516 | ||||
517 | if (*end == '\0' && start == end) | |||
518 | { | |||
519 | /* Handle explicit zero-length root label. */ | |||
520 | buf[0] = '\0'; | |||
521 | } | |||
522 | else | |||
523 | { | |||
524 | rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags); | |||
525 | if (rc != IDNA_SUCCESS) | |||
526 | { | |||
527 | free (out); | |||
528 | return rc; | |||
529 | } | |||
530 | } | |||
531 | ||||
532 | if (out) | |||
533 | { | |||
534 | size_t l = strlen (out) + 1 + strlen (buf) + 1; | |||
535 | char *newp = realloc (out, l); | |||
536 | if (!newp) | |||
537 | { | |||
538 | free (out); | |||
539 | return IDNA_MALLOC_ERROR; | |||
540 | } | |||
541 | out = newp; | |||
542 | strcat (out, "."); | |||
543 | strcat (out, buf); | |||
544 | } | |||
545 | else | |||
546 | { | |||
547 | out = strdup (buf)(__extension__ (__builtin_constant_p (buf) && ((size_t )(const void *)((buf) + 1) - (size_t)(const void *)(buf) == 1 ) ? (((const char *) (buf))[0] == '\0' ? (char *) calloc ((size_t ) 1, (size_t) 1) : ({ size_t __len = strlen (buf) + 1; char * __retval = (char *) malloc (__len); if (__retval != ((void*)0 )) __retval = (char *) memcpy (__retval, buf, __len); __retval ; })) : __strdup (buf))); | |||
548 | if (!out) | |||
549 | return IDNA_MALLOC_ERROR; | |||
550 | } | |||
551 | ||||
552 | start = end + 1; | |||
553 | } | |||
554 | while (*end); | |||
555 | ||||
556 | *output = out; | |||
557 | ||||
558 | return IDNA_SUCCESS; | |||
559 | } | |||
560 | ||||
561 | /** | |||
562 | * idna_to_ascii_8z: | |||
563 | * @input: zero terminated input UTF-8 string. | |||
564 | * @output: pointer to newly allocated output string. | |||
565 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
566 | * %IDNA_USE_STD3_ASCII_RULES. | |||
567 | * | |||
568 | * Convert UTF-8 domain name to ASCII string. The domain name may | |||
569 | * contain several labels, separated by dots. The output buffer must | |||
570 | * be deallocated by the caller. | |||
571 | * | |||
572 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
573 | **/ | |||
574 | int | |||
575 | idna_to_ascii_8z (const char *input, char **output, int flags) | |||
576 | { | |||
577 | uint32_t *ucs4; | |||
578 | size_t ucs4len; | |||
579 | int rc; | |||
580 | ||||
581 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | |||
582 | if (!ucs4) | |||
583 | return IDNA_ICONV_ERROR; | |||
584 | ||||
585 | rc = idna_to_ascii_4z (ucs4, output, flags); | |||
586 | ||||
587 | free (ucs4); | |||
588 | ||||
589 | return rc; | |||
590 | ||||
591 | } | |||
592 | ||||
593 | /** | |||
594 | * idna_to_ascii_lz: | |||
595 | * @input: zero terminated input string encoded in the current locale's | |||
596 | * character set. | |||
597 | * @output: pointer to newly allocated output string. | |||
598 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
599 | * %IDNA_USE_STD3_ASCII_RULES. | |||
600 | * | |||
601 | * Convert domain name in the locale's encoding to ASCII string. The | |||
602 | * domain name may contain several labels, separated by dots. The | |||
603 | * output buffer must be deallocated by the caller. | |||
604 | * | |||
605 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
606 | **/ | |||
607 | int | |||
608 | idna_to_ascii_lz (const char *input, char **output, int flags) | |||
609 | { | |||
610 | char *utf8; | |||
611 | int rc; | |||
612 | ||||
613 | utf8 = stringprep_locale_to_utf8 (input); | |||
614 | if (!utf8) | |||
615 | return IDNA_ICONV_ERROR; | |||
616 | ||||
617 | rc = idna_to_ascii_8z (utf8, output, flags); | |||
618 | ||||
619 | free (utf8); | |||
620 | ||||
621 | return rc; | |||
622 | } | |||
623 | ||||
624 | /** | |||
625 | * idna_to_unicode_4z4z: | |||
626 | * @input: zero-terminated Unicode string. | |||
627 | * @output: pointer to newly allocated output Unicode string. | |||
628 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
629 | * %IDNA_USE_STD3_ASCII_RULES. | |||
630 | * | |||
631 | * Convert possibly ACE encoded domain name in UCS-4 format into a | |||
632 | * UCS-4 string. The domain name may contain several labels, | |||
633 | * separated by dots. The output buffer must be deallocated by the | |||
634 | * caller. | |||
635 | * | |||
636 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
637 | **/ | |||
638 | int | |||
639 | idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags) | |||
640 | { | |||
641 | const uint32_t *start = input; | |||
642 | const uint32_t *end; | |||
643 | uint32_t *buf; | |||
644 | size_t buflen; | |||
645 | uint32_t *out = NULL((void*)0); | |||
646 | size_t outlen = 0; | |||
647 | ||||
648 | *output = NULL((void*)0); | |||
649 | ||||
650 | do | |||
651 | { | |||
652 | end = start; | |||
653 | ||||
654 | for (; *end && !DOTP (*end)((*end) == 0x002E || (*end) == 0x3002 || (*end) == 0xFF0E || ( *end) == 0xFF61); end++) | |||
655 | ; | |||
656 | ||||
657 | buflen = (size_t) (end - start); | |||
658 | buf = malloc (sizeof (buf[0]) * (buflen + 1)); | |||
659 | if (!buf) | |||
660 | return IDNA_MALLOC_ERROR; | |||
| ||||
661 | ||||
662 | /* don't check return code as per specification! */ | |||
663 | idna_to_unicode_44i (start, (size_t) (end - start), | |||
664 | buf, &buflen, flags); | |||
665 | ||||
666 | if (out) | |||
667 | { | |||
668 | uint32_t *newp = realloc (out, | |||
669 | sizeof (out[0]) | |||
670 | * (outlen + 1 + buflen + 1)); | |||
671 | if (!newp) | |||
672 | { | |||
673 | free (buf); | |||
674 | free (out); | |||
675 | return IDNA_MALLOC_ERROR; | |||
676 | } | |||
677 | out = newp; | |||
678 | out[outlen++] = 0x002E; /* '.' (full stop) */ | |||
679 | memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); | |||
680 | outlen += buflen; | |||
681 | out[outlen] = 0x0; | |||
682 | free (buf); | |||
683 | } | |||
684 | else | |||
685 | { | |||
686 | out = buf; | |||
687 | outlen = buflen; | |||
688 | out[outlen] = 0x0; | |||
689 | } | |||
690 | ||||
691 | start = end + 1; | |||
692 | } | |||
693 | while (*end); | |||
694 | ||||
695 | *output = out; | |||
696 | ||||
697 | return IDNA_SUCCESS; | |||
698 | } | |||
699 | ||||
700 | /** | |||
701 | * idna_to_unicode_8z4z: | |||
702 | * @input: zero-terminated UTF-8 string. | |||
703 | * @output: pointer to newly allocated output Unicode string. | |||
704 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
705 | * %IDNA_USE_STD3_ASCII_RULES. | |||
706 | * | |||
707 | * Convert possibly ACE encoded domain name in UTF-8 format into a | |||
708 | * UCS-4 string. The domain name may contain several labels, | |||
709 | * separated by dots. The output buffer must be deallocated by the | |||
710 | * caller. | |||
711 | * | |||
712 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
713 | **/ | |||
714 | int | |||
715 | idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags) | |||
716 | { | |||
717 | uint32_t *ucs4; | |||
718 | size_t ucs4len; | |||
719 | int rc; | |||
720 | ||||
721 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | |||
722 | if (!ucs4) | |||
723 | return IDNA_ICONV_ERROR; | |||
724 | ||||
725 | rc = idna_to_unicode_4z4z (ucs4, output, flags); | |||
726 | free (ucs4); | |||
727 | ||||
728 | return rc; | |||
729 | } | |||
730 | ||||
731 | /** | |||
732 | * idna_to_unicode_8z8z: | |||
733 | * @input: zero-terminated UTF-8 string. | |||
734 | * @output: pointer to newly allocated output UTF-8 string. | |||
735 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
736 | * %IDNA_USE_STD3_ASCII_RULES. | |||
737 | * | |||
738 | * Convert possibly ACE encoded domain name in UTF-8 format into a | |||
739 | * UTF-8 string. The domain name may contain several labels, | |||
740 | * separated by dots. The output buffer must be deallocated by the | |||
741 | * caller. | |||
742 | * | |||
743 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
744 | **/ | |||
745 | int | |||
746 | idna_to_unicode_8z8z (const char *input, char **output, int flags) | |||
747 | { | |||
748 | uint32_t *ucs4; | |||
749 | int rc; | |||
750 | ||||
751 | rc = idna_to_unicode_8z4z (input, &ucs4, flags); | |||
752 | if (rc != IDNA_SUCCESS) | |||
753 | return rc; | |||
754 | ||||
755 | *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL((void*)0), NULL((void*)0)); | |||
756 | free (ucs4); | |||
757 | ||||
758 | if (!*output) | |||
759 | return IDNA_ICONV_ERROR; | |||
760 | ||||
761 | return IDNA_SUCCESS; | |||
762 | } | |||
763 | ||||
764 | /** | |||
765 | * idna_to_unicode_8zlz: | |||
766 | * @input: zero-terminated UTF-8 string. | |||
767 | * @output: pointer to newly allocated output string encoded in the | |||
768 | * current locale's character set. | |||
769 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
770 | * %IDNA_USE_STD3_ASCII_RULES. | |||
771 | * | |||
772 | * Convert possibly ACE encoded domain name in UTF-8 format into a | |||
773 | * string encoded in the current locale's character set. The domain | |||
774 | * name may contain several labels, separated by dots. The output | |||
775 | * buffer must be deallocated by the caller. | |||
776 | * | |||
777 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
778 | **/ | |||
779 | int | |||
780 | idna_to_unicode_8zlz (const char *input, char **output, int flags) | |||
781 | { | |||
782 | char *utf8; | |||
783 | int rc; | |||
784 | ||||
785 | rc = idna_to_unicode_8z8z (input, &utf8, flags); | |||
786 | if (rc != IDNA_SUCCESS) | |||
787 | return rc; | |||
788 | ||||
789 | *output = stringprep_utf8_to_locale (utf8); | |||
790 | free (utf8); | |||
791 | ||||
792 | if (!*output) | |||
793 | return IDNA_ICONV_ERROR; | |||
794 | ||||
795 | return IDNA_SUCCESS; | |||
796 | } | |||
797 | ||||
798 | /** | |||
799 | * idna_to_unicode_lzlz: | |||
800 | * @input: zero-terminated string encoded in the current locale's | |||
801 | * character set. | |||
802 | * @output: pointer to newly allocated output string encoded in the | |||
803 | * current locale's character set. | |||
804 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
805 | * %IDNA_USE_STD3_ASCII_RULES. | |||
806 | * | |||
807 | * Convert possibly ACE encoded domain name in the locale's character | |||
808 | * set into a string encoded in the current locale's character set. | |||
809 | * The domain name may contain several labels, separated by dots. The | |||
810 | * output buffer must be deallocated by the caller. | |||
811 | * | |||
812 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
813 | **/ | |||
814 | int | |||
815 | idna_to_unicode_lzlz (const char *input, char **output, int flags) | |||
816 | { | |||
817 | char *utf8; | |||
818 | int rc; | |||
819 | ||||
820 | utf8 = stringprep_locale_to_utf8 (input); | |||
821 | if (!utf8) | |||
| ||||
822 | return IDNA_ICONV_ERROR; | |||
823 | ||||
824 | rc = idna_to_unicode_8zlz (utf8, output, flags); | |||
825 | free (utf8); | |||
826 | ||||
827 | return rc; | |||
828 | } | |||
829 | ||||
830 | /** | |||
831 | * IDNA_ACE_PREFIX | |||
832 | * | |||
833 | * The IANA allocated prefix to use for IDNA. "xn--" | |||
834 | */ | |||
835 | ||||
836 | /** | |||
837 | * Idna_rc: | |||
838 | * @IDNA_SUCCESS: Successful operation. This value is guaranteed to | |||
839 | * always be zero, the remaining ones are only guaranteed to hold | |||
840 | * non-zero values, for logical comparison purposes. | |||
841 | * @IDNA_STRINGPREP_ERROR: Error during string preparation. | |||
842 | * @IDNA_PUNYCODE_ERROR: Error during punycode operation. | |||
843 | * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that | |||
844 | * the string contains non-LDH ASCII characters. | |||
845 | * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility | |||
846 | * with typo in earlier versions. | |||
847 | * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that | |||
848 | * the string contains a leading or trailing hyphen-minus (U+002D). | |||
849 | * @IDNA_INVALID_LENGTH: The final output string is not within the | |||
850 | * (inclusive) range 1 to 63 characters. | |||
851 | * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix | |||
852 | * (for ToUnicode). | |||
853 | * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output | |||
854 | * string does not equal the input. | |||
855 | * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for | |||
856 | * ToASCII). | |||
857 | * @IDNA_ICONV_ERROR: Could not convert string in locale encoding. | |||
858 | * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a | |||
859 | * fatal error). | |||
860 | * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used | |||
861 | * internally in libc). | |||
862 | * | |||
863 | * Enumerated return codes of idna_to_ascii_4i(), | |||
864 | * idna_to_unicode_44i() functions (and functions derived from those | |||
865 | * functions). The value 0 is guaranteed to always correspond to | |||
866 | * success. | |||
867 | */ | |||
868 | ||||
869 | ||||
870 | /** | |||
871 | * Idna_flags: | |||
872 | * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned | |||
873 | * Unicode code points. | |||
874 | * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3 | |||
875 | * rules (i.e., normal host name rules). | |||
876 | * | |||
877 | * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc. | |||
878 | */ |