File: | lib/idna.c |
Location: | line 655, column 9 |
Description: | Potential leak of memory pointed to by 'out' |
1 | /* idna.c --- Prototypes for Internationalized Domain Name library. | |||
2 | Copyright (C) 2002-2015 Simon Josefsson | |||
3 | ||||
4 | This file is part of GNU Libidn. | |||
5 | ||||
6 | GNU Libidn is free software: you can redistribute it and/or | |||
7 | modify it under the terms of either: | |||
8 | ||||
9 | * the GNU Lesser General Public License as published by the Free | |||
10 | Software Foundation; either version 3 of the License, or (at | |||
11 | your option) any later version. | |||
12 | ||||
13 | or | |||
14 | ||||
15 | * the GNU General Public License as published by the Free | |||
16 | Software Foundation; either version 2 of the License, or (at | |||
17 | your option) any later version. | |||
18 | ||||
19 | or both in parallel, as here. | |||
20 | ||||
21 | GNU Libidn is distributed in the hope that it will be useful, | |||
22 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
24 | General Public License for more details. | |||
25 | ||||
26 | You should have received copies of the GNU General Public License and | |||
27 | the GNU Lesser General Public License along with this program. If | |||
28 | not, see <http://www.gnu.org/licenses/>. */ | |||
29 | ||||
30 | #ifdef HAVE_CONFIG_H1 | |||
31 | # include "config.h" | |||
32 | #endif | |||
33 | ||||
34 | #include <stdlib.h> | |||
35 | #include <string.h> | |||
36 | #include <stringprep.h> | |||
37 | #include <punycode.h> | |||
38 | ||||
39 | #include "idna.h" | |||
40 | ||||
41 | /* Get c_strcasecmp. */ | |||
42 | #include <c-strcase.h> | |||
43 | ||||
44 | #define DOTP(c)((c) == 0x002E || (c) == 0x3002 || (c) == 0xFF0E || (c) == 0xFF61 ) ((c) == 0x002E || (c) == 0x3002 || \ | |||
45 | (c) == 0xFF0E || (c) == 0xFF61) | |||
46 | ||||
47 | /* Core functions */ | |||
48 | ||||
49 | /** | |||
50 | * idna_to_ascii_4i: | |||
51 | * @in: input array with unicode code points. | |||
52 | * @inlen: length of input array with unicode code points. | |||
53 | * @out: output zero terminated string that must have room for at | |||
54 | * least 63 characters plus the terminating zero. | |||
55 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
56 | * %IDNA_USE_STD3_ASCII_RULES. | |||
57 | * | |||
58 | * The ToASCII operation takes a sequence of Unicode code points that | |||
59 | * make up one domain label and transforms it into a sequence of code | |||
60 | * points in the ASCII range (0..7F). If ToASCII succeeds, the | |||
61 | * original sequence and the resulting sequence are equivalent labels. | |||
62 | * | |||
63 | * It is important to note that the ToASCII operation can fail. ToASCII | |||
64 | * fails if any step of it fails. If any step of the ToASCII operation | |||
65 | * fails on any label in a domain name, that domain name MUST NOT be used | |||
66 | * as an internationalized domain name. The method for deadling with this | |||
67 | * failure is application-specific. | |||
68 | * | |||
69 | * The inputs to ToASCII are a sequence of code points, the AllowUnassigned | |||
70 | * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a | |||
71 | * sequence of ASCII code points or a failure condition. | |||
72 | * | |||
73 | * ToASCII never alters a sequence of code points that are all in the ASCII | |||
74 | * range to begin with (although it could fail). Applying the ToASCII | |||
75 | * operation multiple times has exactly the same effect as applying it just | |||
76 | * once. | |||
77 | * | |||
78 | * Return value: Returns 0 on success, or an #Idna_rc error code. | |||
79 | */ | |||
80 | int | |||
81 | idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) | |||
82 | { | |||
83 | size_t len, outlen; | |||
84 | uint32_t *src; /* XXX don't need to copy data? */ | |||
85 | int rc; | |||
86 | ||||
87 | /* | |||
88 | * ToASCII consists of the following steps: | |||
89 | * | |||
90 | * 1. If all code points in the sequence are in the ASCII range (0..7F) | |||
91 | * then skip to step 3. | |||
92 | */ | |||
93 | ||||
94 | { | |||
95 | size_t i; | |||
96 | int inasciirange; | |||
97 | ||||
98 | inasciirange = 1; | |||
99 | for (i = 0; i < inlen; i++) | |||
100 | if (in[i] > 0x7F) | |||
101 | inasciirange = 0; | |||
102 | if (inasciirange) | |||
103 | { | |||
104 | src = malloc (sizeof (in[0]) * (inlen + 1)); | |||
105 | if (src == NULL((void*)0)) | |||
106 | return IDNA_MALLOC_ERROR; | |||
107 | ||||
108 | memcpy (src, in, sizeof (in[0]) * inlen); | |||
109 | src[inlen] = 0; | |||
110 | ||||
111 | goto step3; | |||
112 | } | |||
113 | } | |||
114 | ||||
115 | /* | |||
116 | * 2. Perform the steps specified in [NAMEPREP] and fail if there is | |||
117 | * an error. The AllowUnassigned flag is used in [NAMEPREP]. | |||
118 | */ | |||
119 | ||||
120 | { | |||
121 | char *p; | |||
122 | ||||
123 | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL((void*)0), NULL((void*)0)); | |||
124 | if (p == NULL((void*)0)) | |||
125 | return IDNA_MALLOC_ERROR; | |||
126 | ||||
127 | len = strlen (p); | |||
128 | do | |||
129 | { | |||
130 | char *newp; | |||
131 | ||||
132 | len = 2 * len + 10; /* XXX better guess? */ | |||
133 | newp = realloc (p, len); | |||
134 | if (newp == NULL((void*)0)) | |||
135 | { | |||
136 | free (p); | |||
137 | return IDNA_MALLOC_ERROR; | |||
138 | } | |||
139 | p = newp; | |||
140 | ||||
141 | if (flags & IDNA_ALLOW_UNASSIGNED) | |||
142 | rc = stringprep_nameprep (p, len)stringprep(p, len, 0, stringprep_nameprep); | |||
143 | else | |||
144 | rc = stringprep_nameprep_no_unassigned (p, len)stringprep(p, len, STRINGPREP_NO_UNASSIGNED, stringprep_nameprep ); | |||
145 | } | |||
146 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); | |||
147 | ||||
148 | if (rc != STRINGPREP_OK) | |||
149 | { | |||
150 | free (p); | |||
151 | return IDNA_STRINGPREP_ERROR; | |||
152 | } | |||
153 | ||||
154 | src = stringprep_utf8_to_ucs4 (p, -1, NULL((void*)0)); | |||
155 | ||||
156 | free (p); | |||
157 | ||||
158 | if (!src) | |||
159 | return IDNA_MALLOC_ERROR; | |||
160 | } | |||
161 | ||||
162 | step3: | |||
163 | /* | |||
164 | * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: | |||
165 | * | |||
166 | * (a) Verify the absence of non-LDH ASCII code points; that is, | |||
167 | * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | |||
168 | * | |||
169 | * (b) Verify the absence of leading and trailing hyphen-minus; | |||
170 | * that is, the absence of U+002D at the beginning and end of | |||
171 | * the sequence. | |||
172 | */ | |||
173 | ||||
174 | if (flags & IDNA_USE_STD3_ASCII_RULES) | |||
175 | { | |||
176 | size_t i; | |||
177 | ||||
178 | for (i = 0; src[i]; i++) | |||
179 | if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || | |||
180 | (src[i] >= 0x3A && src[i] <= 0x40) || | |||
181 | (src[i] >= 0x5B && src[i] <= 0x60) || | |||
182 | (src[i] >= 0x7B && src[i] <= 0x7F)) | |||
183 | { | |||
184 | free (src); | |||
185 | return IDNA_CONTAINS_NON_LDH; | |||
186 | } | |||
187 | ||||
188 | if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) | |||
189 | { | |||
190 | free (src); | |||
191 | return IDNA_CONTAINS_MINUS; | |||
192 | } | |||
193 | } | |||
194 | ||||
195 | /* | |||
196 | * 4. If all code points in the sequence are in the ASCII range | |||
197 | * (0..7F), then skip to step 8. | |||
198 | */ | |||
199 | ||||
200 | { | |||
201 | size_t i; | |||
202 | int inasciirange; | |||
203 | ||||
204 | inasciirange = 1; | |||
205 | for (i = 0; src[i]; i++) | |||
206 | { | |||
207 | if (src[i] > 0x7F) | |||
208 | inasciirange = 0; | |||
209 | /* copy string to output buffer if we are about to skip to step8 */ | |||
210 | if (i < 64) | |||
211 | out[i] = src[i]; | |||
212 | } | |||
213 | if (i < 64) | |||
214 | out[i] = '\0'; | |||
215 | if (inasciirange) | |||
216 | goto step8; | |||
217 | } | |||
218 | ||||
219 | /* | |||
220 | * 5. Verify that the sequence does NOT begin with the ACE prefix. | |||
221 | * | |||
222 | */ | |||
223 | ||||
224 | { | |||
225 | size_t i; | |||
226 | int match; | |||
227 | ||||
228 | match = 1; | |||
229 | for (i = 0; match && i < strlen (IDNA_ACE_PREFIX"xn--"); i++) | |||
230 | if (((uint32_t) IDNA_ACE_PREFIX"xn--"[i] & 0xFF) != src[i]) | |||
231 | match = 0; | |||
232 | if (match) | |||
233 | { | |||
234 | free (src); | |||
235 | return IDNA_CONTAINS_ACE_PREFIX; | |||
236 | } | |||
237 | } | |||
238 | ||||
239 | /* | |||
240 | * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] | |||
241 | * and fail if there is an error. | |||
242 | */ | |||
243 | for (len = 0; src[len]; len++) | |||
244 | ; | |||
245 | src[len] = '\0'; | |||
246 | outlen = 63 - strlen (IDNA_ACE_PREFIX"xn--"); | |||
247 | rc = punycode_encode (len, src, NULL((void*)0), | |||
248 | &outlen, &out[strlen (IDNA_ACE_PREFIX"xn--")]); | |||
249 | if (rc != PUNYCODE_SUCCESS) | |||
250 | { | |||
251 | free (src); | |||
252 | return IDNA_PUNYCODE_ERROR; | |||
253 | } | |||
254 | out[strlen (IDNA_ACE_PREFIX"xn--") + outlen] = '\0'; | |||
255 | ||||
256 | /* | |||
257 | * 7. Prepend the ACE prefix. | |||
258 | */ | |||
259 | ||||
260 | memcpy (out, IDNA_ACE_PREFIX"xn--", strlen (IDNA_ACE_PREFIX"xn--")); | |||
261 | ||||
262 | /* | |||
263 | * 8. Verify that the number of code points is in the range 1 to 63 | |||
264 | * inclusive (0 is excluded). | |||
265 | */ | |||
266 | ||||
267 | step8: | |||
268 | free (src); | |||
269 | if (strlen (out) < 1 || strlen (out) > 63) | |||
270 | return IDNA_INVALID_LENGTH; | |||
271 | ||||
272 | return IDNA_SUCCESS; | |||
273 | } | |||
274 | ||||
275 | /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */ | |||
276 | static int | |||
277 | idna_to_unicode_internal (char *utf8in, | |||
278 | uint32_t * out, size_t * outlen, int flags) | |||
279 | { | |||
280 | int rc; | |||
281 | char tmpout[64]; | |||
282 | size_t utf8len = strlen (utf8in) + 1; | |||
283 | size_t addlen = 0; | |||
284 | ||||
285 | /* | |||
286 | * ToUnicode consists of the following steps: | |||
287 | * | |||
288 | * 1. If the sequence contains any code points outside the ASCII range | |||
289 | * (0..7F) then proceed to step 2, otherwise skip to step 3. | |||
290 | */ | |||
291 | ||||
292 | { | |||
293 | size_t i; | |||
294 | int inasciirange; | |||
295 | ||||
296 | inasciirange = 1; | |||
297 | for (i = 0; utf8in[i]; i++) | |||
298 | if (utf8in[i] & ~0x7F) | |||
299 | inasciirange = 0; | |||
300 | if (inasciirange) | |||
301 | goto step3; | |||
302 | } | |||
303 | ||||
304 | /* | |||
305 | * 2. Perform the steps specified in [NAMEPREP] and fail if there is an | |||
306 | * error. (If step 3 of ToASCII is also performed here, it will not | |||
307 | * affect the overall behavior of ToUnicode, but it is not | |||
308 | * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. | |||
309 | */ | |||
310 | do | |||
311 | { | |||
312 | char *newp = realloc (utf8in, utf8len + addlen); | |||
313 | if (newp == NULL((void*)0)) | |||
314 | { | |||
315 | free (utf8in); | |||
316 | return IDNA_MALLOC_ERROR; | |||
317 | } | |||
318 | utf8in = newp; | |||
319 | if (flags & IDNA_ALLOW_UNASSIGNED) | |||
320 | rc = stringprep_nameprep (utf8in, utf8len + addlen)stringprep(utf8in, utf8len + addlen, 0, stringprep_nameprep); | |||
321 | else | |||
322 | rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen)stringprep(utf8in, utf8len + addlen, STRINGPREP_NO_UNASSIGNED , stringprep_nameprep); | |||
323 | addlen += 1; | |||
324 | } | |||
325 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); | |||
326 | ||||
327 | if (rc != STRINGPREP_OK) | |||
328 | { | |||
329 | free (utf8in); | |||
330 | return IDNA_STRINGPREP_ERROR; | |||
331 | } | |||
332 | ||||
333 | /* 3. Verify that the sequence begins with the ACE prefix, and save a | |||
334 | * copy of the sequence. | |||
335 | * ... The ToASCII and ToUnicode operations MUST recognize the ACE | |||
336 | prefix in a case-insensitive manner. | |||
337 | */ | |||
338 | ||||
339 | step3: | |||
340 | if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX"xn--", strlen (IDNA_ACE_PREFIX"xn--")) != 0) | |||
341 | { | |||
342 | free (utf8in); | |||
343 | return IDNA_NO_ACE_PREFIX; | |||
344 | } | |||
345 | ||||
346 | /* 4. Remove the ACE prefix. | |||
347 | */ | |||
348 | ||||
349 | memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX"xn--")], | |||
350 | strlen (utf8in) - strlen (IDNA_ACE_PREFIX"xn--") + 1); | |||
351 | ||||
352 | /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] | |||
353 | * and fail if there is an error. Save a copy of the result of | |||
354 | * this step. | |||
355 | */ | |||
356 | ||||
357 | (*outlen)--; /* reserve one for the zero */ | |||
358 | ||||
359 | rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL((void*)0)); | |||
360 | if (rc != PUNYCODE_SUCCESS) | |||
361 | { | |||
362 | free (utf8in); | |||
363 | return IDNA_PUNYCODE_ERROR; | |||
364 | } | |||
365 | ||||
366 | out[*outlen] = 0; /* add zero */ | |||
367 | ||||
368 | /* 6. Apply ToASCII. | |||
369 | */ | |||
370 | ||||
371 | rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); | |||
372 | if (rc != IDNA_SUCCESS) | |||
373 | { | |||
374 | free (utf8in); | |||
375 | return rc; | |||
376 | } | |||
377 | ||||
378 | /* 7. Verify that the result of step 6 matches the saved copy from | |||
379 | * step 3, using a case-insensitive ASCII comparison. | |||
380 | */ | |||
381 | ||||
382 | if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX"xn--")) != 0) | |||
383 | { | |||
384 | free (utf8in); | |||
385 | return IDNA_ROUNDTRIP_VERIFY_ERROR; | |||
386 | } | |||
387 | ||||
388 | /* 8. Return the saved copy from step 5. | |||
389 | */ | |||
390 | ||||
391 | free (utf8in); | |||
392 | return IDNA_SUCCESS; | |||
393 | } | |||
394 | ||||
395 | /** | |||
396 | * idna_to_unicode_44i: | |||
397 | * @in: input array with unicode code points. | |||
398 | * @inlen: length of input array with unicode code points. | |||
399 | * @out: output array with unicode code points. | |||
400 | * @outlen: on input, maximum size of output array with unicode code points, | |||
401 | * on exit, actual size of output array with unicode code points. | |||
402 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
403 | * %IDNA_USE_STD3_ASCII_RULES. | |||
404 | * | |||
405 | * The ToUnicode operation takes a sequence of Unicode code points | |||
406 | * that make up one domain label and returns a sequence of Unicode | |||
407 | * code points. If the input sequence is a label in ACE form, then the | |||
408 | * result is an equivalent internationalized label that is not in ACE | |||
409 | * form, otherwise the original sequence is returned unaltered. | |||
410 | * | |||
411 | * ToUnicode never fails. If any step fails, then the original input | |||
412 | * sequence is returned immediately in that step. | |||
413 | * | |||
414 | * The Punycode decoder can never output more code points than it | |||
415 | * inputs, but Nameprep can, and therefore ToUnicode can. Note that | |||
416 | * the number of octets needed to represent a sequence of code points | |||
417 | * depends on the particular character encoding used. | |||
418 | * | |||
419 | * The inputs to ToUnicode are a sequence of code points, the | |||
420 | * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of | |||
421 | * ToUnicode is always a sequence of Unicode code points. | |||
422 | * | |||
423 | * Return value: Returns #Idna_rc error condition, but it must only be | |||
424 | * used for debugging purposes. The output buffer is always | |||
425 | * guaranteed to contain the correct data according to the | |||
426 | * specification (sans malloc induced errors). NB! This means that | |||
427 | * you normally ignore the return code from this function, as | |||
428 | * checking it means breaking the standard. | |||
429 | */ | |||
430 | int | |||
431 | idna_to_unicode_44i (const uint32_t * in, size_t inlen, | |||
432 | uint32_t * out, size_t * outlen, int flags) | |||
433 | { | |||
434 | int rc; | |||
435 | size_t outlensave = *outlen; | |||
436 | char *p; | |||
437 | ||||
438 | p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL((void*)0), NULL((void*)0)); | |||
439 | if (p == NULL((void*)0)) | |||
440 | return IDNA_MALLOC_ERROR; | |||
441 | ||||
442 | rc = idna_to_unicode_internal (p, out, outlen, flags); | |||
443 | if (rc != IDNA_SUCCESS) | |||
444 | { | |||
445 | memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? | |||
446 | inlen : outlensave)); | |||
447 | *outlen = inlen; | |||
448 | } | |||
449 | ||||
450 | /* p is freed in idna_to_unicode_internal. */ | |||
451 | ||||
452 | return rc; | |||
453 | } | |||
454 | ||||
455 | /* Wrappers that handle several labels */ | |||
456 | ||||
457 | /** | |||
458 | * idna_to_ascii_4z: | |||
459 | * @input: zero terminated input Unicode string. | |||
460 | * @output: pointer to newly allocated output string. | |||
461 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
462 | * %IDNA_USE_STD3_ASCII_RULES. | |||
463 | * | |||
464 | * Convert UCS-4 domain name to ASCII string. The domain name may | |||
465 | * contain several labels, separated by dots. The output buffer must | |||
466 | * be deallocated by the caller. | |||
467 | * | |||
468 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
469 | **/ | |||
470 | int | |||
471 | idna_to_ascii_4z (const uint32_t * input, char **output, int flags) | |||
472 | { | |||
473 | const uint32_t *start = input; | |||
474 | const uint32_t *end; | |||
475 | char buf[64]; | |||
476 | char *out = NULL((void*)0); | |||
477 | int rc; | |||
478 | ||||
479 | /* 1) Whenever dots are used as label separators, the following | |||
480 | characters MUST be recognized as dots: U+002E (full stop), | |||
481 | U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), | |||
482 | U+FF61 (halfwidth ideographic full stop). */ | |||
483 | ||||
484 | if (input[0] == 0) | |||
485 | { | |||
486 | /* Handle implicit zero-length root label. */ | |||
487 | *output = malloc (1); | |||
488 | if (!*output) | |||
489 | return IDNA_MALLOC_ERROR; | |||
490 | strcpy (*output, ""); | |||
491 | return IDNA_SUCCESS; | |||
492 | } | |||
493 | ||||
494 | if (DOTP (input[0])((input[0]) == 0x002E || (input[0]) == 0x3002 || (input[0]) == 0xFF0E || (input[0]) == 0xFF61) && input[1] == 0) | |||
495 | { | |||
496 | /* Handle explicit zero-length root label. */ | |||
497 | *output = malloc (2); | |||
498 | if (!*output) | |||
499 | return IDNA_MALLOC_ERROR; | |||
500 | strcpy (*output, "."); | |||
501 | return IDNA_SUCCESS; | |||
502 | } | |||
503 | ||||
504 | *output = NULL((void*)0); | |||
505 | do | |||
506 | { | |||
507 | end = start; | |||
508 | ||||
509 | for (; *end && !DOTP (*end)((*end) == 0x002E || (*end) == 0x3002 || (*end) == 0xFF0E || ( *end) == 0xFF61); end++) | |||
510 | ; | |||
511 | ||||
512 | if (*end == '\0' && start == end) | |||
513 | { | |||
514 | /* Handle explicit zero-length root label. */ | |||
515 | buf[0] = '\0'; | |||
516 | } | |||
517 | else | |||
518 | { | |||
519 | rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags); | |||
520 | if (rc != IDNA_SUCCESS) | |||
521 | { | |||
522 | free (out); | |||
523 | return rc; | |||
524 | } | |||
525 | } | |||
526 | ||||
527 | if (out) | |||
528 | { | |||
529 | size_t l = strlen (out) + 1 + strlen (buf) + 1; | |||
530 | char *newp = realloc (out, l); | |||
531 | if (!newp) | |||
532 | { | |||
533 | free (out); | |||
534 | return IDNA_MALLOC_ERROR; | |||
535 | } | |||
536 | out = newp; | |||
537 | strcat (out, "."); | |||
538 | strcat (out, buf); | |||
539 | } | |||
540 | else | |||
541 | { | |||
542 | out = strdup (buf)(__extension__ (__builtin_constant_p (buf) && ((size_t )(const void *)((buf) + 1) - (size_t)(const void *)(buf) == 1 ) ? (((const char *) (buf))[0] == '\0' ? (char *) calloc ((size_t ) 1, (size_t) 1) : ({ size_t __len = strlen (buf) + 1; char * __retval = (char *) malloc (__len); if (__retval != ((void*)0 )) __retval = (char *) memcpy (__retval, buf, __len); __retval ; })) : __strdup (buf))); | |||
543 | if (!out) | |||
544 | return IDNA_MALLOC_ERROR; | |||
545 | } | |||
546 | ||||
547 | start = end + 1; | |||
548 | } | |||
549 | while (*end); | |||
550 | ||||
551 | *output = out; | |||
552 | ||||
553 | return IDNA_SUCCESS; | |||
554 | } | |||
555 | ||||
556 | /** | |||
557 | * idna_to_ascii_8z: | |||
558 | * @input: zero terminated input UTF-8 string. | |||
559 | * @output: pointer to newly allocated output string. | |||
560 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
561 | * %IDNA_USE_STD3_ASCII_RULES. | |||
562 | * | |||
563 | * Convert UTF-8 domain name to ASCII string. The domain name may | |||
564 | * contain several labels, separated by dots. The output buffer must | |||
565 | * be deallocated by the caller. | |||
566 | * | |||
567 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
568 | **/ | |||
569 | int | |||
570 | idna_to_ascii_8z (const char *input, char **output, int flags) | |||
571 | { | |||
572 | uint32_t *ucs4; | |||
573 | size_t ucs4len; | |||
574 | int rc; | |||
575 | ||||
576 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | |||
577 | if (!ucs4) | |||
578 | return IDNA_ICONV_ERROR; | |||
579 | ||||
580 | rc = idna_to_ascii_4z (ucs4, output, flags); | |||
581 | ||||
582 | free (ucs4); | |||
583 | ||||
584 | return rc; | |||
585 | ||||
586 | } | |||
587 | ||||
588 | /** | |||
589 | * idna_to_ascii_lz: | |||
590 | * @input: zero terminated input string encoded in the current locale's | |||
591 | * character set. | |||
592 | * @output: pointer to newly allocated output string. | |||
593 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
594 | * %IDNA_USE_STD3_ASCII_RULES. | |||
595 | * | |||
596 | * Convert domain name in the locale's encoding to ASCII string. The | |||
597 | * domain name may contain several labels, separated by dots. The | |||
598 | * output buffer must be deallocated by the caller. | |||
599 | * | |||
600 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
601 | **/ | |||
602 | int | |||
603 | idna_to_ascii_lz (const char *input, char **output, int flags) | |||
604 | { | |||
605 | char *utf8; | |||
606 | int rc; | |||
607 | ||||
608 | utf8 = stringprep_locale_to_utf8 (input); | |||
609 | if (!utf8) | |||
610 | return IDNA_ICONV_ERROR; | |||
611 | ||||
612 | rc = idna_to_ascii_8z (utf8, output, flags); | |||
613 | ||||
614 | free (utf8); | |||
615 | ||||
616 | return rc; | |||
617 | } | |||
618 | ||||
619 | /** | |||
620 | * idna_to_unicode_4z4z: | |||
621 | * @input: zero-terminated Unicode string. | |||
622 | * @output: pointer to newly allocated output Unicode string. | |||
623 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
624 | * %IDNA_USE_STD3_ASCII_RULES. | |||
625 | * | |||
626 | * Convert possibly ACE encoded domain name in UCS-4 format into a | |||
627 | * UCS-4 string. The domain name may contain several labels, | |||
628 | * separated by dots. The output buffer must be deallocated by the | |||
629 | * caller. | |||
630 | * | |||
631 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
632 | **/ | |||
633 | int | |||
634 | idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags) | |||
635 | { | |||
636 | const uint32_t *start = input; | |||
637 | const uint32_t *end; | |||
638 | uint32_t *buf; | |||
639 | size_t buflen; | |||
640 | uint32_t *out = NULL((void*)0); | |||
641 | size_t outlen = 0; | |||
642 | ||||
643 | *output = NULL((void*)0); | |||
644 | ||||
645 | do | |||
646 | { | |||
647 | end = start; | |||
648 | ||||
649 | for (; *end && !DOTP (*end)((*end) == 0x002E || (*end) == 0x3002 || (*end) == 0xFF0E || ( *end) == 0xFF61); end++) | |||
650 | ; | |||
651 | ||||
652 | buflen = (size_t) (end - start); | |||
653 | buf = malloc (sizeof (buf[0]) * (buflen + 1)); | |||
654 | if (!buf) | |||
655 | return IDNA_MALLOC_ERROR; | |||
| ||||
656 | ||||
657 | /* don't check return code as per specification! */ | |||
658 | idna_to_unicode_44i (start, (size_t) (end - start), | |||
659 | buf, &buflen, flags); | |||
660 | ||||
661 | if (out) | |||
662 | { | |||
663 | uint32_t *newp = realloc (out, | |||
664 | sizeof (out[0]) | |||
665 | * (outlen + 1 + buflen + 1)); | |||
666 | if (!newp) | |||
667 | { | |||
668 | free (buf); | |||
669 | free (out); | |||
670 | return IDNA_MALLOC_ERROR; | |||
671 | } | |||
672 | out = newp; | |||
673 | out[outlen++] = 0x002E; /* '.' (full stop) */ | |||
674 | memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); | |||
675 | outlen += buflen; | |||
676 | out[outlen] = 0x0; | |||
677 | free (buf); | |||
678 | } | |||
679 | else | |||
680 | { | |||
681 | out = buf; | |||
682 | outlen = buflen; | |||
683 | out[outlen] = 0x0; | |||
684 | } | |||
685 | ||||
686 | start = end + 1; | |||
687 | } | |||
688 | while (*end); | |||
689 | ||||
690 | *output = out; | |||
691 | ||||
692 | return IDNA_SUCCESS; | |||
693 | } | |||
694 | ||||
695 | /** | |||
696 | * idna_to_unicode_8z4z: | |||
697 | * @input: zero-terminated UTF-8 string. | |||
698 | * @output: pointer to newly allocated output Unicode string. | |||
699 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
700 | * %IDNA_USE_STD3_ASCII_RULES. | |||
701 | * | |||
702 | * Convert possibly ACE encoded domain name in UTF-8 format into a | |||
703 | * UCS-4 string. The domain name may contain several labels, | |||
704 | * separated by dots. The output buffer must be deallocated by the | |||
705 | * caller. | |||
706 | * | |||
707 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
708 | **/ | |||
709 | int | |||
710 | idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags) | |||
711 | { | |||
712 | uint32_t *ucs4; | |||
713 | size_t ucs4len; | |||
714 | int rc; | |||
715 | ||||
716 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | |||
717 | if (!ucs4) | |||
718 | return IDNA_ICONV_ERROR; | |||
719 | ||||
720 | rc = idna_to_unicode_4z4z (ucs4, output, flags); | |||
721 | free (ucs4); | |||
722 | ||||
723 | return rc; | |||
724 | } | |||
725 | ||||
726 | /** | |||
727 | * idna_to_unicode_8z8z: | |||
728 | * @input: zero-terminated UTF-8 string. | |||
729 | * @output: pointer to newly allocated output UTF-8 string. | |||
730 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
731 | * %IDNA_USE_STD3_ASCII_RULES. | |||
732 | * | |||
733 | * Convert possibly ACE encoded domain name in UTF-8 format into a | |||
734 | * UTF-8 string. The domain name may contain several labels, | |||
735 | * separated by dots. The output buffer must be deallocated by the | |||
736 | * caller. | |||
737 | * | |||
738 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
739 | **/ | |||
740 | int | |||
741 | idna_to_unicode_8z8z (const char *input, char **output, int flags) | |||
742 | { | |||
743 | uint32_t *ucs4; | |||
744 | int rc; | |||
745 | ||||
746 | rc = idna_to_unicode_8z4z (input, &ucs4, flags); | |||
747 | if (rc != IDNA_SUCCESS) | |||
748 | return rc; | |||
749 | ||||
750 | *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL((void*)0), NULL((void*)0)); | |||
751 | free (ucs4); | |||
752 | ||||
753 | if (!*output) | |||
754 | return IDNA_ICONV_ERROR; | |||
755 | ||||
756 | return IDNA_SUCCESS; | |||
757 | } | |||
758 | ||||
759 | /** | |||
760 | * idna_to_unicode_8zlz: | |||
761 | * @input: zero-terminated UTF-8 string. | |||
762 | * @output: pointer to newly allocated output string encoded in the | |||
763 | * current locale's character set. | |||
764 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
765 | * %IDNA_USE_STD3_ASCII_RULES. | |||
766 | * | |||
767 | * Convert possibly ACE encoded domain name in UTF-8 format into a | |||
768 | * string encoded in the current locale's character set. The domain | |||
769 | * name may contain several labels, separated by dots. The output | |||
770 | * buffer must be deallocated by the caller. | |||
771 | * | |||
772 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
773 | **/ | |||
774 | int | |||
775 | idna_to_unicode_8zlz (const char *input, char **output, int flags) | |||
776 | { | |||
777 | char *utf8; | |||
778 | int rc; | |||
779 | ||||
780 | rc = idna_to_unicode_8z8z (input, &utf8, flags); | |||
781 | if (rc != IDNA_SUCCESS) | |||
782 | return rc; | |||
783 | ||||
784 | *output = stringprep_utf8_to_locale (utf8); | |||
785 | free (utf8); | |||
786 | ||||
787 | if (!*output) | |||
788 | return IDNA_ICONV_ERROR; | |||
789 | ||||
790 | return IDNA_SUCCESS; | |||
791 | } | |||
792 | ||||
793 | /** | |||
794 | * idna_to_unicode_lzlz: | |||
795 | * @input: zero-terminated string encoded in the current locale's | |||
796 | * character set. | |||
797 | * @output: pointer to newly allocated output string encoded in the | |||
798 | * current locale's character set. | |||
799 | * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or | |||
800 | * %IDNA_USE_STD3_ASCII_RULES. | |||
801 | * | |||
802 | * Convert possibly ACE encoded domain name in the locale's character | |||
803 | * set into a string encoded in the current locale's character set. | |||
804 | * The domain name may contain several labels, separated by dots. The | |||
805 | * output buffer must be deallocated by the caller. | |||
806 | * | |||
807 | * Return value: Returns %IDNA_SUCCESS on success, or error code. | |||
808 | **/ | |||
809 | int | |||
810 | idna_to_unicode_lzlz (const char *input, char **output, int flags) | |||
811 | { | |||
812 | char *utf8; | |||
813 | int rc; | |||
814 | ||||
815 | utf8 = stringprep_locale_to_utf8 (input); | |||
816 | if (!utf8) | |||
| ||||
817 | return IDNA_ICONV_ERROR; | |||
818 | ||||
819 | rc = idna_to_unicode_8zlz (utf8, output, flags); | |||
820 | free (utf8); | |||
821 | ||||
822 | return rc; | |||
823 | } | |||
824 | ||||
825 | /** | |||
826 | * IDNA_ACE_PREFIX | |||
827 | * | |||
828 | * The IANA allocated prefix to use for IDNA. "xn--" | |||
829 | */ | |||
830 | ||||
831 | /** | |||
832 | * Idna_rc: | |||
833 | * @IDNA_SUCCESS: Successful operation. This value is guaranteed to | |||
834 | * always be zero, the remaining ones are only guaranteed to hold | |||
835 | * non-zero values, for logical comparison purposes. | |||
836 | * @IDNA_STRINGPREP_ERROR: Error during string preparation. | |||
837 | * @IDNA_PUNYCODE_ERROR: Error during punycode operation. | |||
838 | * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that | |||
839 | * the string contains non-LDH ASCII characters. | |||
840 | * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility | |||
841 | * with typo in earlier versions. | |||
842 | * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that | |||
843 | * the string contains a leading or trailing hyphen-minus (U+002D). | |||
844 | * @IDNA_INVALID_LENGTH: The final output string is not within the | |||
845 | * (inclusive) range 1 to 63 characters. | |||
846 | * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix | |||
847 | * (for ToUnicode). | |||
848 | * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output | |||
849 | * string does not equal the input. | |||
850 | * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for | |||
851 | * ToASCII). | |||
852 | * @IDNA_ICONV_ERROR: Could not convert string in locale encoding. | |||
853 | * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a | |||
854 | * fatal error). | |||
855 | * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used | |||
856 | * internally in libc). | |||
857 | * | |||
858 | * Enumerated return codes of idna_to_ascii_4i(), | |||
859 | * idna_to_unicode_44i() functions (and functions derived from those | |||
860 | * functions). The value 0 is guaranteed to always correspond to | |||
861 | * success. | |||
862 | */ | |||
863 | ||||
864 | ||||
865 | /** | |||
866 | * Idna_flags: | |||
867 | * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned | |||
868 | * Unicode code points. | |||
869 | * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3 | |||
870 | * rules (i.e., normal host name rules). | |||
871 | * | |||
872 | * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc. | |||
873 | */ |