libidn  1.28
stringprep.c
Go to the documentation of this file.
1 /* stringprep.c --- Core stringprep implementation.
2  Copyright (C) 2002-2013 Simon Josefsson
3 
4  This file is part of GNU Libidn.
5 
6  GNU Libidn is free software: you can redistribute it and/or
7  modify it under the terms of either:
8 
9  * the GNU Lesser General Public License as published by the Free
10  Software Foundation; either version 3 of the License, or (at
11  your option) any later version.
12 
13  or
14 
15  * the GNU General Public License as published by the Free
16  Software Foundation; either version 2 of the License, or (at
17  your option) any later version.
18 
19  or both in parallel, as here.
20 
21  GNU Libidn is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24  General Public License for more details.
25 
26  You should have received copies of the GNU General Public License and
27  the GNU Lesser General Public License along with this program. If
28  not, see <http://www.gnu.org/licenses/>. */
29 
30 #ifdef HAVE_CONFIG_H
31 # include "config.h"
32 #endif
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "stringprep.h"
38 
39 static ssize_t
40 stringprep_find_character_in_table (uint32_t ucs4,
41  const Stringprep_table_element * table)
42 {
43  ssize_t i;
44 
45  /* This is where typical uses of Libidn spends very close to all CPU
46  time and causes most cache misses. One could easily do a binary
47  search instead. Before rewriting this, I want hard evidence this
48  slowness is at all relevant in typical applications. (I don't
49  dispute optimization may improve matters significantly, I'm
50  mostly interested in having someone give real-world benchmark on
51  the impact of libidn.) */
52 
53  for (i = 0; table[i].start || table[i].end; i++)
54  if (ucs4 >= table[i].start &&
55  ucs4 <= (table[i].end ? table[i].end : table[i].start))
56  return i;
57 
58  return -1;
59 }
60 
61 static ssize_t
62 stringprep_find_string_in_table (uint32_t * ucs4,
63  size_t ucs4len,
64  size_t * tablepos,
65  const Stringprep_table_element * table)
66 {
67  size_t j;
68  ssize_t pos;
69 
70  for (j = 0; j < ucs4len; j++)
71  if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
72  {
73  if (tablepos)
74  *tablepos = pos;
75  return j;
76  }
77 
78  return -1;
79 }
80 
81 static int
82 stringprep_apply_table_to_string (uint32_t * ucs4,
83  size_t * ucs4len,
84  size_t maxucs4len,
85  const Stringprep_table_element * table)
86 {
87  ssize_t pos;
88  size_t i, maplen;
89 
90  while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
91  &i, table)) != -1)
92  {
93  for (maplen = STRINGPREP_MAX_MAP_CHARS;
94  maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
95  ;
96 
97  if (*ucs4len - 1 + maplen >= maxucs4len)
99 
100  memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
101  sizeof (uint32_t) * (*ucs4len - pos - 1));
102  memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
103  *ucs4len = *ucs4len - 1 + maplen;
104  }
105 
106  return STRINGPREP_OK;
107 }
108 
109 #define INVERTED(x) ((x) & ((~0UL) >> 1))
110 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
111  ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
112  ( INVERTED(profileflags) && (profileflags & flags)))
113 
145 int
146 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
148  const Stringprep_profile * profile)
149 {
150  size_t i, j;
151  ssize_t k;
152  size_t ucs4len = *len;
153  int rc;
154 
155  for (i = 0; profile[i].operation; i++)
156  {
157  switch (profile[i].operation)
158  {
159  case STRINGPREP_NFKC:
160  {
161  uint32_t *q = 0;
162 
163  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
164  break;
165 
166  if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
167  /* Profile requires NFKC, but callee asked for no NFKC. */
168  return STRINGPREP_FLAG_ERROR;
169 
170  q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
171  if (!q)
172  return STRINGPREP_NFKC_FAILED;
173 
174  for (ucs4len = 0; q[ucs4len]; ucs4len++)
175  ;
176 
177  if (ucs4len >= maxucs4len)
178  {
179  free (q);
181  }
182 
183  memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
184 
185  free (q);
186  }
187  break;
188 
190  k = stringprep_find_string_in_table (ucs4, ucs4len,
191  NULL, profile[i].table);
192  if (k != -1)
194  break;
195 
197  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
198  break;
199  if (flags & STRINGPREP_NO_UNASSIGNED)
200  {
201  k = stringprep_find_string_in_table
202  (ucs4, ucs4len, NULL, profile[i].table);
203  if (k != -1)
205  }
206  break;
207 
209  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
210  break;
211  rc = stringprep_apply_table_to_string
212  (ucs4, &ucs4len, maxucs4len, profile[i].table);
213  if (rc != STRINGPREP_OK)
214  return rc;
215  break;
216 
220  break;
221 
222  case STRINGPREP_BIDI:
223  {
224  int done_prohibited = 0;
225  int done_ral = 0;
226  int done_l = 0;
227  size_t contains_ral = SIZE_MAX;
228  size_t contains_l = SIZE_MAX;
229 
230  for (j = 0; profile[j].operation; j++)
231  if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
232  {
233  done_prohibited = 1;
234  k = stringprep_find_string_in_table (ucs4, ucs4len,
235  NULL,
236  profile[j].table);
237  if (k != -1)
239  }
240  else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
241  {
242  done_ral = 1;
243  if (stringprep_find_string_in_table
244  (ucs4, ucs4len, NULL, profile[j].table) != -1)
245  contains_ral = j;
246  }
247  else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
248  {
249  done_l = 1;
250  if (stringprep_find_string_in_table
251  (ucs4, ucs4len, NULL, profile[j].table) != -1)
252  contains_l = j;
253  }
254 
255  if (!done_prohibited || !done_ral || !done_l)
257 
258  if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
260 
261  if (contains_ral != SIZE_MAX)
262  {
263  if (!(stringprep_find_character_in_table
264  (ucs4[0], profile[contains_ral].table) != -1 &&
265  stringprep_find_character_in_table
266  (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
268  }
269  }
270  break;
271 
272  default:
274  break;
275  }
276  }
277 
278  *len = ucs4len;
279 
280  return STRINGPREP_OK;
281 }
282 
283 static int
284 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
286  const Stringprep_profile * profile)
287 {
288  int rc;
289 
290  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
291  if (rc != STRINGPREP_OK)
292  return rc;
293 
294  if (ucs4len >= maxucs4len)
296 
297  ucs4[ucs4len] = 0;
298 
299  return STRINGPREP_OK;
300 }
301 
326 int
327 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
329  const Stringprep_profile * profile)
330 {
331  size_t ucs4len;
332 
333  for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
334  ;
335 
336  return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
337 }
338 
366 int
367 stringprep (char *in,
368  size_t maxlen,
370  const Stringprep_profile * profile)
371 {
372  int rc;
373  char *utf8 = NULL;
374  uint32_t *ucs4 = NULL;
375  size_t ucs4len, maxucs4len, adducs4len = 50;
376 
377  do
378  {
379  uint32_t *newp;
380 
381  free (ucs4);
382  ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
383  maxucs4len = ucs4len + adducs4len;
384  newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
385  if (!newp)
386  {
387  free (ucs4);
389  }
390  ucs4 = newp;
391 
392  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
393  adducs4len += 50;
394  }
395  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
396  if (rc != STRINGPREP_OK)
397  {
398  free (ucs4);
399  return rc;
400  }
401 
402  utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
403  free (ucs4);
404  if (!utf8)
406 
407  if (strlen (utf8) >= maxlen)
408  {
409  free (utf8);
411  }
412 
413  strcpy (in, utf8); /* flawfinder: ignore */
414 
415  free (utf8);
416 
417  return STRINGPREP_OK;
418 }
419 
444 int
445 stringprep_profile (const char *in,
446  char **out,
447  const char *profile, Stringprep_profile_flags flags)
448 {
449  const Stringprep_profiles *p;
450  char *str = NULL;
451  size_t len = strlen (in) + 1;
452  int rc;
453 
454  for (p = &stringprep_profiles[0]; p->name; p++)
455  if (strcmp (p->name, profile) == 0)
456  break;
457 
458  if (!p || !p->name || !p->tables)
460 
461  do
462  {
463  free (str);
464  str = (char *) malloc (len);
465  if (str == NULL)
467 
468  strcpy (str, in);
469 
470  rc = stringprep (str, len, flags, p->tables);
471  len += 50;
472  }
473  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
474 
475  if (rc == STRINGPREP_OK)
476  *out = str;
477  else
478  free (str);
479 
480  return rc;
481 }
482