libidn  1.38
stringprep.c
Go to the documentation of this file.
1 /* stringprep.c --- Core stringprep implementation.
2  Copyright (C) 2002-2021 Simon Josefsson
3 
4  This file is part of GNU Libidn.
5 
6  GNU Libidn is free software: you can redistribute it and/or
7  modify it under the terms of either:
8 
9  * the GNU Lesser General Public License as published by the Free
10  Software Foundation; either version 3 of the License, or (at
11  your option) any later version.
12 
13  or
14 
15  * the GNU General Public License as published by the Free
16  Software Foundation; either version 2 of the License, or (at
17  your option) any later version.
18 
19  or both in parallel, as here.
20 
21  GNU Libidn is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24  General Public License for more details.
25 
26  You should have received copies of the GNU General Public License and
27  the GNU Lesser General Public License along with this program. If
28  not, see <http://www.gnu.org/licenses/>. */
29 
30 #ifdef HAVE_CONFIG_H
31 # include "config.h"
32 #endif
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "stringprep.h"
38 
39 static int
40 _compare_table_element (const uint32_t * c,
41  const Stringprep_table_element * e)
42 {
43  if (*c < e->start)
44  return -1;
45  if (*c > e->end)
46  return 1;
47  return 0;
48 }
49 
50 static ssize_t
51 stringprep_find_character_in_table (uint32_t ucs4,
52  const Stringprep_table_element * table,
53  size_t table_size)
54 {
55  /* This is where typical uses of Libidn spends very close to all CPU
56  time and causes most cache misses. One could easily do a binary
57  search instead. Before rewriting this, I want hard evidence this
58  slowness is at all relevant in typical applications. (I don't
59  dispute optimization may improve matters significantly, I'm
60  mostly interested in having someone give real-world benchmark on
61  the impact of libidn.)
62  *
63  * Answer (Tim Rühsen rockdaboot@gmx.de):
64  * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles.
65  * That is almost 20s on my Intel i3 3.1GHz !!!
66  * That even makes fuzzing almost useless, eating up CPU cycles for nothing.
67  *
68  * The bsearch() approach takes ~3 billion CPU cycles.
69  * Almost a factor of 20 faster (but still pretty slow).
70  * There are still ~2 million calls to bsearch() which make ~30% of CPU time used.
71  * Most time is spent in _g_utf8_normalize_wc().
72 
73  ssize_t i;
74 
75  for (i = 0; table[i].start || table[i].end; i++)
76  if (ucs4 >= table[i].start &&
77  ucs4 <= (table[i].end ? table[i].end : table[i].start))
78  return i;
79  */
80 
81  const Stringprep_table_element *p =
82  bsearch (&ucs4, table, table_size, sizeof (Stringprep_table_element),
83  (int (*)(const void *, const void *)) _compare_table_element);
84 
85  return p ? (p - table) : -1;
86 }
87 
88 static ssize_t
89 stringprep_find_string_in_table (uint32_t * ucs4,
90  size_t ucs4len,
91  size_t *tablepos,
92  const Stringprep_table_element * table,
93  size_t table_size)
94 {
95  size_t j;
96  ssize_t pos;
97 
98  for (j = 0; j < ucs4len; j++)
99  if ((pos =
100  stringprep_find_character_in_table (ucs4[j], table,
101  table_size)) != -1)
102  {
103  if (tablepos)
104  *tablepos = pos;
105  return j;
106  }
107 
108  return -1;
109 }
110 
111 static int
112 stringprep_apply_table_to_string (uint32_t * ucs4,
113  size_t *ucs4len,
114  size_t maxucs4len,
115  const Stringprep_table_element * table,
116  size_t table_size)
117 {
118  ssize_t pos;
119  size_t i, maplen;
120  uint32_t *src = ucs4; /* points to unprocessed data */
121  size_t srclen = *ucs4len; /* length of unprocessed data */
122 
123  while ((pos = stringprep_find_string_in_table (src, srclen,
124  &i, table,
125  table_size)) != -1)
126  {
127  for (maplen = STRINGPREP_MAX_MAP_CHARS;
128  maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
129  ;
130 
131  if (*ucs4len - 1 + maplen >= maxucs4len)
133 
134  memmove (src + pos + maplen, src + pos + 1,
135  sizeof (uint32_t) * (srclen - pos - 1));
136  memcpy (src + pos, table[i].map, sizeof (uint32_t) * maplen);
137  *ucs4len = *ucs4len - 1 + maplen;
138  src += pos + maplen;
139  srclen -= pos + 1;
140  }
141 
142  return STRINGPREP_OK;
143 }
144 
145 #define INVERTED(x) ((x) & ((~0UL) >> 1))
146 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
147  ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
148  ( INVERTED(profileflags) && (profileflags & flags)))
149 
181 int
182 stringprep_4i (uint32_t * ucs4, size_t *len, size_t maxucs4len,
184  const Stringprep_profile * profile)
185 {
186  size_t i, j;
187  ssize_t k;
188  size_t ucs4len = *len;
189  int rc;
190 
191  for (i = 0; profile[i].operation; i++)
192  {
193  switch (profile[i].operation)
194  {
195  case STRINGPREP_NFKC:
196  {
197  uint32_t *q = 0;
198 
199  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
200  break;
201 
202  if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
203  /* Profile requires NFKC, but callee asked for no NFKC. */
204  return STRINGPREP_FLAG_ERROR;
205 
206  q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
207  if (!q)
208  return STRINGPREP_NFKC_FAILED;
209 
210  for (ucs4len = 0; q[ucs4len]; ucs4len++)
211  ;
212 
213  if (ucs4len >= maxucs4len)
214  {
215  free (q);
217  }
218 
219  memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
220 
221  free (q);
222  }
223  break;
224 
226  k = stringprep_find_string_in_table (ucs4, ucs4len,
227  NULL, profile[i].table,
228  profile[i].table_size);
229  if (k != -1)
231  break;
232 
234  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
235  break;
236  if (flags & STRINGPREP_NO_UNASSIGNED)
237  {
238  k = stringprep_find_string_in_table
239  (ucs4, ucs4len, NULL, profile[i].table,
240  profile[i].table_size);
241  if (k != -1)
243  }
244  break;
245 
247  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
248  break;
249  rc = stringprep_apply_table_to_string
250  (ucs4, &ucs4len, maxucs4len, profile[i].table,
251  profile[i].table_size);
252  if (rc != STRINGPREP_OK)
253  return rc;
254  break;
255 
259  break;
260 
261  case STRINGPREP_BIDI:
262  {
263  int done_prohibited = 0;
264  int done_ral = 0;
265  int done_l = 0;
266  size_t contains_ral = SIZE_MAX;
267  size_t contains_l = SIZE_MAX;
268 
269  for (j = 0; profile[j].operation; j++)
270  if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
271  {
272  done_prohibited = 1;
273  k = stringprep_find_string_in_table (ucs4, ucs4len,
274  NULL,
275  profile[j].table,
276  profile[j].table_size);
277  if (k != -1)
279  }
280  else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
281  {
282  done_ral = 1;
283  if (stringprep_find_string_in_table
284  (ucs4, ucs4len, NULL, profile[j].table,
285  profile[j].table_size) != -1)
286  contains_ral = j;
287  }
288  else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
289  {
290  done_l = 1;
291  if (stringprep_find_string_in_table
292  (ucs4, ucs4len, NULL, profile[j].table,
293  profile[j].table_size) != -1)
294  contains_l = j;
295  }
296 
297  if (!done_prohibited || !done_ral || !done_l)
299 
300  if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
302 
303  if (contains_ral != SIZE_MAX)
304  {
305  if (!(stringprep_find_character_in_table
306  (ucs4[0], profile[contains_ral].table,
307  profile[contains_ral].table_size) != -1
308  &&
309  stringprep_find_character_in_table (ucs4[ucs4len - 1],
310  profile
311  [contains_ral].table,
312  profile
313  [contains_ral].table_size)
314  != -1))
316  }
317  }
318  break;
319 
320  default:
322  break;
323  }
324  }
325 
326  *len = ucs4len;
327 
328  return STRINGPREP_OK;
329 }
330 
331 static int
332 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
334  const Stringprep_profile * profile)
335 {
336  int rc;
337 
338  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
339  if (rc != STRINGPREP_OK)
340  return rc;
341 
342  if (ucs4len >= maxucs4len)
344 
345  ucs4[ucs4len] = 0;
346 
347  return STRINGPREP_OK;
348 }
349 
374 int
375 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
377  const Stringprep_profile * profile)
378 {
379  size_t ucs4len;
380 
381  for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
382  ;
383 
384  return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
385 }
386 
414 int
415 stringprep (char *in,
416  size_t maxlen,
418  const Stringprep_profile * profile)
419 {
420  int rc;
421  char *utf8 = NULL;
422  uint32_t *ucs4 = NULL;
423  size_t ucs4len, maxucs4len, adducs4len = strlen (in) / 10 + 1;
424 
425  do
426  {
427  uint32_t *newp;
428 
429  free (ucs4);
430  ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
431  if (ucs4 == NULL)
432  return STRINGPREP_ICONV_ERROR;
433  maxucs4len = ucs4len + adducs4len;
434  newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
435  if (!newp)
436  {
437  free (ucs4);
439  }
440  ucs4 = newp;
441 
442  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
443  adducs4len *= 2;
444  }
445  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
446  if (rc != STRINGPREP_OK)
447  {
448  free (ucs4);
449  return rc;
450  }
451 
452  utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
453  free (ucs4);
454  if (!utf8)
455  return STRINGPREP_ICONV_ERROR;
456 
457  if (strlen (utf8) >= maxlen)
458  {
459  free (utf8);
461  }
462 
463  strcpy (in, utf8); /* flawfinder: ignore */
464 
465  free (utf8);
466 
467  return STRINGPREP_OK;
468 }
469 
494 int
495 stringprep_profile (const char *in,
496  char **out,
497  const char *profile, Stringprep_profile_flags flags)
498 {
499  const Stringprep_profiles *p;
500  char *str = NULL;
501  size_t len = strlen (in) + 1, addlen = len / 10 + 1;
502  int rc;
503 
504  for (p = &stringprep_profiles[0]; p->name; p++)
505  if (strcmp (p->name, profile) == 0)
506  break;
507 
508  if (!p || !p->name || !p->tables)
510 
511  do
512  {
513  free (str);
514  str = (char *) malloc (len);
515  if (str == NULL)
517 
518  strcpy (str, in);
519 
520  rc = stringprep (str, len, flags, p->tables);
521  len += addlen;
522  addlen *= 2;
523  }
524  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
525 
526  if (rc == STRINGPREP_OK)
527  *out = str;
528  else
529  free (str);
530 
531  return rc;
532 }
533 
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
Definition: nfkc.c:1040
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition: nfkc.c:1007
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
Definition: nfkc.c:1098
const Stringprep_profiles stringprep_profiles[]
Definition: profiles.c:34
int stringprep_4zi(uint32_t *ucs4, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:375
int stringprep_4i(uint32_t *ucs4, size_t *len, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:182
int stringprep_profile(const char *in, char **out, const char *profile, Stringprep_profile_flags flags)
Definition: stringprep.c:495
#define UNAPPLICAPLEFLAGS(flags, profileflags)
Definition: stringprep.c:146
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:415
@ STRINGPREP_BIDI_PROHIBIT_TABLE
Definition: stringprep.h:101
@ STRINGPREP_BIDI_RAL_TABLE
Definition: stringprep.h:102
@ STRINGPREP_NFKC
Definition: stringprep.h:96
@ STRINGPREP_PROHIBIT_TABLE
Definition: stringprep.h:100
@ STRINGPREP_UNASSIGNED_TABLE
Definition: stringprep.h:99
@ STRINGPREP_BIDI
Definition: stringprep.h:97
@ STRINGPREP_MAP_TABLE
Definition: stringprep.h:98
@ STRINGPREP_BIDI_L_TABLE
Definition: stringprep.h:103
Stringprep_profile_flags
Definition: stringprep.h:87
@ STRINGPREP_NO_NFKC
Definition: stringprep.h:88
@ STRINGPREP_NO_UNASSIGNED
Definition: stringprep.h:90
@ STRINGPREP_UNKNOWN_PROFILE
Definition: stringprep.h:78
@ STRINGPREP_NFKC_FAILED
Definition: stringprep.h:81
@ STRINGPREP_TOO_SMALL_BUFFER
Definition: stringprep.h:75
@ STRINGPREP_ICONV_ERROR
Definition: stringprep.h:79
@ STRINGPREP_MALLOC_ERROR
Definition: stringprep.h:82
@ STRINGPREP_FLAG_ERROR
Definition: stringprep.h:77
@ STRINGPREP_OK
Definition: stringprep.h:67
@ STRINGPREP_CONTAINS_UNASSIGNED
Definition: stringprep.h:69
@ STRINGPREP_CONTAINS_PROHIBITED
Definition: stringprep.h:70
@ STRINGPREP_BIDI_CONTAINS_PROHIBITED
Definition: stringprep.h:73
@ STRINGPREP_BIDI_BOTH_L_AND_RAL
Definition: stringprep.h:71
@ STRINGPREP_BIDI_LEADTRAIL_NOT_RAL
Definition: stringprep.h:72
@ STRINGPREP_PROFILE_ERROR
Definition: stringprep.h:76
#define STRINGPREP_MAX_MAP_CHARS
Definition: stringprep.h:106
const char * name
Definition: stringprep.h:171
const Stringprep_profile * tables
Definition: stringprep.h:172
uint32_t map[STRINGPREP_MAX_MAP_CHARS]
Definition: stringprep.h:135
Stringprep_profile_steps operation
Definition: stringprep.h:150