libidn  1.42
stringprep.c
Go to the documentation of this file.
1 /* stringprep.c --- Core stringprep implementation.
2  Copyright (C) 2002-2024 Simon Josefsson
3 
4  This file is part of GNU Libidn.
5 
6  GNU Libidn is free software: you can redistribute it and/or
7  modify it under the terms of either:
8 
9  * the GNU Lesser General Public License as published by the Free
10  Software Foundation; either version 3 of the License, or (at
11  your option) any later version.
12 
13  or
14 
15  * the GNU General Public License as published by the Free
16  Software Foundation; either version 2 of the License, or (at
17  your option) any later version.
18 
19  or both in parallel, as here.
20 
21  GNU Libidn is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24  General Public License for more details.
25 
26  You should have received copies of the GNU General Public License and
27  the GNU Lesser General Public License along with this program. If
28  not, see <https://www.gnu.org/licenses/>. */
29 
30 #ifdef HAVE_CONFIG_H
31 # include "config.h"
32 #endif
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "stringprep.h"
38 
39 static int
40 _compare_table_element (const uint32_t *c, const Stringprep_table_element *e)
41 {
42  if (*c < e->start)
43  return -1;
44  if (*c > e->end)
45  return 1;
46  return 0;
47 }
48 
49 static ssize_t
50 stringprep_find_character_in_table (uint32_t ucs4,
51  const Stringprep_table_element *table,
52  size_t table_size)
53 {
54  /* This is where typical uses of Libidn spends very close to all CPU
55  time and causes most cache misses. One could easily do a binary
56  search instead. Before rewriting this, I want hard evidence this
57  slowness is at all relevant in typical applications. (I don't
58  dispute optimization may improve matters significantly, I'm
59  mostly interested in having someone give real-world benchmark on
60  the impact of libidn.)
61  *
62  * Answer (Tim Rühsen rockdaboot@gmx.de):
63  * Testing the fuzz corpora just once via make check takes ~54 billion CPU cycles.
64  * That is almost 20s on my Intel i3 3.1GHz !!!
65  * That even makes fuzzing almost useless, eating up CPU cycles for nothing.
66  *
67  * The bsearch() approach takes ~3 billion CPU cycles.
68  * Almost a factor of 20 faster (but still pretty slow).
69  * There are still ~2 million calls to bsearch() which make ~30% of CPU time used.
70  * Most time is spent in _g_utf8_normalize_wc().
71 
72  ssize_t i;
73 
74  for (i = 0; table[i].start || table[i].end; i++)
75  if (ucs4 >= table[i].start &&
76  ucs4 <= (table[i].end ? table[i].end : table[i].start))
77  return i;
78  */
79 
80  const Stringprep_table_element *p =
81  bsearch (&ucs4, table, table_size, sizeof (Stringprep_table_element),
82  (int (*)(const void *, const void *)) _compare_table_element);
83 
84  return p ? (p - table) : -1;
85 }
86 
87 static ssize_t
88 stringprep_find_string_in_table (uint32_t *ucs4,
89  size_t ucs4len,
90  size_t *tablepos,
91  const Stringprep_table_element *table,
92  size_t table_size)
93 {
94  size_t j;
95  ssize_t pos;
96 
97  for (j = 0; j < ucs4len; j++)
98  if ((pos =
99  stringprep_find_character_in_table (ucs4[j], table,
100  table_size)) != -1)
101  {
102  if (tablepos)
103  *tablepos = pos;
104  return j;
105  }
106 
107  return -1;
108 }
109 
110 static int
111 stringprep_apply_table_to_string (uint32_t *ucs4,
112  size_t *ucs4len,
113  size_t maxucs4len,
114  const Stringprep_table_element *table,
115  size_t table_size)
116 {
117  ssize_t pos;
118  size_t i, maplen;
119  uint32_t *src = ucs4; /* points to unprocessed data */
120  size_t srclen = *ucs4len; /* length of unprocessed data */
121 
122  while ((pos = stringprep_find_string_in_table (src, srclen,
123  &i, table,
124  table_size)) != -1)
125  {
126  for (maplen = STRINGPREP_MAX_MAP_CHARS;
127  maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
128  ;
129 
130  if (*ucs4len - 1 + maplen >= maxucs4len)
132 
133  memmove (src + pos + maplen, src + pos + 1,
134  sizeof (uint32_t) * (srclen - pos - 1));
135  memcpy (src + pos, table[i].map, sizeof (uint32_t) * maplen);
136  *ucs4len = *ucs4len - 1 + maplen;
137  src += pos + maplen;
138  srclen -= pos + 1;
139  }
140 
141  return STRINGPREP_OK;
142 }
143 
144 #define INVERTED(x) ((x) & ((~0UL) >> 1))
145 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
146  ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
147  ( INVERTED(profileflags) && (profileflags & flags)))
148 
180 int
181 stringprep_4i (uint32_t *ucs4, size_t *len, size_t maxucs4len,
183  const Stringprep_profile *profile)
184 {
185  size_t i, j;
186  ssize_t k;
187  size_t ucs4len = *len;
188  int rc;
189 
190  for (i = 0; profile[i].operation; i++)
191  {
192  switch (profile[i].operation)
193  {
194  case STRINGPREP_NFKC:
195  {
196  uint32_t *q = 0;
197 
198  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
199  break;
200 
201  if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
202  /* Profile requires NFKC, but callee asked for no NFKC. */
203  return STRINGPREP_FLAG_ERROR;
204 
205  q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
206  if (!q)
207  return STRINGPREP_NFKC_FAILED;
208 
209  for (ucs4len = 0; q[ucs4len]; ucs4len++)
210  ;
211 
212  if (ucs4len >= maxucs4len)
213  {
214  free (q);
216  }
217 
218  memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
219 
220  free (q);
221  }
222  break;
223 
225  k = stringprep_find_string_in_table (ucs4, ucs4len,
226  NULL, profile[i].table,
227  profile[i].table_size);
228  if (k != -1)
230  break;
231 
233  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
234  break;
235  if (flags & STRINGPREP_NO_UNASSIGNED)
236  {
237  k = stringprep_find_string_in_table
238  (ucs4, ucs4len, NULL, profile[i].table,
239  profile[i].table_size);
240  if (k != -1)
242  }
243  break;
244 
246  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
247  break;
248  rc = stringprep_apply_table_to_string
249  (ucs4, &ucs4len, maxucs4len, profile[i].table,
250  profile[i].table_size);
251  if (rc != STRINGPREP_OK)
252  return rc;
253  break;
254 
258  break;
259 
260  case STRINGPREP_BIDI:
261  {
262  int done_prohibited = 0;
263  int done_ral = 0;
264  int done_l = 0;
265  size_t contains_ral = SIZE_MAX;
266  size_t contains_l = SIZE_MAX;
267 
268  for (j = 0; profile[j].operation; j++)
269  if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
270  {
271  done_prohibited = 1;
272  k = stringprep_find_string_in_table (ucs4, ucs4len,
273  NULL,
274  profile[j].table,
275  profile[j].table_size);
276  if (k != -1)
278  }
279  else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
280  {
281  done_ral = 1;
282  if (stringprep_find_string_in_table
283  (ucs4, ucs4len, NULL, profile[j].table,
284  profile[j].table_size) != -1)
285  contains_ral = j;
286  }
287  else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
288  {
289  done_l = 1;
290  if (stringprep_find_string_in_table
291  (ucs4, ucs4len, NULL, profile[j].table,
292  profile[j].table_size) != -1)
293  contains_l = j;
294  }
295 
296  if (!done_prohibited || !done_ral || !done_l)
298 
299  if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
301 
302  if (contains_ral != SIZE_MAX)
303  {
304  if (!(stringprep_find_character_in_table
305  (ucs4[0], profile[contains_ral].table,
306  profile[contains_ral].table_size) != -1
307  &&
308  stringprep_find_character_in_table (ucs4[ucs4len - 1],
309  profile
310  [contains_ral].table,
311  profile
312  [contains_ral].table_size)
313  != -1))
315  }
316  }
317  break;
318 
319  default:
321  break;
322  }
323  }
324 
325  *len = ucs4len;
326 
327  return STRINGPREP_OK;
328 }
329 
330 static int
331 stringprep_4zi_1 (uint32_t *ucs4, size_t ucs4len, size_t maxucs4len,
333  const Stringprep_profile *profile)
334 {
335  int rc;
336 
337  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
338  if (rc != STRINGPREP_OK)
339  return rc;
340 
341  if (ucs4len >= maxucs4len)
343 
344  ucs4[ucs4len] = 0;
345 
346  return STRINGPREP_OK;
347 }
348 
373 int
374 stringprep_4zi (uint32_t *ucs4, size_t maxucs4len,
376  const Stringprep_profile *profile)
377 {
378  size_t ucs4len;
379 
380  for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
381  ;
382 
383  return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
384 }
385 
413 int
414 stringprep (char *in,
415  size_t maxlen,
416  Stringprep_profile_flags flags, const Stringprep_profile *profile)
417 {
418  int rc;
419  char *utf8 = NULL;
420  uint32_t *ucs4 = NULL;
421  size_t ucs4len, maxucs4len, adducs4len = strlen (in) / 10 + 1;
422 
423  do
424  {
425  uint32_t *newp;
426 
427  free (ucs4);
428  ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
429  if (ucs4 == NULL)
430  return STRINGPREP_ICONV_ERROR;
431  maxucs4len = ucs4len + adducs4len;
432  newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
433  if (!newp)
434  {
435  free (ucs4);
437  }
438  ucs4 = newp;
439 
440  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
441  adducs4len *= 2;
442  }
443  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
444  if (rc != STRINGPREP_OK)
445  {
446  free (ucs4);
447  return rc;
448  }
449 
450  utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
451  free (ucs4);
452  if (!utf8)
453  return STRINGPREP_ICONV_ERROR;
454 
455  if (strlen (utf8) >= maxlen)
456  {
457  free (utf8);
459  }
460 
461  strcpy (in, utf8); /* flawfinder: ignore */
462 
463  free (utf8);
464 
465  return STRINGPREP_OK;
466 }
467 
492 int
493 stringprep_profile (const char *in,
494  char **out,
495  const char *profile, Stringprep_profile_flags flags)
496 {
497  const Stringprep_profiles *p;
498  char *str = NULL;
499  size_t len = strlen (in) + 1, addlen = len / 10 + 1;
500  int rc;
501 
502  for (p = &stringprep_profiles[0]; p->name; p++)
503  if (strcmp (p->name, profile) == 0)
504  break;
505 
506  if (!p || !p->name || !p->tables)
508 
509  do
510  {
511  free (str);
512  str = (char *) malloc (len);
513  if (str == NULL)
515 
516  strcpy (str, in);
517 
518  rc = stringprep (str, len, flags, p->tables);
519  len += addlen;
520  addlen *= 2;
521  }
522  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
523 
524  if (rc == STRINGPREP_OK)
525  *out = str;
526  else
527  free (str);
528 
529  return rc;
530 }
531 
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
Definition: nfkc.c:1039
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition: nfkc.c:1006
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
Definition: nfkc.c:1096
const Stringprep_profiles stringprep_profiles[]
Definition: profiles.c:34
int stringprep_4zi(uint32_t *ucs4, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:374
int stringprep_4i(uint32_t *ucs4, size_t *len, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:181
int stringprep_profile(const char *in, char **out, const char *profile, Stringprep_profile_flags flags)
Definition: stringprep.c:493
#define UNAPPLICAPLEFLAGS(flags, profileflags)
Definition: stringprep.c:145
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:414
@ STRINGPREP_BIDI_PROHIBIT_TABLE
Definition: stringprep.h:101
@ STRINGPREP_BIDI_RAL_TABLE
Definition: stringprep.h:102
@ STRINGPREP_NFKC
Definition: stringprep.h:96
@ STRINGPREP_PROHIBIT_TABLE
Definition: stringprep.h:100
@ STRINGPREP_UNASSIGNED_TABLE
Definition: stringprep.h:99
@ STRINGPREP_BIDI
Definition: stringprep.h:97
@ STRINGPREP_MAP_TABLE
Definition: stringprep.h:98
@ STRINGPREP_BIDI_L_TABLE
Definition: stringprep.h:103
Stringprep_profile_flags
Definition: stringprep.h:87
@ STRINGPREP_NO_NFKC
Definition: stringprep.h:88
@ STRINGPREP_NO_UNASSIGNED
Definition: stringprep.h:90
@ STRINGPREP_UNKNOWN_PROFILE
Definition: stringprep.h:78
@ STRINGPREP_NFKC_FAILED
Definition: stringprep.h:81
@ STRINGPREP_TOO_SMALL_BUFFER
Definition: stringprep.h:75
@ STRINGPREP_ICONV_ERROR
Definition: stringprep.h:79
@ STRINGPREP_MALLOC_ERROR
Definition: stringprep.h:82
@ STRINGPREP_FLAG_ERROR
Definition: stringprep.h:77
@ STRINGPREP_OK
Definition: stringprep.h:67
@ STRINGPREP_CONTAINS_UNASSIGNED
Definition: stringprep.h:69
@ STRINGPREP_CONTAINS_PROHIBITED
Definition: stringprep.h:70
@ STRINGPREP_BIDI_CONTAINS_PROHIBITED
Definition: stringprep.h:73
@ STRINGPREP_BIDI_BOTH_L_AND_RAL
Definition: stringprep.h:71
@ STRINGPREP_BIDI_LEADTRAIL_NOT_RAL
Definition: stringprep.h:72
@ STRINGPREP_PROFILE_ERROR
Definition: stringprep.h:76
#define STRINGPREP_MAX_MAP_CHARS
Definition: stringprep.h:106
const char * name
Definition: stringprep.h:171
const Stringprep_profile * tables
Definition: stringprep.h:172
uint32_t map[STRINGPREP_MAX_MAP_CHARS]
Definition: stringprep.h:135
Stringprep_profile_steps operation
Definition: stringprep.h:150