libidn  1.32
stringprep.c
Go to the documentation of this file.
1 /* stringprep.c --- Core stringprep implementation.
2  Copyright (C) 2002-2015 Simon Josefsson
3 
4  This file is part of GNU Libidn.
5 
6  GNU Libidn is free software: you can redistribute it and/or
7  modify it under the terms of either:
8 
9  * the GNU Lesser General Public License as published by the Free
10  Software Foundation; either version 3 of the License, or (at
11  your option) any later version.
12 
13  or
14 
15  * the GNU General Public License as published by the Free
16  Software Foundation; either version 2 of the License, or (at
17  your option) any later version.
18 
19  or both in parallel, as here.
20 
21  GNU Libidn is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24  General Public License for more details.
25 
26  You should have received copies of the GNU General Public License and
27  the GNU Lesser General Public License along with this program. If
28  not, see <http://www.gnu.org/licenses/>. */
29 
30 #ifdef HAVE_CONFIG_H
31 # include "config.h"
32 #endif
33 
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #include "stringprep.h"
38 
39 static ssize_t
40 stringprep_find_character_in_table (uint32_t ucs4,
41  const Stringprep_table_element * table)
42 {
43  ssize_t i;
44 
45  /* This is where typical uses of Libidn spends very close to all CPU
46  time and causes most cache misses. One could easily do a binary
47  search instead. Before rewriting this, I want hard evidence this
48  slowness is at all relevant in typical applications. (I don't
49  dispute optimization may improve matters significantly, I'm
50  mostly interested in having someone give real-world benchmark on
51  the impact of libidn.) */
52 
53  for (i = 0; table[i].start || table[i].end; i++)
54  if (ucs4 >= table[i].start &&
55  ucs4 <= (table[i].end ? table[i].end : table[i].start))
56  return i;
57 
58  return -1;
59 }
60 
61 static ssize_t
62 stringprep_find_string_in_table (uint32_t * ucs4,
63  size_t ucs4len,
64  size_t * tablepos,
65  const Stringprep_table_element * table)
66 {
67  size_t j;
68  ssize_t pos;
69 
70  for (j = 0; j < ucs4len; j++)
71  if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
72  {
73  if (tablepos)
74  *tablepos = pos;
75  return j;
76  }
77 
78  return -1;
79 }
80 
81 static int
82 stringprep_apply_table_to_string (uint32_t * ucs4,
83  size_t * ucs4len,
84  size_t maxucs4len,
85  const Stringprep_table_element * table)
86 {
87  ssize_t pos;
88  size_t i, maplen;
89 
90  while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
91  &i, table)) != -1)
92  {
93  for (maplen = STRINGPREP_MAX_MAP_CHARS;
94  maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
95  ;
96 
97  if (*ucs4len - 1 + maplen >= maxucs4len)
99 
100  memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
101  sizeof (uint32_t) * (*ucs4len - pos - 1));
102  memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
103  *ucs4len = *ucs4len - 1 + maplen;
104  }
105 
106  return STRINGPREP_OK;
107 }
108 
109 #define INVERTED(x) ((x) & ((~0UL) >> 1))
110 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
111  ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
112  ( INVERTED(profileflags) && (profileflags & flags)))
113 
145 int
146 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
148  const Stringprep_profile * profile)
149 {
150  size_t i, j;
151  ssize_t k;
152  size_t ucs4len = *len;
153  int rc;
154 
155  for (i = 0; profile[i].operation; i++)
156  {
157  switch (profile[i].operation)
158  {
159  case STRINGPREP_NFKC:
160  {
161  uint32_t *q = 0;
162 
163  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
164  break;
165 
166  if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
167  /* Profile requires NFKC, but callee asked for no NFKC. */
168  return STRINGPREP_FLAG_ERROR;
169 
170  q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
171  if (!q)
172  return STRINGPREP_NFKC_FAILED;
173 
174  for (ucs4len = 0; q[ucs4len]; ucs4len++)
175  ;
176 
177  if (ucs4len >= maxucs4len)
178  {
179  free (q);
181  }
182 
183  memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
184 
185  free (q);
186  }
187  break;
188 
190  k = stringprep_find_string_in_table (ucs4, ucs4len,
191  NULL, profile[i].table);
192  if (k != -1)
194  break;
195 
197  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
198  break;
199  if (flags & STRINGPREP_NO_UNASSIGNED)
200  {
201  k = stringprep_find_string_in_table
202  (ucs4, ucs4len, NULL, profile[i].table);
203  if (k != -1)
205  }
206  break;
207 
209  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
210  break;
211  rc = stringprep_apply_table_to_string
212  (ucs4, &ucs4len, maxucs4len, profile[i].table);
213  if (rc != STRINGPREP_OK)
214  return rc;
215  break;
216 
220  break;
221 
222  case STRINGPREP_BIDI:
223  {
224  int done_prohibited = 0;
225  int done_ral = 0;
226  int done_l = 0;
227  size_t contains_ral = SIZE_MAX;
228  size_t contains_l = SIZE_MAX;
229 
230  for (j = 0; profile[j].operation; j++)
231  if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
232  {
233  done_prohibited = 1;
234  k = stringprep_find_string_in_table (ucs4, ucs4len,
235  NULL,
236  profile[j].table);
237  if (k != -1)
239  }
240  else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
241  {
242  done_ral = 1;
243  if (stringprep_find_string_in_table
244  (ucs4, ucs4len, NULL, profile[j].table) != -1)
245  contains_ral = j;
246  }
247  else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
248  {
249  done_l = 1;
250  if (stringprep_find_string_in_table
251  (ucs4, ucs4len, NULL, profile[j].table) != -1)
252  contains_l = j;
253  }
254 
255  if (!done_prohibited || !done_ral || !done_l)
257 
258  if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
260 
261  if (contains_ral != SIZE_MAX)
262  {
263  if (!(stringprep_find_character_in_table
264  (ucs4[0], profile[contains_ral].table) != -1 &&
265  stringprep_find_character_in_table
266  (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
268  }
269  }
270  break;
271 
272  default:
274  break;
275  }
276  }
277 
278  *len = ucs4len;
279 
280  return STRINGPREP_OK;
281 }
282 
283 static int
284 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
286  const Stringprep_profile * profile)
287 {
288  int rc;
289 
290  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
291  if (rc != STRINGPREP_OK)
292  return rc;
293 
294  if (ucs4len >= maxucs4len)
296 
297  ucs4[ucs4len] = 0;
298 
299  return STRINGPREP_OK;
300 }
301 
326 int
327 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
329  const Stringprep_profile * profile)
330 {
331  size_t ucs4len;
332 
333  for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
334  ;
335 
336  return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
337 }
338 
366 int
367 stringprep (char *in,
368  size_t maxlen,
370  const Stringprep_profile * profile)
371 {
372  int rc;
373  char *utf8 = NULL;
374  uint32_t *ucs4 = NULL;
375  size_t ucs4len, maxucs4len, adducs4len = 50;
376 
377  do
378  {
379  uint32_t *newp;
380 
381  free (ucs4);
382  ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
383  if (ucs4 == NULL)
384  return STRINGPREP_ICONV_ERROR;
385  maxucs4len = ucs4len + adducs4len;
386  newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
387  if (!newp)
388  {
389  free (ucs4);
391  }
392  ucs4 = newp;
393 
394  rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
395  adducs4len += 50;
396  }
397  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
398  if (rc != STRINGPREP_OK)
399  {
400  free (ucs4);
401  return rc;
402  }
403 
404  utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
405  free (ucs4);
406  if (!utf8)
407  return STRINGPREP_ICONV_ERROR;
408 
409  if (strlen (utf8) >= maxlen)
410  {
411  free (utf8);
413  }
414 
415  strcpy (in, utf8); /* flawfinder: ignore */
416 
417  free (utf8);
418 
419  return STRINGPREP_OK;
420 }
421 
446 int
447 stringprep_profile (const char *in,
448  char **out,
449  const char *profile, Stringprep_profile_flags flags)
450 {
451  const Stringprep_profiles *p;
452  char *str = NULL;
453  size_t len = strlen (in) + 1;
454  int rc;
455 
456  for (p = &stringprep_profiles[0]; p->name; p++)
457  if (strcmp (p->name, profile) == 0)
458  break;
459 
460  if (!p || !p->name || !p->tables)
462 
463  do
464  {
465  free (str);
466  str = (char *) malloc (len);
467  if (str == NULL)
469 
470  strcpy (str, in);
471 
472  rc = stringprep (str, len, flags, p->tables);
473  len += 50;
474  }
475  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
476 
477  if (rc == STRINGPREP_OK)
478  *out = str;
479  else
480  free (str);
481 
482  return rc;
483 }
484 
Stringprep_profile_flags
Definition: stringprep.h:78
const Stringprep_profiles stringprep_profiles[]
Definition: profiles.c:33
int stringprep_4i(uint32_t *ucs4, size_t *len, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:146
#define UNAPPLICAPLEFLAGS(flags, profileflags)
Definition: stringprep.c:110
const Stringprep_profile * tables
Definition: stringprep.h:119
int stringprep_profile(const char *in, char **out, const char *profile, Stringprep_profile_flags flags)
Definition: stringprep.c:447
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition: nfkc.c:1024
char * stringprep_ucs4_to_utf8(const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
Definition: nfkc.c:1057
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:367
#define STRINGPREP_MAX_MAP_CHARS
Definition: stringprep.h:98
const char * name
Definition: stringprep.h:118
uint32_t * stringprep_ucs4_nfkc_normalize(const uint32_t *str, ssize_t len)
Definition: nfkc.c:1104
uint32_t map[STRINGPREP_MAX_MAP_CHARS]
Definition: stringprep.h:104
int stringprep_4zi(uint32_t *ucs4, size_t maxucs4len, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:327
Stringprep_profile_steps operation
Definition: stringprep.h:110