|
libidn
1.25
|
00001 /* stringprep.c --- Core stringprep implementation. 00002 Copyright (C) 2002-2012 Simon Josefsson 00003 00004 This file is part of GNU Libidn. 00005 00006 GNU Libidn is free software: you can redistribute it and/or 00007 modify it under the terms of either: 00008 00009 * the GNU Lesser General Public License as published by the Free 00010 Software Foundation; either version 3 of the License, or (at 00011 your option) any later version. 00012 00013 or 00014 00015 * the GNU General Public License as published by the Free 00016 Software Foundation; either version 2 of the License, or (at 00017 your option) any later version. 00018 00019 or both in parallel, as here. 00020 00021 GNU Libidn is distributed in the hope that it will be useful, 00022 but WITHOUT ANY WARRANTY; without even the implied warranty of 00023 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00024 General Public License for more details. 00025 00026 You should have received copies of the GNU General Public License and 00027 the GNU Lesser General Public License along with this program. If 00028 not, see <http://www.gnu.org/licenses/>. */ 00029 00030 #ifdef HAVE_CONFIG_H 00031 # include "config.h" 00032 #endif 00033 00034 #include <stdlib.h> 00035 #include <string.h> 00036 00037 #include "stringprep.h" 00038 00039 static ssize_t 00040 stringprep_find_character_in_table (uint32_t ucs4, 00041 const Stringprep_table_element * table) 00042 { 00043 ssize_t i; 00044 00045 /* This is where typical uses of Libidn spends very close to all CPU 00046 time and causes most cache misses. One could easily do a binary 00047 search instead. Before rewriting this, I want hard evidence this 00048 slowness is at all relevant in typical applications. (I don't 00049 dispute optimization may improve matters significantly, I'm 00050 mostly interested in having someone give real-world benchmark on 00051 the impact of libidn.) */ 00052 00053 for (i = 0; table[i].start || table[i].end; i++) 00054 if (ucs4 >= table[i].start && 00055 ucs4 <= (table[i].end ? table[i].end : table[i].start)) 00056 return i; 00057 00058 return -1; 00059 } 00060 00061 static ssize_t 00062 stringprep_find_string_in_table (uint32_t * ucs4, 00063 size_t ucs4len, 00064 size_t * tablepos, 00065 const Stringprep_table_element * table) 00066 { 00067 size_t j; 00068 ssize_t pos; 00069 00070 for (j = 0; j < ucs4len; j++) 00071 if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1) 00072 { 00073 if (tablepos) 00074 *tablepos = pos; 00075 return j; 00076 } 00077 00078 return -1; 00079 } 00080 00081 static int 00082 stringprep_apply_table_to_string (uint32_t * ucs4, 00083 size_t * ucs4len, 00084 size_t maxucs4len, 00085 const Stringprep_table_element * table) 00086 { 00087 ssize_t pos; 00088 size_t i, maplen; 00089 00090 while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len, 00091 &i, table)) != -1) 00092 { 00093 for (maplen = STRINGPREP_MAX_MAP_CHARS; 00094 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--) 00095 ; 00096 00097 if (*ucs4len - 1 + maplen >= maxucs4len) 00098 return STRINGPREP_TOO_SMALL_BUFFER; 00099 00100 memmove (&ucs4[pos + maplen], &ucs4[pos + 1], 00101 sizeof (uint32_t) * (*ucs4len - pos - 1)); 00102 memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen); 00103 *ucs4len = *ucs4len - 1 + maplen; 00104 } 00105 00106 return STRINGPREP_OK; 00107 } 00108 00109 #define INVERTED(x) ((x) & ((~0UL) >> 1)) 00110 #define UNAPPLICAPLEFLAGS(flags, profileflags) \ 00111 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \ 00112 ( INVERTED(profileflags) && (profileflags & flags))) 00113 00145 int 00146 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len, 00147 Stringprep_profile_flags flags, 00148 const Stringprep_profile * profile) 00149 { 00150 size_t i, j; 00151 ssize_t k; 00152 size_t ucs4len = *len; 00153 int rc; 00154 00155 for (i = 0; profile[i].operation; i++) 00156 { 00157 switch (profile[i].operation) 00158 { 00159 case STRINGPREP_NFKC: 00160 { 00161 uint32_t *q = 0; 00162 00163 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) 00164 break; 00165 00166 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags) 00167 /* Profile requires NFKC, but callee asked for no NFKC. */ 00168 return STRINGPREP_FLAG_ERROR; 00169 00170 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len); 00171 if (!q) 00172 return STRINGPREP_NFKC_FAILED; 00173 00174 for (ucs4len = 0; q[ucs4len]; ucs4len++) 00175 ; 00176 00177 if (ucs4len >= maxucs4len) 00178 { 00179 free (q); 00180 return STRINGPREP_TOO_SMALL_BUFFER; 00181 } 00182 00183 memcpy (ucs4, q, ucs4len * sizeof (ucs4[0])); 00184 00185 free (q); 00186 } 00187 break; 00188 00189 case STRINGPREP_PROHIBIT_TABLE: 00190 k = stringprep_find_string_in_table (ucs4, ucs4len, 00191 NULL, profile[i].table); 00192 if (k != -1) 00193 return STRINGPREP_CONTAINS_PROHIBITED; 00194 break; 00195 00196 case STRINGPREP_UNASSIGNED_TABLE: 00197 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) 00198 break; 00199 if (flags & STRINGPREP_NO_UNASSIGNED) 00200 { 00201 k = stringprep_find_string_in_table 00202 (ucs4, ucs4len, NULL, profile[i].table); 00203 if (k != -1) 00204 return STRINGPREP_CONTAINS_UNASSIGNED; 00205 } 00206 break; 00207 00208 case STRINGPREP_MAP_TABLE: 00209 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags)) 00210 break; 00211 rc = stringprep_apply_table_to_string 00212 (ucs4, &ucs4len, maxucs4len, profile[i].table); 00213 if (rc != STRINGPREP_OK) 00214 return rc; 00215 break; 00216 00217 case STRINGPREP_BIDI_PROHIBIT_TABLE: 00218 case STRINGPREP_BIDI_RAL_TABLE: 00219 case STRINGPREP_BIDI_L_TABLE: 00220 break; 00221 00222 case STRINGPREP_BIDI: 00223 { 00224 int done_prohibited = 0; 00225 int done_ral = 0; 00226 int done_l = 0; 00227 size_t contains_ral = SIZE_MAX; 00228 size_t contains_l = SIZE_MAX; 00229 00230 for (j = 0; profile[j].operation; j++) 00231 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE) 00232 { 00233 done_prohibited = 1; 00234 k = stringprep_find_string_in_table (ucs4, ucs4len, 00235 NULL, 00236 profile[j].table); 00237 if (k != -1) 00238 return STRINGPREP_BIDI_CONTAINS_PROHIBITED; 00239 } 00240 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE) 00241 { 00242 done_ral = 1; 00243 if (stringprep_find_string_in_table 00244 (ucs4, ucs4len, NULL, profile[j].table) != -1) 00245 contains_ral = j; 00246 } 00247 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE) 00248 { 00249 done_l = 1; 00250 if (stringprep_find_string_in_table 00251 (ucs4, ucs4len, NULL, profile[j].table) != -1) 00252 contains_l = j; 00253 } 00254 00255 if (!done_prohibited || !done_ral || !done_l) 00256 return STRINGPREP_PROFILE_ERROR; 00257 00258 if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX) 00259 return STRINGPREP_BIDI_BOTH_L_AND_RAL; 00260 00261 if (contains_ral != SIZE_MAX) 00262 { 00263 if (!(stringprep_find_character_in_table 00264 (ucs4[0], profile[contains_ral].table) != -1 && 00265 stringprep_find_character_in_table 00266 (ucs4[ucs4len - 1], profile[contains_ral].table) != -1)) 00267 return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL; 00268 } 00269 } 00270 break; 00271 00272 default: 00273 return STRINGPREP_PROFILE_ERROR; 00274 break; 00275 } 00276 } 00277 00278 *len = ucs4len; 00279 00280 return STRINGPREP_OK; 00281 } 00282 00283 static int 00284 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len, 00285 Stringprep_profile_flags flags, 00286 const Stringprep_profile * profile) 00287 { 00288 int rc; 00289 00290 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); 00291 if (rc != STRINGPREP_OK) 00292 return rc; 00293 00294 if (ucs4len >= maxucs4len) 00295 return STRINGPREP_TOO_SMALL_BUFFER; 00296 00297 ucs4[ucs4len] = 0; 00298 00299 return STRINGPREP_OK; 00300 } 00301 00326 int 00327 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len, 00328 Stringprep_profile_flags flags, 00329 const Stringprep_profile * profile) 00330 { 00331 size_t ucs4len; 00332 00333 for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++) 00334 ; 00335 00336 return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile); 00337 } 00338 00366 int 00367 stringprep (char *in, 00368 size_t maxlen, 00369 Stringprep_profile_flags flags, 00370 const Stringprep_profile * profile) 00371 { 00372 int rc; 00373 char *utf8 = NULL; 00374 uint32_t *ucs4 = NULL; 00375 size_t ucs4len, maxucs4len, adducs4len = 50; 00376 00377 do 00378 { 00379 uint32_t *newp; 00380 00381 free (ucs4); 00382 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len); 00383 maxucs4len = ucs4len + adducs4len; 00384 newp = realloc (ucs4, maxucs4len * sizeof (uint32_t)); 00385 if (!newp) 00386 { 00387 free (ucs4); 00388 return STRINGPREP_MALLOC_ERROR; 00389 } 00390 ucs4 = newp; 00391 00392 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile); 00393 adducs4len += 50; 00394 } 00395 while (rc == STRINGPREP_TOO_SMALL_BUFFER); 00396 if (rc != STRINGPREP_OK) 00397 { 00398 free (ucs4); 00399 return rc; 00400 } 00401 00402 utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0); 00403 free (ucs4); 00404 if (!utf8) 00405 return STRINGPREP_MALLOC_ERROR; 00406 00407 if (strlen (utf8) >= maxlen) 00408 { 00409 free (utf8); 00410 return STRINGPREP_TOO_SMALL_BUFFER; 00411 } 00412 00413 strcpy (in, utf8); /* flawfinder: ignore */ 00414 00415 free (utf8); 00416 00417 return STRINGPREP_OK; 00418 } 00419 00444 int 00445 stringprep_profile (const char *in, 00446 char **out, 00447 const char *profile, Stringprep_profile_flags flags) 00448 { 00449 const Stringprep_profiles *p; 00450 char *str = NULL; 00451 size_t len = strlen (in) + 1; 00452 int rc; 00453 00454 for (p = &stringprep_profiles[0]; p->name; p++) 00455 if (strcmp (p->name, profile) == 0) 00456 break; 00457 00458 if (!p || !p->name || !p->tables) 00459 return STRINGPREP_UNKNOWN_PROFILE; 00460 00461 do 00462 { 00463 free (str); 00464 str = (char *) malloc (len); 00465 if (str == NULL) 00466 return STRINGPREP_MALLOC_ERROR; 00467 00468 strcpy (str, in); 00469 00470 rc = stringprep (str, len, flags, p->tables); 00471 len += 50; 00472 } 00473 while (rc == STRINGPREP_TOO_SMALL_BUFFER); 00474 00475 if (rc == STRINGPREP_OK) 00476 *out = str; 00477 else 00478 free (str); 00479 00480 return rc; 00481 } 00482
1.7.6.1