libidn  1.25
stringprep.c
Go to the documentation of this file.
00001 /* stringprep.c --- Core stringprep implementation.
00002    Copyright (C) 2002-2012 Simon Josefsson
00003 
00004    This file is part of GNU Libidn.
00005 
00006    GNU Libidn is free software: you can redistribute it and/or
00007    modify it under the terms of either:
00008 
00009      * the GNU Lesser General Public License as published by the Free
00010        Software Foundation; either version 3 of the License, or (at
00011        your option) any later version.
00012 
00013    or
00014 
00015      * the GNU General Public License as published by the Free
00016        Software Foundation; either version 2 of the License, or (at
00017        your option) any later version.
00018 
00019    or both in parallel, as here.
00020 
00021    GNU Libidn is distributed in the hope that it will be useful,
00022    but WITHOUT ANY WARRANTY; without even the implied warranty of
00023    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00024    General Public License for more details.
00025 
00026    You should have received copies of the GNU General Public License and
00027    the GNU Lesser General Public License along with this program.  If
00028    not, see <http://www.gnu.org/licenses/>. */
00029 
00030 #ifdef HAVE_CONFIG_H
00031 # include "config.h"
00032 #endif
00033 
00034 #include <stdlib.h>
00035 #include <string.h>
00036 
00037 #include "stringprep.h"
00038 
00039 static ssize_t
00040 stringprep_find_character_in_table (uint32_t ucs4,
00041                                     const Stringprep_table_element * table)
00042 {
00043   ssize_t i;
00044 
00045   /* This is where typical uses of Libidn spends very close to all CPU
00046      time and causes most cache misses.  One could easily do a binary
00047      search instead.  Before rewriting this, I want hard evidence this
00048      slowness is at all relevant in typical applications.  (I don't
00049      dispute optimization may improve matters significantly, I'm
00050      mostly interested in having someone give real-world benchmark on
00051      the impact of libidn.) */
00052 
00053   for (i = 0; table[i].start || table[i].end; i++)
00054     if (ucs4 >= table[i].start &&
00055         ucs4 <= (table[i].end ? table[i].end : table[i].start))
00056       return i;
00057 
00058   return -1;
00059 }
00060 
00061 static ssize_t
00062 stringprep_find_string_in_table (uint32_t * ucs4,
00063                                  size_t ucs4len,
00064                                  size_t * tablepos,
00065                                  const Stringprep_table_element * table)
00066 {
00067   size_t j;
00068   ssize_t pos;
00069 
00070   for (j = 0; j < ucs4len; j++)
00071     if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
00072       {
00073         if (tablepos)
00074           *tablepos = pos;
00075         return j;
00076       }
00077 
00078   return -1;
00079 }
00080 
00081 static int
00082 stringprep_apply_table_to_string (uint32_t * ucs4,
00083                                   size_t * ucs4len,
00084                                   size_t maxucs4len,
00085                                   const Stringprep_table_element * table)
00086 {
00087   ssize_t pos;
00088   size_t i, maplen;
00089 
00090   while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
00091                                                  &i, table)) != -1)
00092     {
00093       for (maplen = STRINGPREP_MAX_MAP_CHARS;
00094            maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
00095         ;
00096 
00097       if (*ucs4len - 1 + maplen >= maxucs4len)
00098         return STRINGPREP_TOO_SMALL_BUFFER;
00099 
00100       memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
00101                sizeof (uint32_t) * (*ucs4len - pos - 1));
00102       memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
00103       *ucs4len = *ucs4len - 1 + maplen;
00104     }
00105 
00106   return STRINGPREP_OK;
00107 }
00108 
00109 #define INVERTED(x) ((x) & ((~0UL) >> 1))
00110 #define UNAPPLICAPLEFLAGS(flags, profileflags) \
00111   ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
00112    ( INVERTED(profileflags) && (profileflags & flags)))
00113 
00145 int
00146 stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
00147                Stringprep_profile_flags flags,
00148                const Stringprep_profile * profile)
00149 {
00150   size_t i, j;
00151   ssize_t k;
00152   size_t ucs4len = *len;
00153   int rc;
00154 
00155   for (i = 0; profile[i].operation; i++)
00156     {
00157       switch (profile[i].operation)
00158         {
00159         case STRINGPREP_NFKC:
00160           {
00161             uint32_t *q = 0;
00162 
00163             if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00164               break;
00165 
00166             if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
00167               /* Profile requires NFKC, but callee asked for no NFKC. */
00168               return STRINGPREP_FLAG_ERROR;
00169 
00170             q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
00171             if (!q)
00172               return STRINGPREP_NFKC_FAILED;
00173 
00174             for (ucs4len = 0; q[ucs4len]; ucs4len++)
00175               ;
00176 
00177             if (ucs4len >= maxucs4len)
00178               {
00179                 free (q);
00180                 return STRINGPREP_TOO_SMALL_BUFFER;
00181               }
00182 
00183             memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
00184 
00185             free (q);
00186           }
00187           break;
00188 
00189         case STRINGPREP_PROHIBIT_TABLE:
00190           k = stringprep_find_string_in_table (ucs4, ucs4len,
00191                                                NULL, profile[i].table);
00192           if (k != -1)
00193             return STRINGPREP_CONTAINS_PROHIBITED;
00194           break;
00195 
00196         case STRINGPREP_UNASSIGNED_TABLE:
00197           if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00198             break;
00199           if (flags & STRINGPREP_NO_UNASSIGNED)
00200             {
00201               k = stringprep_find_string_in_table
00202                 (ucs4, ucs4len, NULL, profile[i].table);
00203               if (k != -1)
00204                 return STRINGPREP_CONTAINS_UNASSIGNED;
00205             }
00206           break;
00207 
00208         case STRINGPREP_MAP_TABLE:
00209           if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
00210             break;
00211           rc = stringprep_apply_table_to_string
00212             (ucs4, &ucs4len, maxucs4len, profile[i].table);
00213           if (rc != STRINGPREP_OK)
00214             return rc;
00215           break;
00216 
00217         case STRINGPREP_BIDI_PROHIBIT_TABLE:
00218         case STRINGPREP_BIDI_RAL_TABLE:
00219         case STRINGPREP_BIDI_L_TABLE:
00220           break;
00221 
00222         case STRINGPREP_BIDI:
00223           {
00224             int done_prohibited = 0;
00225             int done_ral = 0;
00226             int done_l = 0;
00227             size_t contains_ral = SIZE_MAX;
00228             size_t contains_l = SIZE_MAX;
00229 
00230             for (j = 0; profile[j].operation; j++)
00231               if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
00232                 {
00233                   done_prohibited = 1;
00234                   k = stringprep_find_string_in_table (ucs4, ucs4len,
00235                                                        NULL,
00236                                                        profile[j].table);
00237                   if (k != -1)
00238                     return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
00239                 }
00240               else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
00241                 {
00242                   done_ral = 1;
00243                   if (stringprep_find_string_in_table
00244                       (ucs4, ucs4len, NULL, profile[j].table) != -1)
00245                     contains_ral = j;
00246                 }
00247               else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
00248                 {
00249                   done_l = 1;
00250                   if (stringprep_find_string_in_table
00251                       (ucs4, ucs4len, NULL, profile[j].table) != -1)
00252                     contains_l = j;
00253                 }
00254 
00255             if (!done_prohibited || !done_ral || !done_l)
00256               return STRINGPREP_PROFILE_ERROR;
00257 
00258             if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
00259               return STRINGPREP_BIDI_BOTH_L_AND_RAL;
00260 
00261             if (contains_ral != SIZE_MAX)
00262               {
00263                 if (!(stringprep_find_character_in_table
00264                       (ucs4[0], profile[contains_ral].table) != -1 &&
00265                       stringprep_find_character_in_table
00266                       (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
00267                   return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
00268               }
00269           }
00270           break;
00271 
00272         default:
00273           return STRINGPREP_PROFILE_ERROR;
00274           break;
00275         }
00276     }
00277 
00278   *len = ucs4len;
00279 
00280   return STRINGPREP_OK;
00281 }
00282 
00283 static int
00284 stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
00285                   Stringprep_profile_flags flags,
00286                   const Stringprep_profile * profile)
00287 {
00288   int rc;
00289 
00290   rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00291   if (rc != STRINGPREP_OK)
00292     return rc;
00293 
00294   if (ucs4len >= maxucs4len)
00295     return STRINGPREP_TOO_SMALL_BUFFER;
00296 
00297   ucs4[ucs4len] = 0;
00298 
00299   return STRINGPREP_OK;
00300 }
00301 
00326 int
00327 stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
00328                 Stringprep_profile_flags flags,
00329                 const Stringprep_profile * profile)
00330 {
00331   size_t ucs4len;
00332 
00333   for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
00334     ;
00335 
00336   return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
00337 }
00338 
00366 int
00367 stringprep (char *in,
00368             size_t maxlen,
00369             Stringprep_profile_flags flags,
00370             const Stringprep_profile * profile)
00371 {
00372   int rc;
00373   char *utf8 = NULL;
00374   uint32_t *ucs4 = NULL;
00375   size_t ucs4len, maxucs4len, adducs4len = 50;
00376 
00377   do
00378     {
00379       uint32_t *newp;
00380 
00381       free (ucs4);
00382       ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
00383       maxucs4len = ucs4len + adducs4len;
00384       newp = realloc (ucs4, maxucs4len * sizeof (uint32_t));
00385       if (!newp)
00386         {
00387           free (ucs4);
00388           return STRINGPREP_MALLOC_ERROR;
00389         }
00390       ucs4 = newp;
00391 
00392       rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
00393       adducs4len += 50;
00394     }
00395   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00396   if (rc != STRINGPREP_OK)
00397     {
00398       free (ucs4);
00399       return rc;
00400     }
00401 
00402   utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
00403   free (ucs4);
00404   if (!utf8)
00405     return STRINGPREP_MALLOC_ERROR;
00406 
00407   if (strlen (utf8) >= maxlen)
00408     {
00409       free (utf8);
00410       return STRINGPREP_TOO_SMALL_BUFFER;
00411     }
00412 
00413   strcpy (in, utf8);            /* flawfinder: ignore */
00414 
00415   free (utf8);
00416 
00417   return STRINGPREP_OK;
00418 }
00419 
00444 int
00445 stringprep_profile (const char *in,
00446                     char **out,
00447                     const char *profile, Stringprep_profile_flags flags)
00448 {
00449   const Stringprep_profiles *p;
00450   char *str = NULL;
00451   size_t len = strlen (in) + 1;
00452   int rc;
00453 
00454   for (p = &stringprep_profiles[0]; p->name; p++)
00455     if (strcmp (p->name, profile) == 0)
00456       break;
00457 
00458   if (!p || !p->name || !p->tables)
00459     return STRINGPREP_UNKNOWN_PROFILE;
00460 
00461   do
00462     {
00463       free (str);
00464       str = (char *) malloc (len);
00465       if (str == NULL)
00466         return STRINGPREP_MALLOC_ERROR;
00467 
00468       strcpy (str, in);
00469 
00470       rc = stringprep (str, len, flags, p->tables);
00471       len += 50;
00472     }
00473   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
00474 
00475   if (rc == STRINGPREP_OK)
00476     *out = str;
00477   else
00478     free (str);
00479 
00480   return rc;
00481 }
00482