libidn  1.25
tld.c
Go to the documentation of this file.
00001 /* tld.c --- Declarations for TLD restriction checking.
00002    Copyright (C) 2004-2012 Simon Josefsson.
00003    Copyright (C) 2003-2012 Free Software Foundation, Inc.
00004 
00005    Author: Thomas Jacob, Internet24.de
00006 
00007    This file is part of GNU Libidn.
00008 
00009    GNU Libidn is free software: you can redistribute it and/or
00010    modify it under the terms of either:
00011 
00012      * the GNU Lesser General Public License as published by the Free
00013        Software Foundation; either version 3 of the License, or (at
00014        your option) any later version.
00015 
00016    or
00017 
00018      * the GNU General Public License as published by the Free
00019        Software Foundation; either version 2 of the License, or (at
00020        your option) any later version.
00021 
00022    or both in parallel, as here.
00023 
00024    GNU Libidn is distributed in the hope that it will be useful,
00025    but WITHOUT ANY WARRANTY; without even the implied warranty of
00026    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00027    General Public License for more details.
00028 
00029    You should have received copies of the GNU General Public License and
00030    the GNU Lesser General Public License along with this program.  If
00031    not, see <http://www.gnu.org/licenses/>. */
00032 
00033 #include <config.h>
00034 
00035 /* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
00036 #include <stringprep.h>
00037 
00038 /* Get strcmp(). */
00039 #include <string.h>
00040 
00041 /* Get specifications. */
00042 #include <tld.h>
00043 
00044 /* Array of built-in domain restriction structures.  See tlds.c.  */
00045 extern const Tld_table *_tld_tables[];
00046 
00059 const Tld_table *
00060 tld_get_table (const char *tld, const Tld_table ** tables)
00061 {
00062   const Tld_table **tldtable = NULL;
00063 
00064   if (!tld || !tables)
00065     return NULL;
00066 
00067   for (tldtable = tables; *tldtable; tldtable++)
00068     if (!strcmp ((*tldtable)->name, tld))
00069       return *tldtable;
00070 
00071   return NULL;
00072 }
00073 
00088 const Tld_table *
00089 tld_default_table (const char *tld, const Tld_table ** overrides)
00090 {
00091   const Tld_table *tldtable = NULL;
00092 
00093   if (!tld)
00094     return NULL;
00095 
00096   if (overrides)
00097     tldtable = tld_get_table (tld, overrides);
00098 
00099   if (!tldtable)
00100     tldtable = tld_get_table (tld, _tld_tables);
00101 
00102   return tldtable;
00103 }
00104 
00105 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||      \
00106                  (c) == 0xFF0E || (c) == 0xFF61)
00107 
00121 int
00122 tld_get_4 (const uint32_t * in, size_t inlen, char **out)
00123 {
00124   const uint32_t *ipos;
00125   size_t olen;
00126 
00127   *out = NULL;
00128   if (!in || inlen == 0)
00129     return TLD_NODATA;
00130 
00131   ipos = &in[inlen - 1];
00132   olen = 0;
00133   /* Scan backwards for non(latin)letters. */
00134   while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
00135                         (*ipos >= 0x61 && *ipos <= 0x7A)))
00136     ipos--, olen++;
00137 
00138   if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */
00139     {
00140       char *out_s = malloc (sizeof (char) * (olen + 1));
00141       char *opos = out_s;
00142 
00143       if (!opos)
00144         return TLD_MALLOC_ERROR;
00145 
00146       ipos++;
00147       /* Transcribe to lowercase ascii string. */
00148       for (; ipos < &in[inlen]; ipos++, opos++)
00149         *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
00150       *opos = 0;
00151       *out = out_s;
00152       return TLD_SUCCESS;
00153     }
00154 
00155   return TLD_NO_TLD;
00156 }
00157 
00169 int
00170 tld_get_4z (const uint32_t * in, char **out)
00171 {
00172   const uint32_t *ipos = in;
00173 
00174   if (!in)
00175     return TLD_NODATA;
00176 
00177   while (*ipos)
00178     ipos++;
00179 
00180   return tld_get_4 (in, ipos - in, out);
00181 }
00182 
00195 int
00196 tld_get_z (const char *in, char **out)
00197 {
00198   uint32_t *iucs;
00199   size_t i, ilen;
00200   int rc;
00201 
00202   ilen = strlen (in);
00203   iucs = calloc (ilen, sizeof (*iucs));
00204 
00205   if (!iucs)
00206     return TLD_MALLOC_ERROR;
00207 
00208   for (i = 0; i < ilen; i++)
00209     iucs[i] = in[i];
00210 
00211   rc = tld_get_4 (iucs, ilen, out);
00212 
00213   free (iucs);
00214 
00215   return rc;
00216 }
00217 
00218 /*
00219  * tld_checkchar - verify that character is permitted
00220  * @ch: 32 bit unicode character to check.
00221  * @tld: A #Tld_table data structure to check @ch against.
00222  *
00223  * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
00224  * character in @tld.
00225  *
00226  * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
00227  *   valid character for the TLD @tld or if @tld is %NULL,
00228  *   %TLD_INVALID if @ch is invalid as defined by @tld.
00229  */
00230 static int
00231 _tld_checkchar (uint32_t ch, const Tld_table * tld)
00232 {
00233   const Tld_table_element *s, *e, *m;
00234 
00235   if (!tld)
00236     return TLD_SUCCESS;
00237 
00238   /* Check for [-a-z0-9.]. */
00239   if ((ch >= 0x61 && ch <= 0x7A) ||
00240       (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
00241     return TLD_SUCCESS;
00242 
00243   s = tld->valid;
00244   e = s + tld->nvalid;
00245   while (s < e)
00246     {
00247       m = s + ((e - s) >> 1);
00248       if (ch < m->start)
00249         e = m;
00250       else if (ch > m->end)
00251         s = m + 1;
00252       else
00253         return TLD_SUCCESS;
00254     }
00255 
00256   return TLD_INVALID;
00257 }
00258 
00278 int
00279 tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos,
00280               const Tld_table * tld)
00281 {
00282   const uint32_t *ipos;
00283   int rc;
00284 
00285   if (!tld)                     /* No data for TLD so everything is valid. */
00286     return TLD_SUCCESS;
00287 
00288   ipos = in;
00289   while (ipos < &in[inlen])
00290     {
00291       rc = _tld_checkchar (*ipos, tld);
00292       if (rc != TLD_SUCCESS)
00293         {
00294           if (errpos)
00295             *errpos = ipos - in;
00296           return rc;
00297         }
00298       ipos++;
00299     }
00300   return TLD_SUCCESS;
00301 }
00302 
00320 int
00321 tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld)
00322 {
00323   const uint32_t *ipos = in;
00324 
00325   if (!ipos)
00326     return TLD_NODATA;
00327 
00328   while (*ipos)
00329     ipos++;
00330 
00331   return tld_check_4t (in, ipos - in, errpos, tld);
00332 }
00333 
00357 int
00358 tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos,
00359              const Tld_table ** overrides)
00360 {
00361   const Tld_table *tld;
00362   char *domain;
00363   int rc;
00364 
00365   if (errpos)
00366     *errpos = 0;
00367 
00368   /* Get TLD name. */
00369   rc = tld_get_4 (in, inlen, &domain);
00370 
00371   if (rc != TLD_SUCCESS)
00372     {
00373       if (rc == TLD_NO_TLD)     /* No TLD, say OK */
00374         return TLD_SUCCESS;
00375       else
00376         return rc;
00377     }
00378 
00379   /* Retrieve appropriate data structure. */
00380   tld = tld_default_table (domain, overrides);
00381   free (domain);
00382 
00383   return tld_check_4t (in, inlen, errpos, tld);
00384 }
00385 
00407 int
00408 tld_check_4z (const uint32_t * in, size_t * errpos,
00409               const Tld_table ** overrides)
00410 {
00411   const uint32_t *ipos = in;
00412 
00413   if (!ipos)
00414     return TLD_NODATA;
00415 
00416   while (*ipos)
00417     ipos++;
00418 
00419   return tld_check_4 (in, ipos - in, errpos, overrides);
00420 }
00421 
00445 int
00446 tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides)
00447 {
00448   uint32_t *iucs;
00449   size_t ilen;
00450   int rc;
00451 
00452   if (!in)
00453     return TLD_NODATA;
00454 
00455   iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
00456 
00457   if (!iucs)
00458     return TLD_MALLOC_ERROR;
00459 
00460   rc = tld_check_4 (iucs, ilen, errpos, overrides);
00461 
00462   free (iucs);
00463 
00464   return rc;
00465 }
00466 
00490 int
00491 tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides)
00492 {
00493   char *utf8;
00494   int rc;
00495 
00496   if (!in)
00497     return TLD_NODATA;
00498 
00499   utf8 = stringprep_locale_to_utf8 (in);
00500   if (!utf8)
00501     return TLD_ICONV_ERROR;
00502 
00503 
00504   rc = tld_check_8z (utf8, errpos, overrides);
00505 
00506   free (utf8);
00507 
00508   return rc;
00509 }
00510