|
libidn
1.25
|
00001 /* tld.c --- Declarations for TLD restriction checking. 00002 Copyright (C) 2004-2012 Simon Josefsson. 00003 Copyright (C) 2003-2012 Free Software Foundation, Inc. 00004 00005 Author: Thomas Jacob, Internet24.de 00006 00007 This file is part of GNU Libidn. 00008 00009 GNU Libidn is free software: you can redistribute it and/or 00010 modify it under the terms of either: 00011 00012 * the GNU Lesser General Public License as published by the Free 00013 Software Foundation; either version 3 of the License, or (at 00014 your option) any later version. 00015 00016 or 00017 00018 * the GNU General Public License as published by the Free 00019 Software Foundation; either version 2 of the License, or (at 00020 your option) any later version. 00021 00022 or both in parallel, as here. 00023 00024 GNU Libidn is distributed in the hope that it will be useful, 00025 but WITHOUT ANY WARRANTY; without even the implied warranty of 00026 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00027 General Public License for more details. 00028 00029 You should have received copies of the GNU General Public License and 00030 the GNU Lesser General Public License along with this program. If 00031 not, see <http://www.gnu.org/licenses/>. */ 00032 00033 #include <config.h> 00034 00035 /* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */ 00036 #include <stringprep.h> 00037 00038 /* Get strcmp(). */ 00039 #include <string.h> 00040 00041 /* Get specifications. */ 00042 #include <tld.h> 00043 00044 /* Array of built-in domain restriction structures. See tlds.c. */ 00045 extern const Tld_table *_tld_tables[]; 00046 00059 const Tld_table * 00060 tld_get_table (const char *tld, const Tld_table ** tables) 00061 { 00062 const Tld_table **tldtable = NULL; 00063 00064 if (!tld || !tables) 00065 return NULL; 00066 00067 for (tldtable = tables; *tldtable; tldtable++) 00068 if (!strcmp ((*tldtable)->name, tld)) 00069 return *tldtable; 00070 00071 return NULL; 00072 } 00073 00088 const Tld_table * 00089 tld_default_table (const char *tld, const Tld_table ** overrides) 00090 { 00091 const Tld_table *tldtable = NULL; 00092 00093 if (!tld) 00094 return NULL; 00095 00096 if (overrides) 00097 tldtable = tld_get_table (tld, overrides); 00098 00099 if (!tldtable) 00100 tldtable = tld_get_table (tld, _tld_tables); 00101 00102 return tldtable; 00103 } 00104 00105 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \ 00106 (c) == 0xFF0E || (c) == 0xFF61) 00107 00121 int 00122 tld_get_4 (const uint32_t * in, size_t inlen, char **out) 00123 { 00124 const uint32_t *ipos; 00125 size_t olen; 00126 00127 *out = NULL; 00128 if (!in || inlen == 0) 00129 return TLD_NODATA; 00130 00131 ipos = &in[inlen - 1]; 00132 olen = 0; 00133 /* Scan backwards for non(latin)letters. */ 00134 while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) || 00135 (*ipos >= 0x61 && *ipos <= 0x7A))) 00136 ipos--, olen++; 00137 00138 if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */ 00139 { 00140 char *out_s = malloc (sizeof (char) * (olen + 1)); 00141 char *opos = out_s; 00142 00143 if (!opos) 00144 return TLD_MALLOC_ERROR; 00145 00146 ipos++; 00147 /* Transcribe to lowercase ascii string. */ 00148 for (; ipos < &in[inlen]; ipos++, opos++) 00149 *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20; 00150 *opos = 0; 00151 *out = out_s; 00152 return TLD_SUCCESS; 00153 } 00154 00155 return TLD_NO_TLD; 00156 } 00157 00169 int 00170 tld_get_4z (const uint32_t * in, char **out) 00171 { 00172 const uint32_t *ipos = in; 00173 00174 if (!in) 00175 return TLD_NODATA; 00176 00177 while (*ipos) 00178 ipos++; 00179 00180 return tld_get_4 (in, ipos - in, out); 00181 } 00182 00195 int 00196 tld_get_z (const char *in, char **out) 00197 { 00198 uint32_t *iucs; 00199 size_t i, ilen; 00200 int rc; 00201 00202 ilen = strlen (in); 00203 iucs = calloc (ilen, sizeof (*iucs)); 00204 00205 if (!iucs) 00206 return TLD_MALLOC_ERROR; 00207 00208 for (i = 0; i < ilen; i++) 00209 iucs[i] = in[i]; 00210 00211 rc = tld_get_4 (iucs, ilen, out); 00212 00213 free (iucs); 00214 00215 return rc; 00216 } 00217 00218 /* 00219 * tld_checkchar - verify that character is permitted 00220 * @ch: 32 bit unicode character to check. 00221 * @tld: A #Tld_table data structure to check @ch against. 00222 * 00223 * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid 00224 * character in @tld. 00225 * 00226 * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a 00227 * valid character for the TLD @tld or if @tld is %NULL, 00228 * %TLD_INVALID if @ch is invalid as defined by @tld. 00229 */ 00230 static int 00231 _tld_checkchar (uint32_t ch, const Tld_table * tld) 00232 { 00233 const Tld_table_element *s, *e, *m; 00234 00235 if (!tld) 00236 return TLD_SUCCESS; 00237 00238 /* Check for [-a-z0-9.]. */ 00239 if ((ch >= 0x61 && ch <= 0x7A) || 00240 (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch)) 00241 return TLD_SUCCESS; 00242 00243 s = tld->valid; 00244 e = s + tld->nvalid; 00245 while (s < e) 00246 { 00247 m = s + ((e - s) >> 1); 00248 if (ch < m->start) 00249 e = m; 00250 else if (ch > m->end) 00251 s = m + 1; 00252 else 00253 return TLD_SUCCESS; 00254 } 00255 00256 return TLD_INVALID; 00257 } 00258 00278 int 00279 tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos, 00280 const Tld_table * tld) 00281 { 00282 const uint32_t *ipos; 00283 int rc; 00284 00285 if (!tld) /* No data for TLD so everything is valid. */ 00286 return TLD_SUCCESS; 00287 00288 ipos = in; 00289 while (ipos < &in[inlen]) 00290 { 00291 rc = _tld_checkchar (*ipos, tld); 00292 if (rc != TLD_SUCCESS) 00293 { 00294 if (errpos) 00295 *errpos = ipos - in; 00296 return rc; 00297 } 00298 ipos++; 00299 } 00300 return TLD_SUCCESS; 00301 } 00302 00320 int 00321 tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld) 00322 { 00323 const uint32_t *ipos = in; 00324 00325 if (!ipos) 00326 return TLD_NODATA; 00327 00328 while (*ipos) 00329 ipos++; 00330 00331 return tld_check_4t (in, ipos - in, errpos, tld); 00332 } 00333 00357 int 00358 tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos, 00359 const Tld_table ** overrides) 00360 { 00361 const Tld_table *tld; 00362 char *domain; 00363 int rc; 00364 00365 if (errpos) 00366 *errpos = 0; 00367 00368 /* Get TLD name. */ 00369 rc = tld_get_4 (in, inlen, &domain); 00370 00371 if (rc != TLD_SUCCESS) 00372 { 00373 if (rc == TLD_NO_TLD) /* No TLD, say OK */ 00374 return TLD_SUCCESS; 00375 else 00376 return rc; 00377 } 00378 00379 /* Retrieve appropriate data structure. */ 00380 tld = tld_default_table (domain, overrides); 00381 free (domain); 00382 00383 return tld_check_4t (in, inlen, errpos, tld); 00384 } 00385 00407 int 00408 tld_check_4z (const uint32_t * in, size_t * errpos, 00409 const Tld_table ** overrides) 00410 { 00411 const uint32_t *ipos = in; 00412 00413 if (!ipos) 00414 return TLD_NODATA; 00415 00416 while (*ipos) 00417 ipos++; 00418 00419 return tld_check_4 (in, ipos - in, errpos, overrides); 00420 } 00421 00445 int 00446 tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides) 00447 { 00448 uint32_t *iucs; 00449 size_t ilen; 00450 int rc; 00451 00452 if (!in) 00453 return TLD_NODATA; 00454 00455 iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen); 00456 00457 if (!iucs) 00458 return TLD_MALLOC_ERROR; 00459 00460 rc = tld_check_4 (iucs, ilen, errpos, overrides); 00461 00462 free (iucs); 00463 00464 return rc; 00465 } 00466 00490 int 00491 tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides) 00492 { 00493 char *utf8; 00494 int rc; 00495 00496 if (!in) 00497 return TLD_NODATA; 00498 00499 utf8 = stringprep_locale_to_utf8 (in); 00500 if (!utf8) 00501 return TLD_ICONV_ERROR; 00502 00503 00504 rc = tld_check_8z (utf8, errpos, overrides); 00505 00506 free (utf8); 00507 00508 return rc; 00509 } 00510
1.7.6.1