libidn  1.29
tld.c
Go to the documentation of this file.
1 /* tld.c --- Declarations for TLD restriction checking.
2  Copyright (C) 2004-2014 Simon Josefsson.
3  Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 
5  Author: Thomas Jacob, Internet24.de
6 
7  This file is part of GNU Libidn.
8 
9  GNU Libidn is free software: you can redistribute it and/or
10  modify it under the terms of either:
11 
12  * the GNU Lesser General Public License as published by the Free
13  Software Foundation; either version 3 of the License, or (at
14  your option) any later version.
15 
16  or
17 
18  * the GNU General Public License as published by the Free
19  Software Foundation; either version 2 of the License, or (at
20  your option) any later version.
21 
22  or both in parallel, as here.
23 
24  GNU Libidn is distributed in the hope that it will be useful,
25  but WITHOUT ANY WARRANTY; without even the implied warranty of
26  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27  General Public License for more details.
28 
29  You should have received copies of the GNU General Public License and
30  the GNU Lesser General Public License along with this program. If
31  not, see <http://www.gnu.org/licenses/>. */
32 
33 #include <config.h>
34 
35 /* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
36 #include <stringprep.h>
37 
38 /* Get strcmp(). */
39 #include <string.h>
40 
41 /* Get specifications. */
42 #include <tld.h>
43 
44 /* Array of built-in domain restriction structures. See tlds.c. */
45 extern const Tld_table *_tld_tables[];
46 
59 const Tld_table *
60 tld_get_table (const char *tld, const Tld_table ** tables)
61 {
62  const Tld_table **tldtable = NULL;
63 
64  if (!tld || !tables)
65  return NULL;
66 
67  for (tldtable = tables; *tldtable; tldtable++)
68  if (!strcmp ((*tldtable)->name, tld))
69  return *tldtable;
70 
71  return NULL;
72 }
73 
88 const Tld_table *
89 tld_default_table (const char *tld, const Tld_table ** overrides)
90 {
91  const Tld_table *tldtable = NULL;
92 
93  if (!tld)
94  return NULL;
95 
96  if (overrides)
97  tldtable = tld_get_table (tld, overrides);
98 
99  if (!tldtable)
100  tldtable = tld_get_table (tld, _tld_tables);
101 
102  return tldtable;
103 }
104 
105 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
106  (c) == 0xFF0E || (c) == 0xFF61)
107 
121 int
122 tld_get_4 (const uint32_t * in, size_t inlen, char **out)
123 {
124  const uint32_t *ipos;
125  size_t olen;
126 
127  *out = NULL;
128  if (!in || inlen == 0)
129  return TLD_NODATA;
130 
131  ipos = &in[inlen - 1];
132  olen = 0;
133  /* Scan backwards for non(latin)letters. */
134  while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
135  (*ipos >= 0x61 && *ipos <= 0x7A)))
136  ipos--, olen++;
137 
138  if (olen > 0 && ipos >= in && DOTP (*ipos))
139  {
140  /* Found something that appears a TLD. */
141  char *out_s = malloc (sizeof (char) * (olen + 1));
142  char *opos = out_s;
143 
144  if (!opos)
145  return TLD_MALLOC_ERROR;
146 
147  ipos++;
148  /* Transcribe to lowercase ascii string. */
149  for (; ipos < &in[inlen]; ipos++, opos++)
150  *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
151  *opos = 0;
152  *out = out_s;
153  return TLD_SUCCESS;
154  }
155 
156  return TLD_NO_TLD;
157 }
158 
170 int
171 tld_get_4z (const uint32_t * in, char **out)
172 {
173  const uint32_t *ipos = in;
174 
175  if (!in)
176  return TLD_NODATA;
177 
178  while (*ipos)
179  ipos++;
180 
181  return tld_get_4 (in, ipos - in, out);
182 }
183 
196 int
197 tld_get_z (const char *in, char **out)
198 {
199  uint32_t *iucs;
200  size_t i, ilen;
201  int rc;
202 
203  ilen = strlen (in);
204  iucs = calloc (ilen, sizeof (*iucs));
205 
206  if (!iucs)
207  return TLD_MALLOC_ERROR;
208 
209  for (i = 0; i < ilen; i++)
210  iucs[i] = in[i];
211 
212  rc = tld_get_4 (iucs, ilen, out);
213 
214  free (iucs);
215 
216  return rc;
217 }
218 
219 /*
220  * tld_checkchar - verify that character is permitted
221  * @ch: 32 bit unicode character to check.
222  * @tld: A #Tld_table data structure to check @ch against.
223  *
224  * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
225  * character in @tld.
226  *
227  * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
228  * valid character for the TLD @tld or if @tld is %NULL,
229  * %TLD_INVALID if @ch is invalid as defined by @tld.
230  */
231 static int
232 _tld_checkchar (uint32_t ch, const Tld_table * tld)
233 {
234  const Tld_table_element *s, *e, *m;
235 
236  if (!tld)
237  return TLD_SUCCESS;
238 
239  /* Check for [-a-z0-9.]. */
240  if ((ch >= 0x61 && ch <= 0x7A) ||
241  (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
242  return TLD_SUCCESS;
243 
244  s = tld->valid;
245  e = s + tld->nvalid;
246  while (s < e)
247  {
248  m = s + ((e - s) >> 1);
249  if (ch < m->start)
250  e = m;
251  else if (ch > m->end)
252  s = m + 1;
253  else
254  return TLD_SUCCESS;
255  }
256 
257  return TLD_INVALID;
258 }
259 
279 int
280 tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos,
281  const Tld_table * tld)
282 {
283  const uint32_t *ipos;
284  int rc;
285 
286  if (!tld) /* No data for TLD so everything is valid. */
287  return TLD_SUCCESS;
288 
289  ipos = in;
290  while (ipos < &in[inlen])
291  {
292  rc = _tld_checkchar (*ipos, tld);
293  if (rc != TLD_SUCCESS)
294  {
295  if (errpos)
296  *errpos = ipos - in;
297  return rc;
298  }
299  ipos++;
300  }
301  return TLD_SUCCESS;
302 }
303 
321 int
322 tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld)
323 {
324  const uint32_t *ipos = in;
325 
326  if (!ipos)
327  return TLD_NODATA;
328 
329  while (*ipos)
330  ipos++;
331 
332  return tld_check_4t (in, ipos - in, errpos, tld);
333 }
334 
358 int
359 tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos,
360  const Tld_table ** overrides)
361 {
362  const Tld_table *tld;
363  char *domain;
364  int rc;
365 
366  if (errpos)
367  *errpos = 0;
368 
369  /* Get TLD name. */
370  rc = tld_get_4 (in, inlen, &domain);
371 
372  if (rc != TLD_SUCCESS)
373  {
374  if (rc == TLD_NO_TLD) /* No TLD, say OK */
375  return TLD_SUCCESS;
376  else
377  return rc;
378  }
379 
380  /* Retrieve appropriate data structure. */
381  tld = tld_default_table (domain, overrides);
382  free (domain);
383 
384  return tld_check_4t (in, inlen, errpos, tld);
385 }
386 
408 int
409 tld_check_4z (const uint32_t * in, size_t * errpos,
410  const Tld_table ** overrides)
411 {
412  const uint32_t *ipos = in;
413 
414  if (!ipos)
415  return TLD_NODATA;
416 
417  while (*ipos)
418  ipos++;
419 
420  return tld_check_4 (in, ipos - in, errpos, overrides);
421 }
422 
446 int
447 tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides)
448 {
449  uint32_t *iucs;
450  size_t ilen;
451  int rc;
452 
453  if (!in)
454  return TLD_NODATA;
455 
456  iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
457 
458  if (!iucs)
459  return TLD_MALLOC_ERROR;
460 
461  rc = tld_check_4 (iucs, ilen, errpos, overrides);
462 
463  free (iucs);
464 
465  return rc;
466 }
467 
491 int
492 tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides)
493 {
494  char *utf8;
495  int rc;
496 
497  if (!in)
498  return TLD_NODATA;
499 
500  utf8 = stringprep_locale_to_utf8 (in);
501  if (!utf8)
502  return TLD_ICONV_ERROR;
503 
504 
505  rc = tld_check_8z (utf8, errpos, overrides);
506 
507  free (utf8);
508 
509  return rc;
510 }
511