libidn  1.42
tld.c
Go to the documentation of this file.
1 /* tld.c --- Declarations for TLD restriction checking.
2  Copyright (C) 2004-2024 Simon Josefsson.
3  Copyright (C) 2003-2024 Free Software Foundation, Inc.
4 
5  Author: Thomas Jacob, Internet24.de
6 
7  This file is part of GNU Libidn.
8 
9  GNU Libidn is free software: you can redistribute it and/or
10  modify it under the terms of either:
11 
12  * the GNU Lesser General Public License as published by the Free
13  Software Foundation; either version 3 of the License, or (at
14  your option) any later version.
15 
16  or
17 
18  * the GNU General Public License as published by the Free
19  Software Foundation; either version 2 of the License, or (at
20  your option) any later version.
21 
22  or both in parallel, as here.
23 
24  GNU Libidn is distributed in the hope that it will be useful,
25  but WITHOUT ANY WARRANTY; without even the implied warranty of
26  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27  General Public License for more details.
28 
29  You should have received copies of the GNU General Public License and
30  the GNU Lesser General Public License along with this program. If
31  not, see <https://www.gnu.org/licenses/>. */
32 
33 #include <config.h>
34 
35 /* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
36 #include <stringprep.h>
37 
38 /* Get strcmp(). */
39 #include <string.h>
40 
41 /* Get specifications. */
42 #include <tld.h>
43 
44 /* Array of built-in domain restriction structures. See tlds.c. */
45 extern const Tld_table *_tld_tables[];
46 
59 const Tld_table *
60 tld_get_table (const char *tld, const Tld_table **tables)
61 {
62  const Tld_table **tldtable = NULL;
63 
64  if (!tld || !tables)
65  return NULL;
66 
67  for (tldtable = tables; *tldtable; tldtable++)
68  if (!strcmp ((*tldtable)->name, tld))
69  return *tldtable;
70 
71  return NULL;
72 }
73 
88 const Tld_table *
89 tld_default_table (const char *tld, const Tld_table **overrides)
90 {
91  const Tld_table *tldtable = NULL;
92 
93  if (!tld)
94  return NULL;
95 
96  if (overrides)
97  tldtable = tld_get_table (tld, overrides);
98 
99  if (!tldtable)
100  tldtable = tld_get_table (tld, _tld_tables);
101 
102  return tldtable;
103 }
104 
105 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
106  (c) == 0xFF0E || (c) == 0xFF61)
107 
121 int
122 tld_get_4 (const uint32_t *in, size_t inlen, char **out)
123 {
124  const uint32_t *ipos;
125  size_t olen;
126 
127  *out = NULL;
128  if (!in || inlen == 0)
129  return TLD_NODATA;
130 
131  ipos = &in[inlen - 1];
132  olen = 0;
133  /* Scan backwards for non(latin)letters. */
134  while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
135  (*ipos >= 0x61 && *ipos <= 0x7A)))
136  ipos--, olen++;
137 
138  if (olen > 0 && ipos >= in && DOTP (*ipos))
139  {
140  /* Found something that appears a TLD. */
141  char *out_s = malloc (sizeof (char) * (olen + 1));
142  char *opos = out_s;
143 
144  if (!opos)
145  return TLD_MALLOC_ERROR;
146 
147  ipos++;
148  /* Transcribe to lowercase ascii string. */
149  for (; ipos < &in[inlen]; ipos++, opos++)
150  *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
151  *opos = 0;
152  *out = out_s;
153  return TLD_SUCCESS;
154  }
155 
156  return TLD_NO_TLD;
157 }
158 
170 int
171 tld_get_4z (const uint32_t *in, char **out)
172 {
173  const uint32_t *ipos = in;
174 
175  if (!in)
176  return TLD_NODATA;
177 
178  while (*ipos)
179  ipos++;
180 
181  return tld_get_4 (in, ipos - in, out);
182 }
183 
196 int
197 tld_get_z (const char *in, char **out)
198 {
199  uint32_t *iucs;
200  size_t i, ilen;
201  int rc;
202 
203  ilen = strlen (in);
204  iucs = calloc (ilen, sizeof (*iucs));
205 
206  if (!iucs)
207  return TLD_MALLOC_ERROR;
208 
209  for (i = 0; i < ilen; i++)
210  iucs[i] = in[i];
211 
212  rc = tld_get_4 (iucs, ilen, out);
213 
214  free (iucs);
215 
216  return rc;
217 }
218 
219 /*
220  * tld_checkchar - verify that character is permitted
221  * @ch: 32 bit unicode character to check.
222  * @tld: A #Tld_table data structure to check @ch against.
223  *
224  * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
225  * character in @tld.
226  *
227  * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
228  * valid character for the TLD @tld or if @tld is %NULL,
229  * %TLD_INVALID if @ch is invalid as defined by @tld.
230  */
231 static int
232 _tld_checkchar (uint32_t ch, const Tld_table *tld)
233 {
234  const Tld_table_element *s, *e, *m;
235 
236  if (!tld)
237  return TLD_SUCCESS;
238 
239  /* Check for [-a-z0-9.]. */
240  if ((ch >= 0x61 && ch <= 0x7A) ||
241  (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
242  return TLD_SUCCESS;
243 
244  s = tld->valid;
245  e = s + tld->nvalid;
246  while (s < e)
247  {
248  m = s + ((e - s) >> 1);
249  if (ch < m->start)
250  e = m;
251  else if (ch > m->end)
252  s = m + 1;
253  else
254  return TLD_SUCCESS;
255  }
256 
257  return TLD_INVALID;
258 }
259 
279 int
280 tld_check_4t (const uint32_t *in, size_t inlen, size_t *errpos,
281  const Tld_table *tld)
282 {
283  const uint32_t *ipos;
284  int rc;
285 
286  if (!tld) /* No data for TLD so everything is valid. */
287  return TLD_SUCCESS;
288 
289  ipos = in;
290  while (ipos < &in[inlen])
291  {
292  rc = _tld_checkchar (*ipos, tld);
293  if (rc != TLD_SUCCESS)
294  {
295  if (errpos)
296  *errpos = ipos - in;
297  return rc;
298  }
299  ipos++;
300  }
301  return TLD_SUCCESS;
302 }
303 
321 int
322 tld_check_4tz (const uint32_t *in, size_t *errpos, const Tld_table *tld)
323 {
324  const uint32_t *ipos = in;
325 
326  if (!ipos)
327  return TLD_NODATA;
328 
329  while (*ipos)
330  ipos++;
331 
332  return tld_check_4t (in, ipos - in, errpos, tld);
333 }
334 
358 int
359 tld_check_4 (const uint32_t *in, size_t inlen, size_t *errpos,
360  const Tld_table **overrides)
361 {
362  const Tld_table *tld;
363  char *domain;
364  int rc;
365 
366  if (errpos)
367  *errpos = 0;
368 
369  /* Get TLD name. */
370  rc = tld_get_4 (in, inlen, &domain);
371 
372  if (rc != TLD_SUCCESS)
373  {
374  if (rc == TLD_NO_TLD) /* No TLD, say OK */
375  return TLD_SUCCESS;
376  else
377  return rc;
378  }
379 
380  /* Retrieve appropriate data structure. */
381  tld = tld_default_table (domain, overrides);
382  free (domain);
383 
384  return tld_check_4t (in, inlen, errpos, tld);
385 }
386 
408 int
409 tld_check_4z (const uint32_t *in, size_t *errpos, const Tld_table **overrides)
410 {
411  const uint32_t *ipos = in;
412 
413  if (!ipos)
414  return TLD_NODATA;
415 
416  while (*ipos)
417  ipos++;
418 
419  return tld_check_4 (in, ipos - in, errpos, overrides);
420 }
421 
445 int
446 tld_check_8z (const char *in, size_t *errpos, const Tld_table **overrides)
447 {
448  uint32_t *iucs;
449  size_t ilen;
450  int rc;
451 
452  if (!in)
453  return TLD_NODATA;
454 
455  iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
456 
457  if (!iucs)
458  return TLD_MALLOC_ERROR;
459 
460  rc = tld_check_4 (iucs, ilen, errpos, overrides);
461 
462  free (iucs);
463 
464  return rc;
465 }
466 
490 int
491 tld_check_lz (const char *in, size_t *errpos, const Tld_table **overrides)
492 {
493  char *utf8;
494  int rc;
495 
496  if (!in)
497  return TLD_NODATA;
498 
499  utf8 = stringprep_locale_to_utf8 (in);
500  if (!utf8)
501  return TLD_ICONV_ERROR;
502 
503 
504  rc = tld_check_8z (utf8, errpos, overrides);
505 
506  free (utf8);
507 
508  return rc;
509 }
510 
uint32_t * stringprep_utf8_to_ucs4(const char *str, ssize_t len, size_t *items_written)
Definition: nfkc.c:1006
IDNAPI char * stringprep_locale_to_utf8(const char *str)
Definition: toutf8.c:145
uint32_t end
Definition: tld.h:81
Definition: tld.h:95
const Tld_table_element * valid
Definition: tld.h:99
size_t nvalid
Definition: tld.h:98
int tld_check_8z(const char *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:446
int tld_check_4z(const uint32_t *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:409
const Tld_table * _tld_tables[]
Definition: tlds.c:60
const Tld_table * tld_get_table(const char *tld, const Tld_table **tables)
Definition: tld.c:60
#define DOTP(c)
Definition: tld.c:105
int tld_check_4(const uint32_t *in, size_t inlen, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:359
int tld_check_lz(const char *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:491
const Tld_table * tld_default_table(const char *tld, const Tld_table **overrides)
Definition: tld.c:89
int tld_check_4tz(const uint32_t *in, size_t *errpos, const Tld_table *tld)
Definition: tld.c:322
int tld_check_4t(const uint32_t *in, size_t inlen, size_t *errpos, const Tld_table *tld)
Definition: tld.c:280
int tld_get_4z(const uint32_t *in, char **out)
Definition: tld.c:171
int tld_get_4(const uint32_t *in, size_t inlen, char **out)
Definition: tld.c:122
int tld_get_z(const char *in, char **out)
Definition: tld.c:197
@ TLD_ICONV_ERROR
Definition: tld.h:111
@ TLD_MALLOC_ERROR
Definition: tld.h:110
@ TLD_SUCCESS
Definition: tld.h:107
@ TLD_NODATA
Definition: tld.h:109
@ TLD_NO_TLD
Definition: tld.h:112
@ TLD_INVALID
Definition: tld.h:108