Bug Summary

File:gentr46map.c
Location:line 499, column 7
Description:Null pointer passed as an argument to a 'nonnull' parameter

Annotated Source Code

1/* gentr46map.c - generate TR46 lookup tables
2 Copyright (C) 2016-2017 Tim Ruehsen
3
4 Libidn2 is free software: you can redistribute it and/or modify it
5 under the terms of either:
6
7 * the GNU Lesser General Public License as published by the Free
8 Software Foundation; either version 3 of the License, or (at
9 your option) any later version.
10
11 or
12
13 * the GNU General Public License as published by the Free
14 Software Foundation; either version 2 of the License, or (at
15 your option) any later version.
16
17 or both in parallel, as here.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received copies of the GNU General Public License and
25 the GNU Lesser General Public License along with this program. If
26 not, see <http://www.gnu.org/licenses/>.
27*/
28
29#include <config.h>
30
31#include <stdio.h>
32#include <string.h>
33#include <stdlib.h>
34#include <errno(*__errno_location ()).h>
35#include <ctype.h>
36
37#include <unistr.h>
38
39#include "tr46map.h"
40
41#define countof(a)(sizeof(a)/sizeof(*(a))) (sizeof(a)/sizeof(*(a)))
42
43typedef struct
44{
45 uint32_t cp1, cp2;
46 unsigned nmappings:5, /* 0-18, # of uint32_t at <offset> */
47 offset:14, /* 0-16383, byte offset into mapdata */
48 flag_index:3;
49 uint8_t flags;
50} IDNAMap_gen;
51
52static IDNAMap_gen idna_map[10000];
53static size_t map_pos;
54
55static uint8_t genmapdata[16384];
56static size_t mapdata_pos;
57
58static uint8_t flag_combination[8];
59static unsigned flag_combinations;
60
61static NFCQCMap nfcqc_map[140];
62static size_t nfcqc_pos;
63
64static char *
65_nextField (char **line)
66{
67 char *s = *line, *e;
68
69 if (!*s)
70 return NULL((void*)0);
71
72 if (!(e = strpbrk (s, ";#")__extension__ ({ char __a0, __a1, __a2; (__builtin_constant_p
(";#") && ((size_t)(const void *)((";#") + 1) - (size_t
)(const void *)(";#") == 1) ? ((__builtin_constant_p (s) &&
((size_t)(const void *)((s) + 1) - (size_t)(const void *)(s)
== 1)) ? __builtin_strpbrk (s, ";#") : ((__a0 = ((const char
*) (";#"))[0], __a0 == '\0') ? ((void) (s), (char *) ((void*
)0)) : ((__a1 = ((const char *) (";#"))[1], __a1 == '\0') ? __builtin_strchr
(s, __a0) : ((__a2 = ((const char *) (";#"))[2], __a2 == '\0'
) ? __strpbrk_c2 (s, __a0, __a1) : (((const char *) (";#"))[3
] == '\0' ? __strpbrk_c3 (s, __a0, __a1, __a2) : __builtin_strpbrk
(s, ";#")))))) : __builtin_strpbrk (s, ";#")); })
))
73 {
74 e = *line += strlen (s);
75 }
76 else
77 {
78 *line = e + (*e == ';');
79 *e = 0;
80 }
81
82 // trim leading and trailing whitespace
83 while (isspace (*s)((*__ctype_b_loc ())[(int) ((*s))] & (unsigned short int)
_ISspace)
)
84 s++;
85 while (e > s && isspace (e[-1])((*__ctype_b_loc ())[(int) ((e[-1]))] & (unsigned short int
) _ISspace)
)
86 *--e = 0;
87
88 return s;
89}
90
91static int
92_scan_file (const char *fname, int (*scan) (char *))
93{
94 FILE *fp = fopen (fname, "r");
95 char *buf = NULL((void*)0), *linep;
96 size_t bufsize = 0;
97 ssize_t buflen;
98 int ret = 0;
99
100 if (!fp)
101 {
102 fprintf (stderrstderr, "Failed to open %s (%d)\n", fname, errno(*__errno_location ()));
103 return -1;
104 }
105
106 while ((buflen = getline (&buf, &bufsize, fp)) >= 0)
107 {
108 linep = buf;
109
110 while (isspace (*linep)((*__ctype_b_loc ())[(int) ((*linep))] & (unsigned short int
) _ISspace)
)
111 linep++; // ignore leading whitespace
112
113 // strip off \r\n
114 while (buflen > 0 && (buf[buflen] == '\n' || buf[buflen] == '\r'))
115 buf[--buflen] = 0;
116
117 if (!*linep || *linep == '#')
118 continue; // skip empty lines and comments
119
120 if ((ret = scan (linep)))
121 break;
122 }
123
124 free (buf);
125 fclose (fp);
126
127 return ret;
128}
129
130static size_t
131_u32_stream_len (uint32_t * src, size_t len)
132{
133 unsigned it;
134 size_t n = 0;
135
136/*
1371 byte: 0-0x7f -> 0xxxxxxx
1382 bytes: 0x80-0x3fff ->1xxxxxxx 0xxxxxxx
1393 bytes: 0x4000-0x1fffff ->1xxxxxxx 1xxxxxxx 0xxxxxxx
1404 bytes: 0x200000-0xFFFFFFF -> 1xxxxxxx 1xxxxxxx 1xxxxxxx 0xxxxxxx
1415 bytes: 0x10000000->0xFFFFFFFF -> 1xxxxxxx 1xxxxxxx 1xxxxxxx 1xxxxxxx
142*/
143 for (it = 0; it < len; it++)
144 {
145 uint32_t cp = src[it];
146
147 if (cp <= 0x7f)
148 n += 1;
149 else if (cp <= 0x3fff)
150 n += 2;
151 else if (cp <= 0x1fffff)
152 n += 3;
153 else if (cp <= 0xFFFFFFF)
154 n += 4;
155 else
156 n += 5;
157 }
158
159 return n;
160}
161
162static size_t
163_u32_to_stream (uint8_t * dst, size_t dst_size, uint32_t * src, size_t len)
164{
165 unsigned it;
166 size_t n = 0;
167
168 n = _u32_stream_len (src, len);
169
170 if (!dst)
171 return n;
172
173 if (dst_size < n)
174 return 0;
175
176 for (it = 0; it < len; it++)
177 {
178 uint32_t cp = src[it];
179
180 if (cp <= 0x7f)
181 *dst++ = cp & 0x7F;
182 else if (cp <= 0x3fff)
183 {
184 *dst++ = 0x80 | ((cp >> 7) & 0x7F);
185 *dst++ = cp & 0x7F;
186 }
187 else if (cp <= 0x1fffff)
188 {
189 *dst++ = 0x80 | ((cp >> 14) & 0x7F);
190 *dst++ = 0x80 | ((cp >> 7) & 0x7F);
191 *dst++ = cp & 0x7F;
192 }
193 else if (cp <= 0xFFFFFFF)
194 {
195 *dst++ = 0x80 | ((cp >> 21) & 0x7F);
196 *dst++ = 0x80 | ((cp >> 14) & 0x7F);
197 *dst++ = 0x80 | ((cp >> 7) & 0x7F);
198 *dst++ = cp & 0x7F;
199 }
200 else
201 {
202 *dst++ = 0x80 | ((cp >> 28) & 0x7F);
203 *dst++ = 0x80 | ((cp >> 21) & 0x7F);
204 *dst++ = 0x80 | ((cp >> 14) & 0x7F);
205 *dst++ = 0x80 | ((cp >> 7) & 0x7F);
206 *dst++ = cp & 0x7F;
207 }
208 }
209
210 return n;
211}
212
213/* copy 'n' codepoints from stream 'src' to 'dst' */
214static void
215_copy_from_stream (uint32_t * dst, const uint8_t * src, size_t n)
216{
217 uint32_t cp = 0;
218
219 for (; n; src++)
220 {
221 cp = (cp << 7) | (*src & 0x7F);
222 if ((*src & 0x80) == 0)
223 {
224 *dst++ = cp;
225 cp = 0;
226 n--;
227 }
228 }
229}
230
231static int
232read_IdnaMappings (char *linep)
233{
234 IDNAMap_gen *map = &idna_map[map_pos];
235 char *flag, *codepoint, *mapping;
236 int n;
237
238 codepoint = _nextField (&linep);
239 flag = _nextField (&linep);
240 mapping = _nextField (&linep);
241
242 if ((n = sscanf (codepoint, "%X..%X", &map->cp1, &map->cp2)) == 1)
243 {
244 map->cp2 = map->cp1;
245 }
246 else if (n != 2)
247 {
248 printf ("Failed to scan mapping codepoint '%s'\n", codepoint);
249 return -1;
250 }
251
252 if (map->cp1 > map->cp2)
253 {
254 printf ("Invalid codepoint range '%s'\n", codepoint);
255 return -1;
256 }
257
258 if (map_pos && map->cp1 <= idna_map[map_pos - 1].cp2)
259 {
260 printf ("Mapping codepoints out of order '%s'\n", codepoint);
261 return -1;
262 }
263
264 if (!strcmp (flag, "valid")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("valid") && (
__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("valid"), (!((size_t)(const void *)((flag) + 1) - (size_t)(
const void *)(flag) == 1) || __s1_len >= 4) && (!(
(size_t)(const void *)(("valid") + 1) - (size_t)(const void *
)("valid") == 1) || __s2_len >= 4)) ? __builtin_strcmp (flag
, "valid") : (__builtin_constant_p (flag) && ((size_t
)(const void *)((flag) + 1) - (size_t)(const void *)(flag) ==
1) && (__s1_len = __builtin_strlen (flag), __s1_len <
4) ? (__builtin_constant_p ("valid") && ((size_t)(const
void *)(("valid") + 1) - (size_t)(const void *)("valid") == 1
) ? __builtin_strcmp (flag, "valid") : (__extension__ ({ const
unsigned char *__s2 = (const unsigned char *) (const char *)
("valid"); int __result = (((const unsigned char *) (const char
*) (flag))[0] - __s2[0]); if (__s1_len > 0 && __result
== 0) { __result = (((const unsigned char *) (const char *) (
flag))[1] - __s2[1]); if (__s1_len > 1 && __result
== 0) { __result = (((const unsigned char *) (const char *) (
flag))[2] - __s2[2]); if (__s1_len > 2 && __result
== 0) __result = (((const unsigned char *) (const char *) (flag
))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
"valid") && ((size_t)(const void *)(("valid") + 1) - (
size_t)(const void *)("valid") == 1) && (__s2_len = __builtin_strlen
("valid"), __s2_len < 4) ? (__builtin_constant_p (flag) &&
((size_t)(const void *)((flag) + 1) - (size_t)(const void *)
(flag) == 1) ? __builtin_strcmp (flag, "valid") : (- (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) (flag); int __result = (((const unsigned char *) (const
char *) ("valid"))[0] - __s2[0]); if (__s2_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) ("valid"))[1] - __s2[1]); if (__s2_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) ("valid"))[2] - __s2[2]); if (__s2_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) ("valid"))[3] - __s2[3]); } } __result; })))) : __builtin_strcmp
(flag, "valid")))); })
)
265 map->flags |= TR46_FLG_VALID1;
266 else if (!strcmp (flag, "mapped")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("mapped") &&
(__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("mapped"), (!((size_t)(const void *)((flag) + 1) - (size_t)
(const void *)(flag) == 1) || __s1_len >= 4) && (!
((size_t)(const void *)(("mapped") + 1) - (size_t)(const void
*)("mapped") == 1) || __s2_len >= 4)) ? __builtin_strcmp (
flag, "mapped") : (__builtin_constant_p (flag) && ((size_t
)(const void *)((flag) + 1) - (size_t)(const void *)(flag) ==
1) && (__s1_len = __builtin_strlen (flag), __s1_len <
4) ? (__builtin_constant_p ("mapped") && ((size_t)(const
void *)(("mapped") + 1) - (size_t)(const void *)("mapped") ==
1) ? __builtin_strcmp (flag, "mapped") : (__extension__ ({ const
unsigned char *__s2 = (const unsigned char *) (const char *)
("mapped"); int __result = (((const unsigned char *) (const char
*) (flag))[0] - __s2[0]); if (__s1_len > 0 && __result
== 0) { __result = (((const unsigned char *) (const char *) (
flag))[1] - __s2[1]); if (__s1_len > 1 && __result
== 0) { __result = (((const unsigned char *) (const char *) (
flag))[2] - __s2[2]); if (__s1_len > 2 && __result
== 0) __result = (((const unsigned char *) (const char *) (flag
))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
"mapped") && ((size_t)(const void *)(("mapped") + 1) -
(size_t)(const void *)("mapped") == 1) && (__s2_len =
__builtin_strlen ("mapped"), __s2_len < 4) ? (__builtin_constant_p
(flag) && ((size_t)(const void *)((flag) + 1) - (size_t
)(const void *)(flag) == 1) ? __builtin_strcmp (flag, "mapped"
) : (- (__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) (flag); int __result = (((const unsigned
char *) (const char *) ("mapped"))[0] - __s2[0]); if (__s2_len
> 0 && __result == 0) { __result = (((const unsigned
char *) (const char *) ("mapped"))[1] - __s2[1]); if (__s2_len
> 1 && __result == 0) { __result = (((const unsigned
char *) (const char *) ("mapped"))[2] - __s2[2]); if (__s2_len
> 2 && __result == 0) __result = (((const unsigned
char *) (const char *) ("mapped"))[3] - __s2[3]); } } __result
; })))) : __builtin_strcmp (flag, "mapped")))); })
)
267 map->flags |= TR46_FLG_MAPPED2;
268 else if (!strcmp (flag, "disallowed")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("disallowed") &&
(__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("disallowed"), (!((size_t)(const void *)((flag) + 1) - (size_t
)(const void *)(flag) == 1) || __s1_len >= 4) && (
!((size_t)(const void *)(("disallowed") + 1) - (size_t)(const
void *)("disallowed") == 1) || __s2_len >= 4)) ? __builtin_strcmp
(flag, "disallowed") : (__builtin_constant_p (flag) &&
((size_t)(const void *)((flag) + 1) - (size_t)(const void *)
(flag) == 1) && (__s1_len = __builtin_strlen (flag), __s1_len
< 4) ? (__builtin_constant_p ("disallowed") && ((
size_t)(const void *)(("disallowed") + 1) - (size_t)(const void
*)("disallowed") == 1) ? __builtin_strcmp (flag, "disallowed"
) : (__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) ("disallowed"); int __result = (((const
unsigned char *) (const char *) (flag))[0] - __s2[0]); if (__s1_len
> 0 && __result == 0) { __result = (((const unsigned
char *) (const char *) (flag))[1] - __s2[1]); if (__s1_len >
1 && __result == 0) { __result = (((const unsigned char
*) (const char *) (flag))[2] - __s2[2]); if (__s1_len > 2
&& __result == 0) __result = (((const unsigned char *
) (const char *) (flag))[3] - __s2[3]); } } __result; }))) : (
__builtin_constant_p ("disallowed") && ((size_t)(const
void *)(("disallowed") + 1) - (size_t)(const void *)("disallowed"
) == 1) && (__s2_len = __builtin_strlen ("disallowed"
), __s2_len < 4) ? (__builtin_constant_p (flag) &&
((size_t)(const void *)((flag) + 1) - (size_t)(const void *)
(flag) == 1) ? __builtin_strcmp (flag, "disallowed") : (- (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) (flag); int __result = (((const unsigned char *) (const
char *) ("disallowed"))[0] - __s2[0]); if (__s2_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) ("disallowed"))[1] - __s2[1]); if (__s2_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) ("disallowed"))[2] - __s2[2]); if (__s2_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) ("disallowed"))[3] - __s2[3]); } } __result; })))) : __builtin_strcmp
(flag, "disallowed")))); })
)
269 map->flags |= TR46_FLG_DISALLOWED16;
270 else if (!strcmp (flag, "ignored")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("ignored") &&
(__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("ignored"), (!((size_t)(const void *)((flag) + 1) - (size_t
)(const void *)(flag) == 1) || __s1_len >= 4) && (
!((size_t)(const void *)(("ignored") + 1) - (size_t)(const void
*)("ignored") == 1) || __s2_len >= 4)) ? __builtin_strcmp
(flag, "ignored") : (__builtin_constant_p (flag) && (
(size_t)(const void *)((flag) + 1) - (size_t)(const void *)(flag
) == 1) && (__s1_len = __builtin_strlen (flag), __s1_len
< 4) ? (__builtin_constant_p ("ignored") && ((size_t
)(const void *)(("ignored") + 1) - (size_t)(const void *)("ignored"
) == 1) ? __builtin_strcmp (flag, "ignored") : (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) ("ignored"); int __result = (((const unsigned char *
) (const char *) (flag))[0] - __s2[0]); if (__s1_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (flag))[1] - __s2[1]); if (__s1_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (flag))[2] - __s2[2]); if (__s1_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) (flag))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p
("ignored") && ((size_t)(const void *)(("ignored") +
1) - (size_t)(const void *)("ignored") == 1) && (__s2_len
= __builtin_strlen ("ignored"), __s2_len < 4) ? (__builtin_constant_p
(flag) && ((size_t)(const void *)((flag) + 1) - (size_t
)(const void *)(flag) == 1) ? __builtin_strcmp (flag, "ignored"
) : (- (__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) (flag); int __result = (((const unsigned
char *) (const char *) ("ignored"))[0] - __s2[0]); if (__s2_len
> 0 && __result == 0) { __result = (((const unsigned
char *) (const char *) ("ignored"))[1] - __s2[1]); if (__s2_len
> 1 && __result == 0) { __result = (((const unsigned
char *) (const char *) ("ignored"))[2] - __s2[2]); if (__s2_len
> 2 && __result == 0) __result = (((const unsigned
char *) (const char *) ("ignored"))[3] - __s2[3]); } } __result
; })))) : __builtin_strcmp (flag, "ignored")))); })
)
271 map->flags |= TR46_FLG_IGNORED4;
272 else if (!strcmp (flag, "deviation")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("deviation") &&
(__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("deviation"), (!((size_t)(const void *)((flag) + 1) - (size_t
)(const void *)(flag) == 1) || __s1_len >= 4) && (
!((size_t)(const void *)(("deviation") + 1) - (size_t)(const void
*)("deviation") == 1) || __s2_len >= 4)) ? __builtin_strcmp
(flag, "deviation") : (__builtin_constant_p (flag) &&
((size_t)(const void *)((flag) + 1) - (size_t)(const void *)
(flag) == 1) && (__s1_len = __builtin_strlen (flag), __s1_len
< 4) ? (__builtin_constant_p ("deviation") && ((size_t
)(const void *)(("deviation") + 1) - (size_t)(const void *)("deviation"
) == 1) ? __builtin_strcmp (flag, "deviation") : (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) ("deviation"); int __result = (((const unsigned char
*) (const char *) (flag))[0] - __s2[0]); if (__s1_len > 0
&& __result == 0) { __result = (((const unsigned char
*) (const char *) (flag))[1] - __s2[1]); if (__s1_len > 1
&& __result == 0) { __result = (((const unsigned char
*) (const char *) (flag))[2] - __s2[2]); if (__s1_len > 2
&& __result == 0) __result = (((const unsigned char *
) (const char *) (flag))[3] - __s2[3]); } } __result; }))) : (
__builtin_constant_p ("deviation") && ((size_t)(const
void *)(("deviation") + 1) - (size_t)(const void *)("deviation"
) == 1) && (__s2_len = __builtin_strlen ("deviation")
, __s2_len < 4) ? (__builtin_constant_p (flag) && (
(size_t)(const void *)((flag) + 1) - (size_t)(const void *)(flag
) == 1) ? __builtin_strcmp (flag, "deviation") : (- (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) (flag); int __result = (((const unsigned char *) (const
char *) ("deviation"))[0] - __s2[0]); if (__s2_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) ("deviation"))[1] - __s2[1]); if (__s2_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) ("deviation"))[2] - __s2[2]); if (__s2_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) ("deviation"))[3] - __s2[3]); } } __result; })))) : __builtin_strcmp
(flag, "deviation")))); })
)
273 map->flags |= TR46_FLG_DEVIATION8;
274 else if (!strcmp (flag, "disallowed_STD3_mapped")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("disallowed_STD3_mapped"
) && (__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("disallowed_STD3_mapped"), (!((size_t)(const void *)((flag)
+ 1) - (size_t)(const void *)(flag) == 1) || __s1_len >= 4
) && (!((size_t)(const void *)(("disallowed_STD3_mapped"
) + 1) - (size_t)(const void *)("disallowed_STD3_mapped") == 1
) || __s2_len >= 4)) ? __builtin_strcmp (flag, "disallowed_STD3_mapped"
) : (__builtin_constant_p (flag) && ((size_t)(const void
*)((flag) + 1) - (size_t)(const void *)(flag) == 1) &&
(__s1_len = __builtin_strlen (flag), __s1_len < 4) ? (__builtin_constant_p
("disallowed_STD3_mapped") && ((size_t)(const void *
)(("disallowed_STD3_mapped") + 1) - (size_t)(const void *)("disallowed_STD3_mapped"
) == 1) ? __builtin_strcmp (flag, "disallowed_STD3_mapped") :
(__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) ("disallowed_STD3_mapped"); int __result
= (((const unsigned char *) (const char *) (flag))[0] - __s2
[0]); if (__s1_len > 0 && __result == 0) { __result
= (((const unsigned char *) (const char *) (flag))[1] - __s2
[1]); if (__s1_len > 1 && __result == 0) { __result
= (((const unsigned char *) (const char *) (flag))[2] - __s2
[2]); if (__s1_len > 2 && __result == 0) __result =
(((const unsigned char *) (const char *) (flag))[3] - __s2[3
]); } } __result; }))) : (__builtin_constant_p ("disallowed_STD3_mapped"
) && ((size_t)(const void *)(("disallowed_STD3_mapped"
) + 1) - (size_t)(const void *)("disallowed_STD3_mapped") == 1
) && (__s2_len = __builtin_strlen ("disallowed_STD3_mapped"
), __s2_len < 4) ? (__builtin_constant_p (flag) &&
((size_t)(const void *)((flag) + 1) - (size_t)(const void *)
(flag) == 1) ? __builtin_strcmp (flag, "disallowed_STD3_mapped"
) : (- (__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) (flag); int __result = (((const unsigned
char *) (const char *) ("disallowed_STD3_mapped"))[0] - __s2
[0]); if (__s2_len > 0 && __result == 0) { __result
= (((const unsigned char *) (const char *) ("disallowed_STD3_mapped"
))[1] - __s2[1]); if (__s2_len > 1 && __result == 0
) { __result = (((const unsigned char *) (const char *) ("disallowed_STD3_mapped"
))[2] - __s2[2]); if (__s2_len > 2 && __result == 0
) __result = (((const unsigned char *) (const char *) ("disallowed_STD3_mapped"
))[3] - __s2[3]); } } __result; })))) : __builtin_strcmp (flag
, "disallowed_STD3_mapped")))); })
)
275 map->flags |= TR46_FLG_DISALLOWED_STD3_MAPPED32;
276 else if (!strcmp (flag, "disallowed_STD3_valid")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(flag) && __builtin_constant_p ("disallowed_STD3_valid"
) && (__s1_len = __builtin_strlen (flag), __s2_len = __builtin_strlen
("disallowed_STD3_valid"), (!((size_t)(const void *)((flag) +
1) - (size_t)(const void *)(flag) == 1) || __s1_len >= 4)
&& (!((size_t)(const void *)(("disallowed_STD3_valid"
) + 1) - (size_t)(const void *)("disallowed_STD3_valid") == 1
) || __s2_len >= 4)) ? __builtin_strcmp (flag, "disallowed_STD3_valid"
) : (__builtin_constant_p (flag) && ((size_t)(const void
*)((flag) + 1) - (size_t)(const void *)(flag) == 1) &&
(__s1_len = __builtin_strlen (flag), __s1_len < 4) ? (__builtin_constant_p
("disallowed_STD3_valid") && ((size_t)(const void *)
(("disallowed_STD3_valid") + 1) - (size_t)(const void *)("disallowed_STD3_valid"
) == 1) ? __builtin_strcmp (flag, "disallowed_STD3_valid") : (
__extension__ ({ const unsigned char *__s2 = (const unsigned char
*) (const char *) ("disallowed_STD3_valid"); int __result = (
((const unsigned char *) (const char *) (flag))[0] - __s2[0])
; if (__s1_len > 0 && __result == 0) { __result = (
((const unsigned char *) (const char *) (flag))[1] - __s2[1])
; if (__s1_len > 1 && __result == 0) { __result = (
((const unsigned char *) (const char *) (flag))[2] - __s2[2])
; if (__s1_len > 2 && __result == 0) __result = ((
(const unsigned char *) (const char *) (flag))[3] - __s2[3]);
} } __result; }))) : (__builtin_constant_p ("disallowed_STD3_valid"
) && ((size_t)(const void *)(("disallowed_STD3_valid"
) + 1) - (size_t)(const void *)("disallowed_STD3_valid") == 1
) && (__s2_len = __builtin_strlen ("disallowed_STD3_valid"
), __s2_len < 4) ? (__builtin_constant_p (flag) &&
((size_t)(const void *)((flag) + 1) - (size_t)(const void *)
(flag) == 1) ? __builtin_strcmp (flag, "disallowed_STD3_valid"
) : (- (__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) (flag); int __result = (((const unsigned
char *) (const char *) ("disallowed_STD3_valid"))[0] - __s2[
0]); if (__s2_len > 0 && __result == 0) { __result
= (((const unsigned char *) (const char *) ("disallowed_STD3_valid"
))[1] - __s2[1]); if (__s2_len > 1 && __result == 0
) { __result = (((const unsigned char *) (const char *) ("disallowed_STD3_valid"
))[2] - __s2[2]); if (__s2_len > 2 && __result == 0
) __result = (((const unsigned char *) (const char *) ("disallowed_STD3_valid"
))[3] - __s2[3]); } } __result; })))) : __builtin_strcmp (flag
, "disallowed_STD3_valid")))); })
)
277 map->flags |= TR46_FLG_DISALLOWED_STD3_VALID64;
278 else
279 {
280 printf ("Unknown flag '%s'\n", flag);
281 return -1;
282 }
283
284 if (mapping && *mapping)
285 {
286 uint32_t cp, tmp[20], tmp2[20];
287 int pos;
288
289 while ((n = sscanf (mapping, " %X%n", &cp, &pos)) == 1)
290 {
291 if (mapdata_pos >= countof (genmapdata)(sizeof(genmapdata)/sizeof(*(genmapdata))))
292 {
293 printf ("genmapdata too small - increase and retry\n");
294 break;
295 }
296
297 if (map->nmappings == 0)
298 {
299 map->offset = mapdata_pos;
300 if (map->offset != mapdata_pos)
301 printf ("offset overflow (%zu)\n", mapdata_pos);
302 }
303
304 tmp[map->nmappings] = cp;
305 mapdata_pos += _u32_to_stream (genmapdata + mapdata_pos, 5, &cp, 1);
306 map->nmappings++;
307 mapping += pos;
308 }
309
310 /* selftest */
311 _copy_from_stream (tmp2, genmapdata + map->offset, map->nmappings);
312 for (pos = 0; pos < map->nmappings; pos++)
313 if (tmp[pos] != tmp2[pos])
314 abort ();
315 }
316 else if (map->flags &
317 (TR46_FLG_MAPPED2 | TR46_FLG_DISALLOWED_STD3_MAPPED32 |
318 TR46_FLG_DEVIATION8))
319 {
320 if (map->cp1 != 0x200C && map->cp1 != 0x200D) /* ZWNJ and ZWJ */
321 printf ("Missing mapping for '%s'\n", codepoint);
322 }
323
324 if (map_pos && map->nmappings == 0)
325 {
326 /* merge with previous if possible */
327 IDNAMap_gen *prev = &idna_map[map_pos - 1];
328 if (prev->cp2 + 1 == map->cp1
329 && prev->nmappings == 0 && prev->flags == map->flags)
330 {
331 prev->cp2 = map->cp2;
332 memset (map, 0, sizeof (*map)); /* clean up */
333 return 0;
334 }
335 }
336
337 if (++map_pos >= countof (idna_map)(sizeof(idna_map)/sizeof(*(idna_map))))
338 {
339 printf ("Internal map size too small\n");
340 return -1;
341 }
342
343 return 0;
344}
345
346static int
347_compare_map (IDNAMap_gen * m1, IDNAMap_gen * m2)
348{
349 if (m1->cp1 < m2->cp1)
350 return -1;
351 if (m1->cp1 > m2->cp2)
352 return 1;
353 return 0;
354}
355
356static int
357read_NFCQC (char *linep)
358{
359 NFCQCMap *map = &nfcqc_map[nfcqc_pos];
360 char *codepoint, *type, *check;
361 int n;
362
363 codepoint = _nextField (&linep);
364 type = _nextField (&linep);
365 check = _nextField (&linep);
366
367 if (!type || strcmp (type, "NFC_QC")__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p
(type) && __builtin_constant_p ("NFC_QC") &&
(__s1_len = __builtin_strlen (type), __s2_len = __builtin_strlen
("NFC_QC"), (!((size_t)(const void *)((type) + 1) - (size_t)
(const void *)(type) == 1) || __s1_len >= 4) && (!
((size_t)(const void *)(("NFC_QC") + 1) - (size_t)(const void
*)("NFC_QC") == 1) || __s2_len >= 4)) ? __builtin_strcmp (
type, "NFC_QC") : (__builtin_constant_p (type) && ((size_t
)(const void *)((type) + 1) - (size_t)(const void *)(type) ==
1) && (__s1_len = __builtin_strlen (type), __s1_len <
4) ? (__builtin_constant_p ("NFC_QC") && ((size_t)(const
void *)(("NFC_QC") + 1) - (size_t)(const void *)("NFC_QC") ==
1) ? __builtin_strcmp (type, "NFC_QC") : (__extension__ ({ const
unsigned char *__s2 = (const unsigned char *) (const char *)
("NFC_QC"); int __result = (((const unsigned char *) (const char
*) (type))[0] - __s2[0]); if (__s1_len > 0 && __result
== 0) { __result = (((const unsigned char *) (const char *) (
type))[1] - __s2[1]); if (__s1_len > 1 && __result
== 0) { __result = (((const unsigned char *) (const char *) (
type))[2] - __s2[2]); if (__s1_len > 2 && __result
== 0) __result = (((const unsigned char *) (const char *) (type
))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
"NFC_QC") && ((size_t)(const void *)(("NFC_QC") + 1) -
(size_t)(const void *)("NFC_QC") == 1) && (__s2_len =
__builtin_strlen ("NFC_QC"), __s2_len < 4) ? (__builtin_constant_p
(type) && ((size_t)(const void *)((type) + 1) - (size_t
)(const void *)(type) == 1) ? __builtin_strcmp (type, "NFC_QC"
) : (- (__extension__ ({ const unsigned char *__s2 = (const unsigned
char *) (const char *) (type); int __result = (((const unsigned
char *) (const char *) ("NFC_QC"))[0] - __s2[0]); if (__s2_len
> 0 && __result == 0) { __result = (((const unsigned
char *) (const char *) ("NFC_QC"))[1] - __s2[1]); if (__s2_len
> 1 && __result == 0) { __result = (((const unsigned
char *) (const char *) ("NFC_QC"))[2] - __s2[2]); if (__s2_len
> 2 && __result == 0) __result = (((const unsigned
char *) (const char *) ("NFC_QC"))[3] - __s2[3]); } } __result
; })))) : __builtin_strcmp (type, "NFC_QC")))); })
)
368 return 0;
369
370 if ((n = sscanf (codepoint, "%X..%X", &map->cp1, &map->cp2)) == 1)
371 {
372 map->cp2 = map->cp1;
373 }
374 else if (n != 2)
375 {
376 printf ("Failed to scan mapping codepoint '%s'\n", codepoint);
377 return -1;
378 }
379
380 if (map->cp1 > map->cp2)
381 {
382 printf ("Invalid codepoint range '%s'\n", codepoint);
383 return -1;
384 }
385
386 if (*check == 'N')
387 map->check = 1;
388 else if (*check == 'M')
389 map->check = 2;
390 else
391 {
392 printf ("NFQQC: Unknown value '%s'\n", check);
393 return -1;
394 }
395
396 if (++nfcqc_pos >= countof (nfcqc_map)(sizeof(nfcqc_map)/sizeof(*(nfcqc_map))))
397 {
398 printf ("Internal NFCQC map size too small\n");
399 return -1;
400 }
401
402 return 0;
403}
404
405static int
406_compare_map_by_maplen (IDNAMap_gen * m1, IDNAMap_gen * m2)
407{
408 if (m1->nmappings != m2->nmappings)
409 return m2->nmappings - m1->nmappings;
410 if (m1->cp1 < m2->cp1)
411 return -1;
412 if (m1->cp1 > m2->cp2)
413 return 1;
414 return 0;
415}
416
417/*
418static uint32_t *
419_u32_memmem(uint32_t *haystack, size_t hlen, uint32_t *needle, size_t nlen)
420{
421 uint32_t *p;
422
423 if (nlen == 0)
424 return haystack;
425
426 for (p = haystack; hlen >= nlen; p++, hlen--)
427 {
428 if (*p == *needle && (nlen == 1 || u32_cmp(p, needle, nlen) == 0))
429 return p;
430 }
431
432 return NULL;
433}
434*/
435
436static uint8_t *
437_u8_memmem (uint8_t * haystack, size_t hlen, uint8_t * needle, size_t nlen)
438{
439 uint8_t *p;
440
441 if (nlen == 0)
442 return haystack;
443
444 for (p = haystack; hlen >= nlen; p++, hlen--)
445 {
446 if (*p == *needle && (nlen == 1 || memcmp (p, needle, nlen) == 0))
447 return p;
448 }
449
450 return NULL((void*)0);
451}
452
453static size_t
454_u32_cp_stream_len (const uint8_t * stream, size_t ncp)
455{
456 const uint8_t *end;
457
458 for (end = stream; ncp; end++)
459 {
460 if ((*end & 0x80) == 0)
461 ncp--;
462 }
463
464 return end - stream;
465}
466
467/* Remove doubled mappings. With Unicode 6.3.0 the mapping data shrinks
468 * from 7272 to 4322 entries of uint32_t (29088 to 17288 bytes).
469 * Converting those 4322 uin32_t values to a uint8_t stream, we decrease mapping
470 * table size from 17288 to 9153 bytes.
471 */
472static void
473_compact_idna_map (void)
474{
475 unsigned it;
476
477 /* sort into 'longest mappings first' */
478 qsort (idna_map, map_pos, sizeof (IDNAMap_gen),
479 (int (*)(const void *, const void *)) _compare_map_by_maplen);
480
481 uint8_t *data = calloc (sizeof (uint8_t), mapdata_pos), *p;
482 size_t ndata = 0, slen;
483
484 for (it = 0; it < map_pos; it++)
3
Assuming 'it' is < 'map_pos'
4
Loop condition is true. Entering loop body
485 {
486 IDNAMap_gen *map = idna_map + it;
487
488 if (!map->nmappings)
5
Taking false branch
489 continue;
490
491 slen = _u32_cp_stream_len (genmapdata + map->offset, map->nmappings);
492
493 if ((p = _u8_memmem (data, ndata, genmapdata + map->offset, slen)))
6
Assuming 'p' is null
7
Taking false branch
494 {
495 map->offset = p - data;
496 continue;
497 }
498
499 memcpy (data + ndata, genmapdata + map->offset, slen);
8
Null pointer passed as an argument to a 'nonnull' parameter
500 map->offset = ndata;
501 ndata += slen;
502 }
503
504 memcpy (genmapdata, data, ndata);
505 mapdata_pos = ndata;
506 free (data);
507
508 /* sort into 'lowest codepoint first' */
509 qsort (idna_map, map_pos, sizeof (IDNAMap_gen),
510 (int (*)(const void *, const void *)) _compare_map);
511}
512
513static void
514_combine_idna_flags (void)
515{
516 unsigned it, it2;
517
518 /* There are not many different combinations of flags */
519 for (it = 0; it < map_pos; it++)
520 {
521 IDNAMap_gen *map = idna_map + it;
522 int found = 0;
523
524 for (it2 = 0; it2 < flag_combinations && !found; it2++)
525 {
526 if (flag_combination[it2] == map->flags)
527 {
528 map->flag_index = it2;
529 found = 1;
530 }
531 }
532
533 if (!found)
534 {
535 if (flag_combinations >= countof (flag_combination)(sizeof(flag_combination)/sizeof(*(flag_combination))))
536 {
537 fprintf (stderrstderr,
538 "flag_combination[] too small - increase and retry\n");
539 exit (EXIT_FAILURE1);
540 }
541 map->flag_index = flag_combinations++;
542 flag_combination[map->flag_index] = map->flags;
543 }
544 }
545 for (it = 0; it < map_pos; it++)
546 {
547 IDNAMap_gen *map = idna_map + it;
548
549 if (map->flags != flag_combination[map->flag_index])
550 {
551 fprintf (stderrstderr, "Flags do not for 0x%X-0x%X)\n", map->cp1,
552 map->cp2);
553 exit (EXIT_FAILURE1);
554 }
555 }
556}
557
558static int
559_print_tr46_map (uint32_t min, uint32_t max, int do_print)
560{
561 unsigned it;
562 int it2, entries = 0;
563
564 for (it = 0; it < map_pos; it++)
565 {
566 const IDNAMap_gen *map = idna_map + it;
567 uint32_t cp2, cp1 = map->cp1, value, range;
568 int n;
569
570 if (cp1 < min)
571 continue;
572
573 if (cp1 > max)
574 break;
575
576 n = (map->cp2 - cp1) / 0x10000;
577
578 for (it2 = 0; it2 <= n; it2++, cp1 = cp2 + 1)
579 {
580 entries++;
581
582 if (it2 == n)
583 cp2 = map->cp2;
584 else
585 cp2 = cp1 + 0xFFFF;
586
587 if (!do_print)
588 continue;
589
590 range = cp2 - cp1;
591 value =
592 (((map->nmappings << 14) | map->offset) << 3) | map->flag_index;
593
594 if (max == 0xFF)
595 printf ("0x%X,0x%X,", cp1 & 0xFF, range & 0xFF);
596 else if (max == 0xFFFF)
597 printf ("0x%X,0x%X,0x%X,0x%X,",
598 (cp1 >> 8) & 0xFF, cp1 & 0xFF,
599 (range >> 8) & 0xFF, range & 0xFF);
600 else if (max == 0xFFFFFF)
601 printf ("0x%X,0x%X,0x%X,0x%X,0x%X,",
602 (cp1 >> 16) & 0xFF, (cp1 >> 8) & 0xFF, cp1 & 0xFF,
603 (range >> 8) & 0xFF, range & 0xFF);
604
605 printf ("0x%X,0x%X,0x%X,\n",
606 (value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF);
607 }
608 }
609
610 if (max == 0xFF)
611 return entries * 5;
612 if (max == 0xFFFF)
613 return entries * 7;
614 if (max == 0xFFFFFF)
615 return entries * 8;
616
617 return 0;
618}
619
620int
621main (void)
622{
623 unsigned it;
624
625 // read IDNA mappings
626 if (_scan_file (SRCDIR"." "/IdnaMappingTable.txt", read_IdnaMappings))
1
Taking false branch
627 return 1;
628
629 _compact_idna_map ();
2
Calling '_compact_idna_map'
630 _combine_idna_flags ();
631
632 // read NFC QuickCheck table
633 if (_scan_file (SRCDIR"." "/DerivedNormalizationProps.txt", read_NFCQC))
634 return 1;
635
636 qsort (nfcqc_map, nfcqc_pos, sizeof (NFCQCMap),
637 (int (*)(const void *, const void *)) _compare_map);
638
639 printf ("/* This file is automatically generated. DO NOT EDIT! */\n\n");
640 printf ("#include <stdint.h>\n");
641 printf ("#include \"tr46map.h\"\n\n");
642
643 printf ("static const uint8_t idna_flags[%u] =\n{", flag_combinations);
644 for (it = 0; it < flag_combinations; it++)
645 {
646 printf ("0x%X,", flag_combination[it]);
647 }
648 printf ("};\n\n");
649
650 printf ("static const uint8_t idna_map_8[%d] = {\n",
651 _print_tr46_map (0x0, 0xFF, 0));
652 _print_tr46_map (0x0, 0xFF, 1);
653 printf ("};\n\n");
654
655 printf ("static const uint8_t idna_map_16[%d] = {\n",
656 _print_tr46_map (0x100, 0xFFFF, 0));
657 _print_tr46_map (0x100, 0xFFFF, 1);
658 printf ("};\n\n");
659
660 printf ("static const uint8_t idna_map_24[%d] = {\n",
661 _print_tr46_map (0x10000, 0xFFFFFF, 0));
662 _print_tr46_map (0x10000, 0xFFFFFF, 1);
663 printf ("};\n\n");
664
665 printf ("static const uint8_t mapdata[%zu] = {\n", mapdata_pos);
666 for (it = 0; it < mapdata_pos; it++)
667 {
668 printf ("0x%02X,%s", genmapdata[it], it % 16 == 15 ? "\n" : "");
669 }
670 printf ("};\n\n");
671
672 printf ("static const NFCQCMap nfcqc_map[%zu] = {\n", nfcqc_pos);
673 for (it = 0; it < nfcqc_pos; it++)
674 {
675 NFCQCMap *map = nfcqc_map + it;
676 printf ("{0x%X,0x%X,%d},\n", map->cp1, map->cp2, map->check);
677 }
678 printf ("};\n");
679
680 return 0;
681}