Line data Source code
1 : /* Look at first character in UTF-8 string, returning an error code.
2 : Copyright (C) 1999-2002, 2006-2007, 2009-2020 Free Software Foundation, Inc.
3 : Written by Bruno Haible <bruno@clisp.org>, 2001.
4 :
5 : This program is free software: you can redistribute it and/or modify it
6 : under the terms of the GNU Lesser General Public License as published
7 : by the Free Software Foundation; either version 2.1 of the License, or
8 : (at your option) any later version.
9 :
10 : This program is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 : Lesser General Public License for more details.
14 :
15 : You should have received a copy of the GNU Lesser General Public License
16 : along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 :
18 : #include <config.h>
19 :
20 : /* Specification. */
21 : #include "unistr.h"
22 :
23 : int
24 146 : u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
25 : {
26 146 : uint8_t c = *s;
27 :
28 146 : if (c < 0x80)
29 : {
30 129 : *puc = c;
31 129 : return 1;
32 : }
33 17 : else if (c >= 0xc2)
34 : {
35 16 : if (c < 0xe0)
36 : {
37 2 : if (n >= 2)
38 : {
39 1 : if ((s[1] ^ 0x80) < 0x40)
40 : {
41 2 : *puc = ((unsigned int) (c & 0x1f) << 6)
42 1 : | (unsigned int) (s[1] ^ 0x80);
43 1 : return 2;
44 : }
45 : /* invalid multibyte character */
46 : }
47 : else
48 : {
49 : /* incomplete multibyte character */
50 1 : *puc = 0xfffd;
51 1 : return -2;
52 : }
53 : }
54 14 : else if (c < 0xf0)
55 : {
56 5 : if (n >= 2)
57 : {
58 4 : if ((s[1] ^ 0x80) < 0x40
59 3 : && (c >= 0xe1 || s[1] >= 0xa0)
60 2 : && (c != 0xed || s[1] < 0xa0))
61 : {
62 2 : if (n >= 3)
63 : {
64 1 : if ((s[2] ^ 0x80) < 0x40)
65 : {
66 2 : *puc = ((unsigned int) (c & 0x0f) << 12)
67 1 : | ((unsigned int) (s[1] ^ 0x80) << 6)
68 1 : | (unsigned int) (s[2] ^ 0x80);
69 1 : return 3;
70 : }
71 : /* invalid multibyte character */
72 : }
73 : else
74 : {
75 : /* incomplete multibyte character */
76 1 : *puc = 0xfffd;
77 1 : return -2;
78 : }
79 : }
80 : /* invalid multibyte character */
81 : }
82 : else
83 : {
84 : /* incomplete multibyte character */
85 1 : *puc = 0xfffd;
86 1 : return -2;
87 : }
88 : }
89 9 : else if (c < 0xf8)
90 : {
91 8 : if (n >= 2)
92 : {
93 7 : if ((s[1] ^ 0x80) < 0x40
94 5 : && (c >= 0xf1 || s[1] >= 0x90)
95 4 : && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
96 : {
97 4 : if (n >= 3)
98 : {
99 3 : if ((s[2] ^ 0x80) < 0x40)
100 : {
101 2 : if (n >= 4)
102 : {
103 1 : if ((s[3] ^ 0x80) < 0x40)
104 : {
105 2 : *puc = ((unsigned int) (c & 0x07) << 18)
106 1 : | ((unsigned int) (s[1] ^ 0x80) << 12)
107 1 : | ((unsigned int) (s[2] ^ 0x80) << 6)
108 1 : | (unsigned int) (s[3] ^ 0x80);
109 1 : return 4;
110 : }
111 : /* invalid multibyte character */
112 : }
113 : else
114 : {
115 : /* incomplete multibyte character */
116 1 : *puc = 0xfffd;
117 1 : return -2;
118 : }
119 : }
120 : /* invalid multibyte character */
121 : }
122 : else
123 : {
124 : /* incomplete multibyte character */
125 1 : *puc = 0xfffd;
126 1 : return -2;
127 : }
128 : }
129 : /* invalid multibyte character */
130 : }
131 : else
132 : {
133 : /* incomplete multibyte character */
134 1 : *puc = 0xfffd;
135 1 : return -2;
136 : }
137 : }
138 : }
139 : /* invalid multibyte character */
140 8 : *puc = 0xfffd;
141 8 : return -1;
142 : }
|