Branch data Line data Source code
1 : : /* toutf8.c --- Convert strings from system locale into UTF-8.
2 : : Copyright (C) 2002-2013 Simon Josefsson
3 : :
4 : : This file is part of GNU Libidn.
5 : :
6 : : GNU Libidn is free software: you can redistribute it and/or
7 : : modify it under the terms of either:
8 : :
9 : : * the GNU Lesser General Public License as published by the Free
10 : : Software Foundation; either version 3 of the License, or (at
11 : : your option) any later version.
12 : :
13 : : or
14 : :
15 : : * the GNU General Public License as published by the Free
16 : : Software Foundation; either version 2 of the License, or (at
17 : : your option) any later version.
18 : :
19 : : or both in parallel, as here.
20 : :
21 : : GNU Libidn is distributed in the hope that it will be useful,
22 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
23 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 : : General Public License for more details.
25 : :
26 : : You should have received copies of the GNU General Public License and
27 : : the GNU Lesser General Public License along with this program. If
28 : : not, see <http://www.gnu.org/licenses/>. */
29 : :
30 : : #ifdef HAVE_CONFIG_H
31 : : # include "config.h"
32 : : #endif
33 : :
34 : : /* Get prototypes. */
35 : : #include "stringprep.h"
36 : :
37 : : /* Get fprintf. */
38 : : #include <stdio.h>
39 : :
40 : : /* Get getenv. */
41 : : #include <stdlib.h>
42 : :
43 : : /* Get strlen. */
44 : : #include <string.h>
45 : :
46 : : /* Get iconv_string. */
47 : : #include "striconv.h"
48 : :
49 : : #ifdef _LIBC
50 : : # define HAVE_ICONV 1
51 : : # define HAVE_LOCALE_H 1
52 : : # define HAVE_LANGINFO_CODESET 1
53 : : #endif
54 : :
55 : : #include <locale.h>
56 : :
57 : : #ifdef HAVE_LANGINFO_CODESET
58 : : # include <langinfo.h>
59 : : #endif
60 : :
61 : : #ifdef _LIBC
62 : : # define stringprep_locale_charset() nl_langinfo (CODESET)
63 : : #else
64 : : /**
65 : : * stringprep_locale_charset:
66 : : *
67 : : * Find out current locale charset. The function respect the CHARSET
68 : : * environment variable, but typically uses nl_langinfo(CODESET) when
69 : : * it is supported. It fall back on "ASCII" if CHARSET isn't set and
70 : : * nl_langinfo isn't supported or return anything.
71 : : *
72 : : * Note that this function return the application's locale's preferred
73 : : * charset (or thread's locale's preffered charset, if your system
74 : : * support thread-specific locales). It does not return what the
75 : : * system may be using. Thus, if you receive data from external
76 : : * sources you cannot in general use this function to guess what
77 : : * charset it is encoded in. Use stringprep_convert from the external
78 : : * representation into the charset returned by this function, to have
79 : : * data in the locale encoding.
80 : : *
81 : : * Return value: Return the character set used by the current locale.
82 : : * It will never return NULL, but use "ASCII" as a fallback.
83 : : **/
84 : : const char *
85 : 4 : stringprep_locale_charset (void)
86 : : {
87 : 4 : const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */
88 : :
89 [ - + ][ # # ]: 4 : if (charset && *charset)
90 : 0 : return charset;
91 : :
92 : : # ifdef HAVE_LANGINFO_CODESET
93 : 4 : charset = nl_langinfo (CODESET);
94 : :
95 [ + - ][ + - ]: 4 : if (charset && *charset)
96 : 4 : return charset;
97 : : # endif
98 : :
99 : 4 : return "ASCII";
100 : : }
101 : : #endif
102 : :
103 : : /**
104 : : * stringprep_convert:
105 : : * @str: input zero-terminated string.
106 : : * @to_codeset: name of destination character set.
107 : : * @from_codeset: name of origin character set, as used by @str.
108 : : *
109 : : * Convert the string from one character set to another using the
110 : : * system's iconv() function.
111 : : *
112 : : * Return value: Returns newly allocated zero-terminated string which
113 : : * is @str transcoded into to_codeset.
114 : : **/
115 : : char *
116 : 3 : stringprep_convert (const char *str,
117 : : const char *to_codeset, const char *from_codeset)
118 : : {
119 : : #if HAVE_ICONV
120 : 3 : return str_iconv (str, from_codeset, to_codeset);
121 : : #else
122 : : char *p;
123 : : fprintf (stderr, "libidn: warning: libiconv not installed, cannot "
124 : : "convert data to UTF-8\n");
125 : : p = malloc (strlen (str) + 1);
126 : : if (!p)
127 : : return NULL;
128 : : return strcpy (p, str);
129 : : #endif
130 : : }
131 : :
132 : : /**
133 : : * stringprep_locale_to_utf8:
134 : : * @str: input zero terminated string.
135 : : *
136 : : * Convert string encoded in the locale's character set into UTF-8 by
137 : : * using stringprep_convert().
138 : : *
139 : : * Return value: Returns newly allocated zero-terminated string which
140 : : * is @str transcoded into UTF-8.
141 : : **/
142 : : char *
143 : 2 : stringprep_locale_to_utf8 (const char *str)
144 : : {
145 : 2 : return stringprep_convert (str, "UTF-8", stringprep_locale_charset ());
146 : : }
147 : :
148 : : /**
149 : : * stringprep_utf8_to_locale:
150 : : * @str: input zero terminated string.
151 : : *
152 : : * Convert string encoded in UTF-8 into the locale's character set by
153 : : * using stringprep_convert().
154 : : *
155 : : * Return value: Returns newly allocated zero-terminated string which
156 : : * is @str transcoded into the locale's character set.
157 : : **/
158 : : char *
159 : 1 : stringprep_utf8_to_locale (const char *str)
160 : : {
161 : 1 : return stringprep_convert (str, stringprep_locale_charset (), "UTF-8");
162 : : }
|