libidn  1.42
GNU Internationalized Domain Name Library

Introduction

GNU Libidn is an implementation of the Stringprep, Punycode and IDNA specifications defined by the IETF Internationalized Domain Names (IDN) working group, used for internationalized domain names. The package is available under the GNU Lesser General Public License.

The library contains a generic Stringprep implementation that does Unicode 3.2 NFKC normalization, mapping and prohibitation of characters, and bidirectional character handling. Profiles for Nameprep, iSCSI, SASL and XMPP are included. Punycode and ASCII Compatible Encoding (ACE) via IDNA are supported. A mechanism to define Top-Level Domain (TLD) specific validation tables, and to compare strings against those tables, is included. Default tables for some TLDs are also included.

The Stringprep API consists of two main functions, one for converting data from the system's native representation into UTF-8, and one function to perform the Stringprep processing. Adding a new Stringprep profile for your application within the API is straightforward. The Punycode API consists of one encoding function and one decoding function. The IDNA API consists of the ToASCII and ToUnicode functions, as well as an high-level interface for converting entire domain names to and from the ACE encoded form. The TLD API consists of one set of functions to extract the TLD name from a domain string, one set of functions to locate the proper TLD table to use based on the TLD name, and core functions to validate a string against a TLD table, and some utility wrappers to perform all the steps in one call.

The library is used by, e.g., GNU SASL and Shishi to process user names and passwords. Libidn can be built into GNU Libc to enable a new system-wide getaddrinfo() flag for IDN processing.

Libidn is developed for the GNU/Linux system, but runs on over 20 Unix platforms (including Solaris, IRIX, AIX, and Tru64) and Windows. Libidn is written in C and (parts of) the API is accessible from C, C++, Emacs Lisp, Python and Java.

The project web page:
https://www.gnu.org/software/libidn/

The software archive:
ftp://alpha.gnu.org/pub/gnu/libidn/

For more information see:
http://www.ietf.org/html.charters/idn-charter.html
http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)
http://www.ietf.org/rfc/rfc3490.txt (idna specification)
http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)
http://www.ietf.org/rfc/rfc3492.txt (punycode specification)
http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt
http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt
http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt
http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
Further information and paid contract development:
Simon Josefsson simon.nosp@m.@jos.nosp@m.efsso.nosp@m.n.or.nosp@m.g

Examples

/* example.c --- Example code showing how to use stringprep().
* Copyright (C) 2002-2024 Simon Josefsson
*
* This file is part of GNU Libidn.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h> /* setlocale() */
#include <stringprep.h>
/*
* Compiling using libtool and pkg-config is recommended:
*
* $ libtool cc -o example example.c `pkg-config --cflags --libs libidn`
* $ ./example
* Input string encoded as `ISO-8859-1': ª
* Before locale2utf8 (length 2): aa 0a
* Before stringprep (length 3): c2 aa 0a
* After stringprep (length 2): 61 0a
* $
*
*/
int
main (void)
{
char buf[BUFSIZ];
char *p;
int rc;
size_t i;
setlocale (LC_ALL, "");
printf ("Input string encoded as `%s': ", stringprep_locale_charset ());
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Before locale2utf8 (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
if (p)
{
strcpy (buf, p);
free (p);
}
else
printf ("Could not convert string to UTF-8, continuing anyway...\n");
printf ("Before stringprep (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
rc = stringprep (buf, BUFSIZ, 0, stringprep_nameprep);
if (rc != STRINGPREP_OK)
printf ("Stringprep failed (%d): %s\n", rc, stringprep_strerror (rc));
else
{
printf ("After stringprep (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
}
return 0;
}
const char * stringprep_strerror(Stringprep_rc rc)
int stringprep(char *in, size_t maxlen, Stringprep_profile_flags flags, const Stringprep_profile *profile)
Definition: stringprep.c:414
IDNAPI const char * stringprep_locale_charset(void)
Definition: toutf8.c:85
IDNAPI char * stringprep_locale_to_utf8(const char *str)
Definition: toutf8.c:145
@ STRINGPREP_OK
Definition: stringprep.h:67
#define stringprep_nameprep(in, maxlen)
Definition: stringprep.h:202
/* example3.c --- Example ToASCII() code showing how to use Libidn.
* Copyright (C) 2002-2024 Simon Josefsson
*
* This file is part of GNU Libidn.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h> /* setlocale() */
#include <stringprep.h> /* stringprep_locale_charset() */
#include <idna.h> /* idna_to_ascii_lz() */
/*
* Compiling using libtool and pkg-config is recommended:
*
* $ libtool cc -o example3 example3.c `pkg-config --cflags --libs libidn`
* $ ./example3
* Input domain encoded as `ISO-8859-1': www.räksmörgåsª.example
* Read string (length 23): 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 aa 2e 65 78 61 6d 70 6c 65
* ACE label (length 33): 'www.xn--rksmrgsa-0zap8p.example'
* 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65
* $
*
*/
int
main (void)
{
char buf[BUFSIZ];
char *p;
int rc;
size_t i;
setlocale (LC_ALL, "");
printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Read string (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
rc = idna_to_ascii_lz (buf, &p, 0);
if (rc != IDNA_SUCCESS)
{
printf ("ToASCII() failed (%d): %s\n", rc, idna_strerror (rc));
return EXIT_FAILURE;
}
printf ("ACE label (length %ld): '%s'\n", (long int) strlen (p), p);
for (i = 0; i < strlen (p); i++)
printf ("%02x ", (unsigned) p[i] & 0xFF);
printf ("\n");
free (p);
return 0;
}
int idna_to_ascii_lz(const char *input, char **output, int flags)
Definition: idna.c:609
@ IDNA_SUCCESS
Definition: idna.h:74
IDNAPI const char * idna_strerror(Idna_rc rc)
Definition: strerror-idna.c:73
/* example4.c --- Example ToUnicode() code showing how to use Libidn.
* Copyright (C) 2002-2024 Simon Josefsson
*
* This file is part of GNU Libidn.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h> /* setlocale() */
#include <stringprep.h> /* stringprep_locale_charset() */
#include <idna.h> /* idna_to_unicode_lzlz() */
/*
* Compiling using libtool and pkg-config is recommended:
*
* $ libtool cc -o example4 example4.c `pkg-config --cflags --libs libidn`
* $ ./example4
* Input domain encoded as `ISO-8859-1': www.xn--rksmrgsa-0zap8p.example
* Read string (length 33): 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65
* ACE label (length 23): 'www.räksmörgåsa.example'
* 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 61 2e 65 78 61 6d 70 6c 65
* $
*
*/
int
main (void)
{
char buf[BUFSIZ];
char *p;
int rc;
size_t i;
setlocale (LC_ALL, "");
printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Read string (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
rc = idna_to_unicode_lzlz (buf, &p, 0);
if (rc != IDNA_SUCCESS)
{
printf ("ToUnicode() failed (%d): %s\n", rc, idna_strerror (rc));
return EXIT_FAILURE;
}
printf ("ACE label (length %ld): '%s'\n", (long int) strlen (p), p);
for (i = 0; i < strlen (p); i++)
printf ("%02x ", (unsigned) p[i] & 0xFF);
printf ("\n");
free (p);
return 0;
}
int idna_to_unicode_lzlz(const char *input, char **output, int flags)
Definition: idna.c:819
/* example5.c --- Example TLD checking.
* Copyright (C) 2004-2024 Simon Josefsson
*
* This file is part of GNU Libidn.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Get stringprep_locale_charset, etc. */
#include <stringprep.h>
/* Get idna_to_ascii_8z, etc. */
#include <idna.h>
/* Get tld_check_4z. */
#include <tld.h>
/*
* Compiling using libtool and pkg-config is recommended:
*
* $ libtool cc -o example5 example5.c `pkg-config --cflags --libs libidn`
* $ ./example5
* Input domain encoded as `UTF-8': fooß.no
* Read string (length 8): 66 6f 6f c3 9f 2e 6e 6f
* ToASCII string (length 8): fooss.no
* ToUnicode string: U+0066 U+006f U+006f U+0073 U+0073 U+002e U+006e U+006f
* Domain accepted by TLD check
*
* $ ./example5
* Input domain encoded as `UTF-8': gr€€n.no
* Read string (length 12): 67 72 e2 82 ac e2 82 ac 6e 2e 6e 6f
* ToASCII string (length 16): xn--grn-l50aa.no
* ToUnicode string: U+0067 U+0072 U+20ac U+20ac U+006e U+002e U+006e U+006f
* Domain rejected by TLD check, Unicode position 2
*
*/
int
main (void)
{
char buf[BUFSIZ];
char *p;
uint32_t *r;
int rc;
size_t errpos, i;
printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
fflush (stdout);
if (!fgets (buf, BUFSIZ, stdin))
perror ("fgets");
buf[strlen (buf) - 1] = '\0';
printf ("Read string (length %ld): ", (long int) strlen (buf));
for (i = 0; i < strlen (buf); i++)
printf ("%02x ", (unsigned) buf[i] & 0xFF);
printf ("\n");
if (p)
{
strcpy (buf, p);
free (p);
}
else
printf ("Could not convert string to UTF-8, continuing anyway...\n");
rc = idna_to_ascii_8z (buf, &p, 0);
if (rc != IDNA_SUCCESS)
{
printf ("idna_to_ascii_8z failed (%d): %s\n", rc, idna_strerror (rc));
return 2;
}
printf ("ToASCII string (length %ld): %s\n", (long int) strlen (p), p);
rc = idna_to_unicode_8z4z (p, &r, 0);
free (p);
if (rc != IDNA_SUCCESS)
{
printf ("idna_to_unicode_8z4z failed (%d): %s\n",
rc, idna_strerror (rc));
return 2;
}
printf ("ToUnicode string: ");
for (i = 0; r[i]; i++)
printf ("U+%04x ", r[i]);
printf ("\n");
rc = tld_check_4z (r, &errpos, NULL);
free (r);
if (rc == TLD_INVALID)
{
printf ("Domain rejected by TLD check, Unicode position %ld\n",
(long int) errpos);
return 1;
}
else if (rc != TLD_SUCCESS)
{
printf ("tld_check_4z() failed (%d): %s\n", rc, tld_strerror (rc));
return 2;
}
printf ("Domain accepted by TLD check\n");
return 0;
}
int idna_to_ascii_8z(const char *input, char **output, int flags)
Definition: idna.c:576
int idna_to_unicode_8z4z(const char *input, uint32_t **output, int flags)
Definition: idna.c:719
const char * tld_strerror(Tld_rc rc)
Definition: strerror-tld.c:59
int tld_check_4z(const uint32_t *in, size_t *errpos, const Tld_table **overrides)
Definition: tld.c:409
@ TLD_SUCCESS
Definition: tld.h:107
@ TLD_INVALID
Definition: tld.h:108