libidn  1.29
Macros | Enumerations | Functions
nfkc.c File Reference
#include <stdlib.h>
#include <string.h>
#include "stringprep.h"
#include "gunidecomp.h"
#include "gunicomp.h"

Go to the source code of this file.

Macros

#define gboolean   int
#define gchar   char
#define guchar   unsigned char
#define glong   long
#define gint   int
#define guint   unsigned int
#define gushort   unsigned short
#define gint16   int16_t
#define guint16   uint16_t
#define gunichar   uint32_t
#define gsize   size_t
#define gssize   ssize_t
#define g_malloc   malloc
#define g_free   free
#define g_return_val_if_fail(expr, val)
#define FALSE   (0)
#define TRUE   (!FALSE)
#define G_N_ELEMENTS(arr)   (sizeof (arr) / sizeof ((arr)[0]))
#define G_UNLIKELY(expr)   (expr)
#define g_utf8_next_char(p)   ((p) + g_utf8_skip[*(const guchar *)(p)])
#define UTF8_COMPUTE(Char, Mask, Len)
#define UTF8_LENGTH(Char)
#define UTF8_GET(Result, Chars, Count, Mask, Len)
#define CC_PART1(Page, Char)
#define CC_PART2(Page, Char)
#define COMBINING_CLASS(Char)
#define SBase   0xAC00
#define LBase   0x1100
#define VBase   0x1161
#define TBase   0x11A7
#define LCount   19
#define VCount   21
#define TCount   28
#define NCount   (VCount * TCount)
#define SCount   (LCount * NCount)
#define CI(Page, Char)
#define COMPOSE_INDEX(Char)   (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))

Enumerations

enum  GNormalizeMode {
  G_NORMALIZE_DEFAULT, G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, G_NORMALIZE_DEFAULT_COMPOSE, G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
  G_NORMALIZE_ALL, G_NORMALIZE_NFKD = G_NORMALIZE_ALL, G_NORMALIZE_ALL_COMPOSE, G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
}

Functions

uint32_t stringprep_utf8_to_unichar (const char *p)
int stringprep_unichar_to_utf8 (uint32_t c, char *outbuf)
uint32_t * stringprep_utf8_to_ucs4 (const char *str, ssize_t len, size_t *items_written)
char * stringprep_ucs4_to_utf8 (const uint32_t *str, ssize_t len, size_t *items_read, size_t *items_written)
char * stringprep_utf8_nfkc_normalize (const char *str, ssize_t len)
uint32_t * stringprep_ucs4_nfkc_normalize (const uint32_t *str, ssize_t len)

Macro Definition Documentation

#define CC_PART1 (   Page,
  Char 
)
Value:
((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (cclass_data[combining_class_table_part1[Page]][Char]))

Definition at line 554 of file nfkc.c.

#define CC_PART2 (   Page,
  Char 
)
Value:
((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (cclass_data[combining_class_table_part2[Page]][Char]))

Definition at line 559 of file nfkc.c.

#define CI (   Page,
  Char 
)
Value:
((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
: (compose_data[compose_table[Page]][Char]))

Definition at line 723 of file nfkc.c.

#define COMBINING_CLASS (   Char)
Value:
? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
: 0))

Definition at line 564 of file nfkc.c.

#define COMPOSE_INDEX (   Char)    (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))

Definition at line 728 of file nfkc.c.

#define FALSE   (0)

Definition at line 81 of file nfkc.c.

#define g_free   free

Definition at line 53 of file nfkc.c.

#define g_malloc   malloc

Definition at line 52 of file nfkc.c.

#define G_N_ELEMENTS (   arr)    (sizeof (arr) / sizeof ((arr)[0]))

Definition at line 88 of file nfkc.c.

#define g_return_val_if_fail (   expr,
  val 
)
Value:
{ \
if (!(expr)) \
return (val); \
}

Definition at line 54 of file nfkc.c.

#define G_UNLIKELY (   expr)    (expr)

Definition at line 90 of file nfkc.c.

#define g_utf8_next_char (   p)    ((p) + g_utf8_skip[*(const guchar *)(p)])

Definition at line 128 of file nfkc.c.

#define gboolean   int

Definition at line 40 of file nfkc.c.

#define gchar   char

Definition at line 41 of file nfkc.c.

#define gint   int

Definition at line 44 of file nfkc.c.

#define gint16   int16_t

Definition at line 47 of file nfkc.c.

#define glong   long

Definition at line 43 of file nfkc.c.

#define gsize   size_t

Definition at line 50 of file nfkc.c.

#define gssize   ssize_t

Definition at line 51 of file nfkc.c.

#define guchar   unsigned char

Definition at line 42 of file nfkc.c.

#define guint   unsigned int

Definition at line 45 of file nfkc.c.

#define guint16   uint16_t

Definition at line 48 of file nfkc.c.

#define gunichar   uint32_t

Definition at line 49 of file nfkc.c.

#define gushort   unsigned short

Definition at line 46 of file nfkc.c.

#define LBase   0x1100

Definition at line 573 of file nfkc.c.

#define LCount   19

Definition at line 576 of file nfkc.c.

#define NCount   (VCount * TCount)

Definition at line 579 of file nfkc.c.

#define SBase   0xAC00

Definition at line 572 of file nfkc.c.

#define SCount   (LCount * NCount)

Definition at line 580 of file nfkc.c.

#define TBase   0x11A7

Definition at line 575 of file nfkc.c.

#define TCount   28

Definition at line 578 of file nfkc.c.

#define TRUE   (!FALSE)

Definition at line 85 of file nfkc.c.

#define UTF8_COMPUTE (   Char,
  Mask,
  Len 
)

Definition at line 153 of file nfkc.c.

#define UTF8_GET (   Result,
  Chars,
  Count,
  Mask,
  Len 
)
Value:
(Result) = (Chars)[0] & (Mask); \
for ((Count) = 1; (Count) < (Len); ++(Count)) \
{ \
if (((Chars)[(Count)] & 0xc0) != 0x80) \
{ \
(Result) = -1; \
break; \
} \
(Result) <<= 6; \
(Result) |= ((Chars)[(Count)] & 0x3f); \
}

Definition at line 194 of file nfkc.c.

#define UTF8_LENGTH (   Char)
Value:
((Char) < 0x80 ? 1 : \
((Char) < 0x800 ? 2 : \
((Char) < 0x10000 ? 3 : \
((Char) < 0x200000 ? 4 : \
((Char) < 0x4000000 ? 5 : 6)))))

Definition at line 187 of file nfkc.c.

#define VBase   0x1161

Definition at line 574 of file nfkc.c.

#define VCount   21

Definition at line 577 of file nfkc.c.

Enumeration Type Documentation

Enumerator:
G_NORMALIZE_DEFAULT 
G_NORMALIZE_NFD 
G_NORMALIZE_DEFAULT_COMPOSE 
G_NORMALIZE_NFC 
G_NORMALIZE_ALL 
G_NORMALIZE_NFKD 
G_NORMALIZE_ALL_COMPOSE 
G_NORMALIZE_NFKC 

Definition at line 115 of file nfkc.c.

Function Documentation

uint32_t* stringprep_ucs4_nfkc_normalize ( const uint32_t *  str,
ssize_t  len 
)

stringprep_ucs4_nfkc_normalize:

Parameters
stra Unicode string.
lenlength of array, or -1 if is nul-terminated.

Converts a UCS4 string into canonical form, see stringprep_utf8_nfkc_normalize() for more information.

Return value: a newly allocated Unicode string, that is the NFKC normalized form of .

Definition at line 1091 of file nfkc.c.

char* stringprep_ucs4_to_utf8 ( const uint32_t *  str,
ssize_t  len,
size_t *  items_read,
size_t *  items_written 
)

stringprep_ucs4_to_utf8:

Parameters
stra UCS-4 encoded string
lenthe maximum length of to use. If < 0, then the string is terminated with a 0 character.
items_readlocation to store number of characters read read, or NULL.
items_writtenlocation to store number of bytes written or NULL. The value here stored does not include the trailing 0 byte.

Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The result will be terminated with a 0 byte.

Return value: a pointer to a newly allocated UTF-8 string. This value must be deallocated by the caller. If an error occurs, NULL will be returned.

Definition at line 1044 of file nfkc.c.

int stringprep_unichar_to_utf8 ( uint32_t  c,
char *  outbuf 
)

stringprep_unichar_to_utf8:

Parameters
ca ISO10646 character code
outbufoutput buffer, must have at least 6 bytes of space. If NULL, the length will be computed and returned and nothing will be written to .

Converts a single character to UTF-8.

Return value: number of bytes written.

Definition at line 1000 of file nfkc.c.

char* stringprep_utf8_nfkc_normalize ( const char *  str,
ssize_t  len 
)

stringprep_utf8_nfkc_normalize:

Parameters
stra UTF-8 encoded string.
lenlength of , in bytes, or -1 if is nul-terminated.

Converts a string into canonical form, standardizing such issues as whether a character with an accent is represented as a base character and combining accent or as a single precomposed character.

The normalization mode is NFKC (ALL COMPOSE). It standardizes differences that do not affect the text content, such as the above-mentioned accent representation. It standardizes the "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the standard forms (in this case DIGIT THREE). Formatting information may be lost but for most text operations such characters should be considered the same. It returns a result with composed forms rather than a maximally decomposed form.

Return value: a newly allocated string, that is the NFKC normalized form of .

Definition at line 1074 of file nfkc.c.

uint32_t* stringprep_utf8_to_ucs4 ( const char *  str,
ssize_t  len,
size_t *  items_written 
)

stringprep_utf8_to_ucs4:

Parameters
stra UTF-8 encoded string
lenthe maximum length of to use. If < 0, then the string is nul-terminated.
items_writtenlocation to store the number of characters in the result, or NULL.

Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4, assuming valid UTF-8 input. This function does no error checking on the input.

Return value: a pointer to a newly allocated UCS-4 string. This value must be deallocated by the caller.

Definition at line 1021 of file nfkc.c.

uint32_t stringprep_utf8_to_unichar ( const char *  p)

stringprep_utf8_to_unichar:

Parameters
pa pointer to Unicode character encoded as UTF-8

Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If does not point to a valid UTF-8 encoded character, results are undefined.

Return value: the resulting character.

Definition at line 983 of file nfkc.c.