BACK to addon.html#char-mapper

 #if 0
 ##  process this file with bash to test the classification result
 ##  /usr/include/inttypes.h must exist
 base=test_cmap-$$
 cfil=${base}.c
 set -e
 trap "rm -f ${base}*" EXIT
 cat > ${cfil} <<\_EOCode_
 #define TEST_TEST_CMAP         1
 #define DEFINE_TEST_CMAP_TABLE 1
 #define _GNU_SOURCE 1
 #include "test-cmap.h"
 _EOCode_
 
 ${CC:-cc} -o ${base} ${base}.c
 ./${base}
 rm -f ${base}*
 exit 0
 #endif
 /*
  *  11 bits for 12 character classifications
  *  generated by char-mapper on 09/08/13 at 12:45:30
  *
  *  Copyright (C) 2000, 2001 Free Software Foundation, Inc.
  *
  *  This file is part of some library.
  */
 #ifndef TEST_CMAP_H_GUARD
 #define TEST_CMAP_H_GUARD 1
 
 #ifdef HAVE_CONFIG_H
 # if defined(HAVE_INTTYPES_H)
 #   include <inttypes.h>
 
 # elif defined(HAVE_STDINT_H)
 #   include <stdint.h>
 
 # elif !defined(HAVE_UINT16_T)
     typedef unsigned short  uint16_t;
 # endif /* HAVE_*INT*_H header */
 
 #else /* not HAVE_CONFIG_H -- */
 # include <inttypes.h>
 #endif /* HAVE_CONFIG_H */
 
 #if 0 /* mapping specification source (from test-cmap.map) */
 // %file test-cmap.h
 // %test
 // %comment -- see above
 // %
 // 
 // # Basic types:
 // #
 // cntrl       "\x00-\x1F\x7F"
 // digit       "0-9"
 // lower       "a-z"
 // horiz-space " \t"
 // line-end    "\r\n"
 //         vert-space  "\f\v"
 // upper       "A-Z"
 // 
 // # Compound char types.  Only these may have subtracted names.
 // #
 // xdigit      "a-fA-F"    +digit
 // token-end   "\x00"      +horiz-space +line-end
 // name-start  "_"         +upper +lower
 // punctuation "\x20-\x7E" -upper -lower -horiz-space
 // 
 // # pure combined char types:
 // #
 // space       +horiz-space +line-end +vert-space
 // 
 // %emit code -- see below
 // %
 //
 #endif /* 0 -- mapping spec. source */
 
 
 typedef uint16_t test_cmap_mask_t;
 
 #define  IS_CNTRL_CHAR( _c)         is_test_cmap_char((char)(_c), 0x001)
 #define SPN_CNTRL_CHARS(_s)        spn_test_cmap_chars(_s, 0x001)
 #define BRK_CNTRL_CHARS(_s)        brk_test_cmap_chars(_s, 0x001)
 #define  IS_DIGIT_CHAR( _c)         is_test_cmap_char((char)(_c), 0x002)
 #define SPN_DIGIT_CHARS(_s)        spn_test_cmap_chars(_s, 0x002)
 #define BRK_DIGIT_CHARS(_s)        brk_test_cmap_chars(_s, 0x002)
 #define  IS_LOWER_CHAR( _c)         is_test_cmap_char((char)(_c), 0x004)
 #define SPN_LOWER_CHARS(_s)        spn_test_cmap_chars(_s, 0x004)
 #define BRK_LOWER_CHARS(_s)        brk_test_cmap_chars(_s, 0x004)
 #define  IS_HORIZ_SPACE_CHAR( _c)   is_test_cmap_char((char)(_c), 0x008)
 #define SPN_HORIZ_SPACE_CHARS(_s)  spn_test_cmap_chars(_s, 0x008)
 #define BRK_HORIZ_SPACE_CHARS(_s)  brk_test_cmap_chars(_s, 0x008)
 #define  IS_LINE_END_CHAR( _c)      is_test_cmap_char((char)(_c), 0x010)
 #define SPN_LINE_END_CHARS(_s)     spn_test_cmap_chars(_s, 0x010)
 #define BRK_LINE_END_CHARS(_s)     brk_test_cmap_chars(_s, 0x010)
 #define  IS_VERT_SPACE_CHAR( _c)    is_test_cmap_char((char)(_c), 0x020)
 #define SPN_VERT_SPACE_CHARS(_s)   spn_test_cmap_chars(_s, 0x020)
 #define BRK_VERT_SPACE_CHARS(_s)   brk_test_cmap_chars(_s, 0x020)
 #define  IS_UPPER_CHAR( _c)         is_test_cmap_char((char)(_c), 0x040)
 #define SPN_UPPER_CHARS(_s)        spn_test_cmap_chars(_s, 0x040)
 #define BRK_UPPER_CHARS(_s)        brk_test_cmap_chars(_s, 0x040)
 #define  IS_XDIGIT_CHAR( _c)        is_test_cmap_char((char)(_c), 0x082)
 #define SPN_XDIGIT_CHARS(_s)       spn_test_cmap_chars(_s, 0x082)
 #define BRK_XDIGIT_CHARS(_s)       brk_test_cmap_chars(_s, 0x082)
 #define  IS_TOKEN_END_CHAR( _c)     is_test_cmap_char((char)(_c), 0x118)
 #define SPN_TOKEN_END_CHARS(_s)    spn_test_cmap_chars(_s, 0x118)
 #define BRK_TOKEN_END_CHARS(_s)    brk_test_cmap_chars(_s, 0x118)
 #define  IS_NAME_START_CHAR( _c)    is_test_cmap_char((char)(_c), 0x244)
 #define SPN_NAME_START_CHARS(_s)   spn_test_cmap_chars(_s, 0x244)
 #define BRK_NAME_START_CHARS(_s)   brk_test_cmap_chars(_s, 0x244)
 #define  IS_PUNCTUATION_CHAR( _c)   is_test_cmap_char((char)(_c), 0x400)
 #define SPN_PUNCTUATION_CHARS(_s)  spn_test_cmap_chars(_s, 0x400)
 #define BRK_PUNCTUATION_CHARS(_s)  brk_test_cmap_chars(_s, 0x400)
 #define  IS_SPACE_CHAR( _c)         is_test_cmap_char((char)(_c), 0x038)
 #define SPN_SPACE_CHARS(_s)        spn_test_cmap_chars(_s, 0x038)
 #define BRK_SPACE_CHARS(_s)        brk_test_cmap_chars(_s, 0x038)
 
 #ifdef DEFINE_TEST_CMAP_TABLE
 test_cmap_mask_t const test_cmap_table[128] = {
   /*NUL*/ 0x101, /*x01*/ 0x001, /*x02*/ 0x001, /*x03*/ 0x001,
   /*x04*/ 0x001, /*x05*/ 0x001, /*x06*/ 0x001, /*BEL*/ 0x001,
   /* BS*/ 0x001, /* HT*/ 0x009, /* NL*/ 0x011, /* VT*/ 0x021,
   /* FF*/ 0x021, /* CR*/ 0x011, /*x0E*/ 0x001, /*x0F*/ 0x001,
   /*x10*/ 0x001, /*x11*/ 0x001, /*x12*/ 0x001, /*x13*/ 0x001,
   /*x14*/ 0x001, /*x15*/ 0x001, /*x16*/ 0x001, /*x17*/ 0x001,
   /*x18*/ 0x001, /*x19*/ 0x001, /*x1A*/ 0x001, /*ESC*/ 0x001,
   /*x1C*/ 0x001, /*x1D*/ 0x001, /*x1E*/ 0x001, /*x1F*/ 0x001,
   /*   */ 0x008, /* ! */ 0x400, /* " */ 0x400, /* # */ 0x400,
   /* $ */ 0x400, /* % */ 0x400, /* & */ 0x400, /* ' */ 0x400,
   /* ( */ 0x400, /* ) */ 0x400, /* * */ 0x400, /* + */ 0x400,
   /* , */ 0x400, /* - */ 0x400, /* . */ 0x400, /* / */ 0x400,
   /* 0 */ 0x402, /* 1 */ 0x402, /* 2 */ 0x402, /* 3 */ 0x402,
   /* 4 */ 0x402, /* 5 */ 0x402, /* 6 */ 0x402, /* 7 */ 0x402,
   /* 8 */ 0x402, /* 9 */ 0x402, /* : */ 0x400, /* ; */ 0x400,
   /* < */ 0x400, /* = */ 0x400, /* > */ 0x400, /* ? */ 0x400,
   /* @ */ 0x400, /* A */ 0x0C0, /* B */ 0x0C0, /* C */ 0x0C0,
   /* D */ 0x0C0, /* E */ 0x0C0, /* F */ 0x0C0, /* G */ 0x040,
   /* H */ 0x040, /* I */ 0x040, /* J */ 0x040, /* K */ 0x040,
   /* L */ 0x040, /* M */ 0x040, /* N */ 0x040, /* O */ 0x040,
   /* P */ 0x040, /* Q */ 0x040, /* R */ 0x040, /* S */ 0x040,
   /* T */ 0x040, /* U */ 0x040, /* V */ 0x040, /* W */ 0x040,
   /* X */ 0x040, /* Y */ 0x040, /* Z */ 0x040, /* [ */ 0x400,
   /* \ */ 0x400, /* ] */ 0x400, /* ^ */ 0x400, /* _ */ 0x600,
   /* ` */ 0x400, /* a */ 0x084, /* b */ 0x084, /* c */ 0x084,
   /* d */ 0x084, /* e */ 0x084, /* f */ 0x084, /* g */ 0x004,
   /* h */ 0x004, /* i */ 0x004, /* j */ 0x004, /* k */ 0x004,
   /* l */ 0x004, /* m */ 0x004, /* n */ 0x004, /* o */ 0x004,
   /* p */ 0x004, /* q */ 0x004, /* r */ 0x004, /* s */ 0x004,
   /* t */ 0x004, /* u */ 0x004, /* v */ 0x004, /* w */ 0x004,
   /* x */ 0x004, /* y */ 0x004, /* z */ 0x004, /* { */ 0x400,
   /* | */ 0x400, /* } */ 0x400, /* ~ */ 0x400, /*x7F*/ 0x001
 };
 #endif /* DEFINE_TEST_CMAP_TABLE */
 extern test_cmap_mask_t const test_cmap_table[128];
 static inline int
 is_test_cmap_char(char ch, test_cmap_mask_t mask)
 {
     unsigned int ix = (unsigned char)ch;
     return ((ix < 128) && ((test_cmap_table[ix] & mask) != 0));
 }
 
 static inline char *
 spn_test_cmap_chars(char const * p, test_cmap_mask_t mask)
 {
     while ((*p != '\0') && is_test_cmap_char(*p, mask))  p++;
     return (char *)(uintptr_t)p;
 }
 
 static inline char *
 brk_test_cmap_chars(char const * p, test_cmap_mask_t mask)
 {
     while ((*p != '\0') && (! is_test_cmap_char(*p, mask)))  p++;
     return (char *)(uintptr_t)p;
 }
 
 /* emit text from map file: */
 
 #define HOST_CHARSET_UNKNOWN 0
 #define HOST_CHARSET_ASCII   1
 #define HOST_CHARSET_EBCDIC  2
 
 #if  '\n' == 0x0A && ' ' == 0x20 && '0' == 0x30 \
 && 'A' == 0x41 && 'a' == 0x61 && '!' == 0x21
 #  define HOST_CHARSET HOST_CHARSET_ASCII
 #else
 # if '\n' == 0x15 && ' ' == 0x40 && '0' == 0xF0 \
 && 'A' == 0xC1 && 'a' == 0x81 && '!' == 0x5A
 #  define HOST_CHARSET HOST_CHARSET_EBCDIC
 # else
 #  define HOST_CHARSET HOST_CHARSET_UNKNOWN
 # endif
 #endif
 
 
 #ifdef TEST_TEST_CMAP
 int main (int argc, char ** argv) {
     int ix = 0;
     static char const header[] =
         "00 == CNTRL\n"
         "01 == DIGIT\n"
         "02 == LOWER\n"
         "03 == HORIZ_SPACE\n"
         "04 == LINE_END\n"
         "05 == VERT_SPACE\n"
         "06 == UPPER\n"
         "07 == XDIGIT\n"
         "08 == TOKEN_END\n"
         "09 == NAME_START\n"
         "0A == PUNCTUATION\n"
         "0B == SPACE\n"
         "char is:  00 01 02 03 04 05 06 07 08 09 0A 0B\n";
     fwrite(header, sizeof(header)-1, 1, stdout);
 
     for (; ix<128; ix++) {
         char ch = ((ix >= 0x20) && (ix < 0x7F)) ? ix : '?';
         printf("0x%02X (%c) ", ix, ch);
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x001) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x002) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x004) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x008) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x010) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x020) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x040) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x082) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x118) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x244) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x400) ? 'X' : '.');
         putchar(' '); putchar(' ');
         putchar(is_test_cmap_char((char)ix, 0x038) ? 'X' : '.');
         putchar('\n');
     }
     return 0;
 }
 #endif /* TEST_TEST_CMAP */
 #endif /* TEST_CMAP_H_GUARD */