/*============================================================================= Copyright (c) 2001-2011 Hartmut Kaiser Copyright (c) 2001-2011 Joel de Guzman Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM #if defined(_MSC_VER) #pragma once #endif #include #include #include /////////////////////////////////////////////////////////////////////////////// // constants used to classify the single characters /////////////////////////////////////////////////////////////////////////////// #define BOOST_CC_DIGIT 0x0001 #define BOOST_CC_XDIGIT 0x0002 #define BOOST_CC_ALPHA 0x0004 #define BOOST_CC_CTRL 0x0008 #define BOOST_CC_LOWER 0x0010 #define BOOST_CC_UPPER 0x0020 #define BOOST_CC_SPACE 0x0040 #define BOOST_CC_PUNCT 0x0080 namespace boost { namespace spirit { namespace char_encoding { // The detection of isgraph(), isprint() and isblank() is done programmatically // to keep the character type table small. Additionally, these functions are // rather seldom used and the programmatic detection is very simple. /////////////////////////////////////////////////////////////////////////// // ASCII character classification table /////////////////////////////////////////////////////////////////////////// const unsigned char ascii_char_types[] = { /* NUL 0 0 */ BOOST_CC_CTRL, /* SOH 1 1 */ BOOST_CC_CTRL, /* STX 2 2 */ BOOST_CC_CTRL, /* ETX 3 3 */ BOOST_CC_CTRL, /* EOT 4 4 */ BOOST_CC_CTRL, /* ENQ 5 5 */ BOOST_CC_CTRL, /* ACK 6 6 */ BOOST_CC_CTRL, /* BEL 7 7 */ BOOST_CC_CTRL, /* BS 8 8 */ BOOST_CC_CTRL, /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, /* SO 14 e */ BOOST_CC_CTRL, /* SI 15 f */ BOOST_CC_CTRL, /* DLE 16 10 */ BOOST_CC_CTRL, /* DC1 17 11 */ BOOST_CC_CTRL, /* DC2 18 12 */ BOOST_CC_CTRL, /* DC3 19 13 */ BOOST_CC_CTRL, /* DC4 20 14 */ BOOST_CC_CTRL, /* NAK 21 15 */ BOOST_CC_CTRL, /* SYN 22 16 */ BOOST_CC_CTRL, /* ETB 23 17 */ BOOST_CC_CTRL, /* CAN 24 18 */ BOOST_CC_CTRL, /* EM 25 19 */ BOOST_CC_CTRL, /* SUB 26 1a */ BOOST_CC_CTRL, /* ESC 27 1b */ BOOST_CC_CTRL, /* FS 28 1c */ BOOST_CC_CTRL, /* GS 29 1d */ BOOST_CC_CTRL, /* RS 30 1e */ BOOST_CC_CTRL, /* US 31 1f */ BOOST_CC_CTRL, /* SP 32 20 */ BOOST_CC_SPACE, /* ! 33 21 */ BOOST_CC_PUNCT, /* " 34 22 */ BOOST_CC_PUNCT, /* # 35 23 */ BOOST_CC_PUNCT, /* $ 36 24 */ BOOST_CC_PUNCT, /* % 37 25 */ BOOST_CC_PUNCT, /* & 38 26 */ BOOST_CC_PUNCT, /* ' 39 27 */ BOOST_CC_PUNCT, /* ( 40 28 */ BOOST_CC_PUNCT, /* ) 41 29 */ BOOST_CC_PUNCT, /* * 42 2a */ BOOST_CC_PUNCT, /* + 43 2b */ BOOST_CC_PUNCT, /* , 44 2c */ BOOST_CC_PUNCT, /* - 45 2d */ BOOST_CC_PUNCT, /* . 46 2e */ BOOST_CC_PUNCT, /* / 47 2f */ BOOST_CC_PUNCT, /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, /* : 58 3a */ BOOST_CC_PUNCT, /* ; 59 3b */ BOOST_CC_PUNCT, /* < 60 3c */ BOOST_CC_PUNCT, /* = 61 3d */ BOOST_CC_PUNCT, /* > 62 3e */ BOOST_CC_PUNCT, /* ? 63 3f */ BOOST_CC_PUNCT, /* @ 64 40 */ BOOST_CC_PUNCT, /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, /* [ 91 5b */ BOOST_CC_PUNCT, /* \ 92 5c */ BOOST_CC_PUNCT, /* ] 93 5d */ BOOST_CC_PUNCT, /* ^ 94 5e */ BOOST_CC_PUNCT, /* _ 95 5f */ BOOST_CC_PUNCT, /* ` 96 60 */ BOOST_CC_PUNCT, /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, /* { 123 7b */ BOOST_CC_PUNCT, /* | 124 7c */ BOOST_CC_PUNCT, /* } 125 7d */ BOOST_CC_PUNCT, /* ~ 126 7e */ BOOST_CC_PUNCT, /* DEL 127 7f */ BOOST_CC_CTRL, }; /////////////////////////////////////////////////////////////////////////// // Test characters for specified conditions (using ASCII) /////////////////////////////////////////////////////////////////////////// struct ascii { typedef char char_type; static bool isascii_(int ch) { return 0 == (ch & ~0x7f); } static bool ischar(int ch) { return isascii_(ch); } static int isalnum(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_ALPHA) || (ascii_char_types[ch] & BOOST_CC_DIGIT); } static int isalpha(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_ALPHA); } static int isdigit(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_DIGIT); } static int isxdigit(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_XDIGIT); } static int iscntrl(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_CTRL); } static int isgraph(int ch) { return ('\x21' <= ch && ch <= '\x7e'); } static int islower(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_LOWER); } static int isprint(int ch) { return ('\x20' <= ch && ch <= '\x7e'); } static int ispunct(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_PUNCT); } static int isspace(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_SPACE); } static int isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) { return ('\x09' == ch || '\x20' == ch); } static int isupper(int ch) { BOOST_ASSERT(isascii_(ch)); return (ascii_char_types[ch] & BOOST_CC_UPPER); } /////////////////////////////////////////////////////////////////////// // Simple character conversions /////////////////////////////////////////////////////////////////////// static int tolower(int ch) { BOOST_ASSERT(isascii_(ch)); return isupper(ch) ? (ch - 'A' + 'a') : ch; } static int toupper(int ch) { BOOST_ASSERT(isascii_(ch)); return islower(ch) ? (ch - 'a' + 'A') : ch; } static ::boost::uint32_t toucs4(int ch) { return ch; } }; }}} /////////////////////////////////////////////////////////////////////////////// // undefine macros /////////////////////////////////////////////////////////////////////////////// #undef BOOST_CC_DIGIT #undef BOOST_CC_XDIGIT #undef BOOST_CC_ALPHA #undef BOOST_CC_CTRL #undef BOOST_CC_LOWER #undef BOOST_CC_UPPER #undef BOOST_CC_PUNCT #undef BOOST_CC_SPACE #endif