| 1 | /*============================================================================= |
| 2 | Copyright (c) 2001-2011 Hartmut Kaiser |
| 3 | Copyright (c) 2001-2011 Joel de Guzman |
| 4 | |
| 5 | Distributed under the Boost Software License, Version 1.0. (See accompanying |
| 6 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| 7 | =============================================================================*/ |
| 8 | #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM) |
| 9 | #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM |
| 10 | |
| 11 | #if defined(_MSC_VER) |
| 12 | #pragma once |
| 13 | #endif |
| 14 | |
| 15 | #include <climits> |
| 16 | #include <boost/assert.hpp> |
| 17 | #include <boost/cstdint.hpp> |
| 18 | |
| 19 | /////////////////////////////////////////////////////////////////////////////// |
| 20 | // constants used to classify the single characters |
| 21 | /////////////////////////////////////////////////////////////////////////////// |
| 22 | #define BOOST_CC_DIGIT 0x0001 |
| 23 | #define BOOST_CC_XDIGIT 0x0002 |
| 24 | #define BOOST_CC_ALPHA 0x0004 |
| 25 | #define BOOST_CC_CTRL 0x0008 |
| 26 | #define BOOST_CC_LOWER 0x0010 |
| 27 | #define BOOST_CC_UPPER 0x0020 |
| 28 | #define BOOST_CC_SPACE 0x0040 |
| 29 | #define BOOST_CC_PUNCT 0x0080 |
| 30 | |
| 31 | namespace boost { namespace spirit { namespace char_encoding |
| 32 | { |
| 33 | // The detection of isgraph(), isprint() and isblank() is done programmatically |
| 34 | // to keep the character type table small. Additionally, these functions are |
| 35 | // rather seldom used and the programmatic detection is very simple. |
| 36 | |
| 37 | /////////////////////////////////////////////////////////////////////////// |
| 38 | // ASCII character classification table |
| 39 | /////////////////////////////////////////////////////////////////////////// |
| 40 | const unsigned char ascii_char_types[] = |
| 41 | { |
| 42 | /* NUL 0 0 */ BOOST_CC_CTRL, |
| 43 | /* SOH 1 1 */ BOOST_CC_CTRL, |
| 44 | /* STX 2 2 */ BOOST_CC_CTRL, |
| 45 | /* ETX 3 3 */ BOOST_CC_CTRL, |
| 46 | /* EOT 4 4 */ BOOST_CC_CTRL, |
| 47 | /* ENQ 5 5 */ BOOST_CC_CTRL, |
| 48 | /* ACK 6 6 */ BOOST_CC_CTRL, |
| 49 | /* BEL 7 7 */ BOOST_CC_CTRL, |
| 50 | /* BS 8 8 */ BOOST_CC_CTRL, |
| 51 | /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE, |
| 52 | /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE, |
| 53 | /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE, |
| 54 | /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE, |
| 55 | /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE, |
| 56 | /* SO 14 e */ BOOST_CC_CTRL, |
| 57 | /* SI 15 f */ BOOST_CC_CTRL, |
| 58 | /* DLE 16 10 */ BOOST_CC_CTRL, |
| 59 | /* DC1 17 11 */ BOOST_CC_CTRL, |
| 60 | /* DC2 18 12 */ BOOST_CC_CTRL, |
| 61 | /* DC3 19 13 */ BOOST_CC_CTRL, |
| 62 | /* DC4 20 14 */ BOOST_CC_CTRL, |
| 63 | /* NAK 21 15 */ BOOST_CC_CTRL, |
| 64 | /* SYN 22 16 */ BOOST_CC_CTRL, |
| 65 | /* ETB 23 17 */ BOOST_CC_CTRL, |
| 66 | /* CAN 24 18 */ BOOST_CC_CTRL, |
| 67 | /* EM 25 19 */ BOOST_CC_CTRL, |
| 68 | /* SUB 26 1a */ BOOST_CC_CTRL, |
| 69 | /* ESC 27 1b */ BOOST_CC_CTRL, |
| 70 | /* FS 28 1c */ BOOST_CC_CTRL, |
| 71 | /* GS 29 1d */ BOOST_CC_CTRL, |
| 72 | /* RS 30 1e */ BOOST_CC_CTRL, |
| 73 | /* US 31 1f */ BOOST_CC_CTRL, |
| 74 | /* SP 32 20 */ BOOST_CC_SPACE, |
| 75 | /* ! 33 21 */ BOOST_CC_PUNCT, |
| 76 | /* " 34 22 */ BOOST_CC_PUNCT, |
| 77 | /* # 35 23 */ BOOST_CC_PUNCT, |
| 78 | /* $ 36 24 */ BOOST_CC_PUNCT, |
| 79 | /* % 37 25 */ BOOST_CC_PUNCT, |
| 80 | /* & 38 26 */ BOOST_CC_PUNCT, |
| 81 | /* ' 39 27 */ BOOST_CC_PUNCT, |
| 82 | /* ( 40 28 */ BOOST_CC_PUNCT, |
| 83 | /* ) 41 29 */ BOOST_CC_PUNCT, |
| 84 | /* * 42 2a */ BOOST_CC_PUNCT, |
| 85 | /* + 43 2b */ BOOST_CC_PUNCT, |
| 86 | /* , 44 2c */ BOOST_CC_PUNCT, |
| 87 | /* - 45 2d */ BOOST_CC_PUNCT, |
| 88 | /* . 46 2e */ BOOST_CC_PUNCT, |
| 89 | /* / 47 2f */ BOOST_CC_PUNCT, |
| 90 | /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 91 | /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 92 | /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 93 | /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 94 | /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 95 | /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 96 | /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 97 | /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 98 | /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 99 | /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT, |
| 100 | /* : 58 3a */ BOOST_CC_PUNCT, |
| 101 | /* ; 59 3b */ BOOST_CC_PUNCT, |
| 102 | /* < 60 3c */ BOOST_CC_PUNCT, |
| 103 | /* = 61 3d */ BOOST_CC_PUNCT, |
| 104 | /* > 62 3e */ BOOST_CC_PUNCT, |
| 105 | /* ? 63 3f */ BOOST_CC_PUNCT, |
| 106 | /* @ 64 40 */ BOOST_CC_PUNCT, |
| 107 | /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, |
| 108 | /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, |
| 109 | /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, |
| 110 | /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, |
| 111 | /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, |
| 112 | /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER, |
| 113 | /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 114 | /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 115 | /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 116 | /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 117 | /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 118 | /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 119 | /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 120 | /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 121 | /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 122 | /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 123 | /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 124 | /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 125 | /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 126 | /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 127 | /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 128 | /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 129 | /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 130 | /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 131 | /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 132 | /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER, |
| 133 | /* [ 91 5b */ BOOST_CC_PUNCT, |
| 134 | /* \ 92 5c */ BOOST_CC_PUNCT, |
| 135 | /* ] 93 5d */ BOOST_CC_PUNCT, |
| 136 | /* ^ 94 5e */ BOOST_CC_PUNCT, |
| 137 | /* _ 95 5f */ BOOST_CC_PUNCT, |
| 138 | /* ` 96 60 */ BOOST_CC_PUNCT, |
| 139 | /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, |
| 140 | /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, |
| 141 | /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, |
| 142 | /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, |
| 143 | /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, |
| 144 | /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER, |
| 145 | /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 146 | /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 147 | /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 148 | /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 149 | /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 150 | /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 151 | /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 152 | /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 153 | /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 154 | /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 155 | /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 156 | /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 157 | /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 158 | /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 159 | /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 160 | /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 161 | /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 162 | /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 163 | /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 164 | /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER, |
| 165 | /* { 123 7b */ BOOST_CC_PUNCT, |
| 166 | /* | 124 7c */ BOOST_CC_PUNCT, |
| 167 | /* } 125 7d */ BOOST_CC_PUNCT, |
| 168 | /* ~ 126 7e */ BOOST_CC_PUNCT, |
| 169 | /* DEL 127 7f */ BOOST_CC_CTRL, |
| 170 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 171 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 172 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 173 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 174 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 175 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 176 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 177 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 178 | }; |
| 179 | |
| 180 | /////////////////////////////////////////////////////////////////////////// |
| 181 | // Test characters for specified conditions (using ASCII) |
| 182 | /////////////////////////////////////////////////////////////////////////// |
| 183 | struct ascii |
| 184 | { |
| 185 | typedef char char_type; |
| 186 | typedef unsigned char classify_type; |
| 187 | |
| 188 | static bool |
| 189 | isascii_(int ch) |
| 190 | { |
| 191 | return 0 == (ch & ~0x7f); |
| 192 | } |
| 193 | |
| 194 | static bool |
| 195 | ischar(int ch) |
| 196 | { |
| 197 | return isascii_(ch); |
| 198 | } |
| 199 | |
| 200 | // *** Note on assertions: The precondition is that the calls to |
| 201 | // these functions do not violate the required range of ch (type int) |
| 202 | // which is that strict_ischar(ch) should be true. It is the |
| 203 | // responsibility of the caller to make sure this precondition is not |
| 204 | // violated. |
| 205 | |
| 206 | static bool |
| 207 | strict_ischar(int ch) |
| 208 | { |
| 209 | return ch >= 0 && ch <= 127; |
| 210 | } |
| 211 | |
| 212 | static bool |
| 213 | isalnum(int ch) |
| 214 | { |
| 215 | BOOST_ASSERT(strict_ischar(ch)); |
| 216 | return (ascii_char_types[ch] & BOOST_CC_ALPHA) |
| 217 | || (ascii_char_types[ch] & BOOST_CC_DIGIT); |
| 218 | } |
| 219 | |
| 220 | static bool |
| 221 | isalpha(int ch) |
| 222 | { |
| 223 | BOOST_ASSERT(strict_ischar(ch)); |
| 224 | return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false; |
| 225 | } |
| 226 | |
| 227 | static bool |
| 228 | isdigit(int ch) |
| 229 | { |
| 230 | BOOST_ASSERT(strict_ischar(ch)); |
| 231 | return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false; |
| 232 | } |
| 233 | |
| 234 | static bool |
| 235 | isxdigit(int ch) |
| 236 | { |
| 237 | BOOST_ASSERT(strict_ischar(ch)); |
| 238 | return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false; |
| 239 | } |
| 240 | |
| 241 | static bool |
| 242 | iscntrl(int ch) |
| 243 | { |
| 244 | BOOST_ASSERT(strict_ischar(ch)); |
| 245 | return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false; |
| 246 | } |
| 247 | |
| 248 | static bool |
| 249 | isgraph(int ch) |
| 250 | { |
| 251 | BOOST_ASSERT(strict_ischar(ch)); |
| 252 | return ('\x21' <= ch && ch <= '\x7e'); |
| 253 | } |
| 254 | |
| 255 | static bool |
| 256 | islower(int ch) |
| 257 | { |
| 258 | BOOST_ASSERT(strict_ischar(ch)); |
| 259 | return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false; |
| 260 | } |
| 261 | |
| 262 | static bool |
| 263 | isprint(int ch) |
| 264 | { |
| 265 | BOOST_ASSERT(strict_ischar(ch)); |
| 266 | return ('\x20' <= ch && ch <= '\x7e'); |
| 267 | } |
| 268 | |
| 269 | static bool |
| 270 | ispunct(int ch) |
| 271 | { |
| 272 | BOOST_ASSERT(strict_ischar(ch)); |
| 273 | return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false; |
| 274 | } |
| 275 | |
| 276 | static bool |
| 277 | isspace(int ch) |
| 278 | { |
| 279 | BOOST_ASSERT(strict_ischar(ch)); |
| 280 | return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false; |
| 281 | } |
| 282 | |
| 283 | static bool |
| 284 | isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch) |
| 285 | { |
| 286 | BOOST_ASSERT(strict_ischar(ch)); |
| 287 | return ('\x09' == ch || '\x20' == ch); |
| 288 | } |
| 289 | |
| 290 | static bool |
| 291 | isupper(int ch) |
| 292 | { |
| 293 | BOOST_ASSERT(strict_ischar(ch)); |
| 294 | return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false; |
| 295 | } |
| 296 | |
| 297 | /////////////////////////////////////////////////////////////////////// |
| 298 | // Simple character conversions |
| 299 | /////////////////////////////////////////////////////////////////////// |
| 300 | |
| 301 | static int |
| 302 | tolower(int ch) |
| 303 | { |
| 304 | BOOST_ASSERT(strict_ischar(ch)); |
| 305 | return isupper(ch) ? (ch - 'A' + 'a') : ch; |
| 306 | } |
| 307 | |
| 308 | static int |
| 309 | toupper(int ch) |
| 310 | { |
| 311 | BOOST_ASSERT(strict_ischar(ch)); |
| 312 | return islower(ch) ? (ch - 'a' + 'A') : ch; |
| 313 | } |
| 314 | |
| 315 | static ::boost::uint32_t |
| 316 | toucs4(int ch) |
| 317 | { |
| 318 | BOOST_ASSERT(strict_ischar(ch)); |
| 319 | return ch; |
| 320 | } |
| 321 | }; |
| 322 | |
| 323 | }}} |
| 324 | |
| 325 | /////////////////////////////////////////////////////////////////////////////// |
| 326 | // undefine macros |
| 327 | /////////////////////////////////////////////////////////////////////////////// |
| 328 | #undef BOOST_CC_DIGIT |
| 329 | #undef BOOST_CC_XDIGIT |
| 330 | #undef BOOST_CC_ALPHA |
| 331 | #undef BOOST_CC_CTRL |
| 332 | #undef BOOST_CC_LOWER |
| 333 | #undef BOOST_CC_UPPER |
| 334 | #undef BOOST_CC_PUNCT |
| 335 | #undef BOOST_CC_SPACE |
| 336 | |
| 337 | #endif |
| 338 | |