/*============================================================================= Copyright (c) 2001-2011 Joel de Guzman Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) Autogenerated by MultiStageTable.py (Unicode multi-stage table builder) (c) Peter Kankowski, 2008 ==============================================================================*/ #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010) #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010 #include # include "category_table.hpp" # include "script_table.hpp" # include "lowercase_table.hpp" # include "uppercase_table.hpp" namespace boost { namespace spirit { namespace ucd { // This header provides Basic (Level 1) Unicode Support // See http://unicode.org/reports/tr18/ for details struct properties { // bit pattern: xxMMMCCC // MMM: major_category // CCC: category enum major_category { letter, mark, number, separator, other, punctuation, symbol }; enum category { uppercase_letter = 0, // [Lu] an uppercase letter lowercase_letter, // [Ll] a lowercase letter titlecase_letter, // [Lt] a digraphic character, with first part uppercase modifier_letter, // [Lm] a modifier letter other_letter, // [Lo] other letters, including syllables and ideographs nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width) enclosing_mark, // [Me] an enclosing combining mark spacing_mark, // [Mc] a spacing combining mark (positive advance width) decimal_number = 16, // [Nd] a decimal digit letter_number, // [Nl] a letterlike numeric character other_number, // [No] a numeric character of other type space_separator = 24, // [Zs] a space character (of various non-zero widths) line_separator, // [Zl] U+2028 LINE SEPARATOR only paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only control = 32, // [Cc] a C0 or C1 control code format, // [Cf] a format control character private_use, // [Co] a private-use character surrogate, // [Cs] a surrogate code point unassigned, // [Cn] a reserved unassigned code point or a noncharacter dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark open_punctuation, // [Ps] an opening punctuation mark (of a pair) close_punctuation, // [Pe] a closing punctuation mark (of a pair) connector_punctuation, // [Pc] a connecting punctuation mark, like a tie other_punctuation, // [Po] a punctuation mark of other type initial_punctuation, // [Pi] an initial quotation mark final_punctuation, // [Pf] a final quotation mark math_symbol = 48, // [Sm] a symbol of primarily mathematical use currency_symbol, // [Sc] a currency sign modifier_symbol, // [Sk] a non-letterlike modifier symbol other_symbol // [So] a symbol of other type }; enum derived_properties { alphabetic = 64, uppercase = 128, lowercase = 256, white_space = 512, hex_digit = 1024, noncharacter_code_point = 2048, default_ignorable_code_point = 4096 }; enum script { arabic = 0, imperial_aramaic = 1, armenian = 2, avestan = 3, balinese = 4, bamum = 5, bengali = 6, bopomofo = 7, braille = 8, buginese = 9, buhid = 10, canadian_aboriginal = 11, carian = 12, cham = 13, cherokee = 14, coptic = 15, cypriot = 16, cyrillic = 17, devanagari = 18, deseret = 19, egyptian_hieroglyphs = 20, ethiopic = 21, georgian = 22, glagolitic = 23, gothic = 24, greek = 25, gujarati = 26, gurmukhi = 27, hangul = 28, han = 29, hanunoo = 30, hebrew = 31, hiragana = 32, katakana_or_hiragana = 33, old_italic = 34, javanese = 35, kayah_li = 36, katakana = 37, kharoshthi = 38, khmer = 39, kannada = 40, kaithi = 41, tai_tham = 42, lao = 43, latin = 44, lepcha = 45, limbu = 46, linear_b = 47, lisu = 48, lycian = 49, lydian = 50, malayalam = 51, mongolian = 52, meetei_mayek = 53, myanmar = 54, nko = 55, ogham = 56, ol_chiki = 57, old_turkic = 58, oriya = 59, osmanya = 60, phags_pa = 61, inscriptional_pahlavi = 62, phoenician = 63, inscriptional_parthian = 64, rejang = 65, runic = 66, samaritan = 67, old_south_arabian = 68, saurashtra = 69, shavian = 70, sinhala = 71, sundanese = 72, syloti_nagri = 73, syriac = 74, tagbanwa = 75, tai_le = 76, new_tai_lue = 77, tamil = 78, tai_viet = 79, telugu = 80, tifinagh = 81, tagalog = 82, thaana = 83, thai = 84, tibetan = 85, ugaritic = 86, vai = 87, old_persian = 88, cuneiform = 89, yi = 90, inherited = 91, common = 92, unknown = 93 }; }; inline properties::category get_category(::boost::uint32_t ch) { return static_cast(detail::category_lookup(ch) & 0x3F); } inline properties::major_category get_major_category(::boost::uint32_t ch) { return static_cast(get_category(ch) >> 3); } inline bool is_punctuation(::boost::uint32_t ch) { return get_major_category(ch) == properties::punctuation; } inline bool is_decimal_number(::boost::uint32_t ch) { return get_category(ch) == properties::decimal_number; } inline bool is_hex_digit(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::hex_digit) != 0; } inline bool is_control(::boost::uint32_t ch) { return get_category(ch) == properties::control; } inline bool is_alphabetic(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::alphabetic) != 0; } inline bool is_alphanumeric(::boost::uint32_t ch) { return is_decimal_number(ch) || is_alphabetic(ch); } inline bool is_uppercase(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::uppercase) != 0; } inline bool is_lowercase(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::lowercase) != 0; } inline bool is_white_space(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::white_space) != 0; } inline bool is_blank(::boost::uint32_t ch) { switch (ch) { case '\n': case '\v': case '\f': case '\r': return false; default: return is_white_space(ch) && !( get_category(ch) == properties::line_separator || get_category(ch) == properties::paragraph_separator ); } } inline bool is_graph(::boost::uint32_t ch) { return !( is_white_space(ch) || get_category(ch) == properties::control || get_category(ch) == properties::surrogate || get_category(ch) == properties::unassigned ); } inline bool is_print(::boost::uint32_t ch) { return (is_graph(ch) || is_blank(ch)) && !is_control(ch); } inline bool is_noncharacter_code_point(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; } inline bool is_default_ignorable_code_point(::boost::uint32_t ch) { return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; } inline properties::script get_script(::boost::uint32_t ch) { return static_cast(detail::script_lookup(ch) & 0x3F); } inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch) { // The table returns 0 to signal that this code maps to itself ::boost::uint32_t r = detail::lowercase_lookup(ch); return (r == 0)? ch : r; } inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch) { // The table returns 0 to signal that this code maps to itself ::boost::uint32_t r = detail::uppercase_lookup(ch); return (r == 0)? ch : r; } }}} #endif