diff options
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/char_encoding/unicode/query.hpp')
-rw-r--r-- | 3rdParty/Boost/src/boost/spirit/home/support/char_encoding/unicode/query.hpp | 305 |
1 files changed, 305 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/char_encoding/unicode/query.hpp b/3rdParty/Boost/src/boost/spirit/home/support/char_encoding/unicode/query.hpp new file mode 100644 index 0000000..3a0526c --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/char_encoding/unicode/query.hpp @@ -0,0 +1,305 @@ +/*============================================================================= + Copyright (c) 2001-2011 Joel de Guzman + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Autogenerated by MultiStageTable.py (Unicode multi-stage + table builder) (c) Peter Kankowski, 2008 +==============================================================================*/ +#if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010) +#define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010 + +#include <boost/cstdint.hpp> + +# include "category_table.hpp" +# include "script_table.hpp" +# include "lowercase_table.hpp" +# include "uppercase_table.hpp" + +namespace boost { namespace spirit { namespace ucd +{ + // This header provides Basic (Level 1) Unicode Support + // See http://unicode.org/reports/tr18/ for details + + struct properties + { + // bit pattern: xxMMMCCC + // MMM: major_category + // CCC: category + + enum major_category + { + letter, + mark, + number, + separator, + other, + punctuation, + symbol + }; + + enum category + { + uppercase_letter = 0, // [Lu] an uppercase letter + lowercase_letter, // [Ll] a lowercase letter + titlecase_letter, // [Lt] a digraphic character, with first part uppercase + modifier_letter, // [Lm] a modifier letter + other_letter, // [Lo] other letters, including syllables and ideographs + + nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width) + enclosing_mark, // [Me] an enclosing combining mark + spacing_mark, // [Mc] a spacing combining mark (positive advance width) + + decimal_number = 16, // [Nd] a decimal digit + letter_number, // [Nl] a letterlike numeric character + other_number, // [No] a numeric character of other type + + space_separator = 24, // [Zs] a space character (of various non-zero widths) + line_separator, // [Zl] U+2028 LINE SEPARATOR only + paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only + + control = 32, // [Cc] a C0 or C1 control code + format, // [Cf] a format control character + private_use, // [Co] a private-use character + surrogate, // [Cs] a surrogate code point + unassigned, // [Cn] a reserved unassigned code point or a noncharacter + + dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark + open_punctuation, // [Ps] an opening punctuation mark (of a pair) + close_punctuation, // [Pe] a closing punctuation mark (of a pair) + connector_punctuation, // [Pc] a connecting punctuation mark, like a tie + other_punctuation, // [Po] a punctuation mark of other type + initial_punctuation, // [Pi] an initial quotation mark + final_punctuation, // [Pf] a final quotation mark + + math_symbol = 48, // [Sm] a symbol of primarily mathematical use + currency_symbol, // [Sc] a currency sign + modifier_symbol, // [Sk] a non-letterlike modifier symbol + other_symbol // [So] a symbol of other type + }; + + enum derived_properties + { + alphabetic = 64, + uppercase = 128, + lowercase = 256, + white_space = 512, + hex_digit = 1024, + noncharacter_code_point = 2048, + default_ignorable_code_point = 4096 + }; + + enum script + { + arabic = 0, + imperial_aramaic = 1, + armenian = 2, + avestan = 3, + balinese = 4, + bamum = 5, + bengali = 6, + bopomofo = 7, + braille = 8, + buginese = 9, + buhid = 10, + canadian_aboriginal = 11, + carian = 12, + cham = 13, + cherokee = 14, + coptic = 15, + cypriot = 16, + cyrillic = 17, + devanagari = 18, + deseret = 19, + egyptian_hieroglyphs = 20, + ethiopic = 21, + georgian = 22, + glagolitic = 23, + gothic = 24, + greek = 25, + gujarati = 26, + gurmukhi = 27, + hangul = 28, + han = 29, + hanunoo = 30, + hebrew = 31, + hiragana = 32, + katakana_or_hiragana = 33, + old_italic = 34, + javanese = 35, + kayah_li = 36, + katakana = 37, + kharoshthi = 38, + khmer = 39, + kannada = 40, + kaithi = 41, + tai_tham = 42, + lao = 43, + latin = 44, + lepcha = 45, + limbu = 46, + linear_b = 47, + lisu = 48, + lycian = 49, + lydian = 50, + malayalam = 51, + mongolian = 52, + meetei_mayek = 53, + myanmar = 54, + nko = 55, + ogham = 56, + ol_chiki = 57, + old_turkic = 58, + oriya = 59, + osmanya = 60, + phags_pa = 61, + inscriptional_pahlavi = 62, + phoenician = 63, + inscriptional_parthian = 64, + rejang = 65, + runic = 66, + samaritan = 67, + old_south_arabian = 68, + saurashtra = 69, + shavian = 70, + sinhala = 71, + sundanese = 72, + syloti_nagri = 73, + syriac = 74, + tagbanwa = 75, + tai_le = 76, + new_tai_lue = 77, + tamil = 78, + tai_viet = 79, + telugu = 80, + tifinagh = 81, + tagalog = 82, + thaana = 83, + thai = 84, + tibetan = 85, + ugaritic = 86, + vai = 87, + old_persian = 88, + cuneiform = 89, + yi = 90, + inherited = 91, + common = 92, + unknown = 93 + }; + }; + + inline properties::category get_category(::boost::uint32_t ch) + { + return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F); + } + + inline properties::major_category get_major_category(::boost::uint32_t ch) + { + return static_cast<properties::major_category>(get_category(ch) >> 3); + } + + inline bool is_punctuation(::boost::uint32_t ch) + { + return get_major_category(ch) == properties::punctuation; + } + + inline bool is_decimal_number(::boost::uint32_t ch) + { + return get_category(ch) == properties::decimal_number; + } + + inline bool is_hex_digit(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::hex_digit) != 0; + } + + inline bool is_control(::boost::uint32_t ch) + { + return get_category(ch) == properties::control; + } + + inline bool is_alphabetic(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::alphabetic) != 0; + } + + inline bool is_alphanumeric(::boost::uint32_t ch) + { + return is_decimal_number(ch) || is_alphabetic(ch); + } + + inline bool is_uppercase(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::uppercase) != 0; + } + + inline bool is_lowercase(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::lowercase) != 0; + } + + inline bool is_white_space(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::white_space) != 0; + } + + inline bool is_blank(::boost::uint32_t ch) + { + switch (ch) + { + case '\n': case '\v': case '\f': case '\r': + return false; + default: + return is_white_space(ch) + && !( get_category(ch) == properties::line_separator + || get_category(ch) == properties::paragraph_separator + ); + } + } + + inline bool is_graph(::boost::uint32_t ch) + { + return !( is_white_space(ch) + || get_category(ch) == properties::control + || get_category(ch) == properties::surrogate + || get_category(ch) == properties::unassigned + ); + } + + inline bool is_print(::boost::uint32_t ch) + { + return (is_graph(ch) || is_blank(ch)) && !is_control(ch); + } + + inline bool is_noncharacter_code_point(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; + } + + inline bool is_default_ignorable_code_point(::boost::uint32_t ch) + { + return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; + } + + inline properties::script get_script(::boost::uint32_t ch) + { + return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F); + } + + inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch) + { + // The table returns 0 to signal that this code maps to itself + ::boost::uint32_t r = detail::lowercase_lookup(ch); + return (r == 0)? ch : r; + } + + inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch) + { + // The table returns 0 to signal that this code maps to itself + ::boost::uint32_t r = detail::uppercase_lookup(ch); + return (r == 0)? ch : r; + } +}}} + +#endif |