diff options
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser')
4 files changed, 1367 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp new file mode 100644 index 0000000..dc1c6bd --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp @@ -0,0 +1,146 @@ +// num_token.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_NUM_TOKEN_HPP +#define BOOST_LEXER_NUM_TOKEN_HPP + +#include <boost/config.hpp> +#include "../../consts.hpp" // null_token +#include "../../size_t.hpp" +#include <boost/detail/workaround.hpp> + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +struct basic_num_token +{ + enum type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT, + DUP, OR, CHARSET, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT, + ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN, + END}; + + type _type; + std::size_t _id; + std::size_t _min; + bool _comma; + std::size_t _max; + CharT _macro[max_macro_len + 1]; + static const char _precedence_table[END + 1][END + 1]; + static const char *_precedence_strings[END + 1]; + + basic_num_token (const type type_ = BEGIN, + const std::size_t id_ = null_token) : + _type (type_), + _id (id_), + _min (0), + _comma (false), + _max (0) + { + *_macro = 0; + } + + basic_num_token &operator = (const basic_num_token &rhs_) + { + _type = rhs_._type; + _id = rhs_._id; + _min = rhs_._min; + _comma = rhs_._comma; + _max = rhs_._max; + + if (_type == MACRO) + { + const CharT *read_ = rhs_._macro; + CharT *write_ = _macro; + + while (*read_) + { + *write_++ = *read_++; + } + + *write_ = 0; + } + + return *this; + } + + void set (const type type_) + { + _type = type_; + _id = null_token; + } + + void set (const type type_, const std::size_t id_) + { + _type = type_; + _id = id_; + } + + void min_max (const std::size_t min_, const bool comma_, + const std::size_t max_) + { + _min = min_; + _comma = comma_; + _max = max_; + } + + char precedence (const type type_) const + { + return _precedence_table[_type][type_]; + } + + const char *precedence_string () const + { + return _precedence_strings[_type]; + } +}; + +template<typename CharT> +const char basic_num_token<CharT>::_precedence_table[END + 1][END + 1] = { +// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END +/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'}, +/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* | */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}, +/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'}, +/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'}, +/* ( */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}, +/* ) */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'}, +/* ? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* ?? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* * */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* *? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* + */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* +? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '} +}; + +template<typename CharT> +const char *basic_num_token<CharT>::_precedence_strings[END + 1] = +#if BOOST_WORKAROUND(BOOST_INTEL_CXX_VERSION, BOOST_TESTED_AT(910)) +{{"BEGIN"}, {"REGEX"}, {"OREXP"}, {"SEQUENCE"}, {"SUB"}, {"EXPRESSION"}, + {"REPEAT"}, {"DUPLICATE"}, {"|"}, {"CHARSET"}, {"MACRO"}, + {"("}, {")"}, {"?"}, {"??"}, {"*"}, {"*?"}, {"+"}, {"+?"}, {"{n[,[m]]}"}, + {"{n[,[m]]}?"}, {"END"}}; +#else +{"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", "REPEAT", + "DUPLICATE", "|", "CHARSET", "MACRO", "(", ")", "?", "??", "*", "*?", + "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"}; +#endif +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp new file mode 100644 index 0000000..7bdeb80 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp @@ -0,0 +1,574 @@ +// tokeniser.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_HPP +#define BOOST_LEXER_RE_TOKENISER_HPP + +// memcpy() +#include <cstring> +#include <map> +#include "num_token.hpp" +#include "../../runtime_error.hpp" +#include "../../size_t.hpp" +#include <sstream> +#include "../../string_token.hpp" +#include "re_tokeniser_helper.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +class basic_re_tokeniser +{ +public: + typedef basic_num_token<CharT> num_token; + typedef basic_re_tokeniser_state<CharT> state; + typedef basic_string_token<CharT> string_token; + typedef typename string_token::string string; + typedef std::map<string_token, std::size_t> token_map; + typedef std::pair<string_token, std::size_t> token_pair; + + static void next (state &state_, token_map &map_, num_token &token_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + token_.min_max (0, false, 0); + + while (!eos_ && ch_ == '"') + { + state_._in_string ^= 1; + eos_ = state_.next (ch_); + } + + if (eos_) + { + if (state_._in_string) + { + throw runtime_error ("Unexpected end of regex " + "(missing '\"')."); + } + + if (state_._paren_count) + { + throw runtime_error ("Unexpected end of regex " + "(missing ')')."); + } + + token_.set (num_token::END, null_token); + } + else + { + if (ch_ == '\\') + { + // Even if we are in a string, respect escape sequences... + escape (state_, map_, token_); + } + else if (state_._in_string) + { + // All other meta characters lose their special meaning + // inside a string. + create_charset_token (string (1, ch_), false, map_, token_); + } + else + { + // Not an escape sequence and not inside a string, so + // check for meta characters. + switch (ch_) + { + case '(': + token_.set (num_token::OPENPAREN, null_token); + ++state_._paren_count; + read_options (state_); + break; + case ')': + --state_._paren_count; + + if (state_._paren_count < 0) + { + std::ostringstream ss_; + + ss_ << "Number of open parenthesis < 0 at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + token_.set (num_token::CLOSEPAREN, null_token); + + if (!state_._flags_stack.empty ()) + { + state_._flags = state_._flags_stack.top (); + state_._flags_stack.pop (); + } + break; + case '?': + if (!state_.eos () && *state_._curr == '?') + { + token_.set (num_token::AOPT, null_token); + state_.increment (); + } + else + { + token_.set (num_token::OPT, null_token); + } + + break; + case '*': + if (!state_.eos () && *state_._curr == '?') + { + token_.set (num_token::AZEROORMORE, null_token); + state_.increment (); + } + else + { + token_.set (num_token::ZEROORMORE, null_token); + } + + break; + case '+': + if (!state_.eos () && *state_._curr == '?') + { + token_.set (num_token::AONEORMORE, null_token); + state_.increment (); + } + else + { + token_.set (num_token::ONEORMORE, null_token); + } + + break; + case '{': + open_curly (state_, token_); + break; + case '|': + token_.set (num_token::OR, null_token); + break; + case '^': + if (state_._curr - 1 == state_._start) + { + token_.set (num_token::CHARSET, bol_token); + state_._seen_BOL_assertion = true; + } + else + { + create_charset_token (string (1, ch_), false, + map_, token_); + } + + break; + case '$': + if (state_._curr == state_._end) + { + token_.set (num_token::CHARSET, eol_token); + state_._seen_EOL_assertion = true; + } + else + { + create_charset_token (string (1, ch_), false, + map_, token_); + } + + break; + case '.': + { + string dot_; + + if (state_._flags & dot_not_newline) + { + dot_ = '\n'; + } + + create_charset_token (dot_, true, map_, token_); + break; + } + case '[': + { + charset (state_, map_, token_); + break; + } + case '/': + throw runtime_error("Lookahead ('/') is not supported yet."); + break; + default: + if ((state_._flags & icase) && + (std::isupper (ch_, state_._locale) || + std::islower (ch_, state_._locale))) + { + CharT upper_ = std::toupper (ch_, state_._locale); + CharT lower_ = std::tolower (ch_, state_._locale); + + string str_ (1, upper_); + + str_ += lower_; + create_charset_token (str_, false, map_, token_); + } + else + { + create_charset_token (string (1, ch_), false, + map_, token_); + } + + break; + } + } + } + } + +private: + typedef basic_re_tokeniser_helper<CharT> tokeniser_helper; + + static void read_options (state &state_) + { + if (!state_.eos () && *state_._curr == '?') + { + CharT ch_ = 0; + bool eos_ = false; + bool negate_ = false; + + state_.increment (); + eos_ = state_.next (ch_); + state_._flags_stack.push (state_._flags); + + while (!eos_ && ch_ != ':') + { + switch (ch_) + { + case '-': + negate_ ^= 1; + break; + case 'i': + if (negate_) + { + state_._flags = static_cast<regex_flags> + (state_._flags & ~icase); + } + else + { + state_._flags = static_cast<regex_flags> + (state_._flags | icase); + } + + negate_ = false; + break; + case 's': + if (negate_) + { + state_._flags = static_cast<regex_flags> + (state_._flags | dot_not_newline); + } + else + { + state_._flags = static_cast<regex_flags> + (state_._flags & ~dot_not_newline); + } + + negate_ = false; + break; + default: + { + std::ostringstream ss_; + + ss_ << "Unknown option at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } + + eos_ = state_.next (ch_); + } + + // End of string handler will handle early termination + } + else if (!state_._flags_stack.empty ()) + { + state_._flags_stack.push (state_._flags); + } + } + + static void escape (state &state_, token_map &map_, num_token &token_) + { + CharT ch_ = 0; + std::size_t str_len_ = 0; + const CharT *str_ = tokeniser_helper::escape_sequence (state_, + ch_, str_len_); + + if (str_) + { + state state2_ (str_ + 1, str_ + str_len_, state_._flags, + state_._locale); + + charset (state2_, map_, token_); + } + else + { + create_charset_token (string (1, ch_), false, map_, token_); + } + } + + static void charset (state &state_, token_map &map_, num_token &token_) + { + string chars_; + bool negated_ = false; + + tokeniser_helper::charset (state_, chars_, negated_); + create_charset_token (chars_, negated_, map_, token_); + } + + static void create_charset_token (const string &charset_, + const bool negated_, token_map &map_, num_token &token_) + { + std::size_t id_ = null_token; + string_token stok_ (negated_, charset_); + + stok_.remove_duplicates (); + stok_.normalise (); + + typename token_map::const_iterator iter_ = map_.find (stok_); + + if (iter_ == map_.end ()) + { + id_ = map_.size (); + map_.insert (token_pair (stok_, id_)); + } + else + { + id_ = iter_->second; + } + + token_.set (num_token::CHARSET, id_); + } + + static void open_curly (state &state_, num_token &token_) + { + if (state_.eos ()) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + else if (*state_._curr >= '0' && *state_._curr <= '9') + { + repeat_n (state_, token_); + + if (!state_.eos () && *state_._curr == '?') + { + token_._type = num_token::AREPEATN; + state_.increment (); + } + } + else + { + macro (state_, token_); + } + } + + // SYNTAX: + // {n[,[n]]} + // SEMANTIC RULES: + // {0} - INVALID (throw exception) + // {0,} = * + // {0,0} - INVALID (throw exception) + // {0,1} = ? + // {1,} = + + // {min,max} where min == max - {min} + // {min,max} where max < min - INVALID (throw exception) + static void repeat_n (state &state_, num_token &token_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + while (!eos_ && ch_ >= '0' && ch_ <= '9') + { + token_._min *= 10; + token_._min += ch_ - '0'; + eos_ = state_.next (ch_); + } + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + + bool min_max_ = false; + bool repeatn_ = true; + + token_._comma = ch_ == ','; + + if (token_._comma) + { + eos_ = state_.next (ch_); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + + if (ch_ == '}') + { + // Small optimisation: Check for '*' equivalency. + if (token_._min == 0) + { + token_.set (num_token::ZEROORMORE, null_token); + repeatn_ = false; + } + // Small optimisation: Check for '+' equivalency. + else if (token_._min == 1) + { + token_.set (num_token::ONEORMORE, null_token); + repeatn_ = false; + } + } + else + { + if (ch_ < '0' || ch_ > '9') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + min_max_ = true; + + do + { + token_._max *= 10; + token_._max += ch_ - '0'; + eos_ = state_.next (ch_); + } while (!eos_ && ch_ >= '0' && ch_ <= '9'); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + + // Small optimisation: Check for '?' equivalency. + if (token_._min == 0 && token_._max == 1) + { + token_.set (num_token::OPT, null_token); + repeatn_ = false; + } + // Small optimisation: if min == max, then min. + else if (token_._min == token_._max) + { + token_._comma = false; + min_max_ = false; + token_._max = 0; + } + } + } + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + if (repeatn_) + { + // SEMANTIC VALIDATION follows: + // NOTE: {0,} has already become * + // therefore we don't check for a comma. + if (token_._min == 0 && token_._max == 0) + { + std::ostringstream ss_; + + ss_ << "Cannot have exactly zero repeats preceding index " << + state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + if (min_max_ && token_._max < token_._min) + { + std::ostringstream ss_; + + ss_ << "Max less than min preceding index " << + state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + token_.set (num_token::REPEATN, null_token); + } + } + + static void macro (state &state_, num_token &token_) + { + CharT ch_ = 0; + bool eos_ = false; + const CharT *start_ = state_._curr; + + state_.next (ch_); + + if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') && + !(ch_ >= 'a' && ch_ <= 'z')) + { + std::ostringstream ss_; + + ss_ << "Invalid MACRO name at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + do + { + eos_ = state_.next (ch_); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') || + (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9')); + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + std::size_t len_ = state_._curr - 1 - start_; + + if (len_ > max_macro_len) + { + std::basic_stringstream<CharT> ss_; + std::ostringstream os_; + + os_ << "MACRO name '"; + + while (len_) + { + os_ << ss_.narrow (*start_++, ' '); + --len_; + } + + os_ << "' too long."; + throw runtime_error (os_.str ()); + } + + token_.set (num_token::MACRO, null_token); + + // Some systems have memcpy in namespace std. + using namespace std; + + memcpy (token_._macro, start_, len_ * sizeof (CharT)); + token_._macro[len_] = 0; + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp new file mode 100644 index 0000000..6e0791e --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp @@ -0,0 +1,549 @@ +// tokeniser_helper.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H +#define BOOST_LEXER_RE_TOKENISER_HELPER_H + +#include "../../char_traits.hpp" +// strlen() +#include <cstring> +#include "../../size_t.hpp" +#include "re_tokeniser_state.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT, typename Traits = char_traits<CharT> > +class basic_re_tokeniser_helper +{ +public: + typedef basic_re_tokeniser_state<CharT> state; + typedef std::basic_string<CharT> string; + + static const CharT *escape_sequence (state &state_, CharT &ch_, + std::size_t &str_len_) + { + bool eos_ = state_.eos (); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "following '\\'."); + } + + const CharT *str_ = charset_shortcut (*state_._curr, str_len_); + + if (str_) + { + state_.increment (); + } + else + { + ch_ = chr (state_); + } + + return str_; + } + + // This function can call itself. + static void charset (state &state_, string &chars_, bool &negated_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '['."); + } + + negated_ = ch_ == '^'; + + if (negated_) + { + eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '^'."); + } + } + + bool chset_ = false; + CharT prev_ = 0; + + while (ch_ != ']') + { + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + const CharT *str_ = escape_sequence (state_, prev_, str_len_); + + chset_ = str_ != 0; + + if (chset_) + { + state temp_state_ (str_ + 1, str_ + str_len_, + state_._flags, state_._locale); + string temp_chars_; + bool temp_negated_ = false; + + charset (temp_state_, temp_chars_, temp_negated_); + + if (negated_ != temp_negated_) + { + std::ostringstream ss_; + + ss_ << "Mismatch in charset negation preceding " + "index " << state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + chars_ += temp_chars_; + } + } +/* + else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') + { + // TODO: POSIX charsets + } +*/ + else + { + chset_ = false; + prev_ = ch_; + } + + eos_ = state_.next (ch_); + + // Covers preceding if, else if and else + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "(missing ']')."); + } + + if (ch_ == '-') + { + charset_range (chset_, state_, eos_, ch_, prev_, chars_); + } + else if (!chset_) + { + if ((state_._flags & icase) && + (std::isupper (prev_, state_._locale) || + std::islower (prev_, state_._locale))) + { + CharT upper_ = std::toupper (prev_, state_._locale); + CharT lower_ = std::tolower (prev_, state_._locale); + + chars_ += upper_; + chars_ += lower_; + } + else + { + chars_ += prev_; + } + } + } + + if (!negated_ && chars_.empty ()) + { + throw runtime_error ("Empty charsets not allowed."); + } + } + + static CharT chr (state &state_) + { + CharT ch_ = 0; + + // eos_ has already been checked for. + switch (*state_._curr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + ch_ = decode_octal (state_); + break; + case 'a': + ch_ = '\a'; + state_.increment (); + break; + case 'b': + ch_ = '\b'; + state_.increment (); + break; + case 'c': + ch_ = decode_control_char (state_); + break; + case 'e': + ch_ = 27; // '\e' not recognised by compiler + state_.increment (); + break; + case 'f': + ch_ = '\f'; + state_.increment (); + break; + case 'n': + ch_ = '\n'; + state_.increment (); + break; + case 'r': + ch_ = '\r'; + state_.increment (); + break; + case 't': + ch_ = '\t'; + state_.increment (); + break; + case 'v': + ch_ = '\v'; + state_.increment (); + break; + case 'x': + ch_ = decode_hex (state_); + break; + default: + ch_ = *state_._curr; + state_.increment (); + break; + } + + return ch_; + } + +private: + static const char *charset_shortcut (const char ch_, + std::size_t &str_len_) + { + const char *str_ = 0; + + switch (ch_) + { + case 'd': + str_ = "[0-9]"; + break; + case 'D': + str_ = "[^0-9]"; + break; + case 's': + str_ = "[ \t\n\r\f\v]"; + break; + case 'S': + str_ = "[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = "[_0-9A-Za-z]"; + break; + case 'W': + str_ = "[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have strlen in namespace std. + using namespace std; + + str_len_ = strlen (str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + static const wchar_t *charset_shortcut (const wchar_t ch_, + std::size_t &str_len_) + { + const wchar_t *str_ = 0; + + switch (ch_) + { + case 'd': + str_ = L"[0-9]"; + break; + case 'D': + str_ = L"[^0-9]"; + break; + case 's': + str_ = L"[ \t\n\r\f\v]"; + break; + case 'S': + str_ = L"[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = L"[_0-9A-Za-z]"; + break; + case 'W': + str_ = L"[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have wcslen in namespace std. + using namespace std; + + str_len_ = wcslen (str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + static CharT decode_octal (state &state_) + { + std::size_t accumulator_ = 0; + CharT ch_ = *state_._curr; + unsigned short count_ = 3; + bool eos_ = false; + + for (;;) + { + accumulator_ *= 8; + accumulator_ += ch_ - '0'; + --count_; + state_.increment (); + eos_ = state_.eos (); + + if (!count_ || eos_) break; + + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (ch_ < '0' || ch_ > '7') + { + break; + } + } + + return static_cast<CharT> (accumulator_); + } + + static CharT decode_control_char (state &state_) + { + // Skip over 'c' + state_.increment (); + + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex following \\c."); + } + else + { + if (ch_ >= 'a' && ch_ <= 'z') + { + ch_ -= 'a' - 1; + } + else if (ch_ >= 'A' && ch_ <= 'Z') + { + ch_ -= 'A' - 1; + } + else if (ch_ == '@') + { + // Apparently... + ch_ = 0; + } + else + { + std::ostringstream ss_; + + ss_ << "Invalid control char at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } + + return ch_; + } + + static CharT decode_hex (state &state_) + { + // Skip over 'x' + state_.increment (); + + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex following \\x."); + } + + if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || + (ch_ >= 'A' && ch_ <= 'F'))) + { + std::ostringstream ss_; + + ss_ << "Illegal char following \\x at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + std::size_t hex_ = 0; + + do + { + hex_ *= 16; + + if (ch_ >= '0' && ch_ <= '9') + { + hex_ += ch_ - '0'; + } + else if (ch_ >= 'a' && ch_ <= 'f') + { + hex_ += 10 + (ch_ - 'a'); + } + else + { + hex_ += 10 + (ch_ - 'A'); + } + + eos_ = state_.eos (); + + if (!eos_) + { + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (((ch_ >= '0' && ch_ <= '9') || + (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) + { + state_.increment (); + } + else + { + eos_ = true; + } + } + } while (!eos_); + + return static_cast<CharT> (hex_); + } + + static void charset_range (const bool chset_, state &state_, bool &eos_, + CharT &ch_, const CharT prev_, string &chars_) + { + if (chset_) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form start of range preceding " + "index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '-'."); + } + + CharT curr_ = 0; + + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + + if (escape_sequence (state_, curr_, str_len_)) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form end of range preceding index " + << state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } +/* + else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') + { + std::ostringstream ss_; + + ss_ << "POSIX char class cannot form end of range at " + "index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } +*/ + else + { + curr_ = ch_; + } + + eos_ = state_.next (ch_); + + // Covers preceding if and else + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "(missing ']')."); + } + + std::size_t start_ = static_cast<typename Traits::index_type> (prev_); + std::size_t end_ = static_cast<typename Traits::index_type> (curr_); + + // Semanic check + if (end_ < start_) + { + std::ostringstream ss_; + + ss_ << "Invalid range in charset preceding index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + chars_.reserve (chars_.size () + (end_ + 1 - start_)); + + for (; start_ <= end_; ++start_) + { + CharT ch_ = static_cast<CharT> (start_); + + if ((state_._flags & icase) && + (std::isupper (ch_, state_._locale) || + std::islower (ch_, state_._locale))) + { + CharT upper_ = std::toupper (ch_, state_._locale); + CharT lower_ = std::tolower (ch_, state_._locale); + + chars_ += (upper_); + chars_ += (lower_); + } + else + { + chars_ += (ch_); + } + } + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp new file mode 100644 index 0000000..35995ad --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp @@ -0,0 +1,98 @@ +// tokeniser_state.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_STATE_HPP +#define BOOST_LEXER_RE_TOKENISER_STATE_HPP + +#include "../../consts.hpp" +#include <locale> +#include "../../size_t.hpp" +#include <stack> + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +struct basic_re_tokeniser_state +{ + const CharT * const _start; + const CharT * const _end; + const CharT *_curr; + regex_flags _flags; + std::stack<regex_flags> _flags_stack; + std::locale _locale; + long _paren_count; + bool _in_string; + bool _seen_BOL_assertion; + bool _seen_EOL_assertion; + + basic_re_tokeniser_state (const CharT *start_, const CharT * const end_, + const regex_flags flags_, const std::locale locale_) : + _start (start_), + _end (end_), + _curr (start_), + _flags (flags_), + _locale (locale_), + _paren_count (0), + _in_string (false), + _seen_BOL_assertion (false), + _seen_EOL_assertion (false) + { + } + + // prevent VC++ 7.1 warning: + const basic_re_tokeniser_state &operator = + (const basic_re_tokeniser_state &rhs_) + { + _start = rhs_._start; + _end = rhs_._end; + _curr = rhs_._curr; + _flags = rhs_._flags; + _locale = rhs_._locale; + _paren_count = rhs_._paren_count; + _in_string = rhs_._in_string; + _seen_BOL_assertion = rhs_._seen_BOL_assertion; + _seen_EOL_assertion = rhs_._seen_EOL_assertion; + return this; + } + + inline bool next (CharT &ch_) + { + if (_curr >= _end) + { + ch_ = 0; + return true; + } + else + { + ch_ = *_curr; + increment (); + return false; + } + } + + inline void increment () + { + ++_curr; + } + + inline std::size_t index () + { + return _curr - _start; + } + + inline bool eos () + { + return _curr >= _end; + } +}; +} +} +} + +#endif |