diff options
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser')
11 files changed, 2559 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp new file mode 100644 index 0000000..5c3c650 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp @@ -0,0 +1,511 @@ +// parser.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_PARSER_HPP +#define BOOST_LEXER_PARSER_HPP + +#include <boost/assert.hpp> +#include "tree/end_node.hpp" +#include "tree/iteration_node.hpp" +#include "tree/leaf_node.hpp" +#include "../runtime_error.hpp" +#include "tree/selection_node.hpp" +#include "tree/sequence_node.hpp" +#include "../size_t.hpp" +#include "tokeniser/re_tokeniser.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +class basic_parser +{ +public: + typedef basic_re_tokeniser<CharT> tokeniser; + typedef typename tokeniser::string string; + typedef std::map<string, const node *> macro_map; + typedef node::node_ptr_vector node_ptr_vector; + typedef typename tokeniser::num_token token; + +/* + General principles of regex parsing: + - Every regex is a sequence of sub-regexes. + - Regexes consist of operands and operators + - All operators decompose to sequence, selection ('|') and iteration ('*') + - Regex tokens are stored on the stack. + - When a complete sequence of regex tokens is on the stack it is processed. + +Grammar: + +<REGEX> -> <OREXP> +<OREXP> -> <SEQUENCE> | <OREXP>'|'<SEQUENCE> +<SEQUENCE> -> <SUB> +<SUB> -> <EXPRESSION> | <SUB><EXPRESSION> +<EXPRESSION> -> <REPEAT> +<REPEAT> -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE> +<DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}' +*/ + static node *parse (const CharT *start_, const CharT * const end_, + const std::size_t id_, const std::size_t unique_id_, + const std::size_t dfa_state_, const regex_flags flags_, + const std::locale &locale_, node_ptr_vector &node_ptr_vector_, + const macro_map ¯omap_, typename tokeniser::token_map &map_, + bool &seen_BOL_assertion_, bool &seen_EOL_assertion_) + { + node *root_ = 0; + state state_ (start_, end_, flags_, locale_); + token lhs_token_; + token rhs_token_; + token_stack token_stack_; + tree_node_stack tree_node_stack_; + char action_ = 0; + + token_stack_.push (rhs_token_); + tokeniser::next (state_, map_, rhs_token_); + + do + { + lhs_token_ = token_stack_.top (); + action_ = lhs_token_.precedence (rhs_token_._type); + + switch (action_) + { + case '<': + case '=': + token_stack_.push (rhs_token_); + tokeniser::next (state_, map_, rhs_token_); + break; + case '>': + reduce (token_stack_, macromap_, node_ptr_vector_, + tree_node_stack_); + break; + default: + std::ostringstream ss_; + + ss_ << "A syntax error occurred: '" << + lhs_token_.precedence_string () << + "' against '" << rhs_token_.precedence_string () << + "' at index " << state_.index () << "."; + throw runtime_error (ss_.str ().c_str ()); + break; + } + } while (!token_stack_.empty ()); + + if (tree_node_stack_.empty ()) + { + throw runtime_error ("Empty rules are not allowed."); + } + + BOOST_ASSERT(tree_node_stack_.size () == 1); + + node *lhs_node_ = tree_node_stack_.top (); + + tree_node_stack_.pop (); + + if (id_ == 0) + { + // Macros have no end state... + root_ = lhs_node_; + } + else + { + node_ptr_vector_->push_back (static_cast<end_node *>(0)); + + node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_); + + node_ptr_vector_->back () = rhs_node_; + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node + (lhs_node_, rhs_node_); + root_ = node_ptr_vector_->back (); + } + + // Done this way as bug in VC++ 6 prevents |= operator working + // properly! + if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true; + + if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true; + + return root_; + } + +private: + typedef typename tokeniser::state state; + typedef std::stack<token> token_stack; + typedef node::node_stack tree_node_stack; + + static void reduce (token_stack &token_stack_, + const macro_map ¯omap_, node_ptr_vector &node_vector_ptr_, + tree_node_stack &tree_node_stack_) + { + typename tokeniser::num_token lhs_; + typename tokeniser::num_token rhs_; + token_stack handle_; + char action_ = 0; + + do + { + rhs_ = token_stack_.top (); + token_stack_.pop (); + handle_.push (rhs_); + + if (!token_stack_.empty ()) + { + lhs_ = token_stack_.top (); + action_ = lhs_.precedence (rhs_._type); + } + } while (!token_stack_.empty () && action_ == '='); + + BOOST_ASSERT(token_stack_.empty () || action_ == '<'); + + switch (rhs_._type) + { + case token::BEGIN: + // finished processing so exit + break; + case token::REGEX: + // finished parsing, nothing to do + break; + case token::OREXP: + orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_); + break; + case token::SEQUENCE: + token_stack_.push (token::OREXP); + break; + case token::SUB: + sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_); + break; + case token::EXPRESSION: + token_stack_.push (token::SUB); + break; + case token::REPEAT: + repeat (handle_, token_stack_); + break; + case token::CHARSET: + charset (handle_, token_stack_, node_vector_ptr_, + tree_node_stack_); + break; + case token::MACRO: + macro (handle_, token_stack_, macromap_, node_vector_ptr_, + tree_node_stack_); + break; + case token::OPENPAREN: + openparen (handle_, token_stack_); + break; + case token::OPT: + case token::AOPT: + optional (rhs_._type == token::OPT, node_vector_ptr_, + tree_node_stack_); + token_stack_.push (token::DUP); + break; + case token::ZEROORMORE: + case token::AZEROORMORE: + zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_, + tree_node_stack_); + token_stack_.push (token::DUP); + break; + case token::ONEORMORE: + case token::AONEORMORE: + one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_, + tree_node_stack_); + token_stack_.push (token::DUP); + break; + case token::REPEATN: + case token::AREPEATN: + repeatn (rhs_._type == token::REPEATN, handle_.top (), + node_vector_ptr_, tree_node_stack_); + token_stack_.push (token::DUP); + break; + default: + throw runtime_error + ("Internal error regex_parser::reduce"); + break; + } + } + + static void orexp (token_stack &handle_, token_stack &token_stack_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::OREXP && + (handle_.size () == 1 || handle_.size () == 3)); + + if (handle_.size () == 1) + { + token_stack_.push (token::REGEX); + } + else + { + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::OR); + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::SEQUENCE); + perform_or (node_ptr_vector_, tree_node_stack_); + token_stack_.push (token::OREXP); + } + } + + static void sub (token_stack &handle_, token_stack &token_stack_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::SUB && + (handle_.size () == 1 || handle_.size () == 2)); + + if (handle_.size () == 1) + { + token_stack_.push (token::SEQUENCE); + } + else + { + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::EXPRESSION); + // perform join + sequence (node_ptr_vector_, tree_node_stack_); + token_stack_.push (token::SUB); + } + } + + static void repeat (token_stack &handle_, token_stack &token_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::REPEAT && + handle_.size () >= 1 && handle_.size () <= 3); + + if (handle_.size () == 1) + { + token_stack_.push (token::EXPRESSION); + } + else + { + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::DUP); + token_stack_.push (token::REPEAT); + } + } + + static void charset (token_stack &handle_, token_stack &token_stack_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::CHARSET && + handle_.size () == 1); + // store charset + node_ptr_vector_->push_back (static_cast<leaf_node *>(0)); + + const size_t id_ = handle_.top ()._id; + + node_ptr_vector_->back () = new leaf_node (id_, true); + tree_node_stack_.push (node_ptr_vector_->back ()); + token_stack_.push (token::REPEAT); + } + + static void macro (token_stack &handle_, token_stack &token_stack_, + const macro_map ¯omap_, node_ptr_vector &node_ptr_vector_, + tree_node_stack &tree_node_stack_) + { + token &top_ = handle_.top (); + + BOOST_ASSERT(top_._type == token::MACRO && handle_.size () == 1); + + typename macro_map::const_iterator iter_ = + macromap_.find (top_._macro); + + if (iter_ == macromap_.end ()) + { + const CharT *name_ = top_._macro; + std::basic_stringstream<CharT> ss_; + std::ostringstream os_; + + os_ << "Unknown MACRO name '"; + + while (*name_) + { + os_ << ss_.narrow (*name_++, ' '); + } + + os_ << "'."; + throw runtime_error (os_.str ()); + } + + tree_node_stack_.push (iter_->second->copy (node_ptr_vector_)); + token_stack_.push (token::REPEAT); + } + + static void openparen (token_stack &handle_, token_stack &token_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::OPENPAREN && + handle_.size () == 3); + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::REGEX); + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::CLOSEPAREN); + token_stack_.push (token::REPEAT); + } + + static void perform_or (node_ptr_vector &node_ptr_vector_, + tree_node_stack &tree_node_stack_) + { + // perform or + node *rhs_ = tree_node_stack_.top (); + + tree_node_stack_.pop (); + + node *lhs_ = tree_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<selection_node *>(0)); + node_ptr_vector_->back () = new selection_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void sequence (node_ptr_vector &node_ptr_vector_, + tree_node_stack &tree_node_stack_) + { + node *rhs_ = tree_node_stack_.top (); + + tree_node_stack_.pop (); + + node *lhs_ = tree_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void optional (const bool greedy_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform ? + node *lhs_ = tree_node_stack_.top (); + // You don't know if lhs_ is a leaf_node, so get firstpos. + node::node_vector &firstpos_ = lhs_->firstpos (); + + for (node::node_vector::iterator iter_ = firstpos_.begin (), + end_ = firstpos_.end (); iter_ != end_; ++iter_) + { + // These are leaf_nodes! + (*iter_)->greedy (greedy_); + } + + node_ptr_vector_->push_back (static_cast<leaf_node *>(0)); + + node *rhs_ = new leaf_node (null_token, greedy_); + + node_ptr_vector_->back () = rhs_; + node_ptr_vector_->push_back (static_cast<selection_node *>(0)); + node_ptr_vector_->back () = new selection_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void zero_or_more (const bool greedy_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform * + node *ptr_ = tree_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<iteration_node *>(0)); + node_ptr_vector_->back () = new iteration_node (ptr_, greedy_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void one_or_more (const bool greedy_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform + + node *lhs_ = tree_node_stack_.top (); + node *copy_ = lhs_->copy (node_ptr_vector_); + + node_ptr_vector_->push_back (static_cast<iteration_node *>(0)); + + node *rhs_ = new iteration_node (copy_, greedy_); + + node_ptr_vector_->back () = rhs_; + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + // This is one of the most mind bending routines in this code... + static void repeatn (const bool greedy_, const token &token_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform {n[,[m]]} + // Semantic checks have already been performed. + // {0,} = * + // {0,1} = ? + // {1,} = + + // therefore we do not check for these cases. + if (!(token_._min == 1 && !token_._comma)) + { + const std::size_t top_ = token_._min > 0 ? + token_._min : token_._max; + + if (token_._min == 0) + { + optional (greedy_, node_ptr_vector_, tree_node_stack_); + } + + node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_); + node *curr_ = 0; + + for (std::size_t i_ = 2; i_ < top_; ++i_) + { + curr_ = prev_->copy (node_ptr_vector_); + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + prev_ = curr_; + } + + if (token_._comma && token_._min > 0) + { + if (token_._min > 1) + { + curr_ = prev_->copy (node_ptr_vector_); + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + prev_ = curr_; + } + + if (token_._comma && token_._max) + { + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + optional (greedy_, node_ptr_vector_, tree_node_stack_); + prev_ = tree_node_stack_.top (); + tree_node_stack_.pop (); + + const std::size_t count_ = token_._max - token_._min; + + for (std::size_t i_ = 1; i_ < count_; ++i_) + { + curr_ = prev_->copy (node_ptr_vector_); + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + prev_ = curr_; + } + } + else + { + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_); + prev_ = tree_node_stack_.top (); + tree_node_stack_.pop (); + } + } + + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + } + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp new file mode 100644 index 0000000..dc1c6bd --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/num_token.hpp @@ -0,0 +1,146 @@ +// num_token.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_NUM_TOKEN_HPP +#define BOOST_LEXER_NUM_TOKEN_HPP + +#include <boost/config.hpp> +#include "../../consts.hpp" // null_token +#include "../../size_t.hpp" +#include <boost/detail/workaround.hpp> + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +struct basic_num_token +{ + enum type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT, + DUP, OR, CHARSET, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT, + ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN, + END}; + + type _type; + std::size_t _id; + std::size_t _min; + bool _comma; + std::size_t _max; + CharT _macro[max_macro_len + 1]; + static const char _precedence_table[END + 1][END + 1]; + static const char *_precedence_strings[END + 1]; + + basic_num_token (const type type_ = BEGIN, + const std::size_t id_ = null_token) : + _type (type_), + _id (id_), + _min (0), + _comma (false), + _max (0) + { + *_macro = 0; + } + + basic_num_token &operator = (const basic_num_token &rhs_) + { + _type = rhs_._type; + _id = rhs_._id; + _min = rhs_._min; + _comma = rhs_._comma; + _max = rhs_._max; + + if (_type == MACRO) + { + const CharT *read_ = rhs_._macro; + CharT *write_ = _macro; + + while (*read_) + { + *write_++ = *read_++; + } + + *write_ = 0; + } + + return *this; + } + + void set (const type type_) + { + _type = type_; + _id = null_token; + } + + void set (const type type_, const std::size_t id_) + { + _type = type_; + _id = id_; + } + + void min_max (const std::size_t min_, const bool comma_, + const std::size_t max_) + { + _min = min_; + _comma = comma_; + _max = max_; + } + + char precedence (const type type_) const + { + return _precedence_table[_type][type_]; + } + + const char *precedence_string () const + { + return _precedence_strings[_type]; + } +}; + +template<typename CharT> +const char basic_num_token<CharT>::_precedence_table[END + 1][END + 1] = { +// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END +/*BEGIN*/{' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*REGEX*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*OREXP*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* SEQ */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* SUB */{' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*EXPRE*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* RPT */{' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>'}, +/*DUPLI*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* | */{' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}, +/*CHARA*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'}, +/*MACRO*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'}, +/* ( */{' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '}, +/* ) */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>'}, +/* ? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* ?? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* * */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* *? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* + */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* +? */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*{n,m}*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/*{nm}?*/{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>'}, +/* END */{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '} +}; + +template<typename CharT> +const char *basic_num_token<CharT>::_precedence_strings[END + 1] = +#if BOOST_WORKAROUND(BOOST_INTEL_CXX_VERSION, BOOST_TESTED_AT(910)) +{{"BEGIN"}, {"REGEX"}, {"OREXP"}, {"SEQUENCE"}, {"SUB"}, {"EXPRESSION"}, + {"REPEAT"}, {"DUPLICATE"}, {"|"}, {"CHARSET"}, {"MACRO"}, + {"("}, {")"}, {"?"}, {"??"}, {"*"}, {"*?"}, {"+"}, {"+?"}, {"{n[,[m]]}"}, + {"{n[,[m]]}?"}, {"END"}}; +#else +{"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", "REPEAT", + "DUPLICATE", "|", "CHARSET", "MACRO", "(", ")", "?", "??", "*", "*?", + "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"}; +#endif +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp new file mode 100644 index 0000000..7bdeb80 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp @@ -0,0 +1,574 @@ +// tokeniser.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_HPP +#define BOOST_LEXER_RE_TOKENISER_HPP + +// memcpy() +#include <cstring> +#include <map> +#include "num_token.hpp" +#include "../../runtime_error.hpp" +#include "../../size_t.hpp" +#include <sstream> +#include "../../string_token.hpp" +#include "re_tokeniser_helper.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +class basic_re_tokeniser +{ +public: + typedef basic_num_token<CharT> num_token; + typedef basic_re_tokeniser_state<CharT> state; + typedef basic_string_token<CharT> string_token; + typedef typename string_token::string string; + typedef std::map<string_token, std::size_t> token_map; + typedef std::pair<string_token, std::size_t> token_pair; + + static void next (state &state_, token_map &map_, num_token &token_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + token_.min_max (0, false, 0); + + while (!eos_ && ch_ == '"') + { + state_._in_string ^= 1; + eos_ = state_.next (ch_); + } + + if (eos_) + { + if (state_._in_string) + { + throw runtime_error ("Unexpected end of regex " + "(missing '\"')."); + } + + if (state_._paren_count) + { + throw runtime_error ("Unexpected end of regex " + "(missing ')')."); + } + + token_.set (num_token::END, null_token); + } + else + { + if (ch_ == '\\') + { + // Even if we are in a string, respect escape sequences... + escape (state_, map_, token_); + } + else if (state_._in_string) + { + // All other meta characters lose their special meaning + // inside a string. + create_charset_token (string (1, ch_), false, map_, token_); + } + else + { + // Not an escape sequence and not inside a string, so + // check for meta characters. + switch (ch_) + { + case '(': + token_.set (num_token::OPENPAREN, null_token); + ++state_._paren_count; + read_options (state_); + break; + case ')': + --state_._paren_count; + + if (state_._paren_count < 0) + { + std::ostringstream ss_; + + ss_ << "Number of open parenthesis < 0 at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + token_.set (num_token::CLOSEPAREN, null_token); + + if (!state_._flags_stack.empty ()) + { + state_._flags = state_._flags_stack.top (); + state_._flags_stack.pop (); + } + break; + case '?': + if (!state_.eos () && *state_._curr == '?') + { + token_.set (num_token::AOPT, null_token); + state_.increment (); + } + else + { + token_.set (num_token::OPT, null_token); + } + + break; + case '*': + if (!state_.eos () && *state_._curr == '?') + { + token_.set (num_token::AZEROORMORE, null_token); + state_.increment (); + } + else + { + token_.set (num_token::ZEROORMORE, null_token); + } + + break; + case '+': + if (!state_.eos () && *state_._curr == '?') + { + token_.set (num_token::AONEORMORE, null_token); + state_.increment (); + } + else + { + token_.set (num_token::ONEORMORE, null_token); + } + + break; + case '{': + open_curly (state_, token_); + break; + case '|': + token_.set (num_token::OR, null_token); + break; + case '^': + if (state_._curr - 1 == state_._start) + { + token_.set (num_token::CHARSET, bol_token); + state_._seen_BOL_assertion = true; + } + else + { + create_charset_token (string (1, ch_), false, + map_, token_); + } + + break; + case '$': + if (state_._curr == state_._end) + { + token_.set (num_token::CHARSET, eol_token); + state_._seen_EOL_assertion = true; + } + else + { + create_charset_token (string (1, ch_), false, + map_, token_); + } + + break; + case '.': + { + string dot_; + + if (state_._flags & dot_not_newline) + { + dot_ = '\n'; + } + + create_charset_token (dot_, true, map_, token_); + break; + } + case '[': + { + charset (state_, map_, token_); + break; + } + case '/': + throw runtime_error("Lookahead ('/') is not supported yet."); + break; + default: + if ((state_._flags & icase) && + (std::isupper (ch_, state_._locale) || + std::islower (ch_, state_._locale))) + { + CharT upper_ = std::toupper (ch_, state_._locale); + CharT lower_ = std::tolower (ch_, state_._locale); + + string str_ (1, upper_); + + str_ += lower_; + create_charset_token (str_, false, map_, token_); + } + else + { + create_charset_token (string (1, ch_), false, + map_, token_); + } + + break; + } + } + } + } + +private: + typedef basic_re_tokeniser_helper<CharT> tokeniser_helper; + + static void read_options (state &state_) + { + if (!state_.eos () && *state_._curr == '?') + { + CharT ch_ = 0; + bool eos_ = false; + bool negate_ = false; + + state_.increment (); + eos_ = state_.next (ch_); + state_._flags_stack.push (state_._flags); + + while (!eos_ && ch_ != ':') + { + switch (ch_) + { + case '-': + negate_ ^= 1; + break; + case 'i': + if (negate_) + { + state_._flags = static_cast<regex_flags> + (state_._flags & ~icase); + } + else + { + state_._flags = static_cast<regex_flags> + (state_._flags | icase); + } + + negate_ = false; + break; + case 's': + if (negate_) + { + state_._flags = static_cast<regex_flags> + (state_._flags | dot_not_newline); + } + else + { + state_._flags = static_cast<regex_flags> + (state_._flags & ~dot_not_newline); + } + + negate_ = false; + break; + default: + { + std::ostringstream ss_; + + ss_ << "Unknown option at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } + + eos_ = state_.next (ch_); + } + + // End of string handler will handle early termination + } + else if (!state_._flags_stack.empty ()) + { + state_._flags_stack.push (state_._flags); + } + } + + static void escape (state &state_, token_map &map_, num_token &token_) + { + CharT ch_ = 0; + std::size_t str_len_ = 0; + const CharT *str_ = tokeniser_helper::escape_sequence (state_, + ch_, str_len_); + + if (str_) + { + state state2_ (str_ + 1, str_ + str_len_, state_._flags, + state_._locale); + + charset (state2_, map_, token_); + } + else + { + create_charset_token (string (1, ch_), false, map_, token_); + } + } + + static void charset (state &state_, token_map &map_, num_token &token_) + { + string chars_; + bool negated_ = false; + + tokeniser_helper::charset (state_, chars_, negated_); + create_charset_token (chars_, negated_, map_, token_); + } + + static void create_charset_token (const string &charset_, + const bool negated_, token_map &map_, num_token &token_) + { + std::size_t id_ = null_token; + string_token stok_ (negated_, charset_); + + stok_.remove_duplicates (); + stok_.normalise (); + + typename token_map::const_iterator iter_ = map_.find (stok_); + + if (iter_ == map_.end ()) + { + id_ = map_.size (); + map_.insert (token_pair (stok_, id_)); + } + else + { + id_ = iter_->second; + } + + token_.set (num_token::CHARSET, id_); + } + + static void open_curly (state &state_, num_token &token_) + { + if (state_.eos ()) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + else if (*state_._curr >= '0' && *state_._curr <= '9') + { + repeat_n (state_, token_); + + if (!state_.eos () && *state_._curr == '?') + { + token_._type = num_token::AREPEATN; + state_.increment (); + } + } + else + { + macro (state_, token_); + } + } + + // SYNTAX: + // {n[,[n]]} + // SEMANTIC RULES: + // {0} - INVALID (throw exception) + // {0,} = * + // {0,0} - INVALID (throw exception) + // {0,1} = ? + // {1,} = + + // {min,max} where min == max - {min} + // {min,max} where max < min - INVALID (throw exception) + static void repeat_n (state &state_, num_token &token_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + while (!eos_ && ch_ >= '0' && ch_ <= '9') + { + token_._min *= 10; + token_._min += ch_ - '0'; + eos_ = state_.next (ch_); + } + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + + bool min_max_ = false; + bool repeatn_ = true; + + token_._comma = ch_ == ','; + + if (token_._comma) + { + eos_ = state_.next (ch_); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + + if (ch_ == '}') + { + // Small optimisation: Check for '*' equivalency. + if (token_._min == 0) + { + token_.set (num_token::ZEROORMORE, null_token); + repeatn_ = false; + } + // Small optimisation: Check for '+' equivalency. + else if (token_._min == 1) + { + token_.set (num_token::ONEORMORE, null_token); + repeatn_ = false; + } + } + else + { + if (ch_ < '0' || ch_ > '9') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + min_max_ = true; + + do + { + token_._max *= 10; + token_._max += ch_ - '0'; + eos_ = state_.next (ch_); + } while (!eos_ && ch_ >= '0' && ch_ <= '9'); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + + // Small optimisation: Check for '?' equivalency. + if (token_._min == 0 && token_._max == 1) + { + token_.set (num_token::OPT, null_token); + repeatn_ = false; + } + // Small optimisation: if min == max, then min. + else if (token_._min == token_._max) + { + token_._comma = false; + min_max_ = false; + token_._max = 0; + } + } + } + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + if (repeatn_) + { + // SEMANTIC VALIDATION follows: + // NOTE: {0,} has already become * + // therefore we don't check for a comma. + if (token_._min == 0 && token_._max == 0) + { + std::ostringstream ss_; + + ss_ << "Cannot have exactly zero repeats preceding index " << + state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + if (min_max_ && token_._max < token_._min) + { + std::ostringstream ss_; + + ss_ << "Max less than min preceding index " << + state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + token_.set (num_token::REPEATN, null_token); + } + } + + static void macro (state &state_, num_token &token_) + { + CharT ch_ = 0; + bool eos_ = false; + const CharT *start_ = state_._curr; + + state_.next (ch_); + + if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') && + !(ch_ >= 'a' && ch_ <= 'z')) + { + std::ostringstream ss_; + + ss_ << "Invalid MACRO name at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + do + { + eos_ = state_.next (ch_); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "(missing '}')."); + } + } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') || + (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9')); + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + std::size_t len_ = state_._curr - 1 - start_; + + if (len_ > max_macro_len) + { + std::basic_stringstream<CharT> ss_; + std::ostringstream os_; + + os_ << "MACRO name '"; + + while (len_) + { + os_ << ss_.narrow (*start_++, ' '); + --len_; + } + + os_ << "' too long."; + throw runtime_error (os_.str ()); + } + + token_.set (num_token::MACRO, null_token); + + // Some systems have memcpy in namespace std. + using namespace std; + + memcpy (token_._macro, start_, len_ * sizeof (CharT)); + token_._macro[len_] = 0; + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp new file mode 100644 index 0000000..6e0791e --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp @@ -0,0 +1,549 @@ +// tokeniser_helper.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H +#define BOOST_LEXER_RE_TOKENISER_HELPER_H + +#include "../../char_traits.hpp" +// strlen() +#include <cstring> +#include "../../size_t.hpp" +#include "re_tokeniser_state.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT, typename Traits = char_traits<CharT> > +class basic_re_tokeniser_helper +{ +public: + typedef basic_re_tokeniser_state<CharT> state; + typedef std::basic_string<CharT> string; + + static const CharT *escape_sequence (state &state_, CharT &ch_, + std::size_t &str_len_) + { + bool eos_ = state_.eos (); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "following '\\'."); + } + + const CharT *str_ = charset_shortcut (*state_._curr, str_len_); + + if (str_) + { + state_.increment (); + } + else + { + ch_ = chr (state_); + } + + return str_; + } + + // This function can call itself. + static void charset (state &state_, string &chars_, bool &negated_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '['."); + } + + negated_ = ch_ == '^'; + + if (negated_) + { + eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '^'."); + } + } + + bool chset_ = false; + CharT prev_ = 0; + + while (ch_ != ']') + { + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + const CharT *str_ = escape_sequence (state_, prev_, str_len_); + + chset_ = str_ != 0; + + if (chset_) + { + state temp_state_ (str_ + 1, str_ + str_len_, + state_._flags, state_._locale); + string temp_chars_; + bool temp_negated_ = false; + + charset (temp_state_, temp_chars_, temp_negated_); + + if (negated_ != temp_negated_) + { + std::ostringstream ss_; + + ss_ << "Mismatch in charset negation preceding " + "index " << state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + chars_ += temp_chars_; + } + } +/* + else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') + { + // TODO: POSIX charsets + } +*/ + else + { + chset_ = false; + prev_ = ch_; + } + + eos_ = state_.next (ch_); + + // Covers preceding if, else if and else + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "(missing ']')."); + } + + if (ch_ == '-') + { + charset_range (chset_, state_, eos_, ch_, prev_, chars_); + } + else if (!chset_) + { + if ((state_._flags & icase) && + (std::isupper (prev_, state_._locale) || + std::islower (prev_, state_._locale))) + { + CharT upper_ = std::toupper (prev_, state_._locale); + CharT lower_ = std::tolower (prev_, state_._locale); + + chars_ += upper_; + chars_ += lower_; + } + else + { + chars_ += prev_; + } + } + } + + if (!negated_ && chars_.empty ()) + { + throw runtime_error ("Empty charsets not allowed."); + } + } + + static CharT chr (state &state_) + { + CharT ch_ = 0; + + // eos_ has already been checked for. + switch (*state_._curr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + ch_ = decode_octal (state_); + break; + case 'a': + ch_ = '\a'; + state_.increment (); + break; + case 'b': + ch_ = '\b'; + state_.increment (); + break; + case 'c': + ch_ = decode_control_char (state_); + break; + case 'e': + ch_ = 27; // '\e' not recognised by compiler + state_.increment (); + break; + case 'f': + ch_ = '\f'; + state_.increment (); + break; + case 'n': + ch_ = '\n'; + state_.increment (); + break; + case 'r': + ch_ = '\r'; + state_.increment (); + break; + case 't': + ch_ = '\t'; + state_.increment (); + break; + case 'v': + ch_ = '\v'; + state_.increment (); + break; + case 'x': + ch_ = decode_hex (state_); + break; + default: + ch_ = *state_._curr; + state_.increment (); + break; + } + + return ch_; + } + +private: + static const char *charset_shortcut (const char ch_, + std::size_t &str_len_) + { + const char *str_ = 0; + + switch (ch_) + { + case 'd': + str_ = "[0-9]"; + break; + case 'D': + str_ = "[^0-9]"; + break; + case 's': + str_ = "[ \t\n\r\f\v]"; + break; + case 'S': + str_ = "[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = "[_0-9A-Za-z]"; + break; + case 'W': + str_ = "[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have strlen in namespace std. + using namespace std; + + str_len_ = strlen (str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + static const wchar_t *charset_shortcut (const wchar_t ch_, + std::size_t &str_len_) + { + const wchar_t *str_ = 0; + + switch (ch_) + { + case 'd': + str_ = L"[0-9]"; + break; + case 'D': + str_ = L"[^0-9]"; + break; + case 's': + str_ = L"[ \t\n\r\f\v]"; + break; + case 'S': + str_ = L"[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = L"[_0-9A-Za-z]"; + break; + case 'W': + str_ = L"[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have wcslen in namespace std. + using namespace std; + + str_len_ = wcslen (str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + static CharT decode_octal (state &state_) + { + std::size_t accumulator_ = 0; + CharT ch_ = *state_._curr; + unsigned short count_ = 3; + bool eos_ = false; + + for (;;) + { + accumulator_ *= 8; + accumulator_ += ch_ - '0'; + --count_; + state_.increment (); + eos_ = state_.eos (); + + if (!count_ || eos_) break; + + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (ch_ < '0' || ch_ > '7') + { + break; + } + } + + return static_cast<CharT> (accumulator_); + } + + static CharT decode_control_char (state &state_) + { + // Skip over 'c' + state_.increment (); + + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex following \\c."); + } + else + { + if (ch_ >= 'a' && ch_ <= 'z') + { + ch_ -= 'a' - 1; + } + else if (ch_ >= 'A' && ch_ <= 'Z') + { + ch_ -= 'A' - 1; + } + else if (ch_ == '@') + { + // Apparently... + ch_ = 0; + } + else + { + std::ostringstream ss_; + + ss_ << "Invalid control char at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } + + return ch_; + } + + static CharT decode_hex (state &state_) + { + // Skip over 'x' + state_.increment (); + + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex following \\x."); + } + + if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || + (ch_ >= 'A' && ch_ <= 'F'))) + { + std::ostringstream ss_; + + ss_ << "Illegal char following \\x at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + std::size_t hex_ = 0; + + do + { + hex_ *= 16; + + if (ch_ >= '0' && ch_ <= '9') + { + hex_ += ch_ - '0'; + } + else if (ch_ >= 'a' && ch_ <= 'f') + { + hex_ += 10 + (ch_ - 'a'); + } + else + { + hex_ += 10 + (ch_ - 'A'); + } + + eos_ = state_.eos (); + + if (!eos_) + { + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (((ch_ >= '0' && ch_ <= '9') || + (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) + { + state_.increment (); + } + else + { + eos_ = true; + } + } + } while (!eos_); + + return static_cast<CharT> (hex_); + } + + static void charset_range (const bool chset_, state &state_, bool &eos_, + CharT &ch_, const CharT prev_, string &chars_) + { + if (chset_) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form start of range preceding " + "index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '-'."); + } + + CharT curr_ = 0; + + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + + if (escape_sequence (state_, curr_, str_len_)) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form end of range preceding index " + << state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } +/* + else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') + { + std::ostringstream ss_; + + ss_ << "POSIX char class cannot form end of range at " + "index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } +*/ + else + { + curr_ = ch_; + } + + eos_ = state_.next (ch_); + + // Covers preceding if and else + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "(missing ']')."); + } + + std::size_t start_ = static_cast<typename Traits::index_type> (prev_); + std::size_t end_ = static_cast<typename Traits::index_type> (curr_); + + // Semanic check + if (end_ < start_) + { + std::ostringstream ss_; + + ss_ << "Invalid range in charset preceding index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + chars_.reserve (chars_.size () + (end_ + 1 - start_)); + + for (; start_ <= end_; ++start_) + { + CharT ch_ = static_cast<CharT> (start_); + + if ((state_._flags & icase) && + (std::isupper (ch_, state_._locale) || + std::islower (ch_, state_._locale))) + { + CharT upper_ = std::toupper (ch_, state_._locale); + CharT lower_ = std::tolower (ch_, state_._locale); + + chars_ += (upper_); + chars_ += (lower_); + } + else + { + chars_ += (ch_); + } + } + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp new file mode 100644 index 0000000..35995ad --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_state.hpp @@ -0,0 +1,98 @@ +// tokeniser_state.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_STATE_HPP +#define BOOST_LEXER_RE_TOKENISER_STATE_HPP + +#include "../../consts.hpp" +#include <locale> +#include "../../size_t.hpp" +#include <stack> + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +struct basic_re_tokeniser_state +{ + const CharT * const _start; + const CharT * const _end; + const CharT *_curr; + regex_flags _flags; + std::stack<regex_flags> _flags_stack; + std::locale _locale; + long _paren_count; + bool _in_string; + bool _seen_BOL_assertion; + bool _seen_EOL_assertion; + + basic_re_tokeniser_state (const CharT *start_, const CharT * const end_, + const regex_flags flags_, const std::locale locale_) : + _start (start_), + _end (end_), + _curr (start_), + _flags (flags_), + _locale (locale_), + _paren_count (0), + _in_string (false), + _seen_BOL_assertion (false), + _seen_EOL_assertion (false) + { + } + + // prevent VC++ 7.1 warning: + const basic_re_tokeniser_state &operator = + (const basic_re_tokeniser_state &rhs_) + { + _start = rhs_._start; + _end = rhs_._end; + _curr = rhs_._curr; + _flags = rhs_._flags; + _locale = rhs_._locale; + _paren_count = rhs_._paren_count; + _in_string = rhs_._in_string; + _seen_BOL_assertion = rhs_._seen_BOL_assertion; + _seen_EOL_assertion = rhs_._seen_EOL_assertion; + return this; + } + + inline bool next (CharT &ch_) + { + if (_curr >= _end) + { + ch_ = 0; + return true; + } + else + { + ch_ = *_curr; + increment (); + return false; + } + } + + inline void increment () + { + ++_curr; + } + + inline std::size_t index () + { + return _curr - _start; + } + + inline bool eos () + { + return _curr >= _end; + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp new file mode 100644 index 0000000..c613e6a --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/end_node.hpp @@ -0,0 +1,90 @@ +// end_node.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_END_NODE_HPP +#define BOOST_LEXER_END_NODE_HPP + +#include "node.hpp" +#include "../../size_t.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +class end_node : public node +{ +public: + end_node (const std::size_t id_, const std::size_t unique_id_, + const std::size_t lexer_state_) : + node (false), + _id (id_), + _unique_id (unique_id_), + _lexer_state (lexer_state_) + { + node::_firstpos.push_back (this); + node::_lastpos.push_back (this); + } + + virtual ~end_node () + { + } + + virtual type what_type () const + { + return END; + } + + virtual bool traverse (const_node_stack &/*node_stack_*/, + bool_stack &/*perform_op_stack_*/) const + { + return false; + } + + virtual const node_vector &followpos () const + { + // _followpos is always empty..! + return _followpos; + } + + virtual bool end_state () const + { + return true; + } + + virtual std::size_t id () const + { + return _id; + } + + virtual std::size_t unique_id () const + { + return _unique_id; + } + + virtual std::size_t lexer_state () const + { + return _lexer_state; + } + +private: + std::size_t _id; + std::size_t _unique_id; + std::size_t _lexer_state; + node_vector _followpos; + + virtual void copy_node (node_ptr_vector &/*node_ptr_vector_*/, + node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/, + bool &/*down_*/) const + { + // Nothing to do, as end_nodes are not copied. + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp new file mode 100644 index 0000000..6b39462 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/iteration_node.hpp @@ -0,0 +1,90 @@ +// iteration_node.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_ITERATION_NODE_HPP +#define BOOST_LEXER_ITERATION_NODE_HPP + +#include "node.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +class iteration_node : public node +{ +public: + iteration_node (node *next_, const bool greedy_) : + node (true), + _next (next_), + _greedy (greedy_) + { + node_vector::iterator iter_; + node_vector::iterator end_; + + _next->append_firstpos (_firstpos); + _next->append_lastpos (_lastpos); + + for (iter_ = _lastpos.begin (), end_ = _lastpos.end (); + iter_ != end_; ++iter_) + { + (*iter_)->append_followpos (_firstpos); + } + + for (iter_ = _firstpos.begin (), end_ = _firstpos.end (); + iter_ != end_; ++iter_) + { + (*iter_)->greedy (greedy_); + } + } + + virtual ~iteration_node () + { + } + + virtual type what_type () const + { + return ITERATION; + } + + virtual bool traverse (const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const + { + perform_op_stack_.push (true); + node_stack_.push (_next); + return true; + } + +private: + // Not owner of this pointer... + node *_next; + bool _greedy; + + virtual void copy_node (node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const + { + if (perform_op_stack_.top ()) + { + node *ptr_ = new_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<iteration_node *>(0)); + node_ptr_vector_->back () = new iteration_node (ptr_, _greedy); + new_node_stack_.top () = node_ptr_vector_->back (); + } + else + { + down_ = true; + } + + perform_op_stack_.pop (); + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp new file mode 100644 index 0000000..39ed98d --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/leaf_node.hpp @@ -0,0 +1,107 @@ +// leaf_node.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_LEAF_NODE_HPP +#define BOOST_LEXER_LEAF_NODE_HPP + +#include "../../consts.hpp" // null_token +#include "node.hpp" +#include "../../size_t.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +class leaf_node : public node +{ +public: + leaf_node (const std::size_t token_, const bool greedy_) : + node (token_ == null_token), + _token (token_), + _set_greedy (!greedy_), + _greedy (greedy_) + { + if (!_nullable) + { + _firstpos.push_back (this); + _lastpos.push_back (this); + } + } + + virtual ~leaf_node () + { + } + + virtual void append_followpos (const node_vector &followpos_) + { + for (node_vector::const_iterator iter_ = followpos_.begin (), + end_ = followpos_.end (); iter_ != end_; ++iter_) + { + _followpos.push_back (*iter_); + } + } + + virtual type what_type () const + { + return LEAF; + } + + virtual bool traverse (const_node_stack &/*node_stack_*/, + bool_stack &/*perform_op_stack_*/) const + { + return false; + } + + virtual std::size_t token () const + { + return _token; + } + + virtual void greedy (const bool greedy_) + { + if (!_set_greedy) + { + _greedy = greedy_; + _set_greedy = true; + } + } + + virtual bool greedy () const + { + return _greedy; + } + + virtual const node_vector &followpos () const + { + return _followpos; + } + + virtual node_vector &followpos () + { + return _followpos; + } + +private: + std::size_t _token; + bool _set_greedy; + bool _greedy; + node_vector _followpos; + + virtual void copy_node (node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/, + bool &/*down_*/) const + { + node_ptr_vector_->push_back (static_cast<leaf_node *>(0)); + node_ptr_vector_->back () = new leaf_node (_token, _greedy); + new_node_stack_.push (node_ptr_vector_->back ()); + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp new file mode 100644 index 0000000..1e36ccb --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/node.hpp @@ -0,0 +1,188 @@ +// node.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_NODE_HPP +#define BOOST_LEXER_NODE_HPP + +#include <boost/assert.hpp> +#include "../../containers/ptr_vector.hpp" +#include "../../runtime_error.hpp" +#include "../../size_t.hpp" +#include <stack> +#include <vector> + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +class node +{ +public: + enum type {LEAF, SEQUENCE, SELECTION, ITERATION, END}; + + typedef std::stack<bool> bool_stack; + typedef std::stack<node *> node_stack; + // stack and vector not owner of node pointers + typedef std::stack<const node *> const_node_stack; + typedef std::vector<node *> node_vector; + typedef ptr_vector<node> node_ptr_vector; + + node () : + _nullable (false) + { + } + + node (const bool nullable_) : + _nullable (nullable_) + { + } + + virtual ~node () + { + } + + bool nullable () const + { + return _nullable; + } + + void append_firstpos (node_vector &firstpos_) const + { + firstpos_.insert (firstpos_.end (), + _firstpos.begin (), _firstpos.end ()); + } + + void append_lastpos (node_vector &lastpos_) const + { + lastpos_.insert (lastpos_.end (), + _lastpos.begin (), _lastpos.end ()); + } + + virtual void append_followpos (const node_vector &/*followpos_*/) + { + throw runtime_error ("Internal error node::append_followpos()"); + } + + node *copy (node_ptr_vector &node_ptr_vector_) const + { + node *new_root_ = 0; + const_node_stack node_stack_; + bool_stack perform_op_stack_; + bool down_ = true; + node_stack new_node_stack_; + + node_stack_.push (this); + + while (!node_stack_.empty ()) + { + while (down_) + { + down_ = node_stack_.top ()->traverse (node_stack_, + perform_op_stack_); + } + + while (!down_ && !node_stack_.empty ()) + { + const node *top_ = node_stack_.top (); + + top_->copy_node (node_ptr_vector_, new_node_stack_, + perform_op_stack_, down_); + + if (!down_) node_stack_.pop (); + } + } + + BOOST_ASSERT(new_node_stack_.size () == 1); + new_root_ = new_node_stack_.top (); + new_node_stack_.pop (); + return new_root_; + } + + virtual type what_type () const = 0; + + virtual bool traverse (const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const = 0; + + node_vector &firstpos () + { + return _firstpos; + } + + const node_vector &firstpos () const + { + return _firstpos; + } + + // _lastpos modified externally, so not const & + node_vector &lastpos () + { + return _lastpos; + } + + virtual bool end_state () const + { + return false; + } + + virtual std::size_t id () const + { + throw runtime_error ("Internal error node::id()"); + } + + virtual std::size_t unique_id () const + { + throw runtime_error ("Internal error node::unique_id()"); + } + + virtual std::size_t lexer_state () const + { + throw runtime_error ("Internal error node::state()"); + } + + virtual std::size_t token () const + { + throw runtime_error ("Internal error node::token()"); + } + + virtual void greedy (const bool /*greedy_*/) + { + throw runtime_error ("Internal error node::token(bool)"); + } + + virtual bool greedy () const + { + throw runtime_error ("Internal error node::token()"); + } + + virtual const node_vector &followpos () const + { + throw runtime_error ("Internal error node::followpos()"); + } + + virtual node_vector &followpos () + { + throw runtime_error ("Internal error node::followpos()"); + } + +protected: + const bool _nullable; + node_vector _firstpos; + node_vector _lastpos; + + virtual void copy_node (node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const = 0; + +private: + node (node const &); // No copy construction. + node &operator = (node const &); // No assignment. +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp new file mode 100644 index 0000000..67e5ea0 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/selection_node.hpp @@ -0,0 +1,94 @@ +// selection_node.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_SELECTION_NODE_HPP +#define BOOST_LEXER_SELECTION_NODE_HPP + +#include "node.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +class selection_node : public node +{ +public: + selection_node (node *left_, node *right_) : + node (left_->nullable () || right_->nullable ()), + _left (left_), + _right (right_) + { + _left->append_firstpos (_firstpos); + _right->append_firstpos (_firstpos); + _left->append_lastpos (_lastpos); + _right->append_lastpos (_lastpos); + } + + virtual ~selection_node () + { + } + + virtual type what_type () const + { + return SELECTION; + } + + virtual bool traverse (const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const + { + perform_op_stack_.push (true); + + switch (_right->what_type ()) + { + case SEQUENCE: + case SELECTION: + case ITERATION: + perform_op_stack_.push (false); + break; + default: + break; + } + + node_stack_.push (_right); + node_stack_.push (_left); + return true; + } + +private: + // Not owner of these pointers... + node *_left; + node *_right; + + virtual void copy_node (node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const + { + if (perform_op_stack_.top ()) + { + node *rhs_ = new_node_stack_.top (); + + new_node_stack_.pop (); + + node *lhs_ = new_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<selection_node *>(0)); + node_ptr_vector_->back () = new selection_node (lhs_, rhs_); + new_node_stack_.top () = node_ptr_vector_->back (); + } + else + { + down_ = true; + } + + perform_op_stack_.pop (); + } +}; +} +} +} + +#endif diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp new file mode 100644 index 0000000..471fa68 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tree/sequence_node.hpp @@ -0,0 +1,112 @@ +// sequence_node.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_SEQUENCE_NODE_HPP +#define BOOST_LEXER_SEQUENCE_NODE_HPP + +#include "node.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +class sequence_node : public node +{ +public: + sequence_node (node *left_, node *right_) : + node (left_->nullable () && right_->nullable ()), + _left (left_), + _right (right_) + { + _left->append_firstpos (_firstpos); + + if (_left->nullable ()) + { + _right->append_firstpos (_firstpos); + } + + if (_right->nullable ()) + { + _left->append_lastpos (_lastpos); + } + + _right->append_lastpos (_lastpos); + + node_vector &lastpos_ = _left->lastpos (); + const node_vector &firstpos_ = _right->firstpos (); + + for (node_vector::iterator iter_ = lastpos_.begin (), + end_ = lastpos_.end (); iter_ != end_; ++iter_) + { + (*iter_)->append_followpos (firstpos_); + } + } + + virtual ~sequence_node () + { + } + + virtual type what_type () const + { + return SEQUENCE; + } + + virtual bool traverse (const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const + { + perform_op_stack_.push (true); + + switch (_right->what_type ()) + { + case SEQUENCE: + case SELECTION: + case ITERATION: + perform_op_stack_.push (false); + break; + default: + break; + } + + node_stack_.push (_right); + node_stack_.push (_left); + return true; + } + +private: + // Not owner of these pointers... + node *_left; + node *_right; + + virtual void copy_node (node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const + { + if (perform_op_stack_.top ()) + { + node *rhs_ = new_node_stack_.top (); + + new_node_stack_.pop (); + + node *lhs_ = new_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node (lhs_, rhs_); + new_node_stack_.top () = node_ptr_vector_->back (); + } + else + { + down_ = true; + } + + perform_op_stack_.pop (); + } +}; +} +} +} + +#endif |