diff options
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp')
-rw-r--r-- | 3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp new file mode 100644 index 0000000..9000e5e --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp @@ -0,0 +1,511 @@ +// parser.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_PARSER_HPP +#define BOOST_LEXER_PARSER_HPP + +#include <boost/assert.hpp> +#include "tree/end_node.hpp" +#include "tree/iteration_node.hpp" +#include "tree/leaf_node.hpp" +#include "../runtime_error.hpp" +#include "tree/selection_node.hpp" +#include "tree/sequence_node.hpp" +#include "../size_t.hpp" +#include "tokeniser/re_tokeniser.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT> +class basic_parser +{ +public: + typedef basic_re_tokeniser<CharT> tokeniser; + typedef typename tokeniser::string string; + typedef std::map<string, const node *> macro_map; + typedef node::node_ptr_vector node_ptr_vector; + typedef typename tokeniser::num_token token; + +/* + General principles of regex parsing: + - Every regex is a sequence of sub-regexes. + - Regexes consist of operands and operators + - All operators decompose to sequence, selection ('|') and iteration ('*') + - Regex tokens are stored on the stack. + - When a complete sequence of regex tokens is on the stack it is processed. + +Grammar: + +<REGEX> -> <OREXP> +<OREXP> -> <SEQUENCE> | <OREXP>'|'<SEQUENCE> +<SEQUENCE> -> <SUB> +<SUB> -> <EXPRESSION> | <SUB><EXPRESSION> +<EXPRESSION> -> <REPEAT> +<REPEAT> -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE> +<DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}' +*/ + static node *parse (const CharT *start_, const CharT * const end_, + const std::size_t id_, const std::size_t unique_id_, + const std::size_t dfa_state_, const regex_flags flags_, + const std::locale &locale_, node_ptr_vector &node_ptr_vector_, + const macro_map ¯omap_, typename tokeniser::token_map &map_, + bool &seen_BOL_assertion_, bool &seen_EOL_assertion_) + { + node *root_ = 0; + state state_ (start_, end_, flags_, locale_); + token lhs_token_; + token rhs_token_; + token_stack token_stack_; + tree_node_stack tree_node_stack_; + char action_ = 0; + + token_stack_.push (rhs_token_); + tokeniser::next (state_, map_, rhs_token_); + + do + { + lhs_token_ = token_stack_.top (); + action_ = lhs_token_.precedence (rhs_token_._type); + + switch (action_) + { + case '<': + case '=': + token_stack_.push (rhs_token_); + tokeniser::next (state_, map_, rhs_token_); + break; + case '>': + reduce (token_stack_, macromap_, node_ptr_vector_, + tree_node_stack_); + break; + default: + std::ostringstream ss_; + + ss_ << "A syntax error occured: '" << + lhs_token_.precedence_string () << + "' against '" << rhs_token_.precedence_string () << + "' at index " << state_.index () << "."; + throw runtime_error (ss_.str ().c_str ()); + break; + } + } while (!token_stack_.empty ()); + + if (tree_node_stack_.empty ()) + { + throw runtime_error ("Empty rules are not allowed."); + } + + BOOST_ASSERT(tree_node_stack_.size () == 1); + + node *lhs_node_ = tree_node_stack_.top (); + + tree_node_stack_.pop (); + + if (id_ == 0) + { + // Macros have no end state... + root_ = lhs_node_; + } + else + { + node_ptr_vector_->push_back (static_cast<end_node *>(0)); + + node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_); + + node_ptr_vector_->back () = rhs_node_; + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node + (lhs_node_, rhs_node_); + root_ = node_ptr_vector_->back (); + } + + // Done this way as bug in VC++ 6 prevents |= operator working + // properly! + if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true; + + if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true; + + return root_; + } + +private: + typedef typename tokeniser::state state; + typedef std::stack<token> token_stack; + typedef node::node_stack tree_node_stack; + + static void reduce (token_stack &token_stack_, + const macro_map ¯omap_, node_ptr_vector &node_vector_ptr_, + tree_node_stack &tree_node_stack_) + { + typename tokeniser::num_token lhs_; + typename tokeniser::num_token rhs_; + token_stack handle_; + char action_ = 0; + + do + { + rhs_ = token_stack_.top (); + token_stack_.pop (); + handle_.push (rhs_); + + if (!token_stack_.empty ()) + { + lhs_ = token_stack_.top (); + action_ = lhs_.precedence (rhs_._type); + } + } while (!token_stack_.empty () && action_ == '='); + + BOOST_ASSERT(token_stack_.empty () || action_ == '<'); + + switch (rhs_._type) + { + case token::BEGIN: + // finished processing so exit + break; + case token::REGEX: + // finished parsing, nothing to do + break; + case token::OREXP: + orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_); + break; + case token::SEQUENCE: + token_stack_.push (token::OREXP); + break; + case token::SUB: + sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_); + break; + case token::EXPRESSION: + token_stack_.push (token::SUB); + break; + case token::REPEAT: + repeat (handle_, token_stack_); + break; + case token::CHARSET: + charset (handle_, token_stack_, node_vector_ptr_, + tree_node_stack_); + break; + case token::MACRO: + macro (handle_, token_stack_, macromap_, node_vector_ptr_, + tree_node_stack_); + break; + case token::OPENPAREN: + openparen (handle_, token_stack_); + break; + case token::OPT: + case token::AOPT: + optional (rhs_._type == token::OPT, node_vector_ptr_, + tree_node_stack_); + token_stack_.push (token::DUP); + break; + case token::ZEROORMORE: + case token::AZEROORMORE: + zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_, + tree_node_stack_); + token_stack_.push (token::DUP); + break; + case token::ONEORMORE: + case token::AONEORMORE: + one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_, + tree_node_stack_); + token_stack_.push (token::DUP); + break; + case token::REPEATN: + case token::AREPEATN: + repeatn (rhs_._type == token::REPEATN, handle_.top (), + node_vector_ptr_, tree_node_stack_); + token_stack_.push (token::DUP); + break; + default: + throw runtime_error + ("Internal error regex_parser::reduce"); + break; + } + } + + static void orexp (token_stack &handle_, token_stack &token_stack_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::OREXP && + (handle_.size () == 1 || handle_.size () == 3)); + + if (handle_.size () == 1) + { + token_stack_.push (token::REGEX); + } + else + { + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::OR); + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::SEQUENCE); + perform_or (node_ptr_vector_, tree_node_stack_); + token_stack_.push (token::OREXP); + } + } + + static void sub (token_stack &handle_, token_stack &token_stack_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::SUB && + (handle_.size () == 1 || handle_.size () == 2)); + + if (handle_.size () == 1) + { + token_stack_.push (token::SEQUENCE); + } + else + { + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::EXPRESSION); + // perform join + sequence (node_ptr_vector_, tree_node_stack_); + token_stack_.push (token::SUB); + } + } + + static void repeat (token_stack &handle_, token_stack &token_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::REPEAT && + handle_.size () >= 1 && handle_.size () <= 3); + + if (handle_.size () == 1) + { + token_stack_.push (token::EXPRESSION); + } + else + { + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::DUP); + token_stack_.push (token::REPEAT); + } + } + + static void charset (token_stack &handle_, token_stack &token_stack_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::CHARSET && + handle_.size () == 1); + // store charset + node_ptr_vector_->push_back (static_cast<leaf_node *>(0)); + + const size_t id_ = handle_.top ()._id; + + node_ptr_vector_->back () = new leaf_node (id_, true); + tree_node_stack_.push (node_ptr_vector_->back ()); + token_stack_.push (token::REPEAT); + } + + static void macro (token_stack &handle_, token_stack &token_stack_, + const macro_map ¯omap_, node_ptr_vector &node_ptr_vector_, + tree_node_stack &tree_node_stack_) + { + token &top_ = handle_.top (); + + BOOST_ASSERT(top_._type == token::MACRO && handle_.size () == 1); + + typename macro_map::const_iterator iter_ = + macromap_.find (top_._macro); + + if (iter_ == macromap_.end ()) + { + const CharT *name_ = top_._macro; + std::basic_stringstream<CharT> ss_; + std::ostringstream os_; + + os_ << "Unknown MACRO name '"; + + while (*name_) + { + os_ << ss_.narrow (*name_++, ' '); + } + + os_ << "'."; + throw runtime_error (os_.str ()); + } + + tree_node_stack_.push (iter_->second->copy (node_ptr_vector_)); + token_stack_.push (token::REPEAT); + } + + static void openparen (token_stack &handle_, token_stack &token_stack_) + { + BOOST_ASSERT(handle_.top ()._type == token::OPENPAREN && + handle_.size () == 3); + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::REGEX); + handle_.pop (); + BOOST_ASSERT(handle_.top ()._type == token::CLOSEPAREN); + token_stack_.push (token::REPEAT); + } + + static void perform_or (node_ptr_vector &node_ptr_vector_, + tree_node_stack &tree_node_stack_) + { + // perform or + node *rhs_ = tree_node_stack_.top (); + + tree_node_stack_.pop (); + + node *lhs_ = tree_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<selection_node *>(0)); + node_ptr_vector_->back () = new selection_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void sequence (node_ptr_vector &node_ptr_vector_, + tree_node_stack &tree_node_stack_) + { + node *rhs_ = tree_node_stack_.top (); + + tree_node_stack_.pop (); + + node *lhs_ = tree_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void optional (const bool greedy_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform ? + node *lhs_ = tree_node_stack_.top (); + // You don't know if lhs_ is a leaf_node, so get firstpos. + node::node_vector &firstpos_ = lhs_->firstpos (); + + for (node::node_vector::iterator iter_ = firstpos_.begin (), + end_ = firstpos_.end (); iter_ != end_; ++iter_) + { + // These are leaf_nodes! + (*iter_)->greedy (greedy_); + } + + node_ptr_vector_->push_back (static_cast<leaf_node *>(0)); + + node *rhs_ = new leaf_node (null_token, greedy_); + + node_ptr_vector_->back () = rhs_; + node_ptr_vector_->push_back (static_cast<selection_node *>(0)); + node_ptr_vector_->back () = new selection_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void zero_or_more (const bool greedy_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform * + node *ptr_ = tree_node_stack_.top (); + + node_ptr_vector_->push_back (static_cast<iteration_node *>(0)); + node_ptr_vector_->back () = new iteration_node (ptr_, greedy_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + static void one_or_more (const bool greedy_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform + + node *lhs_ = tree_node_stack_.top (); + node *copy_ = lhs_->copy (node_ptr_vector_); + + node_ptr_vector_->push_back (static_cast<iteration_node *>(0)); + + node *rhs_ = new iteration_node (copy_, greedy_); + + node_ptr_vector_->back () = rhs_; + node_ptr_vector_->push_back (static_cast<sequence_node *>(0)); + node_ptr_vector_->back () = new sequence_node (lhs_, rhs_); + tree_node_stack_.top () = node_ptr_vector_->back (); + } + + // This is one of the most mind bending routines in this code... + static void repeatn (const bool greedy_, const token &token_, + node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_) + { + // perform {n[,[m]]} + // Semantic checks have already been performed. + // {0,} = * + // {0,1} = ? + // {1,} = + + // therefore we do not check for these cases. + if (!(token_._min == 1 && !token_._comma)) + { + const std::size_t top_ = token_._min > 0 ? + token_._min : token_._max; + + if (token_._min == 0) + { + optional (greedy_, node_ptr_vector_, tree_node_stack_); + } + + node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_); + node *curr_ = 0; + + for (std::size_t i_ = 2; i_ < top_; ++i_) + { + curr_ = prev_->copy (node_ptr_vector_); + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + prev_ = curr_; + } + + if (token_._comma && token_._min > 0) + { + if (token_._min > 1) + { + curr_ = prev_->copy (node_ptr_vector_); + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + prev_ = curr_; + } + + if (token_._comma && token_._max) + { + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + optional (greedy_, node_ptr_vector_, tree_node_stack_); + prev_ = tree_node_stack_.top (); + tree_node_stack_.pop (); + + const std::size_t count_ = token_._max - token_._min; + + for (std::size_t i_ = 1; i_ < count_; ++i_) + { + curr_ = prev_->copy (node_ptr_vector_); + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + prev_ = curr_; + } + } + else + { + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_); + prev_ = tree_node_stack_.top (); + tree_node_stack_.pop (); + } + } + + tree_node_stack_.push (static_cast<node *>(0)); + tree_node_stack_.top () = prev_; + sequence (node_ptr_vector_, tree_node_stack_); + } + } +}; +} +} +} + +#endif |