summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp')
-rw-r--r--3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp511
1 files changed, 511 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp
new file mode 100644
index 0000000..9000e5e
--- /dev/null
+++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/parser.hpp
@@ -0,0 +1,511 @@
+// parser.hpp
+// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_PARSER_HPP
+#define BOOST_LEXER_PARSER_HPP
+
+#include <boost/assert.hpp>
+#include "tree/end_node.hpp"
+#include "tree/iteration_node.hpp"
+#include "tree/leaf_node.hpp"
+#include "../runtime_error.hpp"
+#include "tree/selection_node.hpp"
+#include "tree/sequence_node.hpp"
+#include "../size_t.hpp"
+#include "tokeniser/re_tokeniser.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+class basic_parser
+{
+public:
+ typedef basic_re_tokeniser<CharT> tokeniser;
+ typedef typename tokeniser::string string;
+ typedef std::map<string, const node *> macro_map;
+ typedef node::node_ptr_vector node_ptr_vector;
+ typedef typename tokeniser::num_token token;
+
+/*
+ General principles of regex parsing:
+ - Every regex is a sequence of sub-regexes.
+ - Regexes consist of operands and operators
+ - All operators decompose to sequence, selection ('|') and iteration ('*')
+ - Regex tokens are stored on the stack.
+ - When a complete sequence of regex tokens is on the stack it is processed.
+
+Grammar:
+
+<REGEX> -> <OREXP>
+<OREXP> -> <SEQUENCE> | <OREXP>'|'<SEQUENCE>
+<SEQUENCE> -> <SUB>
+<SUB> -> <EXPRESSION> | <SUB><EXPRESSION>
+<EXPRESSION> -> <REPEAT>
+<REPEAT> -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE>
+<DUPLICATE> -> '?' | '*' | '+' | '{n[,[m]]}'
+*/
+ static node *parse (const CharT *start_, const CharT * const end_,
+ const std::size_t id_, const std::size_t unique_id_,
+ const std::size_t dfa_state_, const regex_flags flags_,
+ const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
+ const macro_map &macromap_, typename tokeniser::token_map &map_,
+ bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
+ {
+ node *root_ = 0;
+ state state_ (start_, end_, flags_, locale_);
+ token lhs_token_;
+ token rhs_token_;
+ token_stack token_stack_;
+ tree_node_stack tree_node_stack_;
+ char action_ = 0;
+
+ token_stack_.push (rhs_token_);
+ tokeniser::next (state_, map_, rhs_token_);
+
+ do
+ {
+ lhs_token_ = token_stack_.top ();
+ action_ = lhs_token_.precedence (rhs_token_._type);
+
+ switch (action_)
+ {
+ case '<':
+ case '=':
+ token_stack_.push (rhs_token_);
+ tokeniser::next (state_, map_, rhs_token_);
+ break;
+ case '>':
+ reduce (token_stack_, macromap_, node_ptr_vector_,
+ tree_node_stack_);
+ break;
+ default:
+ std::ostringstream ss_;
+
+ ss_ << "A syntax error occured: '" <<
+ lhs_token_.precedence_string () <<
+ "' against '" << rhs_token_.precedence_string () <<
+ "' at index " << state_.index () << ".";
+ throw runtime_error (ss_.str ().c_str ());
+ break;
+ }
+ } while (!token_stack_.empty ());
+
+ if (tree_node_stack_.empty ())
+ {
+ throw runtime_error ("Empty rules are not allowed.");
+ }
+
+ BOOST_ASSERT(tree_node_stack_.size () == 1);
+
+ node *lhs_node_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+
+ if (id_ == 0)
+ {
+ // Macros have no end state...
+ root_ = lhs_node_;
+ }
+ else
+ {
+ node_ptr_vector_->push_back (static_cast<end_node *>(0));
+
+ node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_);
+
+ node_ptr_vector_->back () = rhs_node_;
+ node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
+ node_ptr_vector_->back () = new sequence_node
+ (lhs_node_, rhs_node_);
+ root_ = node_ptr_vector_->back ();
+ }
+
+ // Done this way as bug in VC++ 6 prevents |= operator working
+ // properly!
+ if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true;
+
+ if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true;
+
+ return root_;
+ }
+
+private:
+ typedef typename tokeniser::state state;
+ typedef std::stack<token> token_stack;
+ typedef node::node_stack tree_node_stack;
+
+ static void reduce (token_stack &token_stack_,
+ const macro_map &macromap_, node_ptr_vector &node_vector_ptr_,
+ tree_node_stack &tree_node_stack_)
+ {
+ typename tokeniser::num_token lhs_;
+ typename tokeniser::num_token rhs_;
+ token_stack handle_;
+ char action_ = 0;
+
+ do
+ {
+ rhs_ = token_stack_.top ();
+ token_stack_.pop ();
+ handle_.push (rhs_);
+
+ if (!token_stack_.empty ())
+ {
+ lhs_ = token_stack_.top ();
+ action_ = lhs_.precedence (rhs_._type);
+ }
+ } while (!token_stack_.empty () && action_ == '=');
+
+ BOOST_ASSERT(token_stack_.empty () || action_ == '<');
+
+ switch (rhs_._type)
+ {
+ case token::BEGIN:
+ // finished processing so exit
+ break;
+ case token::REGEX:
+ // finished parsing, nothing to do
+ break;
+ case token::OREXP:
+ orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
+ break;
+ case token::SEQUENCE:
+ token_stack_.push (token::OREXP);
+ break;
+ case token::SUB:
+ sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
+ break;
+ case token::EXPRESSION:
+ token_stack_.push (token::SUB);
+ break;
+ case token::REPEAT:
+ repeat (handle_, token_stack_);
+ break;
+ case token::CHARSET:
+ charset (handle_, token_stack_, node_vector_ptr_,
+ tree_node_stack_);
+ break;
+ case token::MACRO:
+ macro (handle_, token_stack_, macromap_, node_vector_ptr_,
+ tree_node_stack_);
+ break;
+ case token::OPENPAREN:
+ openparen (handle_, token_stack_);
+ break;
+ case token::OPT:
+ case token::AOPT:
+ optional (rhs_._type == token::OPT, node_vector_ptr_,
+ tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ case token::ZEROORMORE:
+ case token::AZEROORMORE:
+ zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_,
+ tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ case token::ONEORMORE:
+ case token::AONEORMORE:
+ one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_,
+ tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ case token::REPEATN:
+ case token::AREPEATN:
+ repeatn (rhs_._type == token::REPEATN, handle_.top (),
+ node_vector_ptr_, tree_node_stack_);
+ token_stack_.push (token::DUP);
+ break;
+ default:
+ throw runtime_error
+ ("Internal error regex_parser::reduce");
+ break;
+ }
+ }
+
+ static void orexp (token_stack &handle_, token_stack &token_stack_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ BOOST_ASSERT(handle_.top ()._type == token::OREXP &&
+ (handle_.size () == 1 || handle_.size () == 3));
+
+ if (handle_.size () == 1)
+ {
+ token_stack_.push (token::REGEX);
+ }
+ else
+ {
+ handle_.pop ();
+ BOOST_ASSERT(handle_.top ()._type == token::OR);
+ handle_.pop ();
+ BOOST_ASSERT(handle_.top ()._type == token::SEQUENCE);
+ perform_or (node_ptr_vector_, tree_node_stack_);
+ token_stack_.push (token::OREXP);
+ }
+ }
+
+ static void sub (token_stack &handle_, token_stack &token_stack_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ BOOST_ASSERT(handle_.top ()._type == token::SUB &&
+ (handle_.size () == 1 || handle_.size () == 2));
+
+ if (handle_.size () == 1)
+ {
+ token_stack_.push (token::SEQUENCE);
+ }
+ else
+ {
+ handle_.pop ();
+ BOOST_ASSERT(handle_.top ()._type == token::EXPRESSION);
+ // perform join
+ sequence (node_ptr_vector_, tree_node_stack_);
+ token_stack_.push (token::SUB);
+ }
+ }
+
+ static void repeat (token_stack &handle_, token_stack &token_stack_)
+ {
+ BOOST_ASSERT(handle_.top ()._type == token::REPEAT &&
+ handle_.size () >= 1 && handle_.size () <= 3);
+
+ if (handle_.size () == 1)
+ {
+ token_stack_.push (token::EXPRESSION);
+ }
+ else
+ {
+ handle_.pop ();
+ BOOST_ASSERT(handle_.top ()._type == token::DUP);
+ token_stack_.push (token::REPEAT);
+ }
+ }
+
+ static void charset (token_stack &handle_, token_stack &token_stack_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ BOOST_ASSERT(handle_.top ()._type == token::CHARSET &&
+ handle_.size () == 1);
+ // store charset
+ node_ptr_vector_->push_back (static_cast<leaf_node *>(0));
+
+ const size_t id_ = handle_.top ()._id;
+
+ node_ptr_vector_->back () = new leaf_node (id_, true);
+ tree_node_stack_.push (node_ptr_vector_->back ());
+ token_stack_.push (token::REPEAT);
+ }
+
+ static void macro (token_stack &handle_, token_stack &token_stack_,
+ const macro_map &macromap_, node_ptr_vector &node_ptr_vector_,
+ tree_node_stack &tree_node_stack_)
+ {
+ token &top_ = handle_.top ();
+
+ BOOST_ASSERT(top_._type == token::MACRO && handle_.size () == 1);
+
+ typename macro_map::const_iterator iter_ =
+ macromap_.find (top_._macro);
+
+ if (iter_ == macromap_.end ())
+ {
+ const CharT *name_ = top_._macro;
+ std::basic_stringstream<CharT> ss_;
+ std::ostringstream os_;
+
+ os_ << "Unknown MACRO name '";
+
+ while (*name_)
+ {
+ os_ << ss_.narrow (*name_++, ' ');
+ }
+
+ os_ << "'.";
+ throw runtime_error (os_.str ());
+ }
+
+ tree_node_stack_.push (iter_->second->copy (node_ptr_vector_));
+ token_stack_.push (token::REPEAT);
+ }
+
+ static void openparen (token_stack &handle_, token_stack &token_stack_)
+ {
+ BOOST_ASSERT(handle_.top ()._type == token::OPENPAREN &&
+ handle_.size () == 3);
+ handle_.pop ();
+ BOOST_ASSERT(handle_.top ()._type == token::REGEX);
+ handle_.pop ();
+ BOOST_ASSERT(handle_.top ()._type == token::CLOSEPAREN);
+ token_stack_.push (token::REPEAT);
+ }
+
+ static void perform_or (node_ptr_vector &node_ptr_vector_,
+ tree_node_stack &tree_node_stack_)
+ {
+ // perform or
+ node *rhs_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+
+ node *lhs_ = tree_node_stack_.top ();
+
+ node_ptr_vector_->push_back (static_cast<selection_node *>(0));
+ node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void sequence (node_ptr_vector &node_ptr_vector_,
+ tree_node_stack &tree_node_stack_)
+ {
+ node *rhs_ = tree_node_stack_.top ();
+
+ tree_node_stack_.pop ();
+
+ node *lhs_ = tree_node_stack_.top ();
+
+ node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
+ node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void optional (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform ?
+ node *lhs_ = tree_node_stack_.top ();
+ // You don't know if lhs_ is a leaf_node, so get firstpos.
+ node::node_vector &firstpos_ = lhs_->firstpos ();
+
+ for (node::node_vector::iterator iter_ = firstpos_.begin (),
+ end_ = firstpos_.end (); iter_ != end_; ++iter_)
+ {
+ // These are leaf_nodes!
+ (*iter_)->greedy (greedy_);
+ }
+
+ node_ptr_vector_->push_back (static_cast<leaf_node *>(0));
+
+ node *rhs_ = new leaf_node (null_token, greedy_);
+
+ node_ptr_vector_->back () = rhs_;
+ node_ptr_vector_->push_back (static_cast<selection_node *>(0));
+ node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void zero_or_more (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform *
+ node *ptr_ = tree_node_stack_.top ();
+
+ node_ptr_vector_->push_back (static_cast<iteration_node *>(0));
+ node_ptr_vector_->back () = new iteration_node (ptr_, greedy_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ static void one_or_more (const bool greedy_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform +
+ node *lhs_ = tree_node_stack_.top ();
+ node *copy_ = lhs_->copy (node_ptr_vector_);
+
+ node_ptr_vector_->push_back (static_cast<iteration_node *>(0));
+
+ node *rhs_ = new iteration_node (copy_, greedy_);
+
+ node_ptr_vector_->back () = rhs_;
+ node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
+ node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
+ tree_node_stack_.top () = node_ptr_vector_->back ();
+ }
+
+ // This is one of the most mind bending routines in this code...
+ static void repeatn (const bool greedy_, const token &token_,
+ node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
+ {
+ // perform {n[,[m]]}
+ // Semantic checks have already been performed.
+ // {0,} = *
+ // {0,1} = ?
+ // {1,} = +
+ // therefore we do not check for these cases.
+ if (!(token_._min == 1 && !token_._comma))
+ {
+ const std::size_t top_ = token_._min > 0 ?
+ token_._min : token_._max;
+
+ if (token_._min == 0)
+ {
+ optional (greedy_, node_ptr_vector_, tree_node_stack_);
+ }
+
+ node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_);
+ node *curr_ = 0;
+
+ for (std::size_t i_ = 2; i_ < top_; ++i_)
+ {
+ curr_ = prev_->copy (node_ptr_vector_);
+ tree_node_stack_.push (static_cast<node *>(0));
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ prev_ = curr_;
+ }
+
+ if (token_._comma && token_._min > 0)
+ {
+ if (token_._min > 1)
+ {
+ curr_ = prev_->copy (node_ptr_vector_);
+ tree_node_stack_.push (static_cast<node *>(0));
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ prev_ = curr_;
+ }
+
+ if (token_._comma && token_._max)
+ {
+ tree_node_stack_.push (static_cast<node *>(0));
+ tree_node_stack_.top () = prev_;
+ optional (greedy_, node_ptr_vector_, tree_node_stack_);
+ prev_ = tree_node_stack_.top ();
+ tree_node_stack_.pop ();
+
+ const std::size_t count_ = token_._max - token_._min;
+
+ for (std::size_t i_ = 1; i_ < count_; ++i_)
+ {
+ curr_ = prev_->copy (node_ptr_vector_);
+ tree_node_stack_.push (static_cast<node *>(0));
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ prev_ = curr_;
+ }
+ }
+ else
+ {
+ tree_node_stack_.push (static_cast<node *>(0));
+ tree_node_stack_.top () = prev_;
+ zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_);
+ prev_ = tree_node_stack_.top ();
+ tree_node_stack_.pop ();
+ }
+ }
+
+ tree_node_stack_.push (static_cast<node *>(0));
+ tree_node_stack_.top () = prev_;
+ sequence (node_ptr_vector_, tree_node_stack_);
+ }
+ }
+};
+}
+}
+}
+
+#endif