diff options
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/lex/lexer/lexer.hpp')
-rw-r--r-- | 3rdParty/Boost/src/boost/spirit/home/lex/lexer/lexer.hpp | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/lex/lexer/lexer.hpp b/3rdParty/Boost/src/boost/spirit/home/lex/lexer/lexer.hpp new file mode 100644 index 0000000..f69cd23 --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/lex/lexer/lexer.hpp @@ -0,0 +1,381 @@ +// Copyright (c) 2001-2011 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) +#define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM + +#if defined(_MSC_VER) +#pragma once +#endif + +#include <boost/spirit/home/support/info.hpp> +#include <boost/spirit/home/qi/skip_over.hpp> +#include <boost/spirit/home/qi/parser.hpp> +#include <boost/spirit/home/qi/detail/assign_to.hpp> +#include <boost/spirit/home/lex/reference.hpp> +#include <boost/spirit/home/lex/meta_compiler.hpp> +#include <boost/spirit/home/lex/lexer_type.hpp> +#include <boost/spirit/home/lex/lexer/token_def.hpp> +#include <boost/assert.hpp> +#include <boost/noncopyable.hpp> +#include <boost/detail/iterator.hpp> +#include <boost/fusion/include/vector.hpp> +#include <boost/mpl/assert.hpp> +#include <boost/range/iterator_range.hpp> +#include <string> + +namespace boost { namespace spirit { namespace lex +{ + /////////////////////////////////////////////////////////////////////////// + namespace detail + { + /////////////////////////////////////////////////////////////////////// + template <typename LexerDef> + struct lexer_def_ + : proto::extends< + typename proto::terminal< + lex::reference<lexer_def_<LexerDef> const> + >::type + , lexer_def_<LexerDef> > + , qi::parser<lexer_def_<LexerDef> > + , lex::lexer_type<lexer_def_<LexerDef> > + { + private: + // avoid warnings about using 'this' in constructor + lexer_def_& this_() { return *this; } + + typedef typename LexerDef::char_type char_type; + typedef typename LexerDef::string_type string_type; + typedef typename LexerDef::id_type id_type; + + typedef lex::reference<lexer_def_ const> reference_; + typedef typename proto::terminal<reference_>::type terminal_type; + typedef proto::extends<terminal_type, lexer_def_> proto_base_type; + + reference_ alias() const + { + return reference_(*this); + } + + public: + // Qi interface: metafunction calculating parser attribute type + template <typename Context, typename Iterator> + struct attribute + { + // the return value of a token set contains the matched token + // id, and the corresponding pair of iterators + typedef typename Iterator::base_iterator_type iterator_type; + typedef + fusion::vector2<id_type, iterator_range<iterator_type> > + type; + }; + + // Qi interface: parse functionality + template <typename Iterator, typename Context + , typename Skipper, typename Attribute> + bool parse(Iterator& first, Iterator const& last + , Context& /*context*/, Skipper const& skipper + , Attribute& attr) const + { + qi::skip_over(first, last, skipper); // always do a pre-skip + + if (first != last) { + typedef typename + boost::detail::iterator_traits<Iterator>::value_type + token_type; + + token_type const& t = *first; + if (token_is_valid(t) && t.state() == first.get_state()) { + // any of the token definitions matched + spirit::traits::assign_to(t, attr); + ++first; + return true; + } + } + return false; + } + + // Qi interface: 'what' functionality + template <typename Context> + info what(Context& /*context*/) const + { + return info("lexer"); + } + + private: + // allow to use the lexer.self.add("regex1", id1)("regex2", id2); + // syntax + struct adder + { + adder(lexer_def_& def_) + : def(def_) {} + + // Add a token definition based on a single character as given + // by the first parameter, the second parameter allows to + // specify the token id to use for the new token. If no token + // id is given the character code is used. + adder const& operator()(char_type c + , id_type token_id = id_type()) const + { + if (id_type() == token_id) + token_id = static_cast<id_type>(c); + def.def.add_token (def.state.c_str(), c, token_id + , def.targetstate.empty() ? 0 : def.targetstate.c_str()); + return *this; + } + + // Add a token definition based on a character sequence as + // given by the first parameter, the second parameter allows to + // specify the token id to use for the new token. If no token + // id is given this function will generate a unique id to be + // used as the token's id. + adder const& operator()(string_type const& s + , id_type token_id = id_type()) const + { + if (id_type() == token_id) + token_id = def.def.get_next_id(); + def.def.add_token (def.state.c_str(), s, token_id + , def.targetstate.empty() ? 0 : def.targetstate.c_str()); + return *this; + } + + template <typename Attribute> + adder const& operator()( + token_def<Attribute, char_type, id_type>& tokdef + , id_type token_id = id_type()) const + { + // make sure we have a token id + if (id_type() == token_id) { + if (id_type() == tokdef.id()) { + token_id = def.def.get_next_id(); + tokdef.id(token_id); + } + else { + token_id = tokdef.id(); + } + } + else { + // the following assertion makes sure that the token_def + // instance has not been assigned a different id earlier + BOOST_ASSERT(id_type() == tokdef.id() + || token_id == tokdef.id()); + tokdef.id(token_id); + } + + def.define(tokdef); + return *this; + } + +// template <typename F> +// adder const& operator()(char_type c, id_type token_id, F act) const +// { +// if (id_type() == token_id) +// token_id = def.def.get_next_id(); +// std::size_t unique_id = +// def.def.add_token (def.state.c_str(), s, token_id); +// def.def.add_action(unique_id, def.state.c_str(), act); +// return *this; +// } + + lexer_def_& def; + + private: + // silence MSVC warning C4512: assignment operator could not be generated + adder& operator= (adder const&); + }; + friend struct adder; + + // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); + // syntax + struct pattern_adder + { + pattern_adder(lexer_def_& def_) + : def(def_) {} + + pattern_adder const& operator()(string_type const& p + , string_type const& s) const + { + def.def.add_pattern (def.state.c_str(), p, s); + return *this; + } + + lexer_def_& def; + + private: + // silence MSVC warning C4512: assignment operator could not be generated + pattern_adder& operator= (pattern_adder const&); + }; + friend struct pattern_adder; + + private: + // Helper function to invoke the necessary 2 step compilation + // process on token definition expressions + template <typename TokenExpr> + void compile2pass(TokenExpr const& expr) + { + expr.collect(def, state, targetstate); + expr.add_actions(def); + } + + public: + /////////////////////////////////////////////////////////////////// + template <typename Expr> + void define(Expr const& expr) + { + compile2pass(compile<lex::domain>(expr)); + } + + lexer_def_(LexerDef& def_, string_type const& state_ + , string_type const& targetstate_ = string_type()) + : proto_base_type(terminal_type::make(alias())) + , add(this_()), add_pattern(this_()), def(def_) + , state(state_), targetstate(targetstate_) + {} + + // allow to switch states + lexer_def_ operator()(char_type const* state) const + { + return lexer_def_(def, state); + } + lexer_def_ operator()(char_type const* state + , char_type const* targetstate) const + { + return lexer_def_(def, state, targetstate); + } + lexer_def_ operator()(string_type const& state + , string_type const& targetstate = string_type()) const + { + return lexer_def_(def, state, targetstate); + } + + // allow to assign a token definition expression + template <typename Expr> + lexer_def_& operator= (Expr const& xpr) + { + // Report invalid expression error as early as possible. + // If you got an error_invalid_expression error message here, + // then the expression (expr) is not a valid spirit lex + // expression. + BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); + + def.clear(state.c_str()); + define(xpr); + return *this; + } + + adder add; + pattern_adder add_pattern; + + private: + LexerDef& def; + string_type state; + string_type targetstate; + + private: + // silence MSVC warning C4512: assignment operator could not be generated + lexer_def_& operator= (lexer_def_ const&); + }; + + // allow to assign a token definition expression + template <typename LexerDef, typename Expr> + inline lexer_def_<LexerDef>& + operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) + { + // Report invalid expression error as early as possible. + // If you got an error_invalid_expression error message here, + // then the expression (expr) is not a valid spirit lex + // expression. + BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); + + lexdef.define(xpr); + return lexdef; + } + + template <typename LexerDef, typename Expr> + inline lexer_def_<LexerDef>& + operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) + { + // Report invalid expression error as early as possible. + // If you got an error_invalid_expression error message here, + // then the expression (expr) is not a valid spirit lex + // expression. + BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); + + lexdef.define(xpr); + return lexdef; + } + } + + /////////////////////////////////////////////////////////////////////////// + // The match_flags flags are used to influence different matching + // modes of the lexer + struct match_flags + { + enum enum_type + { + match_default = 0, // no flags + match_not_dot_newline = 1, // the regex '.' doesn't match newlines + match_icase = 2 // all matching operations are case insensitive + }; + }; + + /////////////////////////////////////////////////////////////////////////// + // This represents a lexer object + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + // This is the first token id automatically assigned by the library + // if needed + enum tokenids + { + min_token_id = 0x10000 + }; + + template <typename Lexer> + class lexer : public Lexer + { + private: + // avoid warnings about using 'this' in constructor + lexer& this_() { return *this; } + + std::size_t next_token_id; // has to be an integral type + + public: + typedef Lexer lexer_type; + typedef typename Lexer::id_type id_type; + typedef typename Lexer::char_type char_type; + typedef typename Lexer::iterator_type iterator_type; + typedef lexer base_type; + + typedef detail::lexer_def_<lexer> lexer_def; + typedef std::basic_string<char_type> string_type; + + lexer(unsigned int flags = match_flags::match_default + , id_type first_id = id_type(min_token_id)) + : lexer_type(flags) + , next_token_id(first_id) + , self(this_(), lexer_type::initial_state()) + {} + + // access iterator interface + template <typename Iterator> + iterator_type begin(Iterator& first, Iterator const& last + , char_type const* initial_state = 0) const + { return this->lexer_type::begin(first, last, initial_state); } + iterator_type end() const + { return this->lexer_type::end(); } + + std::size_t map_state(char_type const* state) + { return this->lexer_type::add_state(state); } + + // create a unique token id + id_type get_next_id() { return id_type(next_token_id++); } + + lexer_def self; // allow for easy token definition + }; + +}}} + +#endif |