// Copyright (c) 2001-2011 Hartmut Kaiser // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM) #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM #if defined(_MSC_VER) #pragma once #endif #include #include #include #include #include #include #include #include #include namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace detail { /////////////////////////////////////////////////////////////////////// template class data; // no default specialization /////////////////////////////////////////////////////////////////////// // neither supports state, nor actors template class data { protected: typedef typename boost::detail::iterator_traits::value_type char_type; public: typedef Iterator base_iterator_type; typedef iterator_range token_value_type; typedef token_value_type get_value_type; typedef std::size_t state_type; typedef char_type const* state_name_type; typedef unused_type semantic_actions_type; typedef detail::wrap_action wrap_action_type; typedef unused_type next_token_functor; typedef unused_type get_state_name_type; // initialize the shared data template data (IterData const& data_, Iterator& first, Iterator const& last) : first_(first), last_(last) , state_machine_(data_.state_machine_) , rules_(data_.rules_) , bol_(data_.state_machine_.data()._seen_BOL_assertion) {} // The following functions are used by the implementation of the // placeholder '_state'. template void set_state_name (Char const*) { // some (random) versions of gcc instantiate this function even if it's not // needed leading to false static asserts #if !defined(__GNUC__) // If you see a compile time assertion below you're probably // using a token type not supporting lexer states (the 3rd // template parameter of the token is mpl::false_), but your // code uses state changes anyways. BOOST_STATIC_ASSERT(false); #endif } char_type const* get_state_name() const { return rules_.initial(); } std::size_t get_state_id (char_type const*) const { return 0; } // The function get_eoi() is used by the implementation of the // placeholder '_eoi'. Iterator const& get_eoi() const { return last_; } // The function less() is used by the implementation of the support // function lex::less(). Its functionality is equivalent to flex' // function yyless(): it returns an iterator positioned to the // nth input character beyond the current start iterator (i.e. by // assigning the return value to the placeholder '_end' it is // possible to return all but the first n characters of the current // token back to the input stream. // // This function does nothing as long as no semantic actions are // used. Iterator const& less(Iterator const& it, int) { // The following assertion fires most likely because you are // using lexer semantic actions without using the actor_lexer // as the base class for your token definition class. BOOST_ASSERT(false && "Are you using lexer semantic actions without using the " "actor_lexer base?"); return it; } // The function more() is used by the implementation of the support // function lex::more(). Its functionality is equivalent to flex' // function yymore(): it tells the lexer that the next time it // matches a rule, the corresponding token should be appended onto // the current token value rather than replacing it. // // These functions do nothing as long as no semantic actions are // used. void more() { // The following assertion fires most likely because you are // using lexer semantic actions without using the actor_lexer // as the base class for your token definition class. BOOST_ASSERT(false && "Are you using lexer semantic actions without using the " "actor_lexer base?"); } bool adjust_start() { return false; } void revert_adjust_start() {} // The function lookahead() is used by the implementation of the // support function lex::lookahead. It can be used to implement // lookahead for lexer engines not supporting constructs like flex' // a/b (match a, but only when followed by b): // // This function does nothing as long as no semantic actions are // used. bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) { // The following assertion fires most likely because you are // using lexer semantic actions without using the actor_lexer // as the base class for your token definition class. BOOST_ASSERT(false && "Are you using lexer semantic actions without using the " "actor_lexer base?"); return false; } // the functions next, invoke_actions, and get_state are used by // the functor implementation below // The function next() tries to match the next token from the // underlying input sequence. std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) { prev_bol = bol_; typedef basic_iterator_tokeniser tokenizer; return tokenizer::next(state_machine_, bol_, end, last_ , unique_id); } // nothing to invoke, so this is empty BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t , std::size_t, std::size_t, Iterator const&) { return pass_flags::pass_normal; // always accept } std::size_t get_state() const { return 0; } void set_state(std::size_t) {} void set_end(Iterator const& it) {} Iterator& get_first() { return first_; } Iterator const& get_first() const { return first_; } Iterator const& get_last() const { return last_; } iterator_range get_value() const { return iterator_range(first_, last_); } bool has_value() const { return false; } void reset_value() {} void reset_bol(bool bol) { bol_ = bol; } protected: Iterator& first_; Iterator last_; boost::lexer::basic_state_machine const& state_machine_; boost::lexer::basic_rules const& rules_; bool bol_; // helper storing whether last character was \n private: // silence MSVC warning C4512: assignment operator could not be generated data& operator= (data const&); }; /////////////////////////////////////////////////////////////////////// // doesn't support lexer semantic actions, but supports state template class data : public data { protected: typedef data base_type; typedef typename base_type::char_type char_type; public: typedef Iterator base_iterator_type; typedef iterator_range token_value_type; typedef token_value_type get_value_type; typedef typename base_type::state_type state_type; typedef typename base_type::state_name_type state_name_type; typedef typename base_type::semantic_actions_type semantic_actions_type; // initialize the shared data template data (IterData const& data_, Iterator& first, Iterator const& last) : base_type(data_, first, last) , state_(0) {} // The following functions are used by the implementation of the // placeholder '_state'. void set_state_name (char_type const* new_state) { std::size_t state_id = this->rules_.state(new_state); // If the following assertion fires you've probably been using // a lexer state name which was not defined in your token // definition. BOOST_ASSERT(state_id != boost::lexer::npos); if (state_id != boost::lexer::npos) state_ = state_id; } char_type const* get_state_name() const { return this->rules_.state(state_); } std::size_t get_state_id (char_type const* state) const { return this->rules_.state(state); } // the functions next() and get_state() are used by the functor // implementation below // The function next() tries to match the next token from the // underlying input sequence. std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) { prev_bol = this->bol_; typedef basic_iterator_tokeniser tokenizer; return tokenizer::next(this->state_machine_, state_, this->bol_, end, this->get_eoi(), unique_id); } std::size_t& get_state() { return state_; } void set_state(std::size_t state) { state_ = state; } protected: std::size_t state_; private: // silence MSVC warning C4512: assignment operator could not be generated data& operator= (data const&); }; /////////////////////////////////////////////////////////////////////// // does support lexer semantic actions, may support state template class data : public data { public: typedef semantic_actions semantic_actions_type; protected: typedef data base_type; typedef typename base_type::char_type char_type; typedef typename semantic_actions_type::functor_wrapper_type functor_wrapper_type; public: typedef Iterator base_iterator_type; typedef TokenValue token_value_type; typedef TokenValue const& get_value_type; typedef typename base_type::state_type state_type; typedef typename base_type::state_name_type state_name_type; typedef detail::wrap_action wrap_action_type; template data (IterData const& data_, Iterator& first, Iterator const& last) : base_type(data_, first, last) , actions_(data_.actions_), hold_() , value_(iterator_range(first, last)) , has_value_(false), has_hold_(false) {} // invoke attached semantic actions, if defined BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state , std::size_t& id, std::size_t unique_id, Iterator& end) { return actions_.invoke_actions(state, id, unique_id, end, *this); } // The function less() is used by the implementation of the support // function lex::less(). Its functionality is equivalent to flex' // function yyless(): it returns an iterator positioned to the // nth input character beyond the current start iterator (i.e. by // assigning the return value to the placeholder '_end' it is // possible to return all but the first n characters of the current // token back to the input stream). Iterator const& less(Iterator& it, int n) { it = this->get_first(); std::advance(it, n); return it; } // The function more() is used by the implementation of the support // function lex::more(). Its functionality is equivalent to flex' // function yymore(): it tells the lexer that the next time it // matches a rule, the corresponding token should be appended onto // the current token value rather than replacing it. void more() { hold_ = this->get_first(); has_hold_ = true; } // The function lookahead() is used by the implementation of the // support function lex::lookahead. It can be used to implement // lookahead for lexer engines not supporting constructs like flex' // a/b (match a, but only when followed by b) bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) { Iterator end = end_; std::size_t unique_id = boost::lexer::npos; bool bol = this->bol_; if (std::size_t(~0) == state) state = this->state_; typedef basic_iterator_tokeniser tokenizer; return id == tokenizer::next(this->state_machine_, state, bol, end, this->get_eoi(), unique_id); } // The adjust_start() and revert_adjust_start() are helper // functions needed to implement the functionality required for // lex::more(). It is called from the functor body below. bool adjust_start() { if (!has_hold_) return false; std::swap(this->get_first(), hold_); has_hold_ = false; return true; } void revert_adjust_start() { // this will be called only if adjust_start above returned true std::swap(this->get_first(), hold_); has_hold_ = true; } TokenValue const& get_value() const { if (!has_value_) { value_ = iterator_range(this->get_first(), end_); has_value_ = true; } return value_; } template void set_value(Value const& val) { value_ = val; has_value_ = true; } void set_end(Iterator const& it) { end_ = it; } bool has_value() const { return has_value_; } void reset_value() { has_value_ = false; } protected: semantic_actions_type const& actions_; Iterator hold_; // iterator needed to support lex::more() Iterator end_; // iterator pointing to end of matched token mutable TokenValue value_; // token value to use mutable bool has_value_; // 'true' if value_ is valid bool has_hold_; // 'true' if hold_ is valid private: // silence MSVC warning C4512: assignment operator could not be generated data& operator= (data const&); }; /////////////////////////////////////////////////////////////////////// // does support lexer semantic actions, may support state, is used for // position_token exposing exactly one type template class data > : public data { public: typedef semantic_actions semantic_actions_type; protected: typedef data base_type; typedef typename base_type::char_type char_type; typedef typename semantic_actions_type::functor_wrapper_type functor_wrapper_type; public: typedef Iterator base_iterator_type; typedef boost::optional token_value_type; typedef boost::optional const& get_value_type; typedef typename base_type::state_type state_type; typedef typename base_type::state_name_type state_name_type; typedef detail::wrap_action wrap_action_type; template data (IterData const& data_, Iterator& first, Iterator const& last) : base_type(data_, first, last) , actions_(data_.actions_), hold_() , has_value_(false), has_hold_(false) { spirit::traits::assign_to(first, last, value_); has_value_ = true; } // invoke attached semantic actions, if defined BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state , std::size_t& id, std::size_t unique_id, Iterator& end) { return actions_.invoke_actions(state, id, unique_id, end, *this); } // The function less() is used by the implementation of the support // function lex::less(). Its functionality is equivalent to flex' // function yyless(): it returns an iterator positioned to the // nth input character beyond the current start iterator (i.e. by // assigning the return value to the placeholder '_end' it is // possible to return all but the first n characters of the current // token back to the input stream). Iterator const& less(Iterator& it, int n) { it = this->get_first(); std::advance(it, n); return it; } // The function more() is used by the implementation of the support // function lex::more(). Its functionality is equivalent to flex' // function yymore(): it tells the lexer that the next time it // matches a rule, the corresponding token should be appended onto // the current token value rather than replacing it. void more() { hold_ = this->get_first(); has_hold_ = true; } // The function lookahead() is used by the implementation of the // support function lex::lookahead. It can be used to implement // lookahead for lexer engines not supporting constructs like flex' // a/b (match a, but only when followed by b) bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) { Iterator end = end_; std::size_t unique_id = boost::lexer::npos; bool bol = this->bol_; if (std::size_t(~0) == state) state = this->state_; typedef basic_iterator_tokeniser tokenizer; return id == tokenizer::next(this->state_machine_, state, bol, end, this->get_eoi(), unique_id); } // The adjust_start() and revert_adjust_start() are helper // functions needed to implement the functionality required for // lex::more(). It is called from the functor body below. bool adjust_start() { if (!has_hold_) return false; std::swap(this->get_first(), hold_); has_hold_ = false; return true; } void revert_adjust_start() { // this will be called only if adjust_start above returned true std::swap(this->get_first(), hold_); has_hold_ = true; } token_value_type const& get_value() const { if (!has_value_) { spirit::traits::assign_to(this->get_first(), end_, value_); has_value_ = true; } return value_; } template void set_value(Value const& val) { value_ = val; has_value_ = true; } void set_end(Iterator const& it) { end_ = it; } bool has_value() const { return has_value_; } void reset_value() { has_value_ = false; } protected: semantic_actions_type const& actions_; Iterator hold_; // iterator needed to support lex::more() Iterator end_; // iterator pointing to end of matched token mutable token_value_type value_; // token value to use mutable bool has_value_; // 'true' if value_ is valid bool has_hold_; // 'true' if hold_ is valid private: // silence MSVC warning C4512: assignment operator could not be generated data& operator= (data const&); }; } }}}} #endif