// Copyright (c) 2001-2011 Hartmut Kaiser // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM) #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM #if defined(_MSC_VER) #pragma once #endif #include #include #include #include #include #include #include #include #include #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) #include #endif #include namespace boost { namespace spirit { namespace lex { namespace lexertl { /////////////////////////////////////////////////////////////////////////// namespace detail { /////////////////////////////////////////////////////////////////////// // The must_escape function checks if the given character value needs // to be preceded by a backslash character to disable its special // meaning in the context of a regular expression /////////////////////////////////////////////////////////////////////// template inline bool must_escape(Char c) { // FIXME: more needed? switch (c) { case '+': case '/': case '*': case '?': case '|': case '(': case ')': case '[': case ']': case '{': case '}': case '.': case '^': case '$': case '\\': case '"': return true; default: break; } return false; } /////////////////////////////////////////////////////////////////////// // The escape function returns the string representation of the given // character value, possibly escaped with a backslash character, to // allow it being safely used in a regular expression definition. /////////////////////////////////////////////////////////////////////// template inline std::basic_string escape(Char ch) { std::basic_string result(1, ch); if (detail::must_escape(ch)) { typedef typename std::basic_string::size_type size_type; result.insert((size_type)0, 1, '\\'); } return result; } /////////////////////////////////////////////////////////////////////// // /////////////////////////////////////////////////////////////////////// inline boost::lexer::regex_flags map_flags(unsigned int flags) { unsigned int retval = boost::lexer::none; if (flags & match_flags::match_not_dot_newline) retval |= boost::lexer::dot_not_newline; if (flags & match_flags::match_icase) retval |= boost::lexer::icase; return boost::lexer::regex_flags(retval); } } /////////////////////////////////////////////////////////////////////////// template bool generate_static(Lexer const&, std::ostream&, char const*, F); /////////////////////////////////////////////////////////////////////////// // // Every lexer type to be used as a lexer for Spirit has to conform to // the following public interface: // // typedefs: // iterator_type The type of the iterator exposed by this lexer. // token_type The type of the tokens returned from the exposed // iterators. // // functions: // default constructor // Since lexers are instantiated as base classes // only it might be a good idea to make this // constructor protected. // begin, end Return a pair of iterators, when dereferenced // returning the sequence of tokens recognized in // the input stream given as the parameters to the // begin() function. // add_token Should add the definition of a token to be // recognized by this lexer. // clear Should delete all current token definitions // associated with the given state of this lexer // object. // // template parameters: // Iterator The type of the iterator used to access the // underlying character stream. // Token The type of the tokens to be returned from the // exposed token iterator. // Functor The type of the InputPolicy to use to instantiate // the multi_pass iterator type to be used as the // token iterator (returned from begin()/end()). // /////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////// // // The lexer class is a implementation of a Spirit.Lex lexer on // top of Ben Hanson's lexertl library as outlined above (For more // information about lexertl go here: http://www.benhanson.net/lexertl.html). // // This class is supposed to be used as the first and only template // parameter while instantiating instances of a lex::lexer class. // /////////////////////////////////////////////////////////////////////////// template , typename Iterator = typename Token::iterator_type , typename Functor = functor > class lexer { private: struct dummy { void true_() {} }; typedef void (dummy::*safe_bool)(); static std::size_t const all_states_id = static_cast(-2); public: operator safe_bool() const { return initialized_dfa_ ? &dummy::true_ : 0; } typedef typename boost::detail::iterator_traits::value_type char_type; typedef std::basic_string string_type; typedef boost::lexer::basic_rules basic_rules_type; // Every lexer type to be used as a lexer for Spirit has to conform to // a public interface . typedef Token token_type; typedef typename Token::id_type id_type; typedef iterator iterator_type; private: // this type is purely used for the iterator_type construction below struct iterator_data_type { typedef typename Functor::semantic_actions_type semantic_actions_type; iterator_data_type( boost::lexer::basic_state_machine const& sm , boost::lexer::basic_rules const& rules , semantic_actions_type const& actions) : state_machine_(sm), rules_(rules), actions_(actions) {} boost::lexer::basic_state_machine const& state_machine_; boost::lexer::basic_rules const& rules_; semantic_actions_type const& actions_; private: // silence MSVC warning C4512: assignment operator could not be generated iterator_data_type& operator= (iterator_data_type const&); }; public: // Return the start iterator usable for iterating over the generated // tokens. iterator_type begin(Iterator& first, Iterator const& last , char_type const* initial_state = 0) const { if (!init_dfa()) // never minimize DFA for dynamic lexers return iterator_type(); iterator_data_type iterator_data(state_machine_, rules_, actions_); return iterator_type(iterator_data, first, last, initial_state); } // Return the end iterator usable to stop iterating over the generated // tokens. iterator_type end() const { return iterator_type(); } protected: // Lexer instances can be created by means of a derived class only. lexer(unsigned int flags) : flags_(detail::map_flags(flags)) , rules_(flags_) , initialized_dfa_(false) {} public: // interface for token definition management std::size_t add_token(char_type const* state, char_type tokendef, std::size_t token_id, char_type const* targetstate) { add_state(state); initialized_dfa_ = false; if (state == all_states()) return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot()); if (0 == targetstate) targetstate = state; else add_state(targetstate); return rules_.add(state, detail::escape(tokendef), token_id, targetstate); } std::size_t add_token(char_type const* state, string_type const& tokendef, std::size_t token_id, char_type const* targetstate) { add_state(state); initialized_dfa_ = false; if (state == all_states()) return rules_.add(state, tokendef, token_id, rules_.dot()); if (0 == targetstate) targetstate = state; else add_state(targetstate); return rules_.add(state, tokendef, token_id, targetstate); } // interface for pattern definition management void add_pattern (char_type const* state, string_type const& name, string_type const& patterndef) { add_state(state); rules_.add_macro(name.c_str(), patterndef); initialized_dfa_ = false; } boost::lexer::rules const& get_rules() const { return rules_; } void clear(char_type const* state) { std::size_t s = rules_.state(state); if (boost::lexer::npos != s) rules_.clear(state); initialized_dfa_ = false; } std::size_t add_state(char_type const* state) { if (state == all_states()) return all_states_id; std::size_t stateid = rules_.state(state); if (boost::lexer::npos == stateid) { stateid = rules_.add_state(state); initialized_dfa_ = false; } return stateid; } string_type initial_state() const { return string_type(rules_.initial()); } string_type all_states() const { return string_type(rules_.all_states()); } // Register a semantic action with the given id template void add_action(std::size_t unique_id, std::size_t state, F act) { // If you see an error here stating add_action is not a member of // fusion::unused_type then you are probably having semantic actions // attached to at least one token in the lexer definition without // using the lex::lexertl::actor_lexer<> as its base class. typedef typename Functor::wrap_action_type wrapper_type; if (state == all_states_id) { // add the action to all known states typedef typename basic_rules_type::string_size_t_map::value_type state_type; BOOST_FOREACH(state_type const& s, rules_.statemap()) actions_.add_action(unique_id, s.second, wrapper_type::call(act)); } else { actions_.add_action(unique_id, state, wrapper_type::call(act)); } } // template // void add_action(std::size_t unique_id, char_type const* state, F act) // { // typedef typename Functor::wrap_action_type wrapper_type; // actions_.add_action(unique_id, add_state(state), wrapper_type::call(act)); // } // We do not minimize the state machine by default anymore because // Ben said: "If you can afford to generate a lexer at runtime, there // is little point in calling minimise." // Go figure. bool init_dfa(bool minimize = false) const { if (!initialized_dfa_) { state_machine_.clear(); typedef boost::lexer::basic_generator generator; generator::build (rules_, state_machine_); if (minimize) generator::minimise (state_machine_); #if defined(BOOST_SPIRIT_LEXERTL_DEBUG) boost::lexer::debug::dump(state_machine_, std::cerr); #endif initialized_dfa_ = true; } return true; } private: // lexertl specific data mutable boost::lexer::basic_state_machine state_machine_; boost::lexer::regex_flags flags_; basic_rules_type rules_; typename Functor::semantic_actions_type actions_; mutable bool initialized_dfa_; // generator functions must be able to access members directly template friend bool generate_static(Lexer const&, std::ostream&, char const*, F); }; /////////////////////////////////////////////////////////////////////////// // // The actor_lexer class is another implementation of a Spirit.Lex // lexer on top of Ben Hanson's lexertl library as outlined above (For // more information about lexertl go here: // http://www.benhanson.net/lexertl.html). // // The only difference to the lexer class above is that token_def // definitions may have semantic (lexer) actions attached while being // defined: // // int w; // token_def word = "[^ \t\n]+"; // self = word[++ref(w)]; // see example: word_count_lexer // // This class is supposed to be used as the first and only template // parameter while instantiating instances of a lex::lexer class. // /////////////////////////////////////////////////////////////////////////// template , typename Iterator = typename Token::iterator_type , typename Functor = functor > class actor_lexer : public lexer { protected: // Lexer instances can be created by means of a derived class only. actor_lexer(unsigned int flags) : lexer(flags) {} }; }}}} #endif