// state_machine.hpp // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_LEXER_STATE_MACHINE_HPP #define BOOST_LEXER_STATE_MACHINE_HPP #include #include "conversion/char_state_machine.hpp" #include "consts.hpp" #include #include "internals.hpp" #include #include "containers/ptr_vector.hpp" #include "size_t.hpp" #include namespace boost { namespace lexer { template class basic_state_machine { public: typedef CharT char_type; class iterator { public: friend class basic_state_machine; struct data { // Current iterator info std::size_t dfa; std::size_t states; std::size_t state; std::size_t transitions; std::size_t transition; // Current state info bool end_state; std::size_t id; std::size_t unique_id; std::size_t goto_dfa; std::size_t bol_index; std::size_t eol_index; // Current transition info basic_string_token token; std::size_t goto_state; data () : dfa (npos), states (0), state (npos), transitions (0), transition (npos), end_state (false), id (npos), unique_id (npos), goto_dfa (npos), bol_index (npos), eol_index (npos), goto_state (npos) { } bool operator == (const data &rhs_) const { return dfa == rhs_.dfa && states == rhs_.states && state == rhs_.state && transitions == rhs_.transitions && transition == rhs_.transition && end_state == rhs_.end_state && id == rhs_.id && unique_id == rhs_.unique_id && goto_dfa == rhs_.goto_dfa && bol_index == rhs_.bol_index && eol_index == rhs_.eol_index && token == rhs_.token && transition == rhs_.transition; } }; iterator () : _sm (0), _dfas (0), _dfa (npos), _states (0), _state (npos), _transitions (0), _transition (npos) { } bool operator == (const iterator &rhs_) const { return _dfas == rhs_._dfas && _dfa == rhs_._dfa && _states == rhs_._states && _state == rhs_._state && _transitions == rhs_._transitions && _transition == rhs_._transition; } bool operator != (const iterator &rhs_) const { return !(*this == rhs_); } data &operator * () { return _data; } data *operator -> () { return &_data; } // Let compiler generate operator = (). // prefix version iterator &operator ++ () { next (); return *this; } // postfix version iterator operator ++ (int) { iterator iter_ = *this; next (); return iter_; } void clear () { _dfas = _states = _transitions = 0; _dfa = _state = _transition = npos; } private: basic_state_machine *_sm; data _data; std::size_t _dfas; std::size_t _dfa; std::size_t _states; std::size_t _state; std::size_t _transitions; std::size_t _transition; typename detail::basic_char_state_machine::state:: size_t_string_token_map::const_iterator _token_iter; typename detail::basic_char_state_machine::state:: size_t_string_token_map::const_iterator _token_end; void next () { bool reset_state_ = false; if (_transition >= _transitions) { _transition = _data.transition = 0; _data.state = ++_state; reset_state_ = true; if (_state >= _states) { ++_dfa; if (_dfa >= _dfas) { clear (); reset_state_ = false; } else { _states = _data.states = _sm->_csm._sm_vector[_dfa].size (); _state = _data.state = 0; } } } else { _data.transition = _transition; } if (reset_state_) { const typename detail::basic_char_state_machine:: state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state]; _transitions = _data.transitions = ptr_->_transitions.size (); _data.end_state = ptr_->_end_state; _data.id = ptr_->_id; _data.unique_id = ptr_->_unique_id; _data.goto_dfa = ptr_->_state; _data.bol_index = ptr_->_bol_index; _data.eol_index = ptr_->_eol_index; _token_iter = ptr_->_transitions.begin (); _token_end = ptr_->_transitions.end (); } if (_token_iter != _token_end) { _data.token = _token_iter->second; _data.goto_state = _token_iter->first; ++_token_iter; ++_transition; } else { _data.token.clear (); _data.goto_state = npos; } } }; friend class iterator; basic_state_machine () { } void clear () { _internals.clear (); _csm.clear (); } bool empty () const { // Don't include _csm in this test, as irrelevant to state. return _internals._lookup->empty () && _internals._dfa_alphabet.empty () && _internals._dfa->empty (); } std::size_t size () const { return _internals._dfa->size (); } bool operator == (const basic_state_machine &rhs_) const { // Don't include _csm in this test, as irrelevant to state. return _internals._lookup == rhs_._internals._lookup && _internals._dfa_alphabet == rhs_._internals._dfa_alphabet && _internals._dfa == rhs_._internals._dfa && _internals._seen_BOL_assertion == rhs_._internals._seen_BOL_assertion && _internals._seen_EOL_assertion == rhs_._internals._seen_EOL_assertion; } iterator begin () const { iterator iter_; iter_._sm = const_cast(this); check_for_csm (); if (!_csm.empty ()) { const typename detail::basic_char_state_machine:: state_vector *ptr_ = &_csm._sm_vector.front (); iter_._dfas = _csm._sm_vector.size (); iter_._states = iter_._data.states = ptr_->size (); iter_._transitions = iter_._data.transitions = ptr_->front ()._transitions.size (); iter_._dfa = iter_._data.dfa = 0; iter_._state = iter_._data.state = 0; iter_._transition = 0; iter_._data.end_state = ptr_->front ()._end_state; iter_._data.id = ptr_->front ()._id; iter_._data.unique_id = ptr_->front ()._unique_id; iter_._data.goto_dfa = ptr_->front ()._state; iter_._data.bol_index = ptr_->front ()._bol_index; iter_._data.eol_index = ptr_->front ()._eol_index; iter_._token_iter = ptr_->front ()._transitions.begin (); iter_._token_end = ptr_->front ()._transitions.end (); // Deal with case where there is only a bol or eol // but no other transitions. if (iter_._transitions) { ++iter_; } } return iter_; } iterator end () const { iterator iter_; iter_._sm = const_cast(this); return iter_; } void swap (basic_state_machine &sm_) { _internals.swap (sm_._internals); _csm.swap (sm_._csm); } const detail::internals &data () const { return _internals; } private: detail::internals _internals; mutable detail::basic_char_state_machine _csm; void check_for_csm () const { if (_csm.empty ()) { human_readable (_csm); } } void human_readable (detail::basic_char_state_machine &sm_) const { const std::size_t max_ = sizeof (CharT) == 1 ? num_chars : num_wchar_ts; const std::size_t start_states_ = _internals._dfa->size (); sm_.clear (); sm_._sm_vector.resize (start_states_); for (std::size_t start_state_index_ = 0; start_state_index_ < start_states_; ++start_state_index_) { const detail::internals::size_t_vector *lu_ = _internals._lookup[start_state_index_]; const std::size_t alphabet_ = _internals._dfa_alphabet[start_state_index_] - dfa_offset; std::vector > chars_ (alphabet_); const std::size_t states_ = _internals._dfa[start_state_index_]-> size () / (alphabet_ + dfa_offset); const std::size_t *read_ptr_ = &_internals. _dfa[start_state_index_]->front () + alphabet_ + dfa_offset; sm_._sm_vector[start_state_index_].resize (states_ - 1); for (std::size_t alpha_index_ = 0; alpha_index_ < max_; ++alpha_index_) { const std::size_t col_ = lu_->at (alpha_index_); if (col_ != static_cast(dead_state_index)) { chars_[col_ - dfa_offset] += static_cast (alpha_index_); } } for (std::size_t state_index_ = 1; state_index_ < states_; ++state_index_) { typename detail::basic_char_state_machine::state *state_ = &sm_._sm_vector[start_state_index_] [state_index_ - 1]; state_->_end_state = *read_ptr_ != 0; state_->_id = *(read_ptr_ + id_index); state_->_unique_id = *(read_ptr_ + unique_id_index); state_->_state = *(read_ptr_ + state_index); state_->_bol_index = *(read_ptr_ + bol_index) - 1; state_->_eol_index = *(read_ptr_ + eol_index) - 1; read_ptr_ += dfa_offset; for (std::size_t col_index_ = 0; col_index_ < alphabet_; ++col_index_, ++read_ptr_) { const std::size_t transition_ = *read_ptr_; if (transition_ != 0) { const std::size_t i_ = transition_ - 1; typename detail::basic_char_state_machine:: state::size_t_string_token_map::iterator iter_ = state_->_transitions.find (i_); if (iter_ == state_->_transitions.end ()) { basic_string_token token_ (false, chars_[col_index_]); typename detail::basic_char_state_machine:: state::size_t_string_token_pair pair_ (i_, token_); state_->_transitions.insert (pair_); } else { iter_->second._charset += chars_[col_index_]; } } } for (typename detail::basic_char_state_machine::state:: size_t_string_token_map::iterator iter_ = state_->_transitions.begin (), end_ = state_->_transitions.end (); iter_ != end_; ++iter_) { std::sort (iter_->second._charset.begin (), iter_->second._charset.end ()); iter_->second.normalise (); } } } } }; typedef basic_state_machine state_machine; typedef basic_state_machine wstate_machine; } } #endif