// Copyright (c) 2001-2011 Hartmut Kaiser // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM) #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM #if defined(_MSC_VER) #pragma once #endif #include #include #include #include #include #include namespace boost { namespace spirit { namespace lex { namespace lexertl { /////////////////////////////////////////////////////////////////////////// template class basic_iterator_tokeniser { public: typedef std::vector size_t_vector; typedef typename boost::detail::iterator_traits::value_type char_type; static std::size_t next ( boost::lexer::basic_state_machine const& state_machine_ , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_ , Iterator const& end_, std::size_t& unique_id_) { if (start_token_ == end_) { unique_id_ = boost::lexer::npos; return 0; } bool bol = bol_; boost::lexer::detail::internals const& internals_ = state_machine_.data(); again: std::size_t const* lookup_ = &internals_._lookup[dfa_state_]-> front (); std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_]; std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front (); std::size_t const* ptr_ = dfa_ + dfa_alphabet_; Iterator curr_ = start_token_; bool end_state_ = *ptr_ != 0; std::size_t id_ = *(ptr_ + boost::lexer::id_index); std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); std::size_t end_start_state_ = dfa_state_; bool end_bol_ = bol_; Iterator end_token_ = start_token_; while (curr_ != end_) { std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; if (BOL_state_ && bol) { ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; } else if (EOL_state_ && *curr_ == '\n') { ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; } else { typedef typename boost::detail::iterator_traits::value_type value_type; typedef typename boost::lexer::char_traits::index_type index_type; index_type index = boost::lexer::char_traits::call(*curr_++); bol = (index == '\n') ? true : false; std::size_t const state_ = ptr_[ lookup_[static_cast(index)]]; if (state_ == 0) { break; } ptr_ = &dfa_[state_ * dfa_alphabet_]; } if (*ptr_) { end_state_ = true; id_ = *(ptr_ + boost::lexer::id_index); uid_ = *(ptr_ + boost::lexer::unique_id_index); end_start_state_ = *(ptr_ + boost::lexer::state_index); end_bol_ = bol; end_token_ = curr_; } } std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; if (EOL_state_ && curr_ == end_) { ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; if (*ptr_) { end_state_ = true; id_ = *(ptr_ + boost::lexer::id_index); uid_ = *(ptr_ + boost::lexer::unique_id_index); end_start_state_ = *(ptr_ + boost::lexer::state_index); end_bol_ = bol; end_token_ = curr_; } } if (end_state_) { // return longest match dfa_state_ = end_start_state_; start_token_ = end_token_; if (id_ == 0) { bol = end_bol_; goto again; } else { bol_ = end_bol_; } } else { bol_ = (*start_token_ == '\n') ? true : false; id_ = boost::lexer::npos; uid_ = boost::lexer::npos; } unique_id_ = uid_; return id_; } /////////////////////////////////////////////////////////////////////// static std::size_t next ( boost::lexer::basic_state_machine const& state_machine_ , bool& bol_, Iterator &start_token_, Iterator const& end_ , std::size_t& unique_id_) { if (start_token_ == end_) { unique_id_ = boost::lexer::npos; return 0; } bool bol = bol_; std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front(); std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0]; std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front (); std::size_t const* ptr_ = dfa_ + dfa_alphabet_; Iterator curr_ = start_token_; bool end_state_ = *ptr_ != 0; std::size_t id_ = *(ptr_ + boost::lexer::id_index); std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); bool end_bol_ = bol_; Iterator end_token_ = start_token_; while (curr_ != end_) { std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; if (BOL_state_ && bol) { ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; } else if (EOL_state_ && *curr_ == '\n') { ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; } else { typedef typename boost::detail::iterator_traits::value_type value_type; typedef typename boost::lexer::char_traits::index_type index_type; index_type index = boost::lexer::char_traits::call(*curr_++); bol = (index == '\n') ? true : false; std::size_t const state_ = ptr_[ lookup_[static_cast(index)]]; if (state_ == 0) { break; } ptr_ = &dfa_[state_ * dfa_alphabet_]; } if (*ptr_) { end_state_ = true; id_ = *(ptr_ + boost::lexer::id_index); uid_ = *(ptr_ + boost::lexer::unique_id_index); end_bol_ = bol; end_token_ = curr_; } } std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; if (EOL_state_ && curr_ == end_) { ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; if (*ptr_) { end_state_ = true; id_ = *(ptr_ + boost::lexer::id_index); uid_ = *(ptr_ + boost::lexer::unique_id_index); end_bol_ = bol; end_token_ = curr_; } } if (end_state_) { // return longest match bol_ = end_bol_; start_token_ = end_token_; } else { bol_ = *start_token_ == '\n'; id_ = boost::lexer::npos; uid_ = boost::lexer::npos; } unique_id_ = uid_; return id_; } }; }}}} #endif