/* * Copyright (c) 2011-2013 Vlad Voicu * Licensed under the Simplified BSD license. * See Documentation/Licenses/BSD-simplified.txt for more information. */ #include <SwifTools/SpellParser.h> #include <boost/spirit/include/lex_lexertl.hpp> #include <boost/bind.hpp> #include <boost/ref.hpp> #include <boost/numeric/conversion/cast.hpp> #include <string> namespace lex = boost::spirit::lex; namespace Swift { template <typename Lexer> struct word_count_tokens : lex::lexer<Lexer> { word_count_tokens() { // define tokens (regular expresions) to match strings // order is important this->self.add ("w{3}.[^ ]+", ID_WWW) ("http:\\/\\/[^ ]+", ID_HTTP) ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD) (".", ID_CHAR); } }; struct counter { typedef bool result_type; // the function operator gets called for each of the matched tokens template <typename Token> bool operator()(Token const& t, PositionPairList& wordPositions, std::size_t& position) const { switch (t.id()) { case ID_WWW: position += boost::numeric_cast<size_t>(t.value().size()); break; case ID_HTTP: position += boost::numeric_cast<size_t>(t.value().size()); break; case ID_WORD: // matched a word wordPositions.push_back(boost::tuples::make_tuple(position, position + boost::numeric_cast<size_t>(t.value().size()))); position += boost::numeric_cast<size_t>(t.value().size()); break; case ID_CHAR: // match a simple char ++position; break; } return true; // always continue to tokenize } }; void SpellParser::check(const std::string& fragment, PositionPairList& wordPositions) { std::size_t position = 0; // create the token definition instance needed to invoke the lexical analyzer word_count_tokens<lex::lexertl::lexer<> > word_count_functor; char const* first = fragment.c_str(); char const* last = &first[fragment.size()]; lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position))); } }