diff options
Diffstat (limited to 'SwifTools/SpellParser.cpp')
-rw-r--r-- | SwifTools/SpellParser.cpp | 90 |
1 files changed, 48 insertions, 42 deletions
diff --git a/SwifTools/SpellParser.cpp b/SwifTools/SpellParser.cpp index 8cb42e4..5bafa6e 100644 --- a/SwifTools/SpellParser.cpp +++ b/SwifTools/SpellParser.cpp @@ -4,14 +4,20 @@ * See Documentation/Licenses/BSD-simplified.txt for more information. */ +/* + * Copyright (c) 2016 Isode Limited. + * All rights reserved. + * See the COPYING file for more information. + */ + #include <SwifTools/SpellParser.h> -#include <boost/spirit/include/lex_lexertl.hpp> +#include <string> + #include <boost/bind.hpp> -#include <boost/ref.hpp> #include <boost/numeric/conversion/cast.hpp> - -#include <string> +#include <boost/ref.hpp> +#include <boost/spirit/include/lex_lexertl.hpp> namespace lex = boost::spirit::lex; @@ -20,51 +26,51 @@ namespace Swift { template <typename Lexer> struct word_count_tokens : lex::lexer<Lexer> { - word_count_tokens() - { - // define tokens (regular expresions) to match strings - // order is important - this->self.add - ("w{3}.[^ ]+", ID_WWW) - ("http:\\/\\/[^ ]+", ID_HTTP) - ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD) - (".", ID_CHAR); - } + word_count_tokens() + { + // define tokens (regular expresions) to match strings + // order is important + this->self.add + ("w{3}.[^ ]+", ID_WWW) + ("http:\\/\\/[^ ]+", ID_HTTP) + ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD) + (".", ID_CHAR); + } }; struct counter { - typedef bool result_type; - // the function operator gets called for each of the matched tokens - template <typename Token> - bool operator()(Token const& t, PositionPairList& wordPositions, std::size_t& position) const - { - switch (t.id()) { - case ID_WWW: - position += boost::numeric_cast<size_t>(t.value().size()); - break; - case ID_HTTP: - position += boost::numeric_cast<size_t>(t.value().size()); - break; - case ID_WORD: // matched a word - wordPositions.push_back(boost::tuples::make_tuple(position, position + boost::numeric_cast<size_t>(t.value().size()))); - position += boost::numeric_cast<size_t>(t.value().size()); - break; - case ID_CHAR: // match a simple char - ++position; - break; - } - return true; // always continue to tokenize - } + typedef bool result_type; + // the function operator gets called for each of the matched tokens + template <typename Token> + bool operator()(Token const& t, PositionPairList& wordPositions, std::size_t& position) const + { + switch (t.id()) { + case ID_WWW: + position += boost::numeric_cast<size_t>(t.value().size()); + break; + case ID_HTTP: + position += boost::numeric_cast<size_t>(t.value().size()); + break; + case ID_WORD: // matched a word + wordPositions.push_back(boost::tuples::make_tuple(position, position + boost::numeric_cast<size_t>(t.value().size()))); + position += boost::numeric_cast<size_t>(t.value().size()); + break; + case ID_CHAR: // match a simple char + ++position; + break; + } + return true; // always continue to tokenize + } }; void SpellParser::check(const std::string& fragment, PositionPairList& wordPositions) { - std::size_t position = 0; - // create the token definition instance needed to invoke the lexical analyzer - word_count_tokens<lex::lexertl::lexer<> > word_count_functor; - char const* first = fragment.c_str(); - char const* last = &first[fragment.size()]; - lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position))); + std::size_t position = 0; + // create the token definition instance needed to invoke the lexical analyzer + word_count_tokens<lex::lexertl::lexer<> > word_count_functor; + char const* first = fragment.c_str(); + char const* last = &first[fragment.size()]; + lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position))); } } |