diff options
author | Vlad Voicu <vladv@rosedu.org> | 2012-01-19 21:49:08 (GMT) |
---|---|---|
committer | vlad <vlad@tyrion.(none)> | 2012-10-13 13:55:44 (GMT) |
commit | eca3a097403adbd85fe3b0cf366f29ecc37cacc6 (patch) | |
tree | dbc86052c3c5e8f98eee56af06b20d79261d27f0 /SwifTools/SpellParser.cpp | |
parent | 66d6e33635a22bfdfdd82ffab1b1693aa77f6181 (diff) | |
download | swift-contrib-eca3a097403adbd85fe3b0cf366f29ecc37cacc6.zip swift-contrib-eca3a097403adbd85fe3b0cf366f29ecc37cacc6.tar.bz2 |
Big spell checker chunk
Diffstat (limited to 'SwifTools/SpellParser.cpp')
-rw-r--r-- | SwifTools/SpellParser.cpp | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/SwifTools/SpellParser.cpp b/SwifTools/SpellParser.cpp new file mode 100644 index 0000000..8f5120b --- /dev/null +++ b/SwifTools/SpellParser.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Vlad Voicu + * Licensed under the Simplified BSD license. + * See Documentation/Licenses/BSD-simplified.txt for more information. + */ + +#include <SwifTools/SpellParser.h> + +#include <boost/spirit/include/lex_lexertl.hpp> +#include <boost/bind.hpp> +#include <boost/ref.hpp> + +#include <string> + +namespace lex = boost::spirit::lex; + +namespace Swift { + +template <typename Lexer> +struct word_count_tokens : lex::lexer<Lexer> +{ + word_count_tokens() + { + // define tokens (regular expresions) to match strings + // order is important + this->self.add + ("w{3}.[^ ]+", ID_WWW) + ("http:\\/\\/[^ ]+", ID_HTTP) + ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD) + (".", ID_CHAR); + } +}; + +struct counter +{ + typedef bool result_type; + // the function operator gets called for each of the matched tokens + template <typename Token> + bool operator()(Token const& t, PositionPairVector& wordPositions, std::size_t& position) const + { + switch (t.id()) { + case ID_WWW: + position += t.value().size(); + break; + case ID_HTTP: + position += t.value().size(); + break; + case ID_WORD: // matched a word + wordPositions.push_back(boost::tuples::make_tuple(position, position + t.value().size())); + position += t.value().size(); + break; + case ID_CHAR: // match a simple char + ++position; + break; + } + return true; // always continue to tokenize + } +}; + +void SpellParser::check(const std::string& fragment, PositionPairVector& wordPositions) { + std::size_t position = 0; + // create the token definition instance needed to invoke the lexical analyzer + word_count_tokens<lex::lexertl::lexer<> > word_count_functor; + char const* first = fragment.c_str(); + char const* last = &first[fragment.size()]; + lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position))); +} + +} |