diff options
| author | Vlad Voicu <vladvoic@gmail.com> | 2011-11-28 16:37:32 (GMT) | 
|---|---|---|
| committer | Vlad Voicu <vladvoic@gmail.com> | 2013-03-15 09:21:52 (GMT) | 
| commit | 2061b06eccca67595c50edd81c44c5b961bf108b (patch) | |
| tree | 7fdc9e4cc80a9d8ddbe5364a531ef3449f72ab2b /SwifTools/SpellParser.cpp | |
| parent | a069a0df0f51a948a86e34d99f952a33eecd97ba (diff) | |
| download | swift-contrib-2061b06eccca67595c50edd81c44c5b961bf108b.zip swift-contrib-2061b06eccca67595c50edd81c44c5b961bf108b.tar.bz2 | |
Spell checker implementation using Hunspell
Change-Id: Ia15b6532edf6eef7c45bdfb273e77f65ce998f13
License: This patch is BSD-licensed, see Documentation/Licenses/BSD-simplified.txt for details
Diffstat (limited to 'SwifTools/SpellParser.cpp')
| -rw-r--r-- | SwifTools/SpellParser.cpp | 69 | 
1 files changed, 69 insertions, 0 deletions
| diff --git a/SwifTools/SpellParser.cpp b/SwifTools/SpellParser.cpp new file mode 100644 index 0000000..7208cdb --- /dev/null +++ b/SwifTools/SpellParser.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Vlad Voicu + * Licensed under the Simplified BSD license. + * See Documentation/Licenses/BSD-simplified.txt for more information. + */ + +#include <SwifTools/SpellParser.h> + +#include <boost/spirit/include/lex_lexertl.hpp> +#include <boost/bind.hpp> +#include <boost/ref.hpp> + +#include <string> + +namespace lex = boost::spirit::lex; + +namespace Swift { + +template <typename Lexer> +struct word_count_tokens : lex::lexer<Lexer> +{ +	word_count_tokens() +	{ +	// define tokens (regular expresions) to match strings +	// order is important +		this->self.add +			("w{3}.[^ ]+", ID_WWW) +			("http:\\/\\/[^ ]+", ID_HTTP) +			("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD) +			(".", ID_CHAR); +	} +}; + +struct counter +{ +	typedef bool result_type; +	// the function operator gets called for each of the matched tokens +	template <typename Token> +	bool operator()(Token const& t, PositionPairList& wordPositions, std::size_t& position) const +	{ +		switch (t.id()) { +			case ID_WWW: +				position += t.value().size(); +				break; +			case ID_HTTP: +				position += t.value().size(); +				break; +			case ID_WORD:       // matched a word +				wordPositions.push_back(boost::tuples::make_tuple(position, position + t.value().size())); +				position += t.value().size();  +				break; +			case ID_CHAR:       // match a simple char +				++position;  +				break; +		} +		return true;        // always continue to tokenize +	} +}; + +void SpellParser::check(const std::string& fragment, PositionPairList& wordPositions) { +	std::size_t position = 0; +	// create the token definition instance needed to invoke the lexical analyzer +	word_count_tokens<lex::lexertl::lexer<> > word_count_functor; +	char const* first = fragment.c_str(); +	char const* last = &first[fragment.size()]; +	lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position))); +} + +} | 
 Swift
 Swift