Spell checker implementation using Hunspell

Change-Id: Ia15b6532edf6eef7c45bdfb273e77f65ce998f13 License: This patch is BSD-licensed, see Documentation/Licenses/BSD-simplified.txt for details
author: Vlad Voicu <vladvoic@gmail.com> 2011-11-28 16:37:32 (GMT)
committer: Vlad Voicu <vladvoic@gmail.com> 2013-03-15 09:21:52 (GMT)
commit: 2061b06eccca67595c50edd81c44c5b961bf108b (patch)
tree: 7fdc9e4cc80a9d8ddbe5364a531ef3449f72ab2b /SwifTools/SpellParser.cpp
parent: a069a0df0f51a948a86e34d99f952a33eecd97ba (diff)
download: swift-contrib-2061b06eccca67595c50edd81c44c5b961bf108b.zip
swift-contrib-2061b06eccca67595c50edd81c44c5b961bf108b.tar.bz2
1 files changed, 69 insertions, 0 deletions
diff --git a/SwifTools/SpellParser.cpp b/SwifTools/SpellParser.cpp
new file mode 100644
index 0000000..7208cdb
--- /dev/null
+++ b/SwifTools/SpellParser.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2011 Vlad Voicu
+ * Licensed under the Simplified BSD license.
+ * See Documentation/Licenses/BSD-simplified.txt for more information.
+ */
+
+#include <SwifTools/SpellParser.h>
+
+#include <boost/spirit/include/lex_lexertl.hpp>
+#include <boost/bind.hpp>
+#include <boost/ref.hpp>
+
+#include <string>
+
+namespace lex = boost::spirit::lex;
+
+namespace Swift {
+
+template <typename Lexer>
+struct word_count_tokens : lex::lexer<Lexer>
+{
+	word_count_tokens()
+	{
+	// define tokens (regular expresions) to match strings
+	// order is important
+		this->self.add
+			("w{3}.[^ ]+", ID_WWW)
+			("http:\\/\\/[^ ]+", ID_HTTP)
+			("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD)
+			(".", ID_CHAR);
+	}
+};
+
+struct counter
+{
+	typedef bool result_type;
+	// the function operator gets called for each of the matched tokens
+	template <typename Token>
+	bool operator()(Token const& t, PositionPairList& wordPositions, std::size_t& position) const
+	{
+		switch (t.id()) {
+			case ID_WWW:
+				position += t.value().size();
+				break;
+			case ID_HTTP:
+				position += t.value().size();
+				break;
+			case ID_WORD:       // matched a word
+				wordPositions.push_back(boost::tuples::make_tuple(position, position + t.value().size()));
+				position += t.value().size(); 
+				break;
+			case ID_CHAR:       // match a simple char
+				++position; 
+				break;
+		}
+		return true;        // always continue to tokenize
+	}
+};
+
+void SpellParser::check(const std::string& fragment, PositionPairList& wordPositions) {
+	std::size_t position = 0;
+	// create the token definition instance needed to invoke the lexical analyzer
+	word_count_tokens<lex::lexertl::lexer<> > word_count_functor;
+	char const* first = fragment.c_str();
+	char const* last = &first[fragment.size()];
+	lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position)));
+}
+
+}
author	Vlad Voicu <vladvoic@gmail.com>	2011-11-28 16:37:32 (GMT)
committer	Vlad Voicu <vladvoic@gmail.com>	2013-03-15 09:21:52 (GMT)
commit	2061b06eccca67595c50edd81c44c5b961bf108b (patch)
tree	7fdc9e4cc80a9d8ddbe5364a531ef3449f72ab2b /SwifTools/SpellParser.cpp
parent	a069a0df0f51a948a86e34d99f952a33eecd97ba (diff)
download	swift-contrib-2061b06eccca67595c50edd81c44c5b961bf108b.zip swift-contrib-2061b06eccca67595c50edd81c44c5b961bf108b.tar.bz2