summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlad Voicu <vladv@rosedu.org>2012-01-19 21:49:08 (GMT)
committerKevin Smith <git@kismith.co.uk>2012-03-09 15:04:00 (GMT)
commit65679c27623512a79de7c6d92c75d1a9530fb756 (patch)
tree0f4baf79436fb592702bc6830c7b032528a0e55b /SwifTools
parent4ba6dfe66b5898c3b7162b263b2529c534feddc1 (diff)
downloadswift-contrib-65679c27623512a79de7c6d92c75d1a9530fb756.zip
swift-contrib-65679c27623512a79de7c6d92c75d1a9530fb756.tar.bz2
Big spell checker chunk
Diffstat (limited to 'SwifTools')
-rw-r--r--SwifTools/HunspellChecker.cpp13
-rw-r--r--SwifTools/HunspellChecker.h2
-rw-r--r--SwifTools/SConscript1
-rw-r--r--SwifTools/SpellChecker.h13
-rw-r--r--SwifTools/SpellParser.cpp69
-rw-r--r--SwifTools/SpellParser.h29
-rw-r--r--SwifTools/UnitTest/SConscript1
-rw-r--r--SwifTools/UnitTest/SpellParserTest.cpp51
8 files changed, 178 insertions, 1 deletions
diff --git a/SwifTools/HunspellChecker.cpp b/SwifTools/HunspellChecker.cpp
index a090311..ba7cedd 100644
--- a/SwifTools/HunspellChecker.cpp
+++ b/SwifTools/HunspellChecker.cpp
@@ -37,4 +37,17 @@ void HunspellChecker::getSuggestions(const std::string& word, std::vector<std::s
}
}
+void HunspellChecker::checkFragment(const std::string& fragment, PositionPairVector& misspelledPositions) {
+ if (!fragment.empty()) {
+ parser_->check(fragment, misspelledPositions);
+ for (PositionPairVector::iterator it = misspelledPositions.begin(); it != misspelledPositions.end();) {
+ if (isCorrect(fragment.substr(boost::get<0>(*it), boost::get<1>(*it) - boost::get<0>(*it)))) {
+ misspelledPositions.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+ }
+}
+
}
diff --git a/SwifTools/HunspellChecker.h b/SwifTools/HunspellChecker.h
index e016c08..bf56778 100644
--- a/SwifTools/HunspellChecker.h
+++ b/SwifTools/HunspellChecker.h
@@ -6,6 +6,7 @@
#include <vector>
#include <boost/algorithm/string.hpp>
+#include <boost/tuple/tuple.hpp>
#include <SwifTools/SpellChecker.h>
#pragma once
@@ -19,6 +20,7 @@ namespace Swift {
virtual ~HunspellChecker();
virtual bool isCorrect(const std::string& word);
virtual void getSuggestions(const std::string& word, std::vector<std::string>& list);
+ virtual void checkFragment(const std::string& fragment, PositionPairVector& misspelledPositions);
private:
Hunspell* speller_;
};
diff --git a/SwifTools/SConscript b/SwifTools/SConscript
index 2d5a657..41e5b74 100644
--- a/SwifTools/SConscript
+++ b/SwifTools/SConscript
@@ -30,6 +30,7 @@ if env["SCONS_STAGE"] == "build" :
"LastLineTracker.cpp",
"SpellCheckerFactory.cpp",
"HunspellChecker.cpp",
+ "SpellParser.cpp",
]
if swiftools_env.get("HAVE_SPARKLE", 0) :
diff --git a/SwifTools/SpellChecker.h b/SwifTools/SpellChecker.h
index a7272e9..a9cbe77 100644
--- a/SwifTools/SpellChecker.h
+++ b/SwifTools/SpellChecker.h
@@ -4,7 +4,10 @@
* See Documentation/Licenses/BSD-simplified.txt for more information.
*/
+#include <SwifTools/SpellParser.h>
+
#include <boost/algorithm/string.hpp>
+#include <boost/tuple/tuple.hpp>
#include <vector>
#pragma once
@@ -12,8 +15,16 @@
namespace Swift {
class SpellChecker {
public:
- virtual ~SpellChecker() { };
+ SpellChecker() {
+ parser_ = new SpellParser();
+ }
+ virtual ~SpellChecker() {
+ delete parser_;
+ };
virtual bool isCorrect(const std::string& word) = 0;
virtual void getSuggestions(const std::string& word, std::vector<std::string>& list) = 0;
+ virtual void checkFragment(const std::string& fragment, PositionPairVector& misspelledPositions) = 0;
+ protected:
+ SpellParser *parser_;
};
}
diff --git a/SwifTools/SpellParser.cpp b/SwifTools/SpellParser.cpp
new file mode 100644
index 0000000..8f5120b
--- /dev/null
+++ b/SwifTools/SpellParser.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2011 Vlad Voicu
+ * Licensed under the Simplified BSD license.
+ * See Documentation/Licenses/BSD-simplified.txt for more information.
+ */
+
+#include <SwifTools/SpellParser.h>
+
+#include <boost/spirit/include/lex_lexertl.hpp>
+#include <boost/bind.hpp>
+#include <boost/ref.hpp>
+
+#include <string>
+
+namespace lex = boost::spirit::lex;
+
+namespace Swift {
+
+template <typename Lexer>
+struct word_count_tokens : lex::lexer<Lexer>
+{
+ word_count_tokens()
+ {
+ // define tokens (regular expresions) to match strings
+ // order is important
+ this->self.add
+ ("w{3}.[^ ]+", ID_WWW)
+ ("http:\\/\\/[^ ]+", ID_HTTP)
+ ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD)
+ (".", ID_CHAR);
+ }
+};
+
+struct counter
+{
+ typedef bool result_type;
+ // the function operator gets called for each of the matched tokens
+ template <typename Token>
+ bool operator()(Token const& t, PositionPairVector& wordPositions, std::size_t& position) const
+ {
+ switch (t.id()) {
+ case ID_WWW:
+ position += t.value().size();
+ break;
+ case ID_HTTP:
+ position += t.value().size();
+ break;
+ case ID_WORD: // matched a word
+ wordPositions.push_back(boost::tuples::make_tuple(position, position + t.value().size()));
+ position += t.value().size();
+ break;
+ case ID_CHAR: // match a simple char
+ ++position;
+ break;
+ }
+ return true; // always continue to tokenize
+ }
+};
+
+void SpellParser::check(const std::string& fragment, PositionPairVector& wordPositions) {
+ std::size_t position = 0;
+ // create the token definition instance needed to invoke the lexical analyzer
+ word_count_tokens<lex::lexertl::lexer<> > word_count_functor;
+ char const* first = fragment.c_str();
+ char const* last = &first[fragment.size()];
+ lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position)));
+}
+
+}
diff --git a/SwifTools/SpellParser.h b/SwifTools/SpellParser.h
new file mode 100644
index 0000000..2bc562d
--- /dev/null
+++ b/SwifTools/SpellParser.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2011 Vlad Voicu
+ * Licensed under the Simplified BSD license.
+ * See Documentation/Licenses/BSD-simplified.txt for more information.
+ */
+
+#pragma once
+
+#include <boost/algorithm/string.hpp>
+#include <boost/tuple/tuple.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include <vector>
+
+namespace Swift {
+ enum token_ids
+ {
+ ID_WWW = 1,
+ ID_HTTP = 2,
+ ID_WORD = 3,
+ ID_CHAR = 4,
+ };
+ typedef std::list<boost::tuple<int, int> > PositionPairVector;
+
+ class SpellParser{
+ public:
+ void check(const std::string& fragment, PositionPairVector& wordPositions);
+ };
+}
diff --git a/SwifTools/UnitTest/SConscript b/SwifTools/UnitTest/SConscript
index e469deb..913ef37 100644
--- a/SwifTools/UnitTest/SConscript
+++ b/SwifTools/UnitTest/SConscript
@@ -4,4 +4,5 @@ env.Append(UNITTEST_SOURCES = [
File("LinkifyTest.cpp"),
File("TabCompleteTest.cpp"),
File("LastLineTrackerTest.cpp"),
+ File("SpellParserTest.cpp"),
])
diff --git a/SwifTools/UnitTest/SpellParserTest.cpp b/SwifTools/UnitTest/SpellParserTest.cpp
new file mode 100644
index 0000000..974f356
--- /dev/null
+++ b/SwifTools/UnitTest/SpellParserTest.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2012 Vlad Voicu
+ * Licensed under the Simplified BSD license.
+ * See Documentation/Licenses/BSD-simplified.txt for more information.
+ */
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/extensions/TestFactoryRegistry.h>
+
+#include <boost/algorithm/string.hpp>
+
+#include <SwifTools/SpellParser.h>
+
+using namespace Swift;
+
+class SpellParserTest : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(SpellParserTest);
+ CPPUNIT_TEST(testSimpleCheckFragment);
+ CPPUNIT_TEST(testWWWCheckFragment);
+ CPPUNIT_TEST_SUITE_END();
+ public:
+ SpellParserTest() {
+ parser_ = new SpellParser();
+ };
+ void tearDown() {
+ position_.clear();
+ }
+ void testSimpleCheckFragment() {
+ parser_->check("fragment test", position_);
+ int size = position_.size();
+ CPPUNIT_ASSERT_EQUAL(2, size);
+ CPPUNIT_ASSERT_EQUAL(0, boost::get<0>(position_[0]));
+ CPPUNIT_ASSERT_EQUAL(8, boost::get<1>(position_[0]));
+ CPPUNIT_ASSERT_EQUAL(9, boost::get<0>(position_[1]));
+ CPPUNIT_ASSERT_EQUAL(13, boost::get<1>(position_[1]));
+ }
+ void testWWWCheckFragment() {
+ parser_->check("www.link.com fragment test", position_);
+ int size = position_.size();
+ CPPUNIT_ASSERT_EQUAL(2, size);
+ CPPUNIT_ASSERT_EQUAL(13, boost::get<0>(position_[0]));
+ CPPUNIT_ASSERT_EQUAL(21, boost::get<1>(position_[0]));
+ CPPUNIT_ASSERT_EQUAL(22, boost::get<0>(position_[1]));
+ CPPUNIT_ASSERT_EQUAL(26, boost::get<1>(position_[1]));
+ }
+ private:
+ SpellParser *parser_;
+ PositionPairVector position_;
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(SpellParserTest);