diff options
Diffstat (limited to 'SwifTools')
-rw-r--r-- | SwifTools/HunspellChecker.cpp | 13 | ||||
-rw-r--r-- | SwifTools/HunspellChecker.h | 2 | ||||
-rw-r--r-- | SwifTools/SConscript | 1 | ||||
-rw-r--r-- | SwifTools/SpellChecker.h | 13 | ||||
-rw-r--r-- | SwifTools/SpellParser.cpp | 69 | ||||
-rw-r--r-- | SwifTools/SpellParser.h | 29 | ||||
-rw-r--r-- | SwifTools/UnitTest/SConscript | 1 | ||||
-rw-r--r-- | SwifTools/UnitTest/SpellParserTest.cpp | 51 |
8 files changed, 178 insertions, 1 deletions
diff --git a/SwifTools/HunspellChecker.cpp b/SwifTools/HunspellChecker.cpp index a090311..ba7cedd 100644 --- a/SwifTools/HunspellChecker.cpp +++ b/SwifTools/HunspellChecker.cpp @@ -37,4 +37,17 @@ void HunspellChecker::getSuggestions(const std::string& word, std::vector<std::s } } +void HunspellChecker::checkFragment(const std::string& fragment, PositionPairVector& misspelledPositions) { + if (!fragment.empty()) { + parser_->check(fragment, misspelledPositions); + for (PositionPairVector::iterator it = misspelledPositions.begin(); it != misspelledPositions.end();) { + if (isCorrect(fragment.substr(boost::get<0>(*it), boost::get<1>(*it) - boost::get<0>(*it)))) { + misspelledPositions.erase(it++); + } else { + ++it; + } + } + } +} + } diff --git a/SwifTools/HunspellChecker.h b/SwifTools/HunspellChecker.h index e016c08..bf56778 100644 --- a/SwifTools/HunspellChecker.h +++ b/SwifTools/HunspellChecker.h @@ -6,6 +6,7 @@ #include <vector> #include <boost/algorithm/string.hpp> +#include <boost/tuple/tuple.hpp> #include <SwifTools/SpellChecker.h> #pragma once @@ -19,6 +20,7 @@ namespace Swift { virtual ~HunspellChecker(); virtual bool isCorrect(const std::string& word); virtual void getSuggestions(const std::string& word, std::vector<std::string>& list); + virtual void checkFragment(const std::string& fragment, PositionPairVector& misspelledPositions); private: Hunspell* speller_; }; diff --git a/SwifTools/SConscript b/SwifTools/SConscript index 2d5a657..41e5b74 100644 --- a/SwifTools/SConscript +++ b/SwifTools/SConscript @@ -30,6 +30,7 @@ if env["SCONS_STAGE"] == "build" : "LastLineTracker.cpp", "SpellCheckerFactory.cpp", "HunspellChecker.cpp", + "SpellParser.cpp", ] if swiftools_env.get("HAVE_SPARKLE", 0) : diff --git a/SwifTools/SpellChecker.h b/SwifTools/SpellChecker.h index a7272e9..a9cbe77 100644 --- a/SwifTools/SpellChecker.h +++ b/SwifTools/SpellChecker.h @@ -4,7 +4,10 @@ * See Documentation/Licenses/BSD-simplified.txt for more information. */ +#include <SwifTools/SpellParser.h> + #include <boost/algorithm/string.hpp> +#include <boost/tuple/tuple.hpp> #include <vector> #pragma once @@ -12,8 +15,16 @@ namespace Swift { class SpellChecker { public: - virtual ~SpellChecker() { }; + SpellChecker() { + parser_ = new SpellParser(); + } + virtual ~SpellChecker() { + delete parser_; + }; virtual bool isCorrect(const std::string& word) = 0; virtual void getSuggestions(const std::string& word, std::vector<std::string>& list) = 0; + virtual void checkFragment(const std::string& fragment, PositionPairVector& misspelledPositions) = 0; + protected: + SpellParser *parser_; }; } diff --git a/SwifTools/SpellParser.cpp b/SwifTools/SpellParser.cpp new file mode 100644 index 0000000..8f5120b --- /dev/null +++ b/SwifTools/SpellParser.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 Vlad Voicu + * Licensed under the Simplified BSD license. + * See Documentation/Licenses/BSD-simplified.txt for more information. + */ + +#include <SwifTools/SpellParser.h> + +#include <boost/spirit/include/lex_lexertl.hpp> +#include <boost/bind.hpp> +#include <boost/ref.hpp> + +#include <string> + +namespace lex = boost::spirit::lex; + +namespace Swift { + +template <typename Lexer> +struct word_count_tokens : lex::lexer<Lexer> +{ + word_count_tokens() + { + // define tokens (regular expresions) to match strings + // order is important + this->self.add + ("w{3}.[^ ]+", ID_WWW) + ("http:\\/\\/[^ ]+", ID_HTTP) + ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD) + (".", ID_CHAR); + } +}; + +struct counter +{ + typedef bool result_type; + // the function operator gets called for each of the matched tokens + template <typename Token> + bool operator()(Token const& t, PositionPairVector& wordPositions, std::size_t& position) const + { + switch (t.id()) { + case ID_WWW: + position += t.value().size(); + break; + case ID_HTTP: + position += t.value().size(); + break; + case ID_WORD: // matched a word + wordPositions.push_back(boost::tuples::make_tuple(position, position + t.value().size())); + position += t.value().size(); + break; + case ID_CHAR: // match a simple char + ++position; + break; + } + return true; // always continue to tokenize + } +}; + +void SpellParser::check(const std::string& fragment, PositionPairVector& wordPositions) { + std::size_t position = 0; + // create the token definition instance needed to invoke the lexical analyzer + word_count_tokens<lex::lexertl::lexer<> > word_count_functor; + char const* first = fragment.c_str(); + char const* last = &first[fragment.size()]; + lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position))); +} + +} diff --git a/SwifTools/SpellParser.h b/SwifTools/SpellParser.h new file mode 100644 index 0000000..2bc562d --- /dev/null +++ b/SwifTools/SpellParser.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2011 Vlad Voicu + * Licensed under the Simplified BSD license. + * See Documentation/Licenses/BSD-simplified.txt for more information. + */ + +#pragma once + +#include <boost/algorithm/string.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/algorithm/string.hpp> + +#include <vector> + +namespace Swift { + enum token_ids + { + ID_WWW = 1, + ID_HTTP = 2, + ID_WORD = 3, + ID_CHAR = 4, + }; + typedef std::list<boost::tuple<int, int> > PositionPairVector; + + class SpellParser{ + public: + void check(const std::string& fragment, PositionPairVector& wordPositions); + }; +} diff --git a/SwifTools/UnitTest/SConscript b/SwifTools/UnitTest/SConscript index e469deb..913ef37 100644 --- a/SwifTools/UnitTest/SConscript +++ b/SwifTools/UnitTest/SConscript @@ -4,4 +4,5 @@ env.Append(UNITTEST_SOURCES = [ File("LinkifyTest.cpp"), File("TabCompleteTest.cpp"), File("LastLineTrackerTest.cpp"), + File("SpellParserTest.cpp"), ]) diff --git a/SwifTools/UnitTest/SpellParserTest.cpp b/SwifTools/UnitTest/SpellParserTest.cpp new file mode 100644 index 0000000..974f356 --- /dev/null +++ b/SwifTools/UnitTest/SpellParserTest.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 Vlad Voicu + * Licensed under the Simplified BSD license. + * See Documentation/Licenses/BSD-simplified.txt for more information. + */ + +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/extensions/TestFactoryRegistry.h> + +#include <boost/algorithm/string.hpp> + +#include <SwifTools/SpellParser.h> + +using namespace Swift; + +class SpellParserTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(SpellParserTest); + CPPUNIT_TEST(testSimpleCheckFragment); + CPPUNIT_TEST(testWWWCheckFragment); + CPPUNIT_TEST_SUITE_END(); + public: + SpellParserTest() { + parser_ = new SpellParser(); + }; + void tearDown() { + position_.clear(); + } + void testSimpleCheckFragment() { + parser_->check("fragment test", position_); + int size = position_.size(); + CPPUNIT_ASSERT_EQUAL(2, size); + CPPUNIT_ASSERT_EQUAL(0, boost::get<0>(position_[0])); + CPPUNIT_ASSERT_EQUAL(8, boost::get<1>(position_[0])); + CPPUNIT_ASSERT_EQUAL(9, boost::get<0>(position_[1])); + CPPUNIT_ASSERT_EQUAL(13, boost::get<1>(position_[1])); + } + void testWWWCheckFragment() { + parser_->check("www.link.com fragment test", position_); + int size = position_.size(); + CPPUNIT_ASSERT_EQUAL(2, size); + CPPUNIT_ASSERT_EQUAL(13, boost::get<0>(position_[0])); + CPPUNIT_ASSERT_EQUAL(21, boost::get<1>(position_[0])); + CPPUNIT_ASSERT_EQUAL(22, boost::get<0>(position_[1])); + CPPUNIT_ASSERT_EQUAL(26, boost::get<1>(position_[1])); + } + private: + SpellParser *parser_; + PositionPairVector position_; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(SpellParserTest); |