diff options
Diffstat (limited to 'SwifTools')
-rw-r--r-- | SwifTools/HunspellChecker.cpp | 146 | ||||
-rw-r--r-- | SwifTools/HunspellChecker.h | 28 | ||||
-rw-r--r-- | SwifTools/MacOSXChecker.h | 7 | ||||
-rw-r--r-- | SwifTools/MacOSXChecker.mm | 16 | ||||
-rw-r--r-- | SwifTools/SpellChecker.h | 16 | ||||
-rw-r--r-- | SwifTools/SpellCheckerFactory.cpp | 18 | ||||
-rw-r--r-- | SwifTools/SpellCheckerFactory.h | 2 |
7 files changed, 193 insertions, 40 deletions
diff --git a/SwifTools/HunspellChecker.cpp b/SwifTools/HunspellChecker.cpp index fb1a5d6..1de369b 100644 --- a/SwifTools/HunspellChecker.cpp +++ b/SwifTools/HunspellChecker.cpp @@ -15,47 +15,151 @@ #include <algorithm> #include <boost/algorithm/string.hpp> +#include <boost/filesystem.hpp> +#include <boost/regex.hpp> #include <hunspell/hunspell.hxx> +#include <Swiften/Base/Log.h> + namespace Swift { -HunspellChecker::HunspellChecker(const char* affix_path, const char* dictionary_path) { - speller_ = new Hunspell(affix_path, dictionary_path); +static std::vector<std::string> recursiveFileSearch(const std::vector<std::string>& paths, const boost::regex& regex) { + std::vector<std::string> matches; + + for (auto& path : paths) { + if (boost::filesystem::exists(path)) { + boost::cmatch what; + for (auto filename : boost::make_iterator_range(boost::filesystem::directory_iterator(path), boost::filesystem::directory_iterator())) { + if (boost::filesystem::is_regular_file(filename) && boost::regex_match(filename.path().c_str(), what, regex)) { + matches.push_back(filename.path().string()); + } + } + } + } + return matches; +} + +HunspellChecker::HunspellChecker() { } HunspellChecker::~HunspellChecker() { - delete speller_; +} + +std::vector<std::string> HunspellChecker::hunspellDictionaryPaths() const { + // The following list of paths comes from the source of the Hunspell command line tool. + std::vector<std::string> paths = { + "/usr/share/hunspell", + "/usr/share/myspell", + "/usr/share/myspell/dicts", + "/Library/Spelling", + "/opt/openoffice.org/basis3.0/share/dict/ooo", + "/usr/lib/openoffice.org/basis3.0/share/dict/ooo", + "/opt/openoffice.org2.4/share/dict/ooo", + "/usr/lib/openoffice.org2.4/share/dict/ooo", + "/opt/openoffice.org2.3/share/dict/ooo", + "/usr/lib/openoffice.org2.3/share/dict/ooo", + "/opt/openoffice.org2.2/share/dict/ooo", + "/usr/lib/openoffice.org2.2/share/dict/ooo", + "/opt/openoffice.org2.1/share/dict/ooo", + "/usr/lib/openoffice.org2.1/share/dict/ooo", + "/opt/openoffice.org2.0/share/dict/ooo", + "/usr/lib/openoffice.org2.0/share/dict/ooo" + }; + + if (std::getenv("DICPATH")) { + std::string dicpathEnvironment(std::getenv("DICPATH")); + std::vector<std::string> dicpaths; + boost::split(dicpaths,dicpathEnvironment,boost::is_any_of(":")); + paths.insert(paths.begin(), dicpaths.begin(), dicpaths.end()); + } + + return paths; +} + +bool HunspellChecker::isAutomaticallyDetectingLanguage() { + return false; +} + +void HunspellChecker::setActiveLanguage(const std::string& language) { + auto dictionaries = detectedDictionaries(); + if (dictionaries.find(language) != dictionaries.end()) { + SWIFT_LOG(debug) << "Initialized Hunspell with dic,aff files " << dictionaries[language].dicPath << " , " << dictionaries[language].affPath << std::endl; + speller_ = std::unique_ptr<Hunspell>(new Hunspell(dictionaries[language].affPath.c_str(), dictionaries[language].dicPath.c_str())); + activeLangauge_ = language; + } + else { + SWIFT_LOG(warning) << "Unsupported language '" << language << "'" << std::endl; + } +} + +std::string HunspellChecker::activeLanguage() const { + return activeLangauge_.get_value_or(""); +} + +std::vector<std::string> HunspellChecker::supportedLanguages() const { + std::vector<std::string> languages; + + for (const auto& n : detectedDictionaries()) { + languages.push_back(n.first); + } + + return languages; +} + +std::unordered_map<std::string, HunspellChecker::Dictionary> HunspellChecker::detectedDictionaries() const { + std::unordered_map<std::string, HunspellChecker::Dictionary> dictionaries; + + auto dictionaryFiles = recursiveFileSearch(hunspellDictionaryPaths(), boost::regex(".*\\.dic$")); + for (const auto& dictionary : dictionaryFiles) { + std::string correspondingAffixPath = dictionary; + boost::replace_last(correspondingAffixPath, ".dic", ".aff"); + if (boost::filesystem::is_regular_file(correspondingAffixPath)) { + auto filenameWithoutExtension = boost::filesystem::basename(dictionary); + dictionaries[filenameWithoutExtension] = {dictionary, correspondingAffixPath}; + } + } + + return dictionaries; } bool HunspellChecker::isCorrect(const std::string& word) { - return speller_->spell(word.c_str()); + if (speller_) { + return speller_->spell(word.c_str()); + } + else { + return true; + } } void HunspellChecker::getSuggestions(const std::string& word, std::vector<std::string>& list) { - char **suggestList = NULL; - int words_returned = 0; - if (!word.empty()) { - words_returned = speller_->suggest(&suggestList, word.c_str()); - if (suggestList != NULL) { - for (int i = 0; i < words_returned; ++i) { - list.push_back(suggestList[i]); - free(suggestList[i]); + if (speller_) { + char **suggestList = NULL; + int words_returned = 0; + if (!word.empty()) { + words_returned = speller_->suggest(&suggestList, word.c_str()); + if (suggestList != NULL) { + for (int i = 0; i < words_returned; ++i) { + list.push_back(suggestList[i]); + free(suggestList[i]); + } + free(suggestList); } - free(suggestList); } } } void HunspellChecker::checkFragment(const std::string& fragment, PositionPairList& misspelledPositions) { - if (!fragment.empty()) { - parser_->check(fragment, misspelledPositions); - for (PositionPairList::iterator it = misspelledPositions.begin(); it != misspelledPositions.end();) { - if (isCorrect(fragment.substr(boost::get<0>(*it), boost::get<1>(*it) - boost::get<0>(*it)))) { - it = misspelledPositions.erase(it); - } - else { - ++it; + if (speller_) { + if (!fragment.empty()) { + parser_.check(fragment, misspelledPositions); + for (PositionPairList::iterator it = misspelledPositions.begin(); it != misspelledPositions.end();) { + if (isCorrect(fragment.substr(boost::get<0>(*it), boost::get<1>(*it) - boost::get<0>(*it)))) { + it = misspelledPositions.erase(it); + } + else { + ++it; + } } } } diff --git a/SwifTools/HunspellChecker.h b/SwifTools/HunspellChecker.h index 076b468..2d4831e 100644 --- a/SwifTools/HunspellChecker.h +++ b/SwifTools/HunspellChecker.h @@ -12,10 +12,13 @@ #pragma once +#include <memory> +#include <string> +#include <unordered_map> #include <vector> #include <boost/algorithm/string.hpp> -#include <boost/tuple/tuple.hpp> +#include <boost/optional.hpp> #include <SwifTools/SpellChecker.h> @@ -24,12 +27,31 @@ class Hunspell; namespace Swift { class HunspellChecker : public SpellChecker { public: - HunspellChecker(const char* affix_path, const char* dict_path); + HunspellChecker(); virtual ~HunspellChecker(); + + virtual bool isAutomaticallyDetectingLanguage(); + + virtual void setActiveLanguage(const std::string& language); + virtual std::string activeLanguage() const; + virtual std::vector<std::string> supportedLanguages() const; + virtual bool isCorrect(const std::string& word); virtual void getSuggestions(const std::string& word, std::vector<std::string>& list); virtual void checkFragment(const std::string& fragment, PositionPairList& misspelledPositions); + private: - Hunspell* speller_; + struct Dictionary { + std::string dicPath; + std::string affPath; + }; + + std::unordered_map<std::string, Dictionary> detectedDictionaries() const; + std::vector<std::string> hunspellDictionaryPaths() const; + + private: + std::unique_ptr<Hunspell> speller_; + boost::optional<std::string> activeLangauge_; + }; } diff --git a/SwifTools/MacOSXChecker.h b/SwifTools/MacOSXChecker.h index be9a32a..7587c99 100644 --- a/SwifTools/MacOSXChecker.h +++ b/SwifTools/MacOSXChecker.h @@ -23,6 +23,13 @@ namespace Swift { public: MacOSXChecker(); virtual ~MacOSXChecker(); + + virtual bool isAutomaticallyDetectingLanguage(); + + virtual void setActiveLanguage(const std::string& language); + virtual std::string activeLanguage() const; + virtual std::vector<std::string> supportedLanguages() const; + virtual bool isCorrect(const std::string& word); virtual void getSuggestions(const std::string& word, std::vector<std::string>& list); virtual void checkFragment(const std::string& fragment, PositionPairList& misspelledPositions); diff --git a/SwifTools/MacOSXChecker.mm b/SwifTools/MacOSXChecker.mm index 5f4f9c3..519f06c 100644 --- a/SwifTools/MacOSXChecker.mm +++ b/SwifTools/MacOSXChecker.mm @@ -13,6 +13,7 @@ #include <SwifTools/MacOSXChecker.h> #include <algorithm> +#include <cassert> #include <boost/algorithm/string.hpp> @@ -33,6 +34,21 @@ bool MacOSXChecker::isCorrect(const std::string& /*word*/) { return false; } +bool MacOSXChecker::isAutomaticallyDetectingLanguage() { + return true; +} + +void MacOSXChecker::setActiveLanguage(const std::string& /*language*/) { + assert(false); +} + +std::string MacOSXChecker::activeLanguage() const { + assert(false); +} +std::vector<std::string> MacOSXChecker::supportedLanguages() const { + assert(false); +} + void MacOSXChecker::getSuggestions(const std::string& word, std::vector<std::string>& list) { NSSpellChecker* spellChecker = [NSSpellChecker sharedSpellChecker]; NSString* wordString = [[NSString alloc] initWithUTF8String: word.c_str()]; diff --git a/SwifTools/SpellChecker.h b/SwifTools/SpellChecker.h index 415d3f6..664fc63 100644 --- a/SwifTools/SpellChecker.h +++ b/SwifTools/SpellChecker.h @@ -14,24 +14,28 @@ #include <vector> -#include <boost/algorithm/string.hpp> -#include <boost/tuple/tuple.hpp> - #include <SwifTools/SpellParser.h> namespace Swift { class SpellChecker { public: SpellChecker() { - parser_ = new SpellParser(); } + virtual ~SpellChecker() { - delete parser_; } + + virtual bool isAutomaticallyDetectingLanguage() = 0; + + virtual void setActiveLanguage(const std::string& language) = 0; + virtual std::string activeLanguage() const = 0; + virtual std::vector<std::string> supportedLanguages() const = 0; + virtual bool isCorrect(const std::string& word) = 0; virtual void getSuggestions(const std::string& word, std::vector<std::string>& list) = 0; virtual void checkFragment(const std::string& fragment, PositionPairList& misspelledPositions) = 0; + protected: - SpellParser *parser_; + SpellParser parser_; }; } diff --git a/SwifTools/SpellCheckerFactory.cpp b/SwifTools/SpellCheckerFactory.cpp index e53447e..bfd3d4a 100644 --- a/SwifTools/SpellCheckerFactory.cpp +++ b/SwifTools/SpellCheckerFactory.cpp @@ -4,6 +4,12 @@ * See Documentation/Licenses/BSD-simplified.txt for more information. */ +/* + * Copyright (c) 2016 Isode Limited. + * All rights reserved. + * See the COPYING file for more information. + */ + #include <boost/filesystem/operations.hpp> #include <SwifTools/SpellChecker.h> @@ -23,17 +29,11 @@ SpellCheckerFactory::SpellCheckerFactory() { } #ifdef HAVE_HUNSPELL -SpellChecker* SpellCheckerFactory::createSpellChecker(const std::string& dictFile) { - std::string affixFile(dictFile); - boost::replace_all(affixFile, ".dic", ".aff"); - if ((boost::filesystem::exists(dictFile)) && (boost::filesystem::exists(affixFile))) { - return new HunspellChecker(affixFile.c_str(), dictFile.c_str()); - } - // If dictionaries don't exist disable the checker - return NULL; +SpellChecker* SpellCheckerFactory::createSpellChecker() { + return new HunspellChecker(); } #elif defined(SWIFTEN_PLATFORM_MACOSX) -SpellChecker* SpellCheckerFactory::createSpellChecker(const std::string& /*dictFile*/) { +SpellChecker* SpellCheckerFactory::createSpellChecker() { return new MacOSXChecker(); } #endif diff --git a/SwifTools/SpellCheckerFactory.h b/SwifTools/SpellCheckerFactory.h index 2e1711a..eb2ade6 100644 --- a/SwifTools/SpellCheckerFactory.h +++ b/SwifTools/SpellCheckerFactory.h @@ -28,6 +28,6 @@ namespace Swift { class SpellCheckerFactory { public: SpellCheckerFactory(); - SpellChecker* createSpellChecker(const std::string& dictFile); + SpellChecker* createSpellChecker(); }; } |