summaryrefslogtreecommitdiffstats
blob: 5bafa6e555eec0c59ad78f763eeffa49b5281765 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*
 * Copyright (c) 2011-2013 Vlad Voicu
 * Licensed under the Simplified BSD license.
 * See Documentation/Licenses/BSD-simplified.txt for more information.
 */

/*
 * Copyright (c) 2016 Isode Limited.
 * All rights reserved.
 * See the COPYING file for more information.
 */

#include <SwifTools/SpellParser.h>

#include <string>

#include <boost/bind.hpp>
#include <boost/numeric/conversion/cast.hpp>
#include <boost/ref.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>

namespace lex = boost::spirit::lex;

namespace Swift {

template <typename Lexer>
struct word_count_tokens : lex::lexer<Lexer>
{
    word_count_tokens()
    {
    // define tokens (regular expresions) to match strings
    // order is important
        this->self.add
            ("w{3}.[^ ]+", ID_WWW)
            ("http:\\/\\/[^ ]+", ID_HTTP)
            ("\\w{1,}['?|\\-?]?\\w{1,}", ID_WORD)
            (".", ID_CHAR);
    }
};

struct counter
{
    typedef bool result_type;
    // the function operator gets called for each of the matched tokens
    template <typename Token>
    bool operator()(Token const& t, PositionPairList& wordPositions, std::size_t& position) const
    {
        switch (t.id()) {
            case ID_WWW:
                position += boost::numeric_cast<size_t>(t.value().size());
                break;
            case ID_HTTP:
                position += boost::numeric_cast<size_t>(t.value().size());
                break;
            case ID_WORD:       // matched a word
                wordPositions.push_back(boost::tuples::make_tuple(position, position + boost::numeric_cast<size_t>(t.value().size())));
                position += boost::numeric_cast<size_t>(t.value().size());
                break;
            case ID_CHAR:       // match a simple char
                ++position;
                break;
        }
        return true;        // always continue to tokenize
    }
};

void SpellParser::check(const std::string& fragment, PositionPairList& wordPositions) {
    std::size_t position = 0;
    // create the token definition instance needed to invoke the lexical analyzer
    word_count_tokens<lex::lexertl::lexer<> > word_count_functor;
    char const* first = fragment.c_str();
    char const* last = &first[fragment.size()];
    lex::tokenize(first, last, word_count_functor, boost::bind(counter(), _1, boost::ref(wordPositions), boost::ref(position)));
}

}