summaryrefslogtreecommitdiffstats
blob: e733533a4169722ba60954ad36394c8d3e4954eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
//  Copyright (c) 2001-2011 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
#define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM

#if defined(_MSC_VER)
#pragma once
#endif

#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/qi/detail/assign_to.hpp>
#include <boost/spirit/home/lex/reference.hpp>
#include <boost/spirit/home/lex/meta_compiler.hpp>
#include <boost/spirit/home/lex/lexer_type.hpp>
#include <boost/spirit/home/lex/lexer/token_def.hpp>
#include <boost/assert.hpp>
#include <boost/noncopyable.hpp>
#include <boost/detail/iterator.hpp>
#include <boost/fusion/include/vector.hpp>
#include <boost/mpl/assert.hpp>
#include <boost/range/iterator_range.hpp>
#include <string>

namespace boost { namespace spirit { namespace lex
{
    ///////////////////////////////////////////////////////////////////////////
    namespace detail
    {
        ///////////////////////////////////////////////////////////////////////
        template <typename LexerDef>
        struct lexer_def_
          : proto::extends<
                typename proto::terminal<
                   lex::reference<lexer_def_<LexerDef> const> 
                >::type
              , lexer_def_<LexerDef> >
          , qi::parser<lexer_def_<LexerDef> >
          , lex::lexer_type<lexer_def_<LexerDef> >
        {
        private:
            // avoid warnings about using 'this' in constructor
            lexer_def_& this_() { return *this; }

            typedef typename LexerDef::char_type char_type;
            typedef typename LexerDef::string_type string_type;
            typedef typename LexerDef::id_type id_type;

            typedef lex::reference<lexer_def_ const> reference_;
            typedef typename proto::terminal<reference_>::type terminal_type;
            typedef proto::extends<terminal_type, lexer_def_> proto_base_type;

            reference_ alias() const
            {
                return reference_(*this);
            }

        public:
            // Qi interface: metafunction calculating parser attribute type
            template <typename Context, typename Iterator>
            struct attribute
            {
                //  the return value of a token set contains the matched token 
                //  id, and the corresponding pair of iterators
                typedef typename Iterator::base_iterator_type iterator_type;
                typedef 
                    fusion::vector2<id_type, iterator_range<iterator_type> > 
                type;
            };

            // Qi interface: parse functionality
            template <typename Iterator, typename Context
              , typename Skipper, typename Attribute>
            bool parse(Iterator& first, Iterator const& last
              , Context& /*context*/, Skipper const& skipper
              , Attribute& attr) const
            {
                qi::skip_over(first, last, skipper);   // always do a pre-skip

                if (first != last) {
                    typedef typename 
                        boost::detail::iterator_traits<Iterator>::value_type 
                    token_type;

                    token_type const& t = *first;
                    if (token_is_valid(t) && t.state() == first.get_state()) {
                    // any of the token definitions matched
                        spirit::traits::assign_to(t, attr);
                        ++first;
                        return true;
                    }
                }
                return false;
            }

            // Qi interface: 'what' functionality
            template <typename Context>
            info what(Context& /*context*/) const
            {
                return info("lexer");
            }

        private:
            // allow to use the lexer.self.add("regex1", id1)("regex2", id2);
            // syntax
            struct adder
            {
                adder(lexer_def_& def_) 
                  : def(def_) {}

                // Add a token definition based on a single character as given
                // by the first parameter, the second parameter allows to 
                // specify the token id to use for the new token. If no token
                // id is given the character code is used.
                adder const& operator()(char_type c
                  , id_type token_id = id_type()) const
                {
                    if (id_type() == token_id)
                        token_id = static_cast<id_type>(c);
                    def.def.add_token (def.state.c_str(), c, token_id
                        , def.targetstate.empty() ? 0 : def.targetstate.c_str());
                    return *this;
                }

                // Add a token definition based on a character sequence as 
                // given by the first parameter, the second parameter allows to 
                // specify the token id to use for the new token. If no token
                // id is given this function will generate a unique id to be 
                // used as the token's id.
                adder const& operator()(string_type const& s
                  , id_type token_id = id_type()) const
                {
                    if (id_type() == token_id)
                        token_id = def.def.get_next_id();
                    def.def.add_token (def.state.c_str(), s, token_id
                        , def.targetstate.empty() ? 0 : def.targetstate.c_str());
                    return *this;
                }

                template <typename Attribute>
                adder const& operator()(
                    token_def<Attribute, char_type, id_type>& tokdef
                  , id_type token_id = id_type()) const
                {
                    // make sure we have a token id
                    if (id_type() == token_id) {
                        if (id_type() == tokdef.id()) {
                            token_id = def.def.get_next_id();
                            tokdef.id(token_id);
                        }
                        else {
                            token_id = tokdef.id();
                        }
                    }
                    else { 
                    // the following assertion makes sure that the token_def
                    // instance has not been assigned a different id earlier
                        BOOST_ASSERT(id_type() == tokdef.id() 
                                  || token_id == tokdef.id());
                        tokdef.id(token_id);
                    }

                    def.define(tokdef);
                    return *this;
                }

//                 template <typename F>
//                 adder const& operator()(char_type c, id_type token_id, F act) const
//                 {
//                     if (id_type() == token_id)
//                         token_id = def.def.get_next_id();
//                     std::size_t unique_id = 
//                         def.def.add_token (def.state.c_str(), s, token_id);
//                     def.def.add_action(unique_id, def.state.c_str(), act);
//                     return *this;
//                 }

                lexer_def_& def;

            private:
                // silence MSVC warning C4512: assignment operator could not be generated
                adder& operator= (adder const&);
            };
            friend struct adder;

            // allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
            // syntax
            struct pattern_adder
            {
                pattern_adder(lexer_def_& def_) 
                  : def(def_) {}

                pattern_adder const& operator()(string_type const& p
                  , string_type const& s) const
                {
                    def.def.add_pattern (def.state.c_str(), p, s);
                    return *this;
                }

                lexer_def_& def;

            private:
                // silence MSVC warning C4512: assignment operator could not be generated
                pattern_adder& operator= (pattern_adder const&);
            };
            friend struct pattern_adder;

        private:
            // Helper function to invoke the necessary 2 step compilation
            // process on token definition expressions
            template <typename TokenExpr>
            void compile2pass(TokenExpr const& expr) 
            {
                expr.collect(def, state, targetstate);
                expr.add_actions(def);
            }

        public:
            ///////////////////////////////////////////////////////////////////
            template <typename Expr>
            void define(Expr const& expr)
            {
                compile2pass(compile<lex::domain>(expr));
            }

            lexer_def_(LexerDef& def_, string_type const& state_
                  , string_type const& targetstate_ = string_type())
              : proto_base_type(terminal_type::make(alias()))
              , add(this_()), add_pattern(this_()), def(def_)
              , state(state_), targetstate(targetstate_)
            {}

            // allow to switch states
            lexer_def_ operator()(char_type const* state) const
            {
                return lexer_def_(def, state);
            }
            lexer_def_ operator()(char_type const* state
              , char_type const* targetstate) const
            {
                return lexer_def_(def, state, targetstate);
            }
            lexer_def_ operator()(string_type const& state
              , string_type const& targetstate = string_type()) const
            {
                return lexer_def_(def, state, targetstate);
            }

            // allow to assign a token definition expression
            template <typename Expr>
            lexer_def_& operator= (Expr const& xpr)
            {
                // Report invalid expression error as early as possible.
                // If you got an error_invalid_expression error message here,
                // then the expression (expr) is not a valid spirit lex 
                // expression.
                BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);

                def.clear(state.c_str());
                define(xpr);
                return *this;
            }

            // explicitly tell the lexer that the given state will be defined
            // (useful in conjunction with "*")
            std::size_t add_state(char_type const* state = 0)
            {
                return def.add_state(state ? state : def.initial_state().c_str());
            }

            adder add;
            pattern_adder add_pattern;

        private:
            LexerDef& def;
            string_type state;
            string_type targetstate;

        private:
            // silence MSVC warning C4512: assignment operator could not be generated
            lexer_def_& operator= (lexer_def_ const&);
        };

#if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
        // allow to assign a token definition expression
        template <typename LexerDef, typename Expr>
        inline lexer_def_<LexerDef>&
        operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
        {
            // Report invalid expression error as early as possible.
            // If you got an error_invalid_expression error message here,
            // then the expression (expr) is not a valid spirit lex 
            // expression.
            BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);

            lexdef.define(xpr);
            return lexdef;
        }
#else
        // allow to assign a token definition expression
        template <typename LexerDef, typename Expr>
        inline lexer_def_<LexerDef>&
        operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
        {
            // Report invalid expression error as early as possible.
            // If you got an error_invalid_expression error message here,
            // then the expression (expr) is not a valid spirit lex 
            // expression.
            BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);

            lexdef.define(xpr);
            return lexdef;
        }
#endif

        template <typename LexerDef, typename Expr>
        inline lexer_def_<LexerDef>& 
        operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
        {
            // Report invalid expression error as early as possible.
            // If you got an error_invalid_expression error message here,
            // then the expression (expr) is not a valid spirit lex 
            // expression.
            BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);

            lexdef.define(xpr);
            return lexdef;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    //  The match_flags flags are used to influence different matching 
    //  modes of the lexer
    struct match_flags
    {
        enum enum_type 
        {
            match_default = 0,          // no flags
            match_not_dot_newline = 1,  // the regex '.' doesn't match newlines
            match_icase = 2             // all matching operations are case insensitive
        };
    };

    ///////////////////////////////////////////////////////////////////////////
    //  This represents a lexer object
    ///////////////////////////////////////////////////////////////////////////

    ///////////////////////////////////////////////////////////////////////////
    // This is the first token id automatically assigned by the library 
    // if needed
    enum tokenids 
    {
        min_token_id = 0x10000
    };

    template <typename Lexer>
    class lexer : public Lexer
    {
    private:
        // avoid warnings about using 'this' in constructor
        lexer& this_() { return *this; }

        std::size_t next_token_id;   // has to be an integral type

    public:
        typedef Lexer lexer_type;
        typedef typename Lexer::id_type id_type;
        typedef typename Lexer::char_type char_type;
        typedef typename Lexer::iterator_type iterator_type;
        typedef lexer base_type;

        typedef detail::lexer_def_<lexer> lexer_def;
        typedef std::basic_string<char_type> string_type;

        lexer(unsigned int flags = match_flags::match_default
            , id_type first_id = id_type(min_token_id)) 
          : lexer_type(flags)
          , next_token_id(first_id)
          , self(this_(), lexer_type::initial_state()) 
        {}

        // access iterator interface
        template <typename Iterator>
        iterator_type begin(Iterator& first, Iterator const& last
                , char_type const* initial_state = 0) const
            { return this->lexer_type::begin(first, last, initial_state); }
        iterator_type end() const 
            { return this->lexer_type::end(); }

        std::size_t map_state(char_type const* state)
            { return this->lexer_type::add_state(state); }

        //  create a unique token id
        id_type get_next_id() { return id_type(next_token_id++); }

        lexer_def self;  // allow for easy token definition
    };

}}}

#endif