summaryrefslogtreecommitdiffstats
blob: db12935a830c00db38a7298fe70ef0f9ef73ea95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
//  Copyright (c) 2001-2011 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
#define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM

#if defined(_MSC_VER)
#pragma once
#endif

#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parse.hpp>
#include <boost/spirit/home/qi/nonterminal/grammar.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/lex/lexer.hpp>
#include <boost/mpl/assert.hpp>

namespace boost { namespace spirit { namespace lex
{
    ///////////////////////////////////////////////////////////////////////////
    //  Import skip_flag enumerator type from Qi namespace
    using qi::skip_flag;

    ///////////////////////////////////////////////////////////////////////////
    //
    //  The tokenize_and_parse() function is one of the main Spirit API 
    //  functions. It simplifies using a lexer as the underlying token source
    //  while parsing a given input sequence.
    //
    //  The function takes a pair of iterators spanning the underlying input 
    //  stream to parse, the lexer object (built from the token definitions) 
    //  and a parser object (built from the parser grammar definition).
    //
    //  The second version of this function additionally takes an attribute to 
    //  be used as the top level data structure instance the parser should use 
    //  to store the recognized input to.
    //
    //  The function returns true if the parsing succeeded (the given input
    //  sequence has been successfully matched by the given grammar).
    //
    //  first, last:    The pair of iterators spanning the underlying input 
    //                  sequence to parse. These iterators must at least 
    //                  conform to the requirements of the std::intput_iterator 
    //                  category.
    //                  On exit the iterator 'first' will be updated to the 
    //                  position right after the last successfully matched 
    //                  token. 
    //  lex:            The lexer object (encoding the token definitions) to be
    //                  used to convert the input sequence into a sequence of 
    //                  tokens. This token sequence is passed to the parsing 
    //                  process. The LexerExpr type must conform to the 
    //                  lexer interface described in the corresponding section
    //                  of the documentation.
    //  xpr:            The grammar object (encoding the parser grammar) to be
    //                  used to match the token sequence generated by the lex 
    //                  object instance. The ParserExpr type must conform to 
    //                  the grammar interface described in the corresponding 
    //                  section of the documentation.
    //  attr:           The top level attribute passed to the parser. It will 
    //                  be populated during the parsing of the input sequence.
    //                  On exit it will hold the 'parser result' corresponding 
    //                  to the matched input sequence.
    //
    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr>
    inline bool
    tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
        ParserExpr const& xpr)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        return compile<qi::domain>(xpr).parse(
            iter, lex.end(), unused, unused, unused);
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Attribute>
    inline bool
    tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
      , ParserExpr const& xpr, Attribute& attr)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        return compile<qi::domain>(xpr).parse(
            iter, lex.end(), unused, unused, attr);
    }

    ///////////////////////////////////////////////////////////////////////////
    //
    //  The tokenize_and_phrase_parse() function is one of the main Spirit API 
    //  functions. It simplifies using a lexer as the underlying token source
    //  while phrase parsing a given input sequence.
    //
    //  The function takes a pair of iterators spanning the underlying input 
    //  stream to parse, the lexer object (built from the token definitions) 
    //  and a parser object (built from the parser grammar definition). The 
    //  additional skipper parameter will be used as the skip parser during
    //  the parsing process.
    //
    //  The second version of this function additionally takes an attribute to 
    //  be used as the top level data structure instance the parser should use 
    //  to store the recognized input to.
    //
    //  The function returns true if the parsing succeeded (the given input
    //  sequence has been successfully matched by the given grammar).
    //
    //  first, last:    The pair of iterators spanning the underlying input 
    //                  sequence to parse. These iterators must at least 
    //                  conform to the requirements of the std::intput_iterator 
    //                  category.
    //                  On exit the iterator 'first' will be updated to the 
    //                  position right after the last successfully matched 
    //                  token. 
    //  lex:            The lexer object (encoding the token definitions) to be
    //                  used to convert the input sequence into a sequence of 
    //                  tokens. This token sequence is passed to the parsing 
    //                  process. The LexerExpr type must conform to the 
    //                  lexer interface described in the corresponding section
    //                  of the documentation.
    //  xpr:            The grammar object (encoding the parser grammar) to be
    //                  used to match the token sequence generated by the lex 
    //                  object instance. The ParserExpr type must conform to 
    //                  the grammar interface described in the corresponding 
    //                  section of the documentation.
    //  skipper:        The skip parser to be used while parsing the given 
    //                  input sequence. Note, the skip parser will have to 
    //                  act on the same token sequence as the main parser 
    //                  'xpr'.
    //  post_skip:      The post_skip flag controls whether the function will
    //                  invoke an additional post skip after the main parser
    //                  returned.
    //  attr:           The top level attribute passed to the parser. It will 
    //                  be populated during the parsing of the input sequence.
    //                  On exit it will hold the 'parser result' corresponding 
    //                  to the matched input sequence.
    //
    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Skipper>
    inline bool
    tokenize_and_phrase_parse(Iterator& first, Iterator last
      , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
      , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);

        typedef
            typename spirit::result_of::compile<qi::domain, Skipper>::type
        skipper_type;
        skipper_type const skipper_ = compile<qi::domain>(skipper);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        typename Lexer::iterator_type end = lex.end();
        if (!compile<qi::domain>(xpr).parse(
                iter, end, unused, skipper_, unused))
            return false;

        // do a final post-skip
        if (post_skip == skip_flag::postskip)
            qi::skip_over(iter, end, skipper_);
        return true;
    }

    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Skipper, typename Attribute>
    inline bool
    tokenize_and_phrase_parse(Iterator& first, Iterator last
      , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
      , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);

        typedef
            typename spirit::result_of::compile<qi::domain, Skipper>::type
        skipper_type;
        skipper_type const skipper_ = compile<qi::domain>(skipper);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        typename Lexer::iterator_type end = lex.end();
        if (!compile<qi::domain>(xpr).parse(
                iter, end, unused, skipper_, attr))
            return false;

        // do a final post-skip
        if (post_skip == skip_flag::postskip)
            qi::skip_over(iter, end, skipper_);
        return true;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Skipper, typename Attribute>
    inline bool
    tokenize_and_phrase_parse(Iterator& first, Iterator last
      , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
      , Attribute& attr)
    {
        return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
          , skip_flag::postskip, attr);
    }

    ///////////////////////////////////////////////////////////////////////////
    //
    //  The tokenize() function is one of the main Spirit API functions. It 
    //  simplifies using a lexer to tokenize a given input sequence. It's main
    //  purpose is to use the lexer to tokenize all the input. 
    //
    //  The second version below discards all generated tokens afterwards. 
    //  This is useful whenever all the needed functionality has been 
    //  implemented directly inside the lexer semantic actions, which are being 
    //  executed while the tokens are matched. 
    //
    //  The function takes a pair of iterators spanning the underlying input 
    //  stream to scan, the lexer object (built from the token definitions),
    //  and a (optional) functor being called for each of the generated tokens. 
    //
    //  The function returns true if the scanning of the input succeeded (the 
    //  given input sequence has been successfully matched by the given token
    //  definitions).
    //
    //  first, last:    The pair of iterators spanning the underlying input 
    //                  sequence to parse. These iterators must at least 
    //                  conform to the requirements of the std::intput_iterator 
    //                  category.
    //                  On exit the iterator 'first' will be updated to the 
    //                  position right after the last successfully matched 
    //                  token. 
    //  lex:            The lexer object (encoding the token definitions) to be
    //                  used to convert the input sequence into a sequence of 
    //                  tokens. The LexerExpr type must conform to the 
    //                  lexer interface described in the corresponding section
    //                  of the documentation.
    //  f:              A functor (callable object) taking a single argument of
    //                  the token type and returning a bool, indicating whether
    //                  the tokenization should be canceled.
    //  initial_state:  The name of the state the lexer should start matching.
    //                  The default value is zero, causing the lexer to start 
    //                  in its 'INITIAL' state.
    //
    ///////////////////////////////////////////////////////////////////////////
    namespace detail
    {
        template <typename Token, typename F>
        bool tokenize_callback(Token const& t, F f)
        {
            return f(t);
        }

        template <typename Token, typename Eval>
        bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
        {
            f(t);
            return true;
        }

        template <typename Token>
        bool tokenize_callback(Token const& t, void (*f)(Token const&))
        {
            f(t);
            return true;
        }

        template <typename Token>
        bool tokenize_callback(Token const& t, bool (*f)(Token const&))
        {
            return f(t);
        }
    }

    template <typename Iterator, typename Lexer, typename F>
    inline bool
    tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
      , typename Lexer::char_type const* initial_state = 0)
    {
        typedef typename Lexer::iterator_type iterator_type;

        iterator_type iter = lex.begin(first, last, initial_state);
        iterator_type end = lex.end();
        for (/**/; iter != end && token_is_valid(*iter); ++iter) 
        {
            if (!detail::tokenize_callback(*iter, f))
                return false;
        }
        return (iter == end) ? true : false;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer>
    inline bool
    tokenize(Iterator& first, Iterator last, Lexer const& lex
      , typename Lexer::char_type const* initial_state = 0)
    {
        typedef typename Lexer::iterator_type iterator_type;

        iterator_type iter = lex.begin(first, last, initial_state);
        iterator_type end = lex.end();

        while (iter != end && token_is_valid(*iter))
            ++iter;

        return (iter == end) ? true : false;
    }

}}}

#endif