1 files changed, 574 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp
new file mode 100644
index 0000000..7bdeb80
--- /dev/null
+++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser.hpp
@@ -0,0 +1,574 @@
+// tokeniser.hpp
+// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RE_TOKENISER_HPP
+#define BOOST_LEXER_RE_TOKENISER_HPP
+
+// memcpy()
+#include <cstring>
+#include <map>
+#include "num_token.hpp"
+#include "../../runtime_error.hpp"
+#include "../../size_t.hpp"
+#include <sstream>
+#include "../../string_token.hpp"
+#include "re_tokeniser_helper.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT>
+class basic_re_tokeniser
+{
+public:
+    typedef basic_num_token<CharT> num_token;
+    typedef basic_re_tokeniser_state<CharT> state;
+    typedef basic_string_token<CharT> string_token;
+    typedef typename string_token::string string;
+    typedef std::map<string_token, std::size_t> token_map;
+    typedef std::pair<string_token, std::size_t> token_pair;
+
+    static void next (state &state_, token_map &map_, num_token &token_)
+    {
+        CharT ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        token_.min_max (0, false, 0);
+
+        while (!eos_ && ch_ == '"')
+        {
+            state_._in_string ^= 1;
+            eos_ = state_.next (ch_);
+        }
+
+        if (eos_)
+        {
+            if (state_._in_string)
+            {
+                throw runtime_error ("Unexpected end of regex "
+                    "(missing '\"').");
+            }
+
+            if (state_._paren_count)
+            {
+                throw runtime_error ("Unexpected end of regex "
+                    "(missing ')').");
+            }
+
+            token_.set (num_token::END, null_token);
+        }
+        else
+        {
+            if (ch_ == '\\')
+            {
+                // Even if we are in a string, respect escape sequences...
+                escape (state_, map_, token_);
+            }
+            else if (state_._in_string)
+            {
+                // All other meta characters lose their special meaning
+                // inside a string.
+                create_charset_token (string (1, ch_), false, map_, token_);
+            }
+            else
+            {
+                // Not an escape sequence and not inside a string, so
+                // check for meta characters.
+                switch (ch_)
+                {
+                case '(':
+                    token_.set (num_token::OPENPAREN, null_token);
+                    ++state_._paren_count;
+                    read_options (state_);
+                    break;
+                case ')':
+                    --state_._paren_count;
+
+                    if (state_._paren_count < 0)
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Number of open parenthesis < 0 at index " <<
+                            state_.index () - 1 << '.';
+                        throw runtime_error (ss_.str ().c_str ());
+                    }
+
+                    token_.set (num_token::CLOSEPAREN, null_token);
+
+                    if (!state_._flags_stack.empty ())
+                    {
+                        state_._flags = state_._flags_stack.top ();
+                        state_._flags_stack.pop ();
+                    }
+                    break;
+                case '?':
+                    if (!state_.eos () && *state_._curr == '?')
+                    {
+                        token_.set (num_token::AOPT, null_token);
+                        state_.increment ();
+                    }
+                    else
+                    {
+                        token_.set (num_token::OPT, null_token);
+                    }
+
+                    break;
+                case '*':
+                    if (!state_.eos () && *state_._curr == '?')
+                    {
+                        token_.set (num_token::AZEROORMORE, null_token);
+                        state_.increment ();
+                    }
+                    else
+                    {
+                        token_.set (num_token::ZEROORMORE, null_token);
+                    }
+
+                    break;
+                case '+':
+                    if (!state_.eos () && *state_._curr == '?')
+                    {
+                        token_.set (num_token::AONEORMORE, null_token);
+                        state_.increment ();
+                    }
+                    else
+                    {
+                        token_.set (num_token::ONEORMORE, null_token);
+                    }
+
+                    break;
+                case '{':
+                    open_curly (state_, token_);
+                    break;
+                case '|':
+                    token_.set (num_token::OR, null_token);
+                    break;
+                case '^':
+                    if (state_._curr - 1 == state_._start)
+                    {
+                        token_.set (num_token::CHARSET, bol_token);
+                        state_._seen_BOL_assertion = true;
+                    }
+                    else
+                    {
+                        create_charset_token (string (1, ch_), false,
+                            map_, token_);
+                    }
+
+                    break;
+                case '$':
+                    if (state_._curr == state_._end)
+                    {
+                        token_.set (num_token::CHARSET, eol_token);
+                        state_._seen_EOL_assertion = true;
+                    }
+                    else
+                    {
+                        create_charset_token (string (1, ch_), false,
+                            map_, token_);
+                    }
+
+                    break;
+                case '.':
+                {
+                    string dot_;
+
+                    if (state_._flags & dot_not_newline)
+                    {
+                        dot_ = '\n';
+                    }
+
+                    create_charset_token (dot_, true, map_, token_);
+                    break;
+                }
+                case '[':
+                {
+                    charset (state_, map_, token_);
+                    break;
+                }
+                case '/':
+                    throw runtime_error("Lookahead ('/') is not supported yet.");
+                    break;
+                default:
+                    if ((state_._flags & icase) &&
+                        (std::isupper (ch_, state_._locale) ||
+                        std::islower (ch_, state_._locale)))
+                    {
+                        CharT upper_ = std::toupper (ch_, state_._locale);
+                        CharT lower_ = std::tolower (ch_, state_._locale);
+
+                        string str_ (1, upper_);
+
+                        str_ += lower_;
+                        create_charset_token (str_, false, map_, token_);
+                    }
+                    else
+                    {
+                        create_charset_token (string (1, ch_), false,
+                            map_, token_);
+                    }
+
+                    break;
+                }
+            }
+        }
+    }
+
+private:
+    typedef basic_re_tokeniser_helper<CharT> tokeniser_helper;
+
+    static void read_options (state &state_)
+    {
+        if (!state_.eos () && *state_._curr == '?')
+        {
+            CharT ch_ = 0;
+            bool eos_ = false;
+            bool negate_ = false;
+
+            state_.increment ();
+            eos_ = state_.next (ch_);
+            state_._flags_stack.push (state_._flags);
+
+            while (!eos_ && ch_ != ':')
+            {
+                switch (ch_)
+                {
+                case '-':
+                    negate_ ^= 1;
+                    break;
+                case 'i':
+                    if (negate_)
+                    {
+                        state_._flags = static_cast<regex_flags>
+                            (state_._flags & ~icase);
+                    }
+                    else
+                    {
+                        state_._flags = static_cast<regex_flags>
+                            (state_._flags | icase);
+                    }
+
+                    negate_ = false;
+                    break;
+                case 's':
+                    if (negate_)
+                    {
+                        state_._flags = static_cast<regex_flags>
+                            (state_._flags | dot_not_newline);
+                    }
+                    else
+                    {
+                        state_._flags = static_cast<regex_flags>
+                            (state_._flags & ~dot_not_newline);
+                    }
+
+                    negate_ = false;
+                    break;
+                default:
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Unknown option at index " <<
+                        state_.index () - 1 << '.';
+                    throw runtime_error (ss_.str ().c_str ());
+                }
+                }
+
+                eos_ = state_.next (ch_);
+            }
+
+            // End of string handler will handle early termination
+        }
+        else if (!state_._flags_stack.empty ())
+        {
+            state_._flags_stack.push (state_._flags);
+        }
+    }
+
+    static void escape (state &state_, token_map &map_, num_token &token_)
+    {
+        CharT ch_ = 0;
+        std::size_t str_len_ = 0;
+        const CharT *str_ = tokeniser_helper::escape_sequence (state_,
+            ch_, str_len_);
+
+        if (str_)
+        {
+            state state2_ (str_ + 1, str_ + str_len_, state_._flags,
+                state_._locale);
+
+            charset (state2_, map_, token_);
+        }
+        else
+        {
+            create_charset_token (string (1, ch_), false, map_, token_);
+        }
+    }
+
+    static void charset (state &state_, token_map &map_, num_token &token_)
+    {
+        string chars_;
+        bool negated_ = false;
+
+        tokeniser_helper::charset (state_, chars_, negated_);
+        create_charset_token (chars_, negated_, map_, token_);
+    }
+
+    static void create_charset_token (const string &charset_,
+        const bool negated_, token_map &map_, num_token &token_)
+    {
+        std::size_t id_ = null_token;
+        string_token stok_ (negated_, charset_);
+
+        stok_.remove_duplicates ();
+        stok_.normalise ();
+
+        typename token_map::const_iterator iter_ = map_.find (stok_);
+
+        if (iter_ == map_.end ())
+        {
+            id_ = map_.size ();
+            map_.insert (token_pair (stok_, id_));
+        }
+        else
+        {
+            id_ = iter_->second;
+        }
+
+        token_.set (num_token::CHARSET, id_);
+    }
+
+    static void open_curly (state &state_, num_token &token_)
+    {
+        if (state_.eos ())
+        {
+            throw runtime_error ("Unexpected end of regex "
+                "(missing '}').");
+        }
+        else if (*state_._curr >= '0' && *state_._curr <= '9')
+        {
+            repeat_n (state_, token_);
+
+            if (!state_.eos () && *state_._curr == '?')
+            {
+                token_._type = num_token::AREPEATN;
+                state_.increment ();
+            }
+        }
+        else
+        {
+            macro (state_, token_);
+        }
+    }
+
+    // SYNTAX:
+    //   {n[,[n]]}
+    // SEMANTIC RULES:
+    //   {0} - INVALID (throw exception)
+    //   {0,} = *
+    //   {0,0} - INVALID (throw exception)
+    //   {0,1} = ?
+    //   {1,} = +
+    //   {min,max} where min == max - {min}
+    //   {min,max} where max < min - INVALID (throw exception)
+    static void repeat_n (state &state_, num_token &token_)
+    {
+        CharT ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        while (!eos_ && ch_ >= '0' && ch_ <= '9')
+        {
+            token_._min *= 10;
+            token_._min += ch_ - '0';
+            eos_ = state_.next (ch_);
+        }
+
+        if (eos_)
+        {
+            throw runtime_error ("Unexpected end of regex "
+                "(missing '}').");
+        }
+
+        bool min_max_ = false;
+        bool repeatn_ = true;
+
+        token_._comma = ch_ == ',';
+
+        if (token_._comma)
+        {
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                throw runtime_error ("Unexpected end of regex "
+                    "(missing '}').");
+            }
+
+            if (ch_ == '}')
+            {
+                // Small optimisation: Check for '*' equivalency.
+                if (token_._min == 0)
+                {
+                    token_.set (num_token::ZEROORMORE, null_token);
+                    repeatn_ = false;
+                }
+                // Small optimisation: Check for '+' equivalency.
+                else if (token_._min == 1)
+                {
+                    token_.set (num_token::ONEORMORE, null_token);
+                    repeatn_ = false;
+                }
+            }
+            else
+            {
+                if (ch_ < '0' || ch_ > '9')
+                {
+                    std::ostringstream ss_;
+
+                    ss_ << "Missing '}' at index " <<
+                        state_.index () - 1 << '.';
+                    throw runtime_error (ss_.str ().c_str ());
+                }
+
+                min_max_ = true;
+
+                do
+                {
+                    token_._max *= 10;
+                    token_._max += ch_ - '0';
+                    eos_ = state_.next (ch_);
+                } while (!eos_ && ch_ >= '0' && ch_ <= '9');
+
+                if (eos_)
+                {
+                    throw runtime_error ("Unexpected end of regex "
+                        "(missing '}').");
+                }
+
+                // Small optimisation: Check for '?' equivalency.
+                if (token_._min == 0 && token_._max == 1)
+                {
+                    token_.set (num_token::OPT, null_token);
+                    repeatn_ = false;
+                }
+                // Small optimisation: if min == max, then min.
+                else if (token_._min == token_._max)
+                {
+                    token_._comma = false;
+                    min_max_ = false;
+                    token_._max = 0;
+                }
+            }
+        }
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+
+        if (repeatn_)
+        {
+            // SEMANTIC VALIDATION follows:
+            // NOTE: {0,} has already become *
+            // therefore we don't check for a comma.
+            if (token_._min == 0 && token_._max == 0)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Cannot have exactly zero repeats preceding index " <<
+                    state_.index () << '.';
+                throw runtime_error (ss_.str ().c_str ());
+            }
+
+            if (min_max_ && token_._max < token_._min)
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Max less than min preceding index " <<
+                    state_.index () << '.';
+                throw runtime_error (ss_.str ().c_str ());
+            }
+
+            token_.set (num_token::REPEATN, null_token);
+        }
+    }
+
+    static void macro (state &state_, num_token &token_)
+    {
+        CharT ch_ = 0;
+        bool eos_ = false;
+        const CharT *start_ = state_._curr;
+
+        state_.next (ch_);
+
+        if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
+            !(ch_ >= 'a' && ch_ <= 'z'))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Invalid MACRO name at index " <<
+                state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+
+        do
+        {
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                throw runtime_error ("Unexpected end of regex "
+                    "(missing '}').");
+            }
+        } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
+            (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
+
+        if (ch_ != '}')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Missing '}' at index " << state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+
+        std::size_t len_ = state_._curr - 1 - start_;
+
+        if (len_ > max_macro_len)
+        {
+            std::basic_stringstream<CharT> ss_;
+            std::ostringstream os_;
+
+            os_ << "MACRO name '";
+
+            while (len_)
+            {
+                os_ << ss_.narrow (*start_++, ' ');
+                --len_;
+            }
+
+            os_ << "' too long.";
+            throw runtime_error (os_.str ());
+        }
+
+        token_.set (num_token::MACRO, null_token);
+
+        // Some systems have memcpy in namespace std.
+        using namespace std;
+
+        memcpy (token_._macro, start_, len_ * sizeof (CharT));
+        token_._macro[len_] = 0;
+    }
+};
+}
+}
+}
+
+#endif