Adding in the spirit Boost stuff

Change-Id: I4f127ce61667243b64081b0aa309028d5077045f
author: Kevin Smith <git@kismith.co.uk> 2013-01-12 18:41:34 (GMT)
committer: Swift Review <review@swift.im> 2013-01-13 10:36:26 (GMT)
commit: f3bc816af1b0d61452de973963e453bf3b3f95a2 (patch)
tree: e895f8afa3580e6cff6f5ad2017d45bf147a17c2 /3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
parent: 188fc285c6555eadd3c9d50ab8a94adcade78d89 (diff)
download: swift-f3bc816af1b0d61452de973963e453bf3b3f95a2.zip
swift-f3bc816af1b0d61452de973963e453bf3b3f95a2.tar.bz2
1 files changed, 549 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
new file mode 100644
index 0000000..6e0791e
--- /dev/null
+++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
@@ -0,0 +1,549 @@
+// tokeniser_helper.hpp
+// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
+#define BOOST_LEXER_RE_TOKENISER_HELPER_H
+
+#include "../../char_traits.hpp"
+// strlen()
+#include <cstring>
+#include "../../size_t.hpp"
+#include "re_tokeniser_state.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT, typename Traits = char_traits<CharT> >
+class basic_re_tokeniser_helper
+{
+public:
+    typedef basic_re_tokeniser_state<CharT> state;
+    typedef std::basic_string<CharT> string;
+
+    static const CharT *escape_sequence (state &state_, CharT &ch_,
+        std::size_t &str_len_)
+    {
+        bool eos_ = state_.eos ();
+
+        if (eos_)
+        {
+            throw runtime_error ("Unexpected end of regex "
+                "following '\\'.");
+        }
+
+        const CharT *str_ = charset_shortcut (*state_._curr, str_len_);
+
+        if (str_)
+        {
+            state_.increment ();
+        }
+        else
+        {
+            ch_ = chr (state_);
+        }
+
+        return str_;
+    }
+
+    // This function can call itself.
+    static void charset (state &state_, string &chars_, bool &negated_)
+    {
+        CharT ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            // Pointless returning index if at end of string
+            throw runtime_error ("Unexpected end of regex "
+                "following '['.");
+        }
+
+        negated_ = ch_ == '^';
+
+        if (negated_)
+        {
+            eos_ = state_.next (ch_);
+
+            if (eos_)
+            {
+                // Pointless returning index if at end of string
+                throw runtime_error ("Unexpected end of regex "
+                    "following '^'.");
+            }
+        }
+
+        bool chset_ = false;
+        CharT prev_ = 0;
+
+        while (ch_ != ']')
+        {
+            if (ch_ == '\\')
+            {
+                std::size_t str_len_ = 0;
+                const CharT *str_ = escape_sequence (state_, prev_, str_len_);
+
+                chset_ = str_ != 0;
+
+                if (chset_)
+                {
+                    state temp_state_ (str_ + 1, str_ + str_len_,
+                        state_._flags, state_._locale);
+                    string temp_chars_;
+                    bool temp_negated_ = false;
+
+                    charset (temp_state_, temp_chars_, temp_negated_);
+
+                    if (negated_ != temp_negated_)
+                    {
+                        std::ostringstream ss_;
+
+                        ss_ << "Mismatch in charset negation preceding "
+                            "index " << state_.index () << '.';
+                        throw runtime_error (ss_.str ().c_str ());
+                    }
+
+                    chars_ += temp_chars_;
+                }
+            }
+/*
+            else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+            {
+                // TODO: POSIX charsets
+            }
+*/
+            else
+            {
+                chset_ = false;
+                prev_ = ch_;
+            }
+
+            eos_ = state_.next (ch_);
+
+            // Covers preceding if, else if and else
+            if (eos_)
+            {
+                // Pointless returning index if at end of string
+                throw runtime_error ("Unexpected end of regex "
+                    "(missing ']').");
+            }
+
+            if (ch_ == '-')
+            {
+                charset_range (chset_, state_, eos_, ch_, prev_, chars_);
+            }
+            else if (!chset_)
+            {
+                if ((state_._flags & icase) &&
+                    (std::isupper (prev_, state_._locale) ||
+                    std::islower (prev_, state_._locale)))
+                {
+                    CharT upper_ = std::toupper (prev_, state_._locale);
+                    CharT lower_ = std::tolower (prev_, state_._locale);
+
+                    chars_ += upper_;
+                    chars_ += lower_;
+                }
+                else
+                {
+                    chars_ += prev_;
+                }
+            }
+        }
+
+        if (!negated_ && chars_.empty ())
+        {
+            throw runtime_error ("Empty charsets not allowed.");
+        }
+    }
+
+    static CharT chr (state &state_)
+    {
+        CharT ch_ = 0;
+
+        // eos_ has already been checked for.
+        switch (*state_._curr)
+        {
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+                ch_ = decode_octal (state_);
+                break;
+            case 'a':
+                ch_ = '\a';
+                state_.increment ();
+                break;
+            case 'b':
+                ch_ = '\b';
+                state_.increment ();
+                break;
+            case 'c':
+                ch_ = decode_control_char (state_);
+                break;
+            case 'e':
+                ch_ = 27; // '\e' not recognised by compiler
+                state_.increment ();
+                break;
+            case 'f':
+                ch_ = '\f';
+                state_.increment ();
+                break;
+            case 'n':
+                ch_ = '\n';
+                state_.increment ();
+                break;
+            case 'r':
+                ch_ = '\r';
+                state_.increment ();
+                break;
+            case 't':
+                ch_ = '\t';
+                state_.increment ();
+                break;
+            case 'v':
+                ch_ = '\v';
+                state_.increment ();
+                break;
+            case 'x':
+                ch_ = decode_hex (state_);
+                break;
+            default:
+                ch_ = *state_._curr;
+                state_.increment ();
+                break;
+        }
+
+        return ch_;
+    }
+
+private:
+    static const char *charset_shortcut (const char ch_,
+        std::size_t &str_len_)
+    {
+        const char *str_ = 0;
+
+        switch (ch_)
+        {
+        case 'd':
+            str_ = "[0-9]";
+            break;
+        case 'D':
+            str_ = "[^0-9]";
+            break;
+        case 's':
+            str_ = "[ \t\n\r\f\v]";
+            break;
+        case 'S':
+            str_ = "[^ \t\n\r\f\v]";
+            break;
+        case 'w':
+            str_ = "[_0-9A-Za-z]";
+            break;
+        case 'W':
+            str_ = "[^_0-9A-Za-z]";
+            break;
+        }
+
+        if (str_)
+        {
+            // Some systems have strlen in namespace std.
+            using namespace std;
+
+            str_len_ = strlen (str_);
+        }
+        else
+        {
+            str_len_ = 0;
+        }
+
+        return str_;
+    }
+
+    static const wchar_t *charset_shortcut (const wchar_t ch_,
+        std::size_t &str_len_)
+    {
+        const wchar_t *str_ = 0;
+
+        switch (ch_)
+        {
+        case 'd':
+            str_ = L"[0-9]";
+            break;
+        case 'D':
+            str_ = L"[^0-9]";
+            break;
+        case 's':
+            str_ = L"[ \t\n\r\f\v]";
+            break;
+        case 'S':
+            str_ = L"[^ \t\n\r\f\v]";
+            break;
+        case 'w':
+            str_ = L"[_0-9A-Za-z]";
+            break;
+        case 'W':
+            str_ = L"[^_0-9A-Za-z]";
+            break;
+        }
+
+        if (str_)
+        {
+            // Some systems have wcslen in namespace std.
+            using namespace std;
+
+            str_len_ = wcslen (str_);
+        }
+        else
+        {
+            str_len_ = 0;
+        }
+
+        return str_;
+    }
+
+    static CharT decode_octal (state &state_)
+    {
+        std::size_t accumulator_ = 0;
+        CharT ch_ = *state_._curr;
+        unsigned short count_ = 3;
+        bool eos_ = false;
+
+        for (;;)
+        {
+            accumulator_ *= 8;
+            accumulator_ += ch_ - '0';
+            --count_;
+            state_.increment ();
+            eos_ = state_.eos ();
+
+            if (!count_ || eos_) break;
+
+            ch_ = *state_._curr;
+
+            // Don't consume invalid chars!
+            if (ch_ < '0' || ch_ > '7')
+            {
+                break;
+            }
+        }
+
+        return static_cast<CharT> (accumulator_);
+    }
+
+    static CharT decode_control_char (state &state_)
+    {
+        // Skip over 'c'
+        state_.increment ();
+
+        CharT ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            // Pointless returning index if at end of string
+            throw runtime_error ("Unexpected end of regex following \\c.");
+        }
+        else
+        {
+            if (ch_ >= 'a' && ch_ <= 'z')
+            {
+                ch_ -= 'a' - 1;
+            }
+            else if (ch_ >= 'A' && ch_ <= 'Z')
+            {
+                ch_ -= 'A' - 1;
+            }
+            else if (ch_ == '@')
+            {
+                // Apparently...
+                ch_ = 0;
+            }
+            else
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Invalid control char at index " <<
+                    state_.index () - 1 << '.';
+                throw runtime_error (ss_.str ().c_str ());
+            }
+        }
+
+        return ch_;
+    }
+
+    static CharT decode_hex (state &state_)
+    {
+        // Skip over 'x'
+        state_.increment ();
+
+        CharT ch_ = 0;
+        bool eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            // Pointless returning index if at end of string
+            throw runtime_error ("Unexpected end of regex following \\x.");
+        }
+
+        if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||
+            (ch_ >= 'A' && ch_ <= 'F')))
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Illegal char following \\x at index " <<
+                state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+
+        std::size_t hex_ = 0;
+
+        do
+        {
+            hex_ *= 16;
+
+            if (ch_ >= '0' && ch_ <= '9')
+            {
+                hex_ += ch_ - '0';
+            }
+            else if (ch_ >= 'a' && ch_ <= 'f')
+            {
+                hex_ += 10 + (ch_ - 'a');
+            }
+            else
+            {
+                hex_ += 10 + (ch_ - 'A');
+            }
+
+            eos_ = state_.eos ();
+
+            if (!eos_)
+            {
+                ch_ = *state_._curr;
+
+                // Don't consume invalid chars!
+                if (((ch_ >= '0' && ch_ <= '9') ||
+                    (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))
+                {
+                    state_.increment ();
+                }
+                else
+                {
+                    eos_ = true;
+                }
+            }
+        } while (!eos_);
+
+        return static_cast<CharT> (hex_);
+    }
+
+    static void charset_range (const bool chset_, state &state_, bool &eos_,
+        CharT &ch_, const CharT prev_, string &chars_)
+    {
+        if (chset_)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Charset cannot form start of range preceding "
+                "index " << state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+
+        eos_ = state_.next (ch_);
+
+        if (eos_)
+        {
+            // Pointless returning index if at end of string
+            throw runtime_error ("Unexpected end of regex "
+                "following '-'.");
+        }
+
+        CharT curr_ = 0;
+
+        if (ch_ == '\\')
+        {
+            std::size_t str_len_ = 0;
+
+            if (escape_sequence (state_, curr_, str_len_))
+            {
+                std::ostringstream ss_;
+
+                ss_ << "Charset cannot form end of range preceding index "
+                    << state_.index () << '.';
+                throw runtime_error (ss_.str ().c_str ());
+            }
+        }
+/*
+        else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+        {
+            std::ostringstream ss_;
+
+            ss_ << "POSIX char class cannot form end of range at "
+                "index " << state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+*/
+        else
+        {
+            curr_ = ch_;
+        }
+
+        eos_ = state_.next (ch_);
+
+        // Covers preceding if and else
+        if (eos_)
+        {
+            // Pointless returning index if at end of string
+            throw runtime_error ("Unexpected end of regex "
+                "(missing ']').");
+        }
+
+        std::size_t start_ = static_cast<typename Traits::index_type> (prev_);
+        std::size_t end_ = static_cast<typename Traits::index_type> (curr_);
+
+        // Semanic check
+        if (end_ < start_)
+        {
+            std::ostringstream ss_;
+
+            ss_ << "Invalid range in charset preceding index " <<
+                state_.index () - 1 << '.';
+            throw runtime_error (ss_.str ().c_str ());
+        }
+
+        chars_.reserve (chars_.size () + (end_ + 1 - start_));
+
+        for (; start_ <= end_; ++start_)
+        {
+            CharT ch_ = static_cast<CharT> (start_);
+
+            if ((state_._flags & icase) &&
+                (std::isupper (ch_, state_._locale) ||
+                std::islower (ch_, state_._locale)))
+            {
+                CharT upper_ = std::toupper (ch_, state_._locale);
+                CharT lower_ = std::tolower (ch_, state_._locale);
+
+                chars_ += (upper_);
+                chars_ += (lower_);
+            }
+            else
+            {
+                chars_ += (ch_);
+            }
+        }
+    }
+};
+}
+}
+}
+
+#endif
author	Kevin Smith <git@kismith.co.uk>	2013-01-12 18:41:34 (GMT)
committer	Swift Review <review@swift.im>	2013-01-13 10:36:26 (GMT)
commit	f3bc816af1b0d61452de973963e453bf3b3f95a2 (patch)
tree	e895f8afa3580e6cff6f5ad2017d45bf147a17c2 /3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
parent	188fc285c6555eadd3c9d50ab8a94adcade78d89 (diff)
download	swift-f3bc816af1b0d61452de973963e453bf3b3f95a2.zip swift-f3bc816af1b0d61452de973963e453bf3b3f95a2.tar.bz2