summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp')
-rw-r--r--3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp549
1 files changed, 549 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
new file mode 100644
index 0000000..6e0791e
--- /dev/null
+++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp
@@ -0,0 +1,549 @@
+// tokeniser_helper.hpp
+// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
+//
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
+#define BOOST_LEXER_RE_TOKENISER_HELPER_H
+
+#include "../../char_traits.hpp"
+// strlen()
+#include <cstring>
+#include "../../size_t.hpp"
+#include "re_tokeniser_state.hpp"
+
+namespace boost
+{
+namespace lexer
+{
+namespace detail
+{
+template<typename CharT, typename Traits = char_traits<CharT> >
+class basic_re_tokeniser_helper
+{
+public:
+ typedef basic_re_tokeniser_state<CharT> state;
+ typedef std::basic_string<CharT> string;
+
+ static const CharT *escape_sequence (state &state_, CharT &ch_,
+ std::size_t &str_len_)
+ {
+ bool eos_ = state_.eos ();
+
+ if (eos_)
+ {
+ throw runtime_error ("Unexpected end of regex "
+ "following '\\'.");
+ }
+
+ const CharT *str_ = charset_shortcut (*state_._curr, str_len_);
+
+ if (str_)
+ {
+ state_.increment ();
+ }
+ else
+ {
+ ch_ = chr (state_);
+ }
+
+ return str_;
+ }
+
+ // This function can call itself.
+ static void charset (state &state_, string &chars_, bool &negated_)
+ {
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "following '['.");
+ }
+
+ negated_ = ch_ == '^';
+
+ if (negated_)
+ {
+ eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "following '^'.");
+ }
+ }
+
+ bool chset_ = false;
+ CharT prev_ = 0;
+
+ while (ch_ != ']')
+ {
+ if (ch_ == '\\')
+ {
+ std::size_t str_len_ = 0;
+ const CharT *str_ = escape_sequence (state_, prev_, str_len_);
+
+ chset_ = str_ != 0;
+
+ if (chset_)
+ {
+ state temp_state_ (str_ + 1, str_ + str_len_,
+ state_._flags, state_._locale);
+ string temp_chars_;
+ bool temp_negated_ = false;
+
+ charset (temp_state_, temp_chars_, temp_negated_);
+
+ if (negated_ != temp_negated_)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Mismatch in charset negation preceding "
+ "index " << state_.index () << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ chars_ += temp_chars_;
+ }
+ }
+/*
+ else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+ {
+ // TODO: POSIX charsets
+ }
+*/
+ else
+ {
+ chset_ = false;
+ prev_ = ch_;
+ }
+
+ eos_ = state_.next (ch_);
+
+ // Covers preceding if, else if and else
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "(missing ']').");
+ }
+
+ if (ch_ == '-')
+ {
+ charset_range (chset_, state_, eos_, ch_, prev_, chars_);
+ }
+ else if (!chset_)
+ {
+ if ((state_._flags & icase) &&
+ (std::isupper (prev_, state_._locale) ||
+ std::islower (prev_, state_._locale)))
+ {
+ CharT upper_ = std::toupper (prev_, state_._locale);
+ CharT lower_ = std::tolower (prev_, state_._locale);
+
+ chars_ += upper_;
+ chars_ += lower_;
+ }
+ else
+ {
+ chars_ += prev_;
+ }
+ }
+ }
+
+ if (!negated_ && chars_.empty ())
+ {
+ throw runtime_error ("Empty charsets not allowed.");
+ }
+ }
+
+ static CharT chr (state &state_)
+ {
+ CharT ch_ = 0;
+
+ // eos_ has already been checked for.
+ switch (*state_._curr)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ ch_ = decode_octal (state_);
+ break;
+ case 'a':
+ ch_ = '\a';
+ state_.increment ();
+ break;
+ case 'b':
+ ch_ = '\b';
+ state_.increment ();
+ break;
+ case 'c':
+ ch_ = decode_control_char (state_);
+ break;
+ case 'e':
+ ch_ = 27; // '\e' not recognised by compiler
+ state_.increment ();
+ break;
+ case 'f':
+ ch_ = '\f';
+ state_.increment ();
+ break;
+ case 'n':
+ ch_ = '\n';
+ state_.increment ();
+ break;
+ case 'r':
+ ch_ = '\r';
+ state_.increment ();
+ break;
+ case 't':
+ ch_ = '\t';
+ state_.increment ();
+ break;
+ case 'v':
+ ch_ = '\v';
+ state_.increment ();
+ break;
+ case 'x':
+ ch_ = decode_hex (state_);
+ break;
+ default:
+ ch_ = *state_._curr;
+ state_.increment ();
+ break;
+ }
+
+ return ch_;
+ }
+
+private:
+ static const char *charset_shortcut (const char ch_,
+ std::size_t &str_len_)
+ {
+ const char *str_ = 0;
+
+ switch (ch_)
+ {
+ case 'd':
+ str_ = "[0-9]";
+ break;
+ case 'D':
+ str_ = "[^0-9]";
+ break;
+ case 's':
+ str_ = "[ \t\n\r\f\v]";
+ break;
+ case 'S':
+ str_ = "[^ \t\n\r\f\v]";
+ break;
+ case 'w':
+ str_ = "[_0-9A-Za-z]";
+ break;
+ case 'W':
+ str_ = "[^_0-9A-Za-z]";
+ break;
+ }
+
+ if (str_)
+ {
+ // Some systems have strlen in namespace std.
+ using namespace std;
+
+ str_len_ = strlen (str_);
+ }
+ else
+ {
+ str_len_ = 0;
+ }
+
+ return str_;
+ }
+
+ static const wchar_t *charset_shortcut (const wchar_t ch_,
+ std::size_t &str_len_)
+ {
+ const wchar_t *str_ = 0;
+
+ switch (ch_)
+ {
+ case 'd':
+ str_ = L"[0-9]";
+ break;
+ case 'D':
+ str_ = L"[^0-9]";
+ break;
+ case 's':
+ str_ = L"[ \t\n\r\f\v]";
+ break;
+ case 'S':
+ str_ = L"[^ \t\n\r\f\v]";
+ break;
+ case 'w':
+ str_ = L"[_0-9A-Za-z]";
+ break;
+ case 'W':
+ str_ = L"[^_0-9A-Za-z]";
+ break;
+ }
+
+ if (str_)
+ {
+ // Some systems have wcslen in namespace std.
+ using namespace std;
+
+ str_len_ = wcslen (str_);
+ }
+ else
+ {
+ str_len_ = 0;
+ }
+
+ return str_;
+ }
+
+ static CharT decode_octal (state &state_)
+ {
+ std::size_t accumulator_ = 0;
+ CharT ch_ = *state_._curr;
+ unsigned short count_ = 3;
+ bool eos_ = false;
+
+ for (;;)
+ {
+ accumulator_ *= 8;
+ accumulator_ += ch_ - '0';
+ --count_;
+ state_.increment ();
+ eos_ = state_.eos ();
+
+ if (!count_ || eos_) break;
+
+ ch_ = *state_._curr;
+
+ // Don't consume invalid chars!
+ if (ch_ < '0' || ch_ > '7')
+ {
+ break;
+ }
+ }
+
+ return static_cast<CharT> (accumulator_);
+ }
+
+ static CharT decode_control_char (state &state_)
+ {
+ // Skip over 'c'
+ state_.increment ();
+
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex following \\c.");
+ }
+ else
+ {
+ if (ch_ >= 'a' && ch_ <= 'z')
+ {
+ ch_ -= 'a' - 1;
+ }
+ else if (ch_ >= 'A' && ch_ <= 'Z')
+ {
+ ch_ -= 'A' - 1;
+ }
+ else if (ch_ == '@')
+ {
+ // Apparently...
+ ch_ = 0;
+ }
+ else
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Invalid control char at index " <<
+ state_.index () - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+ }
+
+ return ch_;
+ }
+
+ static CharT decode_hex (state &state_)
+ {
+ // Skip over 'x'
+ state_.increment ();
+
+ CharT ch_ = 0;
+ bool eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex following \\x.");
+ }
+
+ if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||
+ (ch_ >= 'A' && ch_ <= 'F')))
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Illegal char following \\x at index " <<
+ state_.index () - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ std::size_t hex_ = 0;
+
+ do
+ {
+ hex_ *= 16;
+
+ if (ch_ >= '0' && ch_ <= '9')
+ {
+ hex_ += ch_ - '0';
+ }
+ else if (ch_ >= 'a' && ch_ <= 'f')
+ {
+ hex_ += 10 + (ch_ - 'a');
+ }
+ else
+ {
+ hex_ += 10 + (ch_ - 'A');
+ }
+
+ eos_ = state_.eos ();
+
+ if (!eos_)
+ {
+ ch_ = *state_._curr;
+
+ // Don't consume invalid chars!
+ if (((ch_ >= '0' && ch_ <= '9') ||
+ (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))
+ {
+ state_.increment ();
+ }
+ else
+ {
+ eos_ = true;
+ }
+ }
+ } while (!eos_);
+
+ return static_cast<CharT> (hex_);
+ }
+
+ static void charset_range (const bool chset_, state &state_, bool &eos_,
+ CharT &ch_, const CharT prev_, string &chars_)
+ {
+ if (chset_)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Charset cannot form start of range preceding "
+ "index " << state_.index () - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ eos_ = state_.next (ch_);
+
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "following '-'.");
+ }
+
+ CharT curr_ = 0;
+
+ if (ch_ == '\\')
+ {
+ std::size_t str_len_ = 0;
+
+ if (escape_sequence (state_, curr_, str_len_))
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Charset cannot form end of range preceding index "
+ << state_.index () << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+ }
+/*
+ else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
+ {
+ std::ostringstream ss_;
+
+ ss_ << "POSIX char class cannot form end of range at "
+ "index " << state_.index () - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+*/
+ else
+ {
+ curr_ = ch_;
+ }
+
+ eos_ = state_.next (ch_);
+
+ // Covers preceding if and else
+ if (eos_)
+ {
+ // Pointless returning index if at end of string
+ throw runtime_error ("Unexpected end of regex "
+ "(missing ']').");
+ }
+
+ std::size_t start_ = static_cast<typename Traits::index_type> (prev_);
+ std::size_t end_ = static_cast<typename Traits::index_type> (curr_);
+
+ // Semanic check
+ if (end_ < start_)
+ {
+ std::ostringstream ss_;
+
+ ss_ << "Invalid range in charset preceding index " <<
+ state_.index () - 1 << '.';
+ throw runtime_error (ss_.str ().c_str ());
+ }
+
+ chars_.reserve (chars_.size () + (end_ + 1 - start_));
+
+ for (; start_ <= end_; ++start_)
+ {
+ CharT ch_ = static_cast<CharT> (start_);
+
+ if ((state_._flags & icase) &&
+ (std::isupper (ch_, state_._locale) ||
+ std::islower (ch_, state_._locale)))
+ {
+ CharT upper_ = std::toupper (ch_, state_._locale);
+ CharT lower_ = std::tolower (ch_, state_._locale);
+
+ chars_ += (upper_);
+ chars_ += (lower_);
+ }
+ else
+ {
+ chars_ += (ch_);
+ }
+ }
+ }
+};
+}
+}
+}
+
+#endif