diff options
author | Kevin Smith <git@kismith.co.uk> | 2013-01-12 18:41:34 (GMT) |
---|---|---|
committer | Swift Review <review@swift.im> | 2013-01-13 10:36:26 (GMT) |
commit | f3bc816af1b0d61452de973963e453bf3b3f95a2 (patch) | |
tree | e895f8afa3580e6cff6f5ad2017d45bf147a17c2 /3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp | |
parent | 188fc285c6555eadd3c9d50ab8a94adcade78d89 (diff) | |
download | swift-f3bc816af1b0d61452de973963e453bf3b3f95a2.zip swift-f3bc816af1b0d61452de973963e453bf3b3f95a2.tar.bz2 |
Adding in the spirit Boost stuff
Change-Id: I4f127ce61667243b64081b0aa309028d5077045f
Diffstat (limited to '3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp')
-rw-r--r-- | 3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp new file mode 100644 index 0000000..6e0791e --- /dev/null +++ b/3rdParty/Boost/src/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp @@ -0,0 +1,549 @@ +// tokeniser_helper.hpp +// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H +#define BOOST_LEXER_RE_TOKENISER_HELPER_H + +#include "../../char_traits.hpp" +// strlen() +#include <cstring> +#include "../../size_t.hpp" +#include "re_tokeniser_state.hpp" + +namespace boost +{ +namespace lexer +{ +namespace detail +{ +template<typename CharT, typename Traits = char_traits<CharT> > +class basic_re_tokeniser_helper +{ +public: + typedef basic_re_tokeniser_state<CharT> state; + typedef std::basic_string<CharT> string; + + static const CharT *escape_sequence (state &state_, CharT &ch_, + std::size_t &str_len_) + { + bool eos_ = state_.eos (); + + if (eos_) + { + throw runtime_error ("Unexpected end of regex " + "following '\\'."); + } + + const CharT *str_ = charset_shortcut (*state_._curr, str_len_); + + if (str_) + { + state_.increment (); + } + else + { + ch_ = chr (state_); + } + + return str_; + } + + // This function can call itself. + static void charset (state &state_, string &chars_, bool &negated_) + { + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '['."); + } + + negated_ = ch_ == '^'; + + if (negated_) + { + eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '^'."); + } + } + + bool chset_ = false; + CharT prev_ = 0; + + while (ch_ != ']') + { + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + const CharT *str_ = escape_sequence (state_, prev_, str_len_); + + chset_ = str_ != 0; + + if (chset_) + { + state temp_state_ (str_ + 1, str_ + str_len_, + state_._flags, state_._locale); + string temp_chars_; + bool temp_negated_ = false; + + charset (temp_state_, temp_chars_, temp_negated_); + + if (negated_ != temp_negated_) + { + std::ostringstream ss_; + + ss_ << "Mismatch in charset negation preceding " + "index " << state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + chars_ += temp_chars_; + } + } +/* + else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') + { + // TODO: POSIX charsets + } +*/ + else + { + chset_ = false; + prev_ = ch_; + } + + eos_ = state_.next (ch_); + + // Covers preceding if, else if and else + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "(missing ']')."); + } + + if (ch_ == '-') + { + charset_range (chset_, state_, eos_, ch_, prev_, chars_); + } + else if (!chset_) + { + if ((state_._flags & icase) && + (std::isupper (prev_, state_._locale) || + std::islower (prev_, state_._locale))) + { + CharT upper_ = std::toupper (prev_, state_._locale); + CharT lower_ = std::tolower (prev_, state_._locale); + + chars_ += upper_; + chars_ += lower_; + } + else + { + chars_ += prev_; + } + } + } + + if (!negated_ && chars_.empty ()) + { + throw runtime_error ("Empty charsets not allowed."); + } + } + + static CharT chr (state &state_) + { + CharT ch_ = 0; + + // eos_ has already been checked for. + switch (*state_._curr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + ch_ = decode_octal (state_); + break; + case 'a': + ch_ = '\a'; + state_.increment (); + break; + case 'b': + ch_ = '\b'; + state_.increment (); + break; + case 'c': + ch_ = decode_control_char (state_); + break; + case 'e': + ch_ = 27; // '\e' not recognised by compiler + state_.increment (); + break; + case 'f': + ch_ = '\f'; + state_.increment (); + break; + case 'n': + ch_ = '\n'; + state_.increment (); + break; + case 'r': + ch_ = '\r'; + state_.increment (); + break; + case 't': + ch_ = '\t'; + state_.increment (); + break; + case 'v': + ch_ = '\v'; + state_.increment (); + break; + case 'x': + ch_ = decode_hex (state_); + break; + default: + ch_ = *state_._curr; + state_.increment (); + break; + } + + return ch_; + } + +private: + static const char *charset_shortcut (const char ch_, + std::size_t &str_len_) + { + const char *str_ = 0; + + switch (ch_) + { + case 'd': + str_ = "[0-9]"; + break; + case 'D': + str_ = "[^0-9]"; + break; + case 's': + str_ = "[ \t\n\r\f\v]"; + break; + case 'S': + str_ = "[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = "[_0-9A-Za-z]"; + break; + case 'W': + str_ = "[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have strlen in namespace std. + using namespace std; + + str_len_ = strlen (str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + static const wchar_t *charset_shortcut (const wchar_t ch_, + std::size_t &str_len_) + { + const wchar_t *str_ = 0; + + switch (ch_) + { + case 'd': + str_ = L"[0-9]"; + break; + case 'D': + str_ = L"[^0-9]"; + break; + case 's': + str_ = L"[ \t\n\r\f\v]"; + break; + case 'S': + str_ = L"[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = L"[_0-9A-Za-z]"; + break; + case 'W': + str_ = L"[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have wcslen in namespace std. + using namespace std; + + str_len_ = wcslen (str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + static CharT decode_octal (state &state_) + { + std::size_t accumulator_ = 0; + CharT ch_ = *state_._curr; + unsigned short count_ = 3; + bool eos_ = false; + + for (;;) + { + accumulator_ *= 8; + accumulator_ += ch_ - '0'; + --count_; + state_.increment (); + eos_ = state_.eos (); + + if (!count_ || eos_) break; + + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (ch_ < '0' || ch_ > '7') + { + break; + } + } + + return static_cast<CharT> (accumulator_); + } + + static CharT decode_control_char (state &state_) + { + // Skip over 'c' + state_.increment (); + + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex following \\c."); + } + else + { + if (ch_ >= 'a' && ch_ <= 'z') + { + ch_ -= 'a' - 1; + } + else if (ch_ >= 'A' && ch_ <= 'Z') + { + ch_ -= 'A' - 1; + } + else if (ch_ == '@') + { + // Apparently... + ch_ = 0; + } + else + { + std::ostringstream ss_; + + ss_ << "Invalid control char at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } + + return ch_; + } + + static CharT decode_hex (state &state_) + { + // Skip over 'x' + state_.increment (); + + CharT ch_ = 0; + bool eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex following \\x."); + } + + if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || + (ch_ >= 'A' && ch_ <= 'F'))) + { + std::ostringstream ss_; + + ss_ << "Illegal char following \\x at index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + std::size_t hex_ = 0; + + do + { + hex_ *= 16; + + if (ch_ >= '0' && ch_ <= '9') + { + hex_ += ch_ - '0'; + } + else if (ch_ >= 'a' && ch_ <= 'f') + { + hex_ += 10 + (ch_ - 'a'); + } + else + { + hex_ += 10 + (ch_ - 'A'); + } + + eos_ = state_.eos (); + + if (!eos_) + { + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (((ch_ >= '0' && ch_ <= '9') || + (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) + { + state_.increment (); + } + else + { + eos_ = true; + } + } + } while (!eos_); + + return static_cast<CharT> (hex_); + } + + static void charset_range (const bool chset_, state &state_, bool &eos_, + CharT &ch_, const CharT prev_, string &chars_) + { + if (chset_) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form start of range preceding " + "index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + eos_ = state_.next (ch_); + + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "following '-'."); + } + + CharT curr_ = 0; + + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + + if (escape_sequence (state_, curr_, str_len_)) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form end of range preceding index " + << state_.index () << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + } +/* + else if (ch_ == '[' && !state_.eos () && *state_._curr == ':') + { + std::ostringstream ss_; + + ss_ << "POSIX char class cannot form end of range at " + "index " << state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } +*/ + else + { + curr_ = ch_; + } + + eos_ = state_.next (ch_); + + // Covers preceding if and else + if (eos_) + { + // Pointless returning index if at end of string + throw runtime_error ("Unexpected end of regex " + "(missing ']')."); + } + + std::size_t start_ = static_cast<typename Traits::index_type> (prev_); + std::size_t end_ = static_cast<typename Traits::index_type> (curr_); + + // Semanic check + if (end_ < start_) + { + std::ostringstream ss_; + + ss_ << "Invalid range in charset preceding index " << + state_.index () - 1 << '.'; + throw runtime_error (ss_.str ().c_str ()); + } + + chars_.reserve (chars_.size () + (end_ + 1 - start_)); + + for (; start_ <= end_; ++start_) + { + CharT ch_ = static_cast<CharT> (start_); + + if ((state_._flags & icase) && + (std::isupper (ch_, state_._locale) || + std::islower (ch_, state_._locale))) + { + CharT upper_ = std::toupper (ch_, state_._locale); + CharT lower_ = std::tolower (ch_, state_._locale); + + chars_ += (upper_); + chars_ += (lower_); + } + else + { + chars_ += (ch_); + } + } + } +}; +} +} +} + +#endif |