diff options
Diffstat (limited to '3rdParty/Boost/src/boost/regex')
26 files changed, 2622 insertions, 414 deletions
diff --git a/3rdParty/Boost/src/boost/regex/config.hpp b/3rdParty/Boost/src/boost/regex/config.hpp index 8c8f524..e4b2138 100644 --- a/3rdParty/Boost/src/boost/regex/config.hpp +++ b/3rdParty/Boost/src/boost/regex/config.hpp @@ -92,6 +92,12 @@ #if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) # define BOOST_REGEX_NO_EXTERNAL_TEMPLATES #endif +/* + * Shared regex lib will crash without this, frankly it looks a lot like a gcc bug: + */ +#if defined(__MINGW32__) +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +#endif /* * If there isn't good enough wide character support then there will @@ -164,7 +170,7 @@ # pragma warning(push) # pragma warning(disable : 4251 4231 4660) # endif -# ifdef _DLL +# if defined(_DLL) && defined(BOOST_MSVC) && (BOOST_MSVC < 1600) # include <string> extern template class __declspec(dllimport) std::basic_string<unsigned short>; # endif diff --git a/3rdParty/Boost/src/boost/regex/icu.hpp b/3rdParty/Boost/src/boost/regex/icu.hpp index 7af1d67..c8770c6 100644 --- a/3rdParty/Boost/src/boost/regex/icu.hpp +++ b/3rdParty/Boost/src/boost/regex/icu.hpp @@ -184,7 +184,9 @@ private: offset_underscore = U_CHAR_CATEGORY_COUNT+3, offset_unicode = U_CHAR_CATEGORY_COUNT+4, offset_any = U_CHAR_CATEGORY_COUNT+5, - offset_ascii = U_CHAR_CATEGORY_COUNT+6 + offset_ascii = U_CHAR_CATEGORY_COUNT+6, + offset_horizontal = U_CHAR_CATEGORY_COUNT+7, + offset_vertical = U_CHAR_CATEGORY_COUNT+8 }; // @@ -197,6 +199,8 @@ private: static const char_class_type mask_unicode; static const char_class_type mask_any; static const char_class_type mask_ascii; + static const char_class_type mask_horizontal; + static const char_class_type mask_vertical; static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2); @@ -311,12 +315,12 @@ inline u32regex do_make_u32regex(InputIterator i, boost::regex_constants::syntax_option_type opt, const boost::mpl::int_<4>*) { - typedef std::vector<UCHAR32> vector_type; + typedef std::vector<UChar32> vector_type; vector_type v; while(i != j) { - v.push_back((UCHAR32)(*i)); - ++a; + v.push_back((UChar32)(*i)); + ++i; } if(v.size()) return u32regex(&*v.begin(), v.size(), opt); @@ -372,7 +376,7 @@ inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_ // // Construction from ICU string type: // -inline u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) +inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl) { return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0)); } @@ -494,7 +498,7 @@ inline bool u32regex_match(const std::wstring& s, return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); } #endif -inline bool u32regex_match(const UnicodeString& s, +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, match_results<const UChar*>& m, const u32regex& e, match_flag_type flags = match_default) @@ -558,7 +562,7 @@ inline bool u32regex_match(const std::wstring& s, return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); } #endif -inline bool u32regex_match(const UnicodeString& s, +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, match_flag_type flags = match_default) { @@ -679,7 +683,7 @@ inline bool u32regex_search(const std::wstring& s, return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); } #endif -inline bool u32regex_search(const UnicodeString& s, +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, match_results<const UChar*>& m, const u32regex& e, match_flag_type flags = match_default) @@ -740,7 +744,7 @@ inline bool u32regex_search(const std::wstring& s, return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0)); } #endif -inline bool u32regex_search(const UnicodeString& s, +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, match_flag_type flags = match_default) { @@ -917,7 +921,7 @@ inline OutputIterator u32regex_replace(OutputIterator out, Iterator first, Iterator last, const u32regex& e, - const UnicodeString& fmt, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, match_flag_type flags = match_default) { return re_detail::extract_output_base @@ -962,9 +966,9 @@ namespace re_detail{ class unicode_string_out_iterator { - UnicodeString* out; + U_NAMESPACE_QUALIFIER UnicodeString* out; public: - unicode_string_out_iterator(UnicodeString& s) : out(&s) {} + unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {} unicode_string_out_iterator& operator++() { return *this; } unicode_string_out_iterator& operator++(int) { return *this; } unicode_string_out_iterator& operator*() { return *this; } @@ -982,23 +986,23 @@ public: } -inline UnicodeString u32regex_replace(const UnicodeString& s, +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const UChar* fmt, match_flag_type flags = match_default) { - UnicodeString result; + U_NAMESPACE_QUALIFIER UnicodeString result; re_detail::unicode_string_out_iterator i(result); u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags); return result; } -inline UnicodeString u32regex_replace(const UnicodeString& s, +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, - const UnicodeString& fmt, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, match_flag_type flags = match_default) { - UnicodeString result; + U_NAMESPACE_QUALIFIER UnicodeString result; re_detail::unicode_string_out_iterator i(result); re_detail::do_regex_replace( re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)), diff --git a/3rdParty/Boost/src/boost/regex/pending/object_cache.hpp b/3rdParty/Boost/src/boost/regex/pending/object_cache.hpp index 2a7e00b..e1aa191 100644 --- a/3rdParty/Boost/src/boost/regex/pending/object_cache.hpp +++ b/3rdParty/Boost/src/boost/regex/pending/object_cache.hpp @@ -73,7 +73,9 @@ boost::shared_ptr<Object const> object_cache<Key, Object>::get(const Key& k, siz // for now just throw, but we should never really get here... // ::boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); +#ifdef BOOST_NO_UNREACHABLE_RETURN_DETECTION return boost::shared_ptr<Object>(); +#endif #else return do_get(k, max_cache_size); #endif diff --git a/3rdParty/Boost/src/boost/regex/pending/static_mutex.hpp b/3rdParty/Boost/src/boost/regex/pending/static_mutex.hpp index 218169c..334ef27 100644 --- a/3rdParty/Boost/src/boost/regex/pending/static_mutex.hpp +++ b/3rdParty/Boost/src/boost/regex/pending/static_mutex.hpp @@ -140,7 +140,7 @@ inline bool scoped_static_mutex_lock::locked()const namespace boost{ class BOOST_REGEX_DECL scoped_static_mutex_lock; -extern "C" BOOST_REGEX_DECL void free_static_mutex(); +extern "C" BOOST_REGEX_DECL void boost_regex_free_static_mutex(); class BOOST_REGEX_DECL static_mutex { diff --git a/3rdParty/Boost/src/boost/regex/v4/basic_regex.hpp b/3rdParty/Boost/src/boost/regex/v4/basic_regex.hpp index cb9ff3c..09b0467 100644 --- a/3rdParty/Boost/src/boost/regex/v4/basic_regex.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/basic_regex.hpp @@ -19,6 +19,9 @@ #ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP #define BOOST_REGEX_V4_BASIC_REGEX_HPP +#include <boost/type_traits/is_same.hpp> +#include <boost/functional/hash.hpp> + #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) @@ -44,12 +47,160 @@ namespace re_detail{ template <class charT, class traits> class basic_regex_parser; +template <class I> +void bubble_down_one(I first, I last) +{ + if(first != last) + { + I next = last - 1; + while((next != first) && !(*(next-1) < *next)) + { + (next-1)->swap(*next); + --next; + } + } +} + +// +// Class named_subexpressions +// Contains information about named subexpressions within the regex. +// +template <class charT> +class named_subexpressions_base +{ +public: + virtual int get_id(const charT* i, const charT* j)const = 0; + virtual int get_id(std::size_t hash)const = 0; +#ifdef __GNUC__ + // warning supression: + virtual ~named_subexpressions_base(){} +#endif +}; + +template <class Iterator> +inline std::size_t hash_value_from_capture_name(Iterator i, Iterator j) +{ + std::size_t r = boost::hash_range(i, j); + r %= ((std::numeric_limits<int>::max)() - 10001); + r += 10000; + return r; +} + +template <class charT> +class named_subexpressions : public named_subexpressions_base<charT> +{ + struct name + { + name(const charT* i, const charT* j, int idx) + : /*n(i, j), */ index(idx) + { + hash = hash_value_from_capture_name(i, j); + } + name(std::size_t h, int idx) + : index(idx), hash(h) + { + } + //std::vector<charT> n; + int index; + std::size_t hash; + bool operator < (const name& other)const + { + return hash < other.hash; //std::lexicographical_compare(n.begin(), n.end(), other.n.begin(), other.n.end()); + } + bool operator == (const name& other)const + { + return hash == other.hash; //n == other.n; + } + void swap(name& other) + { + //n.swap(other.n); + std::swap(index, other.index); + std::swap(hash, other.hash); + } + }; +public: + named_subexpressions(){} + void set_name(const charT* i, const charT* j, int index) + { + m_sub_names.push_back(name(i, j, index)); + bubble_down_one(m_sub_names.begin(), m_sub_names.end()); + } + int get_id(const charT* i, const charT* j)const + { + name t(i, j, 0); + typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + int get_id(std::size_t h)const + { + name t(h, 0); + typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } +private: + std::vector<name> m_sub_names; +}; + +template <class charT, class Other> +class named_subexpressions_converter : public named_subexpressions_base<charT> +{ + boost::shared_ptr<named_subexpressions<Other> > m_converter; +public: + named_subexpressions_converter(boost::shared_ptr<named_subexpressions<Other> > s) + : m_converter(s) {} + int get_id(const charT* i, const charT* j)const + { + if(i == j) + return -1; + std::vector<Other> v; + while(i != j) + { + v.push_back(*i); + ++i; + } + return m_converter->get_id(&v[0], &v[0] + v.size()); + } + int get_id(std::size_t h)const + { + return m_converter->get_id(h); + } +}; + +template <class To> +inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp( + boost::shared_ptr<named_subexpressions<To> > s, + boost::integral_constant<bool,true> const&) +{ + return s; +} +template <class To, class From> +inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs_imp( + boost::shared_ptr<named_subexpressions<From> > s, + boost::integral_constant<bool,false> const&) +{ + return boost::shared_ptr<named_subexpressions_converter<To, From> >(new named_subexpressions_converter<To, From>(s)); +} +template <class To, class From> +inline boost::shared_ptr<named_subexpressions_base<To> > convert_to_named_subs( + boost::shared_ptr<named_subexpressions<From> > s) +{ + typedef typename boost::is_same<To, From>::type tag_type; + return convert_to_named_subs_imp<To>(s, tag_type()); +} // // class regex_data: // represents the data we wish to expose to the matching algorithms. // template <class charT, class traits> -struct regex_data +struct regex_data : public named_subexpressions<charT> { typedef regex_constants::syntax_option_type flag_type; typedef std::size_t size_type; @@ -77,6 +228,7 @@ struct regex_data std::vector< std::pair< std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*. + bool m_has_recursions; // whether we have recursive expressions; }; // // class basic_regex_implementation @@ -520,6 +672,10 @@ public: BOOST_ASSERT(0 != m_pimpl.get()); return m_pimpl->get_data(); } + boost::shared_ptr<re_detail::named_subexpressions<charT> > get_named_subs()const + { + return m_pimpl; + } private: shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl; diff --git a/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp b/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp index 9f2cbee..ee207d0 100644 --- a/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp @@ -240,6 +240,7 @@ protected: bool m_has_backrefs; // true if there are actually any backrefs unsigned m_backrefs; // bitmask of permitted backrefs boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; + bool m_has_recursions; // set when we have recursive expresisons to fixup typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character @@ -250,6 +251,7 @@ private: basic_regex_creator(const basic_regex_creator&); void fixup_pointers(re_syntax_base* state); + void fixup_recursions(re_syntax_base* state); void create_startmaps(re_syntax_base* state); int calculate_backstep(re_syntax_base* state); void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); @@ -263,7 +265,7 @@ private: template <class charT, class traits> basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data) - : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0) + : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false) { m_pdata->m_data.clear(); m_pdata->m_status = ::boost::regex_constants::error_ok; @@ -675,6 +677,8 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set( template <class charT, class traits> void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2) { + if(this->m_pdata->m_status) + return; // we've added all the states we need, now finish things off. // start by adding a terminating state: append_state(syntax_element_match); @@ -692,6 +696,15 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data()); // fixup pointers in the machine: fixup_pointers(m_pdata->m_first_state); + if(m_has_recursions) + { + m_pdata->m_has_recursions = true; + fixup_recursions(m_pdata->m_first_state); + if(this->m_pdata->m_status) + return; + } + else + m_pdata->m_has_recursions = false; // create nested startmaps: create_startmaps(m_pdata->m_first_state); // create main startmap: @@ -713,6 +726,13 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state) { switch(state->type) { + case syntax_element_recurse: + m_has_recursions = true; + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + break; case syntax_element_rep: case syntax_element_dot_rep: case syntax_element_char_rep: @@ -739,6 +759,128 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state) } template <class charT, class traits> +void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state) +{ + re_syntax_base* base = state; + while(state) + { + switch(state->type) + { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int id = static_cast<const re_brace*>(state)->index; + if(id < 0) + { + id = -id-1; + if(id >= 10000) + { + id = m_pdata->get_id(id); + if(id <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a marked sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; + case syntax_element_recurse: + { + bool ok = false; + re_syntax_base* p = base; + int id = static_cast<re_jump*>(state)->alt.i; + if(id > 10000) + id = m_pdata->get_id(id); + while(p) + { + if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == id)) + { + // + // We've found the target of the recursion, set the jump target: + // + static_cast<re_jump*>(state)->alt.p = p; + ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast<re_repeat*>(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast<const re_brace*>(p)->index == id) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast<re_recurse*>(state)->state_id = next_rep_id - 1; + } + + break; + } + p = p->next.p; + } + if(!ok) + { + // recursion to sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + default: + break; + } + state = state->next.p; + } +} + +template <class charT, class traits> void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state) { // non-recursive implementation: @@ -790,7 +932,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state) // if(0 == (this->flags() & regex_constants::no_except)) { - std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + std::string message = "Invalid lookbehind assertion encountered in the regular expression."; boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); e.raise(); } @@ -909,6 +1051,9 @@ template <class charT, class traits> void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) { int not_last_jump = 1; + re_syntax_base* recursion_start = 0; + int recursion_sub = 0; + re_syntax_base* recursion_restart = 0; // track case sensitivity: bool l_icase = m_icase; @@ -953,6 +1098,41 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, create_startmap(state->next.p, 0, pnull, mask); return; } + case syntax_element_recurse: + { + if(recursion_start == state) + { + // Infinite recursion!! + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered an infinite recursion."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + else if(recursion_start == 0) + { + recursion_start = state; + recursion_restart = state->next.p; + state = static_cast<re_jump*>(state)->alt.p; + if(state->type == syntax_element_startmark) + recursion_sub = static_cast<re_brace*>(state)->index; + else + recursion_sub = 0; + break; + } + // fall through, can't handle nested recursion here... + } case syntax_element_backref: // can be null, and any character can match: if(pnull) @@ -1111,12 +1291,45 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, *pnull |= mask; return; } - else + else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index)) { - state = state->next.p; + // recursion termination: + recursion_start = 0; + state = recursion_restart; break; } + // + // Normally we just go to the next state... but if this sub-expression is + // the target of a recursion, then we might be ending a recursion, in which + // case we should check whatever follows that recursion, as well as whatever + // follows this state: + // + if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index) + { + bool ok = false; + re_syntax_base* p = m_pdata->m_first_state; + while(p) + { + if((p->type == syntax_element_recurse)) + { + re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p); + if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index)) + { + ok = true; + break; + } + } + p = p->next.p; + } + if(ok) + { + create_startmap(p->next.p, l_map, pnull, mask); + } + } + state = state->next.p; + break; + case syntax_element_startmark: // need to handle independent subs as a special case: if(static_cast<re_brace*>(state)->index == -3) diff --git a/3rdParty/Boost/src/boost/regex/v4/basic_regex_parser.hpp b/3rdParty/Boost/src/boost/regex/v4/basic_regex_parser.hpp index b8bc996..3ea4d64 100644 --- a/3rdParty/Boost/src/boost/regex/v4/basic_regex_parser.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/basic_regex_parser.hpp @@ -45,6 +45,11 @@ public: basic_regex_parser(regex_data<charT, traits>* data); void parse(const charT* p1, const charT* p2, unsigned flags); void fail(regex_constants::error_type error_code, std::ptrdiff_t position); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message) + { + fail(error_code, position, message, position); + } bool parse_all(); bool parse_basic(); @@ -78,6 +83,8 @@ private: const charT* m_end; // the end of the string being parsed const charT* m_position; // our current parser position unsigned m_mark_count; // how many sub-expressions we have + int m_mark_reset; // used to indicate that we're inside a (?|...) block. + unsigned m_max_mark; // largest mark count seen inside a (?|...) block. std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative bool m_has_case_change; // true if somewhere in the current block the case has changed @@ -96,7 +103,7 @@ private: template <class charT, class traits> basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data) - : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false) + : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false) { } @@ -123,14 +130,27 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, switch(l_flags & regbase::main_option_type) { case regbase::perl_syntax_group: - m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended; - break; + { + m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended; + // + // Add a leading paren with index zero to give recursions a target: + // + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + br->index = 0; + br->icase = this->flags() & regbase::icase; + break; + } case regbase::basic_syntax_group: m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic; break; case regbase::literal: m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal; break; + default: + // Ooops, someone has managed to set more than one of the main option flags, + // so this must be an error: + fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used."); + return; } // parse all our characters: @@ -145,7 +165,7 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, // have had an unexpected ')' : if(!result) { - fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position)); + fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis."); return; } // if an error has been set then give up now: @@ -159,12 +179,39 @@ void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, template <class charT, class traits> void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position) { + // get the error message: + std::string message = this->m_pdata->m_ptraits->error_string(error_code); + fail(error_code, position, message); +} + +template <class charT, class traits> +void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos) +{ if(0 == this->m_pdata->m_status) // update the error code if not already set this->m_pdata->m_status = error_code; m_position = m_end; // don't bother parsing anything else - // get the error message: - std::string message = this->m_pdata->m_ptraits->error_string(error_code); - // and raise the exception, this will do nothing if exceptions are disabled: + + // + // Augment error message with the regular expression text: + // + if(start_pos == position) + start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10)); + std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base)); + if(error_code != regex_constants::error_empty) + { + if((start_pos != 0) || (end_pos != (m_end - m_base))) + message += " The error occured while parsing the regular expression fragment: '"; + else + message += " The error occured while parsing the regular expression: '"; + if(start_pos != end_pos) + { + message += std::string(m_base + start_pos, m_base + position); + message += ">>>HERE>>>"; + message += std::string(m_base + position, m_base + end_pos); + } + message += "'."; + } + #ifndef BOOST_NO_EXCEPTIONS if(0 == (this->flags() & regex_constants::no_except)) { @@ -272,7 +319,7 @@ bool basic_regex_parser<charT, traits>::parse_extended() case regex_constants::syntax_star: if(m_position == this->m_base) { - fail(regex_constants::error_badrepeat, 0); + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression."); return false; } ++m_position; @@ -280,7 +327,7 @@ bool basic_regex_parser<charT, traits>::parse_extended() case regex_constants::syntax_question: if(m_position == this->m_base) { - fail(regex_constants::error_badrepeat, 0); + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression."); return false; } ++m_position; @@ -288,7 +335,7 @@ bool basic_regex_parser<charT, traits>::parse_extended() case regex_constants::syntax_plus: if(m_position == this->m_base) { - fail(regex_constants::error_badrepeat, 0); + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression."); return false; } ++m_position; @@ -297,7 +344,7 @@ bool basic_regex_parser<charT, traits>::parse_extended() ++m_position; return parse_repeat_range(false); case regex_constants::syntax_close_brace: - fail(regex_constants::error_brace, this->m_position - this->m_end); + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); return false; case regex_constants::syntax_or: return parse_alt(); @@ -375,11 +422,17 @@ bool basic_regex_parser<charT, traits>::parse_open_paren() if(0 == (this->flags() & regbase::nosubs)) { markid = ++m_mark_count; +#ifndef BOOST_NO_STD_DISTANCE if(this->flags() & regbase::save_subexpression_location) this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0)); +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0)); +#endif } re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); pb->index = markid; + pb->icase = this->flags() & regbase::icase; std::ptrdiff_t last_paren_start = this->getoffset(pb); // back up insertion point for alternations, and set new point: std::ptrdiff_t last_alt_point = m_alt_insert_point; @@ -392,6 +445,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren() bool old_case_change = m_has_case_change; m_has_case_change = false; // no changes to this scope as yet... // + // Back up branch reset data in case we have a nested (?|...) + // + int mark_reset = m_mark_reset; + m_mark_reset = -1; + // // now recursively add more states, this will terminate when we get to a // matching ')' : // @@ -416,6 +474,10 @@ bool basic_regex_parser<charT, traits>::parse_open_paren() this->flags(opts); m_has_case_change = old_case_change; // + // restore branch reset: + // + m_mark_reset = mark_reset; + // // we either have a ')' or we have run out of characters prematurely: // if(m_position == m_end) @@ -424,14 +486,20 @@ bool basic_regex_parser<charT, traits>::parse_open_paren() return false; } BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); +#ifndef BOOST_NO_STD_DISTANCE if(markid && (this->flags() & regbase::save_subexpression_location)) this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position); +#else + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base); +#endif ++m_position; // // append closing parenthesis state: // pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); pb->index = markid; + pb->icase = this->flags() & regbase::icase; this->m_paren_start = last_paren_start; // // restore the alternate insertion point: @@ -481,7 +549,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape() case regex_constants::syntax_close_brace: if(this->flags() & regbase::no_intervals) return parse_literal(); - fail(regex_constants::error_brace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); return false; case regex_constants::syntax_or: if(this->flags() & regbase::bk_vbar) @@ -576,7 +644,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape() case 'c': case 'C': // not supported yet: - fail(regex_constants::error_escape, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead."); return false; default: break; @@ -600,6 +668,7 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape() // fall through: case regex_constants::escape_type_class: { +escape_type_class_jump: typedef typename traits::char_class_type mask_type; mask_type m = this->m_traits.lookup_classname(m_position, m_position+1); if(m != 0) @@ -673,7 +742,7 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape() char_class_type m; if(m_position == m_end) { - fail(regex_constants::error_escape, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found."); return false; } // maybe have \p{ddd} @@ -685,7 +754,7 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape() ++m_position; if(m_position == m_end) { - fail(regex_constants::error_escape, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence."); return false; } m = this->m_traits.lookup_classname(++base, m_position++); @@ -708,8 +777,106 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape() } return true; } - fail(regex_constants::error_ctype, m_position - m_base); + fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name."); + return false; + } + case regex_constants::escape_type_reset_start_mark: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = -5; + pb->icase = this->flags() & regbase::icase; + this->m_pdata->m_data.align(); + ++m_position; + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_line_ending: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + const charT* e = get_escape_R_string<charT>(); + const charT* old_position = m_position; + const charT* old_end = m_end; + const charT* old_base = m_base; + m_position = e; + m_base = e; + m_end = e + traits::length(e); + bool r = parse_all(); + m_position = ++old_position; + m_end = old_end; + m_base = old_base; + return r; + } + goto escape_type_class_jump; + case regex_constants::escape_type_extended_backref: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + bool have_brace = false; + bool negative = false; + static const char* incomplete_message = "Incomplete \\g escape found."; + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + // maybe have \g{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + have_brace = true; + } + negative = (*m_position == static_cast<charT>('-')); + if((negative) && (++m_position == m_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + const charT* pc = m_position; + int i = this->m_traits.toi(pc, m_end, 10); + if(i < 0) + { + // Check for a named capture: + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + i = this->m_pdata->get_id(base, m_position); + pc = m_position; + } + if(negative) + i = 1 + m_mark_count - i; + if((i > 0) && (this->m_backrefs & (1u << (i-1)))) + { + m_position = pc; + re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = i; + pb->icase = this->flags() & regbase::icase; + } + else + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + m_position = pc; + if(have_brace) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + ++m_position; + } + return true; } + goto escape_type_class_jump; + case regex_constants::escape_type_control_v: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + goto escape_type_class_jump; + // fallthrough: default: this->append_literal(unescape_character()); break; @@ -737,6 +904,7 @@ template <class charT, class traits> bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high) { bool greedy = true; + bool pocessive = false; std::size_t insert_point; // // when we get to here we may have a non-greedy ? mark still to come: @@ -748,16 +916,23 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_ ) ) { - // OK we have a perl regex, check for a '?': + // OK we have a perl or emacs regex, check for a '?': if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) { greedy = false; ++m_position; } + // for perl regexes only check for pocessive ++ repeats. + if((0 == (this->flags() & regbase::main_option_type)) + && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) + { + pocessive = true; + ++m_position; + } } if(0 == this->m_last_state) { - fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position)); + fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position), "Nothing to repeat."); return false; } if(this->m_last_state->type == syntax_element_endmark) @@ -822,12 +997,29 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_ // now fill in the alt jump for the repeat: rep = static_cast<re_repeat*>(this->getaddress(rep_off)); rep->alt.i = this->m_pdata->m_data.size() - rep_off; + // + // If the repeat is pocessive then bracket the repeat with a (?>...) + // independent sub-expression construct: + // + if(pocessive) + { + re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + } return true; } template <class charT, class traits> bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) { + static const char* incomplete_message = "Missing } in quantified repetition."; // // parse a repeat-range: // @@ -839,7 +1031,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) // fail if at end: if(this->m_position == this->m_end) { - fail(regex_constants::error_brace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } // get min: @@ -854,7 +1046,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) } else if(this->m_position == this->m_end) { - fail(regex_constants::error_brace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } min = v; @@ -868,7 +1060,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) ++m_position; if(this->m_position == this->m_end) { - fail(regex_constants::error_brace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } // get the value if any: @@ -886,7 +1078,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) // OK now check trailing }: if(this->m_position == this->m_end) { - fail(regex_constants::error_brace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } if(isbasic) @@ -896,13 +1088,13 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) ++m_position; if(this->m_position == this->m_end) { - fail(regex_constants::error_brace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } } else { - fail(regex_constants::error_badbrace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } } @@ -910,7 +1102,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) ++m_position; else { - fail(regex_constants::error_badbrace, this->m_position - this->m_base); + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); return false; } // @@ -918,7 +1110,11 @@ bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic) // if(min > max) { - fail(regex_constants::error_badbrace, this->m_position - this->m_base); + // Backtrack to error location: + m_position -= 2; + while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position; + ++m_position; + fail(regex_constants::error_badbrace, m_position - m_base); return false; } return parse_repeat(min, max); @@ -941,9 +1137,17 @@ bool basic_regex_parser<charT, traits>::parse_alt() ) ) { - fail(regex_constants::error_empty, this->m_position - this->m_base); + fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression can start with the alternation operator |."); return false; } + // + // Reset mark count if required: + // + if(m_max_mark < m_mark_count) + m_max_mark = m_mark_count; + if(m_mark_reset >= 0) + m_mark_count = m_mark_reset; + ++m_position; // // we need to append a trailing jump: @@ -985,10 +1189,11 @@ bool basic_regex_parser<charT, traits>::parse_alt() template <class charT, class traits> bool basic_regex_parser<charT, traits>::parse_set() { + static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; ++m_position; if(m_position == m_end) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } basic_char_set<charT, traits> char_set; @@ -1021,7 +1226,7 @@ bool basic_regex_parser<charT, traits>::parse_set() ++m_position; if(0 == this->append_set(char_set)) { - fail(regex_constants::error_range, m_position - m_base); + fail(regex_constants::error_ctype, m_position - m_base); return false; } } @@ -1076,6 +1281,7 @@ bool basic_regex_parser<charT, traits>::parse_set() template <class charT, class traits> bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set) { + static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; // // we have either a character class [:name:] // a collating element [.name.] @@ -1083,7 +1289,7 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr // if(m_end == ++m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } switch(this->m_traits.syntax_type(*m_position)) @@ -1108,14 +1314,14 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr // skip the ':' if(m_end == ++m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } const charT* name_first = m_position; // skip at least one character, then find the matching ':]' if(m_end == ++m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } while((m_position != m_end) @@ -1124,13 +1330,13 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr const charT* name_last = m_position; if(m_end == m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } // @@ -1185,14 +1391,14 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr // skip the '=' if(m_end == ++m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } const charT* name_first = m_position; // skip at least one character, then find the matching '=]' if(m_end == ++m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } while((m_position != m_end) @@ -1201,13 +1407,13 @@ bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, tr const charT* name_last = m_position; if(m_end == m_position) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } if((m_end == ++m_position) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) { - fail(regex_constants::error_brack, m_position - m_base); + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); return false; } string_type m = this->m_traits.lookup_collatename(name_first, name_last); @@ -1397,7 +1603,7 @@ charT basic_regex_parser<charT, traits>::unescape_character() charT result(0); if(m_position == m_end) { - fail(regex_constants::error_escape, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely."); return false; } switch(this->m_traits.escape_syntax_type(*m_position)) @@ -1430,24 +1636,22 @@ charT basic_regex_parser<charT, traits>::unescape_character() ++m_position; if(m_position == m_end) { - fail(regex_constants::error_escape, m_position - m_base); - return result; - } - /* - if((*m_position < charT('@')) - || (*m_position > charT(125)) ) - { - fail(regex_constants::error_escape, m_position - m_base); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely."); return result; } - */ result = static_cast<charT>(*m_position % 32); break; case regex_constants::escape_type_hex: ++m_position; if(m_position == m_end) { - fail(regex_constants::error_escape, m_position - m_base); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely."); return result; } // maybe have \x{ddd} @@ -1456,16 +1660,22 @@ charT basic_regex_parser<charT, traits>::unescape_character() ++m_position; if(m_position == m_end) { - fail(regex_constants::error_escape, m_position - m_base); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence."); return result; } int i = this->m_traits.toi(m_position, m_end, 16); if((m_position == m_end) || (i < 0) - || ((std::numeric_limits<charT>::is_specialized) && (charT(i) > (std::numeric_limits<charT>::max)())) + || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)())) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) { - fail(regex_constants::error_badbrace, m_position - m_base); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid."); return result; } ++m_position; @@ -1473,12 +1683,15 @@ charT basic_regex_parser<charT, traits>::unescape_character() } else { - std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position); + std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position)); int i = this->m_traits.toi(m_position, m_position + len, 16); if((i < 0) || !valid_value(charT(0), i)) { - fail(regex_constants::error_escape, m_position - m_base); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character."); return result; } result = charT(i); @@ -1493,14 +1706,20 @@ charT basic_regex_parser<charT, traits>::unescape_character() int val = this->m_traits.toi(bp, bp + 1, 8); if(val != 0) { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; // Oops not an octal escape after all: - fail(regex_constants::error_escape, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence."); return result; } val = this->m_traits.toi(m_position, m_position + len, 8); if(val < 0) { - fail(regex_constants::error_escape, m_position - m_base); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid."); return result; } return static_cast<charT>(val); @@ -1510,6 +1729,9 @@ charT basic_regex_parser<charT, traits>::unescape_character() ++m_position; if(m_position == m_end) { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; fail(regex_constants::error_escape, m_position - m_base); return false; } @@ -1522,12 +1744,18 @@ charT basic_regex_parser<charT, traits>::unescape_character() ++m_position; if(m_position == m_end) { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; fail(regex_constants::error_escape, m_position - m_base); return false; } string_type s = this->m_traits.lookup_collatename(++base, m_position++); if(s.empty()) { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; fail(regex_constants::error_collate, m_position - m_base); return false; } @@ -1537,6 +1765,9 @@ charT basic_regex_parser<charT, traits>::unescape_character() } } // fall through is a failure: + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; fail(regex_constants::error_escape, m_position - m_base); return false; } @@ -1568,10 +1799,14 @@ bool basic_regex_parser<charT, traits>::parse_backref() m_position = pc; re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace))); pb->index = i; + pb->icase = this->flags() & regbase::icase; } else { - fail(regex_constants::error_backref, m_position - m_end); + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_backref, m_position - m_base); return false; } return true; @@ -1603,7 +1838,7 @@ bool basic_regex_parser<charT, traits>::parse_QE() } if(++m_position == m_end) // skip the escape { - fail(regex_constants::error_escape, m_position - m_base); + fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence."); return false; } // check to see if it's a \E: @@ -1634,7 +1869,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() { if(++m_position == m_end) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } // @@ -1655,6 +1893,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() int markid = 0; std::ptrdiff_t jump_offset = 0; re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->icase = this->flags() & regbase::icase; std::ptrdiff_t last_paren_start = this->getoffset(pb); // back up insertion point for alternations, and set new point: std::ptrdiff_t last_alt_point = m_alt_insert_point; @@ -1665,11 +1904,18 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() regex_constants::syntax_option_type old_flags = this->flags(); bool old_case_change = m_has_case_change; m_has_case_change = false; + charT name_delim; + int mark_reset = m_mark_reset; + m_mark_reset = -1; + int v; // // select the actual extension used: // switch(this->m_traits.syntax_type(*m_position)) { + case regex_constants::syntax_or: + m_mark_reset = m_mark_count; + // fall through: case regex_constants::syntax_colon: // // a non-capturing mark: @@ -1677,6 +1923,68 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() pb->index = markid = 0; ++m_position; break; + case regex_constants::syntax_digit: + { + // + // a recursive subexpression: + // + v = this->m_traits.toi(m_position, m_end, 10); + if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated."); + return false; + } +insert_recursion: + pb->index = markid = 0; + re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse))); + pr->alt.i = v; + pr->state_id = 0; + static_cast<re_case*>( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->flags() & regbase::icase; + break; + } + case regex_constants::syntax_plus: + // + // A forward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + v += m_mark_count; + goto insert_recursion; + case regex_constants::syntax_dash: + // + // Possibly a backward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if(v <= 0) + { + --m_position; + // Oops not a relative recursion at all, but a (?-imsx) group: + goto option_group_jump; + } + v = m_mark_count + 1 - v; + if(v <= 0) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + goto insert_recursion; case regex_constants::syntax_equal: pb->index = markid = -1; ++m_position; @@ -1696,7 +2004,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() // a lookbehind assertion: if(++m_position == m_end) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); @@ -1706,8 +2017,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() pb->index = markid = -1; else { - fail(regex_constants::error_badrepeat, m_position - m_base); - return false; + // Probably a named capture which also starts (?< : + name_delim = '>'; + --m_position; + goto named_capture_jump; } ++m_position; jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); @@ -1732,22 +2045,152 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() pb->index = markid = -4; if(++m_position == m_end) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } - int v = this->m_traits.toi(m_position, m_end, 10); - if(v > 0) + v = this->m_traits.toi(m_position, m_end, 10); + if(*m_position == charT('R')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('&')) + { + const charT* base = ++m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = -static_cast<int>(hash_value_from_capture_name(base, m_position)); + } + else + { + v = -this->m_traits.toi(m_position, m_end, 10); + } + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v < 0 ? (v - 1) : 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if((*m_position == charT('\'')) || (*m_position == charT('<'))) + { + const charT* base = ++m_position; + while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\''))) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast<int>(hash_value_from_capture_name(base, m_position)); + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v; + if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture."); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(*m_position == charT('D')) + { + const char* def = "DEFINE"; + while(*def && (m_position != m_end) && (*m_position == charT(*def))) + ++m_position, ++def; + if((m_position == m_end) || *def) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = 9999; // special magic value! + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(v > 0) { re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); br->index = v; if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } if(++m_position == m_end) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } } @@ -1756,25 +2199,37 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() // verify that we have a lookahead or lookbehind assert: if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } if(++m_position == m_end) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) { if(++m_position == m_end) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } m_position -= 3; @@ -1784,7 +2239,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } m_position -= 2; @@ -1793,15 +2251,125 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() break; } case regex_constants::syntax_close_mark: - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; + case regex_constants::escape_type_end_buffer: + { + name_delim = *m_position; +named_capture_jump: + markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + #ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0)); + #else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0)); + #endif + } + pb->index = markid; + const charT* base = ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + while((m_position != m_end) && (*m_position != name_delim)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + this->m_pdata->set_name(base, m_position, markid); + ++m_position; + break; + } default: + if(*m_position == charT('R')) + { + ++m_position; + v = 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + goto insert_recursion; + } + if(*m_position == charT('&')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast<int>(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + if(*m_position == charT('P')) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('>')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast<int>(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + } // // lets assume that we have a (?imsx) group and try and parse it: // +option_group_jump: regex_constants::syntax_option_type opts = parse_options(); if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; + } // make a note of whether we have a case change: m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); pb->index = markid = 0; @@ -1820,7 +2388,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() } else { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); return false; } @@ -1842,12 +2413,21 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() // Unwind alternatives: // if(0 == unwind_alts(last_paren_start)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block."); return false; + } // // we either have a ')' or we have run out of characters prematurely: // if(m_position == m_end) { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end)); return false; } @@ -1878,7 +2458,10 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() if(this->m_last_state == jmp) { // Oops... we didn't have anything inside the assertion: - fail(regex_constants::error_empty, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion."); return false; } } @@ -1897,16 +2480,36 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() } else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt) { - fail(regex_constants::error_bad_pattern, m_position - m_base); + // Can't have seen more than one alternative: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression."); return false; } + else + { + // We must *not* have seen an alternative inside a (DEFINE) block: + b = this->getaddress(b->next.i, b); + if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block."); + return false; + } + } // check for invalid repetition of next state: b = this->getaddress(expected_alt_point); b = this->getaddress(static_cast<re_alt*>(b)->next.i, b); if((b->type != syntax_element_assert_backref) && (b->type != syntax_element_startmark)) { - fail(regex_constants::error_badrepeat, m_position - m_base); + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion."); return false; } } @@ -1915,6 +2518,7 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() // pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace))); pb->index = markid; + pb->icase = this->flags() & regbase::icase; this->m_paren_start = last_paren_start; // // restore the alternate insertion point: @@ -1924,6 +2528,31 @@ bool basic_regex_parser<charT, traits>::parse_perl_extension() // and the case change data: // m_has_case_change = old_case_change; + // + // And the mark_reset data: + // + if(m_max_mark > m_mark_count) + { + m_mark_count = m_max_mark; + } + m_mark_reset = mark_reset; + + + if(markid > 0) + { +#ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1; +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1; +#endif + // + // allow backrefs to this mark: + // + if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT))) + this->m_backrefs |= 1u << (markid - 1); + } return true; } @@ -1935,6 +2564,9 @@ bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate) // if(++m_position == m_end) { + // Rewind to start of sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; fail(regex_constants::error_escape, m_position - m_base); return false; } @@ -2035,6 +2667,9 @@ regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_opt } if(++m_position == m_end) { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; fail(regex_constants::error_paren, m_position - m_base); return false; } @@ -2045,6 +2680,9 @@ regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_opt { if(++m_position == m_end) { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; fail(regex_constants::error_paren, m_position - m_base); return false; } @@ -2071,6 +2709,9 @@ regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_opt } if(++m_position == m_end) { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; fail(regex_constants::error_paren, m_position - m_base); return false; } @@ -2097,7 +2738,7 @@ bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_st ) ) { - fail(regex_constants::error_empty, this->m_position - this->m_base); + fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |."); return false; } // diff --git a/3rdParty/Boost/src/boost/regex/v4/cpp_regex_traits.hpp b/3rdParty/Boost/src/boost/regex/v4/cpp_regex_traits.hpp index 89fe49d..bb40aa1 100644 --- a/3rdParty/Boost/src/boost/regex/v4/cpp_regex_traits.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/cpp_regex_traits.hpp @@ -187,6 +187,7 @@ struct cpp_regex_traits_base #ifndef BOOST_NO_STD_MESSAGES if(m_pmessages == b.m_pmessages) { + return m_pcollate < b.m_pcollate; } return m_pmessages < b.m_pmessages; #else @@ -212,7 +213,7 @@ std::locale cpp_regex_traits_base<charT>::imbue(const std::locale& l) m_locale = l; m_pctype = &BOOST_USE_FACET(std::ctype<charT>, l); #ifndef BOOST_NO_STD_MESSAGES - m_pmessages = &BOOST_USE_FACET(std::messages<charT>, l); + m_pmessages = BOOST_HAS_FACET(std::messages<charT>, l) ? &BOOST_USE_FACET(std::messages<charT>, l) : 0; #endif m_pcollate = &BOOST_USE_FACET(std::collate<charT>, l); return result; @@ -276,7 +277,7 @@ void cpp_regex_traits_char_layer<charT>::init() typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1); #endif std::string cat_name(cpp_regex_traits<charT>::get_catalog_name()); - if(cat_name.size()) + if(cat_name.size() && (this->m_pmessages != 0)) { cat = this->m_pmessages->open( cat_name, @@ -309,7 +310,8 @@ void cpp_regex_traits_char_layer<charT>::init() } catch(...) { - this->m_pmessages->close(cat); + if(this->m_pmessages) + this->m_pmessages->close(cat); throw; } #endif @@ -394,7 +396,9 @@ enum char_class_graph=char_class_alnum|char_class_punct, char_class_blank=1<<9, char_class_word=1<<10, - char_class_unicode=1<<11 + char_class_unicode=1<<11, + char_class_horizontal_space=1<<12, + char_class_vertical_space=1<<13 }; #endif @@ -413,6 +417,8 @@ public: BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24); BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25); BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26); + BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27); + BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28); #endif typedef std::basic_string<charT> string_type; @@ -477,6 +483,10 @@ template <class charT> typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_word; template <class charT> typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_unicode; +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_vertical; +template <class charT> +typename cpp_regex_traits_implementation<charT>::char_class_type const cpp_regex_traits_implementation<charT>::mask_horizontal; #endif #endif @@ -645,7 +655,7 @@ void cpp_regex_traits_implementation<charT>::init() typename std::messages<charT>::catalog cat = reinterpret_cast<std::messages<char>::catalog>(-1); #endif std::string cat_name(cpp_regex_traits<charT>::get_catalog_name()); - if(cat_name.size()) + if(cat_name.size() && (this->m_pmessages != 0)) { cat = this->m_pmessages->open( cat_name, @@ -688,36 +698,40 @@ void cpp_regex_traits_implementation<charT>::init() // Custom class names: // #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET - static const char_class_type masks[14] = + static const char_class_type masks[16] = { std::ctype<charT>::alnum, std::ctype<charT>::alpha, std::ctype<charT>::cntrl, std::ctype<charT>::digit, std::ctype<charT>::graph, + cpp_regex_traits_implementation<charT>::mask_horizontal, std::ctype<charT>::lower, std::ctype<charT>::print, std::ctype<charT>::punct, std::ctype<charT>::space, std::ctype<charT>::upper, + cpp_regex_traits_implementation<charT>::mask_vertical, std::ctype<charT>::xdigit, cpp_regex_traits_implementation<charT>::mask_blank, cpp_regex_traits_implementation<charT>::mask_word, cpp_regex_traits_implementation<charT>::mask_unicode, }; #else - static const char_class_type masks[14] = + static const char_class_type masks[16] = { ::boost::re_detail::char_class_alnum, ::boost::re_detail::char_class_alpha, ::boost::re_detail::char_class_cntrl, ::boost::re_detail::char_class_digit, ::boost::re_detail::char_class_graph, + ::boost::re_detail::char_class_horizontal_space, ::boost::re_detail::char_class_lower, ::boost::re_detail::char_class_print, ::boost::re_detail::char_class_punct, ::boost::re_detail::char_class_space, ::boost::re_detail::char_class_upper, + ::boost::re_detail::char_class_vertical_space, ::boost::re_detail::char_class_xdigit, ::boost::re_detail::char_class_blank, ::boost::re_detail::char_class_word, @@ -744,7 +758,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const { #ifndef BOOST_REGEX_BUGGY_CTYPE_FACET - static const char_class_type masks[20] = + static const char_class_type masks[22] = { 0, std::ctype<char>::alnum, @@ -754,6 +768,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type std::ctype<char>::digit, std::ctype<char>::digit, std::ctype<char>::graph, + cpp_regex_traits_implementation<charT>::mask_horizontal, std::ctype<char>::lower, std::ctype<char>::lower, std::ctype<char>::print, @@ -763,12 +778,13 @@ typename cpp_regex_traits_implementation<charT>::char_class_type std::ctype<char>::upper, cpp_regex_traits_implementation<charT>::mask_unicode, std::ctype<char>::upper, + cpp_regex_traits_implementation<charT>::mask_vertical, std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word, std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word, std::ctype<char>::xdigit, }; #else - static const char_class_type masks[20] = + static const char_class_type masks[22] = { 0, ::boost::re_detail::char_class_alnum, @@ -778,6 +794,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type ::boost::re_detail::char_class_digit, ::boost::re_detail::char_class_digit, ::boost::re_detail::char_class_graph, + ::boost::re_detail::char_class_horizontal_space, ::boost::re_detail::char_class_lower, ::boost::re_detail::char_class_lower, ::boost::re_detail::char_class_print, @@ -787,6 +804,7 @@ typename cpp_regex_traits_implementation<charT>::char_class_type ::boost::re_detail::char_class_upper, ::boost::re_detail::char_class_unicode, ::boost::re_detail::char_class_upper, + ::boost::re_detail::char_class_vertical_space, ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word, ::boost::re_detail::char_class_alnum | ::boost::re_detail::char_class_word, ::boost::re_detail::char_class_xdigit, @@ -820,7 +838,9 @@ bool cpp_regex_traits_implementation<charT>::isctype(const charT c, char_class_t || ((mask & ::boost::re_detail::char_class_xdigit) && (m_pctype->is(std::ctype<charT>::xdigit, c))) || ((mask & ::boost::re_detail::char_class_blank) && (m_pctype->is(std::ctype<charT>::space, c)) && !::boost::re_detail::is_separator(c)) || ((mask & ::boost::re_detail::char_class_word) && (c == '_')) - || ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c)); + || ((mask & ::boost::re_detail::char_class_unicode) && ::boost::re_detail::is_extended(c)) + || ((mask & ::boost::re_detail::char_class_vertical_space) && (is_separator(c) || (c == '\v'))) + || ((mask & ::boost::re_detail::char_class_horizontal_space) && m_pctype->is(std::ctype<charT>::space, c) && !(is_separator(c) || (c == '\v'))); } #endif @@ -930,6 +950,12 @@ public: && m_pimpl->m_pctype->is(std::ctype<charT>::space, c) && !re_detail::is_separator(c)) return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_vertical) + && (::boost::re_detail::is_separator(c) || (c == '\v'))) + return true; + else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_horizontal) + && this->isctype(c, std::ctype<charT>::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation<charT>::mask_vertical)) + return true; return false; #else return m_pimpl->isctype(c, f); diff --git a/3rdParty/Boost/src/boost/regex/v4/cregex.hpp b/3rdParty/Boost/src/boost/regex/v4/cregex.hpp index cafe396..7b3df1e 100644 --- a/3rdParty/Boost/src/boost/regex/v4/cregex.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/cregex.hpp @@ -124,11 +124,11 @@ typedef enum{ REG_STARTEND = 00004 } reg_exec_flags; -// -// POSIX error codes: -// +/* + * POSIX error codes: + */ typedef unsigned reg_error_t; -typedef reg_error_t reg_errcode_t; // backwards compatibility +typedef reg_error_t reg_errcode_t; /* backwards compatibility */ static const reg_error_t REG_NOERROR = 0; /* Success. */ static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ @@ -154,8 +154,9 @@ static const reg_error_t REG_EMPTY = 17; /* empty expression */ static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ static const reg_error_t REG_ESTACK = 19; /* out of stack space */ -static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */ -static const reg_error_t REG_ENOSYS = 20; /* = REG_E_UNKNOWN : Reserved. */ +static const reg_error_t REG_E_PERL = 20; /* Perl (?...) error */ +static const reg_error_t REG_E_UNKNOWN = 21; /* unknown error */ +static const reg_error_t REG_ENOSYS = 21; /* = REG_E_UNKNOWN : Reserved. */ BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA*, const char*, int); BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int, const regex_tA*, char*, regsize_t); @@ -195,14 +196,14 @@ BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW*); #endif #ifdef __cplusplus -} // extern "C" -} // namespace +} /* extern "C" */ +} /* namespace */ #endif -// -// C++ high level wrapper goes here: -// #if defined(__cplusplus) +/* + * C++ high level wrapper goes here: + */ #include <string> #include <vector> namespace boost{ @@ -228,7 +229,7 @@ struct pred2; struct pred3; struct pred4; -} // namespace re_detail +} /* namespace re_detail */ #if (defined(BOOST_MSVC) || defined(__BORLANDC__)) && !defined(BOOST_DISABLE_WIN32) typedef bool (__cdecl *GrepCallback)(const RegEx& expression); @@ -257,9 +258,9 @@ public: unsigned int SetExpression(const std::string& s, bool icase = false){ return SetExpression(s.c_str(), icase); } std::string Expression()const; unsigned int error_code()const; - // - // now matching operators: - // + /* + * now matching operators: + */ bool Match(const char* p, match_flag_type flags = match_default); bool Match(const std::string& s, match_flag_type flags = match_default) { return Match(s.c_str(), flags); } bool Search(const char* p, match_flag_type flags = match_default); @@ -283,9 +284,9 @@ public: bool copy = true, match_flag_type flags = match_default); std::size_t Split(std::vector<std::string>& v, std::string& s, match_flag_type flags = match_default, unsigned max_count = ~0); - // - // now operators for returning what matched in more detail: - // + /* + * now operators for returning what matched in more detail: + */ std::size_t Position(int i = 0)const; std::size_t Length(int i = 0)const; bool Matched(int i = 0)const; @@ -312,11 +313,11 @@ public: #pragma warning(pop) #endif -} // namespace boost +} /* namespace boost */ -#endif +#endif /* __cplusplus */ -#endif // include guard +#endif /* include guard */ diff --git a/3rdParty/Boost/src/boost/regex/v4/error_type.hpp b/3rdParty/Boost/src/boost/regex/v4/error_type.hpp index b6633a0..afcc71e 100644 --- a/3rdParty/Boost/src/boost/regex/v4/error_type.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/error_type.hpp @@ -28,8 +28,8 @@ namespace regex_constants{ enum error_type{ - error_ok = 0, // not used - error_no_match = 1, // not used + error_ok = 0, /* not used */ + error_no_match = 1, /* not used */ error_bad_pattern = 2, error_collate = 3, error_ctype = 4, @@ -42,17 +42,18 @@ enum error_type{ error_range = 11, error_space = 12, error_badrepeat = 13, - error_end = 14, // not used + error_end = 14, /* not used */ error_size = 15, - error_right_paren = 16, // not used + error_right_paren = 16, /* not used */ error_empty = 17, error_complexity = 18, error_stack = 19, - error_unknown = 20 + error_perl_extension = 20, + error_unknown = 21 }; } } -#endif // __cplusplus +#endif /* __cplusplus */ #endif diff --git a/3rdParty/Boost/src/boost/regex/v4/iterator_category.hpp b/3rdParty/Boost/src/boost/regex/v4/iterator_category.hpp index 20870a0..9e40142 100644 --- a/3rdParty/Boost/src/boost/regex/v4/iterator_category.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/iterator_category.hpp @@ -31,10 +31,14 @@ namespace detail{ template <class I> struct is_random_imp { +#ifndef BOOST_NO_STD_ITERATOR_TRAITS private: typedef typename std::iterator_traits<I>::iterator_category cat; public: BOOST_STATIC_CONSTANT(bool, value = (::boost::is_convertible<cat*, std::random_access_iterator_tag*>::value)); +#else + BOOST_STATIC_CONSTANT(bool, value = false); +#endif }; template <class I> diff --git a/3rdParty/Boost/src/boost/regex/v4/match_flags.hpp b/3rdParty/Boost/src/boost/regex/v4/match_flags.hpp index 9585aca..26bde9a 100644 --- a/3rdParty/Boost/src/boost/regex/v4/match_flags.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/match_flags.hpp @@ -31,43 +31,43 @@ namespace boost{ typedef enum _match_flags { match_default = 0, - match_not_bol = 1, // first is not start of line - match_not_eol = match_not_bol << 1, // last is not end of line - match_not_bob = match_not_eol << 1, // first is not start of buffer - match_not_eob = match_not_bob << 1, // last is not end of buffer - match_not_bow = match_not_eob << 1, // first is not start of word - match_not_eow = match_not_bow << 1, // last is not end of word - match_not_dot_newline = match_not_eow << 1, // \n is not matched by '.' - match_not_dot_null = match_not_dot_newline << 1, // '\0' is not matched by '.' - match_prev_avail = match_not_dot_null << 1, // *--first is a valid expression - match_init = match_prev_avail << 1, // internal use - match_any = match_init << 1, // don't care what we match - match_not_null = match_any << 1, // string can't be null - match_continuous = match_not_null << 1, // each grep match must continue from - // uninterupted from the previous one - match_partial = match_continuous << 1, // find partial matches + match_not_bol = 1, /* first is not start of line */ + match_not_eol = match_not_bol << 1, /* last is not end of line */ + match_not_bob = match_not_eol << 1, /* first is not start of buffer */ + match_not_eob = match_not_bob << 1, /* last is not end of buffer */ + match_not_bow = match_not_eob << 1, /* first is not start of word */ + match_not_eow = match_not_bow << 1, /* last is not end of word */ + match_not_dot_newline = match_not_eow << 1, /* \n is not matched by '.' */ + match_not_dot_null = match_not_dot_newline << 1, /* '\0' is not matched by '.' */ + match_prev_avail = match_not_dot_null << 1, /* *--first is a valid expression */ + match_init = match_prev_avail << 1, /* internal use */ + match_any = match_init << 1, /* don't care what we match */ + match_not_null = match_any << 1, /* string can't be null */ + match_continuous = match_not_null << 1, /* each grep match must continue from */ + /* uninterupted from the previous one */ + match_partial = match_continuous << 1, /* find partial matches */ - match_stop = match_partial << 1, // stop after first match (grep) V3 only - match_not_initial_null = match_stop, // don't match initial null, V4 only - match_all = match_stop << 1, // must find the whole of input even if match_any is set - match_perl = match_all << 1, // Use perl matching rules - match_posix = match_perl << 1, // Use POSIX matching rules - match_nosubs = match_posix << 1, // don't trap marked subs - match_extra = match_nosubs << 1, // include full capture information for repeated captures - match_single_line = match_extra << 1, // treat text as single line and ignor any \n's when matching ^ and $. - match_unused1 = match_single_line << 1, // unused - match_unused2 = match_unused1 << 1, // unused - match_unused3 = match_unused2 << 1, // unused + match_stop = match_partial << 1, /* stop after first match (grep) V3 only */ + match_not_initial_null = match_stop, /* don't match initial null, V4 only */ + match_all = match_stop << 1, /* must find the whole of input even if match_any is set */ + match_perl = match_all << 1, /* Use perl matching rules */ + match_posix = match_perl << 1, /* Use POSIX matching rules */ + match_nosubs = match_posix << 1, /* don't trap marked subs */ + match_extra = match_nosubs << 1, /* include full capture information for repeated captures */ + match_single_line = match_extra << 1, /* treat text as single line and ignor any \n's when matching ^ and $. */ + match_unused1 = match_single_line << 1, /* unused */ + match_unused2 = match_unused1 << 1, /* unused */ + match_unused3 = match_unused2 << 1, /* unused */ match_max = match_unused3, - format_perl = 0, // perl style replacement - format_default = 0, // ditto. - format_sed = match_max << 1, // sed style replacement. - format_all = format_sed << 1, // enable all extentions to sytax. - format_no_copy = format_all << 1, // don't copy non-matching segments. - format_first_only = format_no_copy << 1, // Only replace first occurance. - format_is_if = format_first_only << 1, // internal use only. - format_literal = format_is_if << 1 // treat string as a literal + format_perl = 0, /* perl style replacement */ + format_default = 0, /* ditto. */ + format_sed = match_max << 1, /* sed style replacement. */ + format_all = format_sed << 1, /* enable all extentions to sytax. */ + format_no_copy = format_all << 1, /* don't copy non-matching segments. */ + format_first_only = format_no_copy << 1, /* Only replace first occurance. */ + format_is_if = format_first_only << 1, /* internal use only. */ + format_literal = format_is_if << 1 /* treat string as a literal */ } match_flags; @@ -96,10 +96,10 @@ inline match_flags& operator^=(match_flags& m1, match_flags m2) #endif #ifdef __cplusplus -} // namespace regex_constants -// -// import names into boost for backwards compatiblity: -// +} /* namespace regex_constants */ +/* + * import names into boost for backwards compatiblity: + */ using regex_constants::match_flag_type; using regex_constants::match_default; using regex_constants::match_not_bol; @@ -111,28 +111,28 @@ using regex_constants::match_not_eow; using regex_constants::match_not_dot_newline; using regex_constants::match_not_dot_null; using regex_constants::match_prev_avail; -//using regex_constants::match_init; +/* using regex_constants::match_init; */ using regex_constants::match_any; using regex_constants::match_not_null; using regex_constants::match_continuous; using regex_constants::match_partial; -//using regex_constants::match_stop; +/*using regex_constants::match_stop; */ using regex_constants::match_all; using regex_constants::match_perl; using regex_constants::match_posix; using regex_constants::match_nosubs; using regex_constants::match_extra; using regex_constants::match_single_line; -//using regex_constants::match_max; +/*using regex_constants::match_max; */ using regex_constants::format_all; using regex_constants::format_sed; using regex_constants::format_perl; using regex_constants::format_default; using regex_constants::format_no_copy; using regex_constants::format_first_only; -//using regex_constants::format_is_if; +/*using regex_constants::format_is_if;*/ -} // namespace boost -#endif // __cplusplus -#endif // include guard +} /* namespace boost */ +#endif /* __cplusplus */ +#endif /* include guard */ diff --git a/3rdParty/Boost/src/boost/regex/v4/match_results.hpp b/3rdParty/Boost/src/boost/regex/v4/match_results.hpp index acf509f..cd6b9eb 100644 --- a/3rdParty/Boost/src/boost/regex/v4/match_results.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/match_results.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (c) 1998-2002 + * Copyright (c) 1998-2009 * John Maddock * * Use, modification and distribution are subject to the @@ -36,6 +36,13 @@ namespace boost{ #pragma warning(disable : 4251 4231 4660) #endif +namespace re_detail{ + +template <class charT> +class named_subexpressions; + +} + template <class BidiIterator, class Allocator> class match_results { @@ -62,20 +69,35 @@ public: typedef typename re_detail::regex_iterator_traits< BidiIterator>::value_type char_type; typedef std::basic_string<char_type> string_type; + typedef re_detail::named_subexpressions_base<char_type> named_sub_type; // construct/copy/destroy: explicit match_results(const Allocator& a = Allocator()) #ifndef BOOST_NO_STD_ALLOCATOR - : m_subs(a), m_base() {} + : m_subs(a), m_base(), m_last_closed_paren(0), m_is_singular(true) {} #else - : m_subs(), m_base() { (void)a; } + : m_subs(), m_base(), m_last_closed_paren(0), m_is_singular(true) { (void)a; } #endif match_results(const match_results& m) - : m_subs(m.m_subs), m_base(m.m_base) {} + : m_subs(m.m_subs), m_named_subs(m.m_named_subs), m_last_closed_paren(m.m_last_closed_paren), m_is_singular(m.m_is_singular) + { + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + } match_results& operator=(const match_results& m) { m_subs = m.m_subs; - m_base = m.m_base; + m_named_subs = m.m_named_subs; + m_last_closed_paren = m.m_last_closed_paren; + m_is_singular = m.m_is_singular; + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } return *this; } ~match_results(){} @@ -90,13 +112,39 @@ public: // element access: difference_type length(int sub = 0) const { + if(m_is_singular) + raise_logic_error(); sub += 2; if((sub < (int)m_subs.size()) && (sub > 0)) return m_subs[sub].length(); return 0; } + difference_type length(const char_type* sub) const + { + if(m_is_singular) + raise_logic_error(); + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template <class charT> + difference_type length(const charT* sub) const + { + if(m_is_singular) + raise_logic_error(); + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template <class charT, class Traits, class A> + difference_type length(const std::basic_string<charT, Traits, A>& sub) const + { + return length(sub.c_str()); + } difference_type position(size_type sub = 0) const { + if(m_is_singular) + raise_logic_error(); sub += 2; if(sub < m_subs.size()) { @@ -108,8 +156,28 @@ public: } return ~static_cast<difference_type>(0); } + difference_type position(const char_type* sub) const + { + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template <class charT> + difference_type position(const charT* sub) const + { + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template <class charT, class Traits, class A> + difference_type position(const std::basic_string<charT, Traits, A>& sub) const + { + return position(sub.c_str()); + } string_type str(int sub = 0) const { + if(m_is_singular) + raise_logic_error(); sub += 2; string_type result; if(sub < (int)m_subs.size() && (sub > 0)) @@ -122,8 +190,29 @@ public: } return result; } + string_type str(const char_type* sub) const + { + return (*this)[sub].str(); + } + template <class Traits, class A> + string_type str(const std::basic_string<char_type, Traits, A>& sub) const + { + return (*this)[sub].str(); + } + template <class charT> + string_type str(const charT* sub) const + { + return (*this)[sub].str(); + } + template <class charT, class Traits, class A> + string_type str(const std::basic_string<charT, Traits, A>& sub) const + { + return (*this)[sub].str(); + } const_reference operator[](int sub) const { + if(m_is_singular && m_subs.empty()) + raise_logic_error(); sub += 2; if(sub < (int)m_subs.size() && (sub >= 0)) { @@ -131,14 +220,91 @@ public: } return m_null; } + // + // Named sub-expressions: + // + const_reference named_subexpression(const char_type* i, const char_type* j) const + { + if(m_is_singular) + raise_logic_error(); + int index = m_named_subs->get_id(i, j); + return index > 0 ? (*this)[index] : m_null; + } + template <class charT> + const_reference named_subexpression(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return m_null; + std::vector<char_type> s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + int named_subexpression_index(const char_type* i, const char_type* j) const + { + if(m_is_singular) + raise_logic_error(); + int index = m_named_subs->get_id(i, j); + return index > 0 ? index : -20; + } + template <class charT> + int named_subexpression_index(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return -20; + std::vector<char_type> s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression_index(&*s.begin(), &*s.begin() + s.size()); + } + template <class Traits, class A> + const_reference operator[](const std::basic_string<char_type, Traits, A>& s) const + { + return named_subexpression(s.c_str(), s.c_str() + s.size()); + } + const_reference operator[](const char_type* p) const + { + const char_type* e = p; + while(*e) ++e; + return named_subexpression(p, e); + } + + template <class charT> + const_reference operator[](const charT* p) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(*p == 0) + return m_null; + std::vector<char_type> s; + while(*p) + s.insert(s.end(), *p++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + template <class charT, class Traits, class A> + const_reference operator[](const std::basic_string<charT, Traits, A>& ns) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(ns.empty()) + return m_null; + std::vector<char_type> s; + for(unsigned i = 0; i < ns.size(); ++i) + s.insert(s.end(), ns[i]); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } const_reference prefix() const { + if(m_is_singular) + raise_logic_error(); return (*this)[-1]; } const_reference suffix() const { + if(m_is_singular) + raise_logic_error(); return (*this)[-2]; } const_iterator begin() const @@ -150,43 +316,70 @@ public: return m_subs.end(); } // format: - template <class OutputIterator> + template <class OutputIterator, class Functor> OutputIterator format(OutputIterator out, - const string_type& fmt, + Functor fmt, match_flag_type flags = format_default) const { - re_detail::trivial_format_traits<char_type> traits; - return re_detail::regex_format_imp(out, *this, fmt.data(), fmt.data() + fmt.size(), flags, traits); + if(m_is_singular) + raise_logic_error(); + typedef typename re_detail::compute_functor_type<Functor, match_results<BidiIterator, Allocator>, OutputIterator>::type F; + F func(fmt); + return func(*this, out, flags); } - string_type format(const string_type& fmt, - match_flag_type flags = format_default) const + template <class Functor> + string_type format(Functor fmt, match_flag_type flags = format_default) const { - string_type result; - re_detail::string_out_iterator<string_type> i(result); - re_detail::trivial_format_traits<char_type> traits; - re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, traits); + if(m_is_singular) + raise_logic_error(); + std::basic_string<char_type> result; + re_detail::string_out_iterator<std::basic_string<char_type> > i(result); + + typedef typename re_detail::compute_functor_type<Functor, match_results<BidiIterator, Allocator>, re_detail::string_out_iterator<std::basic_string<char_type> > >::type F; + F func(fmt); + + func(*this, i, flags); return result; } // format with locale: - template <class OutputIterator, class RegexT> + template <class OutputIterator, class Functor, class RegexT> OutputIterator format(OutputIterator out, - const string_type& fmt, + Functor fmt, match_flag_type flags, const RegexT& re) const { - return ::boost::re_detail::regex_format_imp(out, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits()); + if(m_is_singular) + raise_logic_error(); + typedef ::boost::regex_traits_wrapper<typename RegexT::traits_type> traits_type; + typedef typename re_detail::compute_functor_type<Functor, match_results<BidiIterator, Allocator>, OutputIterator, traits_type>::type F; + F func(fmt); + return func(*this, out, flags, re.get_traits()); } - template <class RegexT> - string_type format(const string_type& fmt, + template <class RegexT, class Functor> + string_type format(Functor fmt, match_flag_type flags, const RegexT& re) const { - string_type result; - re_detail::string_out_iterator<string_type> i(result); - ::boost::re_detail::regex_format_imp(i, *this, fmt.data(), fmt.data() + fmt.size(), flags, re.get_traits()); + if(m_is_singular) + raise_logic_error(); + typedef ::boost::regex_traits_wrapper<typename RegexT::traits_type> traits_type; + std::basic_string<char_type> result; + re_detail::string_out_iterator<std::basic_string<char_type> > i(result); + + typedef typename re_detail::compute_functor_type<Functor, match_results<BidiIterator, Allocator>, re_detail::string_out_iterator<std::basic_string<char_type> >, traits_type >::type F; + F func(fmt); + + func(*this, i, flags, re.get_traits()); return result; } + const_reference get_last_closed_paren()const + { + if(m_is_singular) + raise_logic_error(); + return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren]; + } + allocator_type get_allocator() const { #ifndef BOOST_NO_STD_ALLOCATOR @@ -198,11 +391,39 @@ public: void swap(match_results& that) { std::swap(m_subs, that.m_subs); - std::swap(m_base, that.m_base); + std::swap(m_named_subs, that.m_named_subs); + std::swap(m_last_closed_paren, that.m_last_closed_paren); + std::swap(m_is_singular, that.m_is_singular); + if(m_is_singular) + { + if(!that.m_is_singular) + { + m_base = that.m_base; + m_null = that.m_null; + } + } + else if(that.m_is_singular) + { + that.m_base = m_base; + that.m_null = m_null; + } + else + { + std::swap(m_base, that.m_base); + std::swap(m_null, that.m_null); + } } bool operator==(const match_results& that)const { - return (m_subs == that.m_subs) && (m_base == that.m_base); + if(m_is_singular) + { + return that.m_is_singular; + } + else if(that.m_is_singular) + { + return false; + } + return (m_subs == that.m_subs) && (m_base == that.m_base) && (m_last_closed_paren == that.m_last_closed_paren); } bool operator!=(const match_results& that)const { return !(*this == that); } @@ -212,6 +433,8 @@ public: const capture_sequence_type& captures(int i)const { + if(m_is_singular) + raise_logic_error(); return (*this)[i].captures(); } #endif @@ -228,21 +451,25 @@ public: m_null.first = i; m_null.second = i; m_null.matched = false; + m_is_singular = false; } - void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true) + void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false) { + if(pos) + m_last_closed_paren = pos; pos += 2; BOOST_ASSERT(m_subs.size() > pos); m_subs[pos].second = i; m_subs[pos].matched = m; - if(pos == 2) + if((pos == 2) && !escape_k) { m_subs[0].first = i; m_subs[0].matched = (m_subs[0].first != m_subs[0].second); m_null.first = i; m_null.second = i; m_null.matched = false; + m_is_singular = false; } } void BOOST_REGEX_CALL set_size(size_type n, BidiIterator i, BidiIterator j) @@ -261,6 +488,7 @@ public: m_subs.insert(m_subs.end(), n+2-len, v); } m_subs[1].first = i; + m_last_closed_paren = 0; } void BOOST_REGEX_CALL set_base(BidiIterator pos) { @@ -272,6 +500,7 @@ public: } void BOOST_REGEX_CALL set_first(BidiIterator i) { + BOOST_ASSERT(m_subs.size() > 2); // set up prefix: m_subs[1].second = i; m_subs[1].matched = (m_subs[1].first != i); @@ -284,26 +513,55 @@ public: m_subs[n].matched = false; } } - void BOOST_REGEX_CALL set_first(BidiIterator i, size_type pos) + void BOOST_REGEX_CALL set_first(BidiIterator i, size_type pos, bool escape_k = false) { BOOST_ASSERT(pos+2 < m_subs.size()); - if(pos) + if(pos || escape_k) + { m_subs[pos+2].first = i; + if(escape_k) + { + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != m_subs[1].second); + } + } else set_first(i); } void BOOST_REGEX_CALL maybe_assign(const match_results<BidiIterator, Allocator>& m); + void BOOST_REGEX_CALL set_named_subs(boost::shared_ptr<named_sub_type> subs) + { + m_named_subs = subs; + } private: - vector_type m_subs; // subexpressions - BidiIterator m_base; // where the search started from - sub_match<BidiIterator> m_null; // a null match + // + // Error handler called when an uninitialized match_results is accessed: + // + static void raise_logic_error() + { + std::logic_error e("Attempt to access an uninitialzed boost::match_results<> class."); + boost::throw_exception(e); + } + + + vector_type m_subs; // subexpressions + BidiIterator m_base; // where the search started from + sub_match<BidiIterator> m_null; // a null match + boost::shared_ptr<named_sub_type> m_named_subs; // Shared copy of named subs in the regex object + int m_last_closed_paren; // Last ) to be seen - used for formatting + bool m_is_singular; // True if our stored iterators are singular }; template <class BidiIterator, class Allocator> void BOOST_REGEX_CALL match_results<BidiIterator, Allocator>::maybe_assign(const match_results<BidiIterator, Allocator>& m) { + if(m_is_singular) + { + *this = m; + return; + } const_iterator p1, p2; p1 = begin(); p2 = m.begin(); diff --git a/3rdParty/Boost/src/boost/regex/v4/perl_matcher.hpp b/3rdParty/Boost/src/boost/regex/v4/perl_matcher.hpp index 33afe6e..52e0bce 100644 --- a/3rdParty/Boost/src/boost/regex/v4/perl_matcher.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/perl_matcher.hpp @@ -61,7 +61,7 @@ inline bool can_start(unsigned short c, const unsigned char* map, unsigned char { return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); } -#if !defined(__hpux) // WCHAR_MIN not usable in pp-directives. +#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives. #if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) { @@ -277,15 +277,21 @@ public: else { repeater_count* p = next; - while(p->state_id != state_id) + while(p && (p->state_id != state_id)) p = p->next; - count = p->count; - start_pos = p->start_pos; + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; } } ~repeater_count() { - *stack = next; + if(next) + *stack = next; } std::size_t get_count() { return count; } int get_id() { return state_id; } @@ -325,6 +331,17 @@ enum saved_state_type saved_state_count = 14 }; +template <class Results> +struct recursion_info +{ + typedef typename Results::value_type value_type; + typedef typename value_type::iterator iterator; + int id; + const re_syntax_base* preturn_address; + Results results; + repeater_count<iterator>* repeater_stack; +}; + #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable : 4251 4231 4660) @@ -340,6 +357,7 @@ public: typedef std::size_t traits_size_type; typedef typename is_byte<char_type>::width_type width_type; typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type; + typedef match_results<BidiIterator, Allocator> results_type; perl_matcher(BidiIterator first, BidiIterator end, match_results<BidiIterator, Allocator>& what, @@ -348,7 +366,7 @@ public: BidiIterator l_base) : m_result(what), base(first), last(end), position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), - m_independent(false), next_count(&rep_obj), rep_obj(&next_count) + m_independent(false), next_count(&rep_obj), rep_obj(&next_count), recursion_stack_position(0) { construct_init(e, f); } @@ -403,12 +421,17 @@ private: bool match_char_repeat(); bool match_dot_repeat_fast(); bool match_dot_repeat_slow(); + bool match_dot_repeat_dispatch() + { + return ::boost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); + } bool match_backstep(); bool match_assert_backref(); bool match_toggle_case(); #ifdef BOOST_REGEX_RECURSIVE bool backtrack_till_match(std::size_t count); #endif + bool match_recursion(); // find procs stored in s_find_vtable: bool find_restart_any(); @@ -464,6 +487,9 @@ private: typename traits::char_class_type m_word_mask; // the bitmask to use when determining whether a match_any matches a newline or not: unsigned char match_any_mask; + // recursion information: + recursion_info<results_type> recursion_stack[50]; + unsigned recursion_stack_position; #ifdef BOOST_REGEX_NON_RECURSIVE // @@ -487,6 +513,8 @@ private: bool unwind_short_set_repeat(bool); bool unwind_long_set_repeat(bool); bool unwind_non_greedy_repeat(bool); + bool unwind_recursion(bool); + bool unwind_recursion_pop(bool); void destroy_single_repeat(); void push_matched_paren(int index, const sub_match<BidiIterator>& sub); void push_recursion_stopper(); @@ -495,7 +523,8 @@ private: void push_repeater_count(int i, repeater_count<BidiIterator>** s); void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); void push_non_greedy_repeat(const re_syntax_base* ps); - + void push_recursion(int id, const re_syntax_base* p, results_type* presults); + void push_recursion_pop(); // pointer to base of stack: saved_state* m_stack_base; diff --git a/3rdParty/Boost/src/boost/regex/v4/perl_matcher_common.hpp b/3rdParty/Boost/src/boost/regex/v4/perl_matcher_common.hpp index 399caa3..fd439f8 100644 --- a/3rdParty/Boost/src/boost/regex/v4/perl_matcher_common.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/perl_matcher_common.hpp @@ -200,12 +200,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_imp() m_match_flags |= regex_constants::match_all; m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); m_presult->set_base(base); + m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs())); if(m_match_flags & match_posix) m_result = *m_presult; verify_options(re.flags(), m_match_flags); if(0 == match_prefix()) return false; - return m_result[0].second == last; + return (m_result[0].second == last) && (m_result[0].first == base); #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) } @@ -261,6 +262,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp() pstate = re.get_first_state(); m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); m_presult->set_base(base); + m_presult->set_named_subs(re_detail::convert_to_named_subs<typename match_results<BidiIterator>::char_type>(this->re.get_named_subs())); m_match_flags |= regex_constants::match_init; } else @@ -344,25 +346,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix() } template <class BidiIterator, class Allocator, class traits> -bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark() -{ - int index = static_cast<const re_brace*>(pstate)->index; - if(index > 0) - { - if((m_match_flags & match_nosubs) == 0) - m_presult->set_second(position, index); - } - else if((index < 0) && (index != -4)) - { - // matched forward lookahead: - pstate = 0; - return true; - } - pstate = pstate->next.p; - return true; -} - -template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::match_literal() { unsigned int len = static_cast<const re_literal*>(pstate)->length; @@ -463,35 +446,6 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_wild() } template <class BidiIterator, class Allocator, class traits> -bool perl_matcher<BidiIterator, Allocator, traits>::match_match() -{ - if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) - return false; - if((m_match_flags & match_all) && (position != last)) - return false; - if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) - return false; - m_presult->set_second(position); - pstate = 0; - m_has_found_match = true; - if((m_match_flags & match_posix) == match_posix) - { - m_result.maybe_assign(*m_presult); - if((m_match_flags & match_any) == 0) - return false; - } -#ifdef BOOST_REGEX_MATCH_EXTRA - if(match_extra & m_match_flags) - { - for(unsigned i = 0; i < m_presult->size(); ++i) - if((*m_presult)[i].matched) - ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); - } -#endif - return true; -} - -template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary() { bool b; // indcates whether next character is a word character @@ -758,8 +712,32 @@ template <class BidiIterator, class Allocator, class traits> inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref() { // return true if marked sub-expression N has been matched: - bool result = (*m_presult)[static_cast<const re_brace*>(pstate)->index].matched; - pstate = pstate->next.p; + int index = static_cast<const re_brace*>(pstate)->index; + bool result; + if(index == 9999) + { + // Magic value for a (DEFINE) block: + return false; + } + else if(index > 0) + { + // Check if index is a hash value: + if(index >= 10000) + index = re.get_data().get_id(index); + // Have we matched subexpression "index"? + result = (*m_presult)[index].matched; + pstate = pstate->next.p; + } + else + { + // Have we recursed into subexpression "index"? + // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. + int id = -index-1; + if(id >= 10000) + id = re.get_data().get_id(id); + result = recursion_stack_position && ((recursion_stack[recursion_stack_position-1].id == id) || (index == 0)); + pstate = pstate->next.p; + } return result; } diff --git a/3rdParty/Boost/src/boost/regex/v4/perl_matcher_non_recursive.hpp b/3rdParty/Boost/src/boost/regex/v4/perl_matcher_non_recursive.hpp index 10e0347..0fcd454 100644 --- a/3rdParty/Boost/src/boost/regex/v4/perl_matcher_non_recursive.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -127,10 +127,21 @@ struct saved_single_repeat : public saved_state : saved_state(arg_id), count(c), rep(r), last_position(lp){} }; +template <class Results> +struct saved_recursion : public saved_state +{ + saved_recursion(int id, const re_syntax_base* p, Results* pr) + : saved_state(14), recursion_id(id), preturn_address(p), results(*pr) + {} + int recursion_id; + const re_syntax_base* preturn_address; + Results results; +}; + template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() { - static matcher_proc_type const s_match_vtable[29] = + static matcher_proc_type const s_match_vtable[30] = { (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), &perl_matcher<BidiIterator, Allocator, traits>::match_endmark, @@ -154,13 +165,18 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() &perl_matcher<BidiIterator, Allocator, traits>::match_combining, &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end, &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue, - (::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), + &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch, &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_backstep, &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, + &perl_matcher<BidiIterator, Allocator, traits>::match_recursion, }; push_recursion_stopper(); @@ -172,7 +188,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() if(!(this->*proc)()) { if(state_count > max_state_count) - raise_error(traits_inst, regex_constants::error_space); + raise_error(traits_inst, regex_constants::error_complexity); if((m_match_flags & match_partial) && (position == last) && (position != search_base)) m_has_partial_match = true; bool successful_unwind = unwind(false); @@ -203,13 +219,13 @@ void perl_matcher<BidiIterator, Allocator, traits>::extend_stack() m_backup_state = block; } else - raise_error(traits_inst, regex_constants::error_size); + raise_error(traits_inst, regex_constants::error_stack); } template <class BidiIterator, class Allocator, class traits> inline void perl_matcher<BidiIterator, Allocator, traits>::push_matched_paren(int index, const sub_match<BidiIterator>& sub) { - BOOST_ASSERT(index); + //BOOST_ASSERT(index); saved_matched_paren<BidiIterator>* pmp = static_cast<saved_matched_paren<BidiIterator>*>(m_backup_state); --pmp; if(pmp < m_stack_base) @@ -313,9 +329,25 @@ inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(st } template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_recursion(int id, const re_syntax_base* p, results_type* presults) +{ + saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_recursion<results_type>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_recursion<results_type>(id, p, presults); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() { int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; switch(index) { case 0: @@ -400,6 +432,13 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() break; } } + case -5: + { + push_matched_paren(0, (*m_presult)[0]); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + break; + } default: { BOOST_ASSERT(index > 0); @@ -848,6 +887,105 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat() #endif } +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Backup call stack: + // + push_recursion_pop(); + // + // Set new call stack: + // + if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0]))) + { + return false; + } + recursion_stack[recursion_stack_position].preturn_address = pstate->next.p; + recursion_stack[recursion_stack_position].results = *m_presult; + if(static_cast<const re_recurse*>(pstate)->state_id > 0) + { + push_repeater_count(static_cast<const re_recurse*>(pstate)->state_id, &next_count); + } + pstate = static_cast<const re_jump*>(pstate)->alt.p; + recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index; + ++recursion_stack_position; + //BOOST_ASSERT(recursion_stack[recursion_stack_position-1].id); + + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark() +{ + int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(recursion_stack_position) + { + if(index == recursion_stack[recursion_stack_position-1].id) + { + --recursion_stack_position; + pstate = recursion_stack[recursion_stack_position].preturn_address; + *m_presult = recursion_stack[recursion_stack_position].results; + push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results); + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate->next.p; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_match() +{ + if(recursion_stack_position) + { + BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id); + --recursion_stack_position; + pstate = recursion_stack[recursion_stack_position].preturn_address; + *m_presult = recursion_stack[recursion_stack_position].results; + push_recursion(recursion_stack[recursion_stack_position].id, recursion_stack[recursion_stack_position].preturn_address, &recursion_stack[recursion_stack_position].results); + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + /**************************************************************************** Unwind and associated proceedures follow, these perform what normal stack @@ -858,7 +996,7 @@ unwinding does in the recursive implementation. template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match) { - static unwind_proc_type const s_unwind_table[14] = + static unwind_proc_type const s_unwind_table[18] = { &perl_matcher<BidiIterator, Allocator, traits>::unwind_end, &perl_matcher<BidiIterator, Allocator, traits>::unwind_paren, @@ -874,6 +1012,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind(bool have_match) &perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion, + &perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop, }; m_recursive_result = have_match; @@ -907,8 +1047,8 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_paren(bool have_match // restore previous values if no match was found: if(have_match == false) { - m_presult->set_first(pmp->sub.first, pmp->index); - m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched); + m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); + m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); } #ifdef BOOST_REGEX_MATCH_EXTRA // @@ -1377,6 +1517,106 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_non_greedy_repeat(boo return r; } +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion(bool r) +{ + saved_recursion<results_type>* pmp = static_cast<saved_recursion<results_type>*>(m_backup_state); + if(!r) + { + recursion_stack[recursion_stack_position].id = pmp->recursion_id; + recursion_stack[recursion_stack_position].preturn_address = pmp->preturn_address; + recursion_stack[recursion_stack_position].results = pmp->results; + ++recursion_stack_position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_recursion_pop(bool r) +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + if(!r) + { + --recursion_stack_position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::push_recursion_pop() +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(15); + m_backup_state = pmp; +} +/* +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_pop(bool r) +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + if(!r) + { + --parenthesis_stack_position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_pop() +{ + saved_state* pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_state*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::unwind_parenthesis_push(bool r) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + if(!r) + { + parenthesis_stack[parenthesis_stack_position++] = pmp->position; + } + boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +inline void perl_matcher<BidiIterator, Allocator, traits>::push_parenthesis_push(BidiIterator p) +{ + saved_position<BidiIterator>* pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast<saved_position<BidiIterator>*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position<BidiIterator>(0, p, 17); + m_backup_state = pmp; +} +*/ } // namespace re_detail } // namespace boost diff --git a/3rdParty/Boost/src/boost/regex/v4/perl_matcher_recursive.hpp b/3rdParty/Boost/src/boost/regex/v4/perl_matcher_recursive.hpp index 68e1aac..d0de802 100644 --- a/3rdParty/Boost/src/boost/regex/v4/perl_matcher_recursive.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/perl_matcher_recursive.hpp @@ -51,8 +51,8 @@ public: template <class A> void restore(match_results<BidiIterator, A>& w) { - w.set_first(sub.first, index); - w.set_second(sub.second, index, sub.matched); + w.set_first(sub.first, index, index == 0); + w.set_second(sub.second, index, sub.matched, index == 0); } const sub_match<BidiIterator>& get() { return sub; } }; @@ -60,7 +60,7 @@ public: template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() { - static matcher_proc_type const s_match_vtable[29] = + static matcher_proc_type const s_match_vtable[30] = { (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark), &perl_matcher<BidiIterator, Allocator, traits>::match_endmark, @@ -84,17 +84,22 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states() &perl_matcher<BidiIterator, Allocator, traits>::match_combining, &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end, &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue, - (::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow), + &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch, &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat, &perl_matcher<BidiIterator, Allocator, traits>::match_backstep, &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref, &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case, + &perl_matcher<BidiIterator, Allocator, traits>::match_recursion, }; if(state_count > max_state_count) - raise_error(traits_inst, regex_constants::error_space); + raise_error(traits_inst, regex_constants::error_complexity); while(pstate) { matcher_proc_type proc = s_match_vtable[pstate->type]; @@ -113,6 +118,7 @@ template <class BidiIterator, class Allocator, class traits> bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() { int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; bool r = true; switch(index) { @@ -205,6 +211,17 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark() break; } } + case -5: + { + // Reset start of $0, since we have a \K escape + backup_subex<BidiIterator> sub(*m_presult, 0); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + r = match_all_states(); + if(r == false) + sub.restore(*m_presult); + break; + } default: { BOOST_ASSERT(index > 0); @@ -833,6 +850,127 @@ bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::si #endif } +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Set new call stack: + // + if(recursion_stack_position >= static_cast<int>(sizeof(recursion_stack)/sizeof(recursion_stack[0]))) + { + return false; + } + recursion_stack[recursion_stack_position].preturn_address = pstate->next.p; + recursion_stack[recursion_stack_position].results = *m_presult; + recursion_stack[recursion_stack_position].repeater_stack = next_count; + pstate = static_cast<const re_jump*>(pstate)->alt.p; + recursion_stack[recursion_stack_position].id = static_cast<const re_brace*>(pstate)->index; + ++recursion_stack_position; + + repeater_count<BidiIterator>* saved = next_count; + repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those + next_count = &r; + bool result = match_all_states(); + next_count = saved; + + if(!result) + { + --recursion_stack_position; + next_count = recursion_stack[recursion_stack_position].repeater_stack; + *m_presult = recursion_stack[recursion_stack_position].results; + return false; + } + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark() +{ + int index = static_cast<const re_brace*>(pstate)->index; + icase = static_cast<const re_brace*>(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(recursion_stack_position) + { + if(index == recursion_stack[recursion_stack_position-1].id) + { + --recursion_stack_position; + recursion_info<results_type> saved = recursion_stack[recursion_stack_position]; + const re_syntax_base* saved_state = pstate = saved.preturn_address; + repeater_count<BidiIterator>* saved_count = next_count; + next_count = saved.repeater_stack; + *m_presult = saved.results; + if(!match_all_states()) + { + recursion_stack[recursion_stack_position] = saved; + ++recursion_stack_position; + next_count = saved_count; + return false; + } + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate ? pstate->next.p : 0; + return true; +} + +template <class BidiIterator, class Allocator, class traits> +bool perl_matcher<BidiIterator, Allocator, traits>::match_match() +{ + if(recursion_stack_position) + { + BOOST_ASSERT(0 == recursion_stack[recursion_stack_position-1].id); + --recursion_stack_position; + const re_syntax_base* saved_state = pstate = recursion_stack[recursion_stack_position].preturn_address; + *m_presult = recursion_stack[recursion_stack_position].results; + if(!match_all_states()) + { + recursion_stack[recursion_stack_position].preturn_address = saved_state; + recursion_stack[recursion_stack_position].results = *m_presult; + ++recursion_stack_position; + return false; + } + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + + + } // namespace re_detail } // namespace boost #ifdef BOOST_MSVC diff --git a/3rdParty/Boost/src/boost/regex/v4/regex_format.hpp b/3rdParty/Boost/src/boost/regex/v4/regex_format.hpp index d114c2e..0ca9baa 100644 --- a/3rdParty/Boost/src/boost/regex/v4/regex_format.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/regex_format.hpp @@ -1,7 +1,7 @@ /* * - * Copyright (c) 1998-2002 - * John Maddock + * Copyright (c) 1998-2009 John Maddock + * Copyright 2008 Eric Niebler. * * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file @@ -21,6 +21,19 @@ #ifndef BOOST_REGEX_FORMAT_HPP #define BOOST_REGEX_FORMAT_HPP +#include <boost/type_traits/is_pointer.hpp> +#include <boost/type_traits/is_function.hpp> +#include <boost/type_traits/is_class.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/type_traits/is_convertible.hpp> +#include <boost/type_traits/remove_pointer.hpp> +#include <boost/type_traits/remove_cv.hpp> +#include <boost/mpl/if.hpp> +#include <boost/mpl/and.hpp> +#include <boost/mpl/not.hpp> +#ifndef BOOST_NO_SFINAE +#include <boost/mpl/has_xxx.hpp> +#endif namespace boost{ @@ -76,15 +89,15 @@ struct trivial_format_traits } }; -template <class OutputIterator, class Results, class traits> +template <class OutputIterator, class Results, class traits, class ForwardIter> class basic_regex_formatter { public: typedef typename traits::char_type char_type; basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} - OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f); - OutputIterator format(const char_type* p1, match_flag_type f) + OutputIterator format(ForwardIter p1, ForwardIter p2, match_flag_type f); + OutputIterator format(ForwardIter p1, match_flag_type f) { return format(p1, p1 + m_traits.length(p1), f); } @@ -107,23 +120,77 @@ private: void format_escape(); void format_conditional(); void format_until_scope_end(); + bool handle_perl_verb(bool have_brace); - const traits& m_traits; // the traits class for localised formatting operations - const Results& m_results; // the match_results being used. - OutputIterator m_out; // where to send output. - const char_type* m_position; // format string, current position - const char_type* m_end; // format string end - match_flag_type m_flags; // format flags to use - output_state m_state; // what to do with the next character - output_state m_restore_state; // what state to restore to. - bool m_have_conditional; // we are parsing a conditional + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const mpl::false_&) + { + std::vector<char_type> v(i, j); + return (i != j) ? this->m_results.named_subexpression(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression(static_cast<const char_type*>(0), static_cast<const char_type*>(0)); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const mpl::true_&) + { + return this->m_results.named_subexpression(i, j); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j) + { + typedef typename boost::is_convertible<ForwardIter, const char_type*>::type tag_type; + return get_named_sub(i, j, tag_type()); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const mpl::false_&) + { + std::vector<char_type> v(i, j); + return (i != j) ? this->m_results.named_subexpression_index(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression_index(static_cast<const char_type*>(0), static_cast<const char_type*>(0)); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const mpl::true_&) + { + return this->m_results.named_subexpression_index(i, j); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j) + { + typedef typename boost::is_convertible<ForwardIter, const char_type*>::type tag_type; + return get_named_sub_index(i, j, tag_type()); + } + inline int toi(ForwardIter& i, ForwardIter j, int base, const boost::mpl::false_&) + { + if(i != j) + { + std::vector<char_type> v(i, j); + const char_type* start = &v[0]; + const char_type* pos = start; + int r = m_traits.toi(pos, &v[0] + v.size(), base); + std::advance(i, pos - start); + return r; + } + return -1; + } + inline int toi(ForwardIter& i, ForwardIter j, int base, const boost::mpl::true_&) + { + return m_traits.toi(i, j, base); + } + inline int toi(ForwardIter& i, ForwardIter j, int base) + { + typedef typename boost::is_convertible<ForwardIter, const char_type*&>::type tag_type; + return toi(i, j, base, tag_type()); + } + + const traits& m_traits; // the traits class for localised formatting operations + const Results& m_results; // the match_results being used. + OutputIterator m_out; // where to send output. + ForwardIter m_position; // format string, current position + ForwardIter m_end; // format string end + match_flag_type m_flags; // format flags to use + output_state m_state; // what to do with the next character + output_state m_restore_state; // what state to restore to. + bool m_have_conditional; // we are parsing a conditional private: basic_regex_formatter(const basic_regex_formatter&); basic_regex_formatter& operator=(const basic_regex_formatter&); }; -template <class OutputIterator, class Results, class traits> -OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f) +template <class OutputIterator, class Results, class traits, class ForwardIter> +OutputIterator basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format(ForwardIter p1, ForwardIter p2, match_flag_type f) { m_position = p1; m_end = p2; @@ -132,8 +199,8 @@ OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(co return m_out; } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::format_all() +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_all() { // over and over: while(m_position != m_end) @@ -210,8 +277,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_all() } } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_perl() { // // On entry *m_position points to a '$' character @@ -232,7 +299,7 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() // OK find out what kind it is: // bool have_brace = false; - const char_type* save_position = m_position; + ForwardIter save_position = m_position; switch(*m_position) { case '&': @@ -250,6 +317,25 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() case '$': put(*m_position++); break; + case '+': + if((++m_position != m_end) && (*m_position == '{')) + { + ForwardIter base = ++m_position; + while((m_position != m_end) && (*m_position != '}')) ++m_position; + if(m_position != m_end) + { + // Named sub-expression: + put(get_named_sub(base, m_position)); + ++m_position; + break; + } + else + { + m_position = --base; + } + } + put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); + break; case '{': have_brace = true; ++m_position; @@ -258,14 +344,18 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() // see if we have a number: { std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); - len = (std::min)(static_cast<std::ptrdiff_t>(2), len); - int v = m_traits.toi(m_position, m_position + len, 10); + //len = (std::min)(static_cast<std::ptrdiff_t>(2), len); + int v = this->toi(m_position, m_position + len, 10); if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) { - // leave the $ as is, and carry on: - m_position = --save_position; - put(*m_position); - ++m_position; + // Look for a Perl-5.10 verb: + if(!handle_perl_verb(have_brace)) + { + // leave the $ as is, and carry on: + m_position = --save_position; + put(*m_position); + ++m_position; + } break; } // otherwise output sub v: @@ -276,8 +366,127 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_perl() } } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() +template <class OutputIterator, class Results, class traits, class ForwardIter> +bool basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::handle_perl_verb(bool have_brace) +{ + // + // We may have a capitalised string containing a Perl action: + // + static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; + static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; + static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; + static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; + + if(m_position == m_end) + return false; + if(have_brace && (*m_position == '^')) + ++m_position; + + int max_len = m_end - m_position; + + if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) + { + m_position += 5; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 5; + return false; + } + } + put(this->m_results[0]); + return true; + } + if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) + { + m_position += 8; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 8; + return false; + } + } + put(this->m_results.prefix()); + return true; + } + if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) + { + m_position += 9; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 9; + return false; + } + } + put(this->m_results.suffix()); + return true; + } + if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) + { + m_position += 16; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 16; + return false; + } + } + put((this->m_results)[this->m_results.size() > 1 ? this->m_results.size() - 1 : 1]); + return true; + } + if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) + { + m_position += 20; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 20; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) + { + m_position += 2; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 2; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + return false; +} + +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_escape() { // skip the escape and check for trailing escape: if(++m_position == m_end) @@ -322,7 +531,7 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() if(*m_position == static_cast<char_type>('{')) { ++m_position; - int val = m_traits.toi(m_position, m_end, 16); + int val = this->toi(m_position, m_end, 16); if(val < 0) { // invalid value treat everything as literals: @@ -330,8 +539,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() put(static_cast<char_type>('{')); return; } - if(*m_position != static_cast<char_type>('}')) + if((m_position == m_end) || (*m_position != static_cast<char_type>('}'))) { + --m_position; while(*m_position != static_cast<char_type>('\\')) --m_position; ++m_position; @@ -346,7 +556,7 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() { std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); len = (std::min)(static_cast<std::ptrdiff_t>(2), len); - int val = m_traits.toi(m_position, m_position + len, 16); + int val = this->toi(m_position, m_position + len, 16); if(val < 0) { --m_position; @@ -408,7 +618,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() break; } // see if we have a \n sed style backreference: - int v = m_traits.toi(m_position, m_position+1, 10); + std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast<std::ptrdiff_t>(1), len); + int v = this->toi(m_position, m_position+len, 10); if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed))) { put(m_results[v]); @@ -418,9 +630,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() { // octal ecape sequence: --m_position; - std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + len = ::boost::re_detail::distance(m_position, m_end); len = (std::min)(static_cast<std::ptrdiff_t>(4), len); - v = m_traits.toi(m_position, m_position + len, 8); + v = this->toi(m_position, m_position + len, 8); BOOST_ASSERT(v >= 0); put(static_cast<char_type>(v)); break; @@ -431,8 +643,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_escape() } } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional() +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_conditional() { if(m_position == m_end) { @@ -440,9 +652,35 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional( put(static_cast<char_type>('?')); return; } - std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); - len = (std::min)(static_cast<std::ptrdiff_t>(2), len); - int v = m_traits.toi(m_position, m_position + len, 10); + int v; + if(*m_position == '{') + { + ForwardIter base = m_position; + ++m_position; + v = this->toi(m_position, m_end, 10); + if(v < 0) + { + // Try a named subexpression: + while((m_position != m_end) && (*m_position != '}')) + ++m_position; + v = this->get_named_sub_index(base + 1, m_position); + } + if((v < 0) || (*m_position != '}')) + { + m_position = base; + // oops trailing '?': + put(static_cast<char_type>('?')); + return; + } + // Skip trailing '}': + ++m_position; + } + else + { + std::ptrdiff_t len = ::boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast<std::ptrdiff_t>(2), len); + v = this->toi(m_position, m_position + len, 10); + } if(v < 0) { // oops not a number: @@ -490,8 +728,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional( } } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end() +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::format_until_scope_end() { do { @@ -502,8 +740,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_ }while(m_position != m_end); } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c) +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::put(char_type c) { // write a single character to output // according to which case translation mode we are in: @@ -532,8 +770,8 @@ void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c) ++m_out; } -template <class OutputIterator, class Results, class traits> -void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub) +template <class OutputIterator, class Results, class traits, class ForwardIter> +void basic_regex_formatter<OutputIterator, Results, traits, ForwardIter>::put(const sub_match_type& sub) { typedef typename sub_match_type::iterator iterator_type; iterator_type i = sub.first; @@ -571,10 +809,10 @@ public: #endif }; -template <class OutputIterator, class Iterator, class Alloc, class charT, class traits> +template <class OutputIterator, class Iterator, class Alloc, class ForwardIter, class traits> OutputIterator regex_format_imp(OutputIterator out, const match_results<Iterator, Alloc>& m, - const charT* p1, const charT* p2, + ForwardIter p1, ForwardIter p2, match_flag_type flags, const traits& t ) @@ -587,57 +825,289 @@ OutputIterator regex_format_imp(OutputIterator out, re_detail::basic_regex_formatter< OutputIterator, match_results<Iterator, Alloc>, - traits > f(out, m, t); + traits, ForwardIter> f(out, m, t); return f.format(p1, p2, flags); } +#ifndef BOOST_NO_SFINAE -} // namespace re_detail +BOOST_MPL_HAS_XXX_TRAIT_DEF(const_iterator) -template <class OutputIterator, class Iterator, class charT> -OutputIterator regex_format(OutputIterator out, - const match_results<Iterator>& m, - const charT* fmt, - match_flag_type flags = format_all - ) +struct any_type { any_type(...); }; +typedef char no_type; +typedef char (&unary_type)[2]; +typedef char (&binary_type)[3]; +typedef char (&ternary_type)[4]; + +no_type check_is_formatter(unary_type, binary_type, ternary_type); +template<typename T> +unary_type check_is_formatter(T const &, binary_type, ternary_type); +template<typename T> +binary_type check_is_formatter(unary_type, T const &, ternary_type); +template<typename T, typename U> +binary_type check_is_formatter(T const &, U const &, ternary_type); +template<typename T> +ternary_type check_is_formatter(unary_type, binary_type, T const &); +template<typename T, typename U> +ternary_type check_is_formatter(T const &, binary_type, U const &); +template<typename T, typename U> +ternary_type check_is_formatter(unary_type, T const &, U const &); +template<typename T, typename U, typename V> +ternary_type check_is_formatter(T const &, U const &, V const &); + +struct unary_binary_ternary { - re_detail::trivial_format_traits<charT> traits; - return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits); -} + typedef unary_type (*unary_fun)(any_type); + typedef binary_type (*binary_fun)(any_type, any_type); + typedef ternary_type (*ternary_fun)(any_type, any_type, any_type); + operator unary_fun(); + operator binary_fun(); + operator ternary_fun(); +}; -template <class OutputIterator, class Iterator, class charT> -OutputIterator regex_format(OutputIterator out, - const match_results<Iterator>& m, - const std::basic_string<charT>& fmt, - match_flag_type flags = format_all - ) +template<typename Formatter, bool IsFunction = boost::is_function<Formatter>::value> +struct formatter_wrapper + : Formatter + , unary_binary_ternary { - re_detail::trivial_format_traits<charT> traits; - return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); -} + formatter_wrapper(){} +}; -template <class Iterator, class charT> -std::basic_string<charT> regex_format(const match_results<Iterator>& m, - const charT* fmt, - match_flag_type flags = format_all) +template<typename Formatter> +struct formatter_wrapper<Formatter, true> + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template<typename Formatter> +struct formatter_wrapper<Formatter *, false> + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template <class F, class M, class O> +struct format_traits_imp +{ +private: + // + // F must be a pointer, a function, or a class with a function call operator: + // + BOOST_STATIC_ASSERT((::boost::is_pointer<F>::value || ::boost::is_function<F>::value || ::boost::is_class<F>::value)); + static formatter_wrapper<F> f; + static M m; + static O out; + static boost::regex_constants::match_flag_type flags; +public: + BOOST_STATIC_CONSTANT(int, value = sizeof(check_is_formatter(f(m), f(m, out), f(m, out, flags)))); +}; + +template <class F, class M, class O> +struct format_traits +{ +public: + // + // Type is mpl::int_<N> where N is one of: + // + // 0 : F is a pointer to a presumably null-terminated string. + // 1 : F is a character-container such as a std::string. + // 2 : F is a Unary Functor. + // 3 : F is a Binary Functor. + // 4 : F is a Ternary Functor. + // + typedef typename boost::mpl::if_< + boost::mpl::and_<boost::is_pointer<F>, boost::mpl::not_<boost::is_function<typename boost::remove_pointer<F>::type> > >, + boost::mpl::int_<0>, + typename boost::mpl::if_< + has_const_iterator<F>, + boost::mpl::int_<1>, + boost::mpl::int_<format_traits_imp<F, M, O>::value> + >::type + >::type type; + // + // This static assertion will fail if the functor passed does not accept + // the same type of arguments passed. + // + BOOST_STATIC_ASSERT( boost::is_class<F>::value && !has_const_iterator<F>::value ? (type::value > 1) : true); +}; + +#else // BOOST_NO_SFINAE + +template <class F, class M, class O> +struct format_traits +{ +public: + // + // Type is mpl::int_<N> where N is one of: + // + // 0 : F is a pointer to a presumably null-terminated string. + // 1 : F is a character-container such as a std::string. + // + // Other options such as F being a Functor are not supported without + // SFINAE support. + // + typedef typename boost::mpl::if_< + boost::is_pointer<F>, + boost::mpl::int_<0>, + boost::mpl::int_<1> + >::type type; +}; + +#endif // BOOST_NO_SFINAE + +template <class Base, class Match> +struct format_functor3 +{ + format_functor3(Base b) : func(b) {} + template <class OutputIter> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f) + { + return func(m, i, f); + } + template <class OutputIter, class Traits> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor3(const format_functor3&); + format_functor3& operator=(const format_functor3&); +}; + +template <class Base, class Match> +struct format_functor2 +{ + format_functor2(Base b) : func(b) {} + template <class OutputIter> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) + { + return func(m, i); + } + template <class OutputIter, class Traits> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor2(const format_functor2&); + format_functor2& operator=(const format_functor2&); +}; + +template <class Base, class Match> +struct format_functor1 +{ + format_functor1(Base b) : func(b) {} + + template <class S, class OutputIter> + OutputIter do_format_string(const S& s, OutputIter i) + { + return re_detail::copy(s.begin(), s.end(), i); + } + template <class S, class OutputIter> + inline OutputIter do_format_string(const S* s, OutputIter i) + { + while(s && *s) + { + *i = *s; + ++i; + ++s; + } + return i; + } + template <class OutputIter> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type /*f*/) + { + return do_format_string(func(m), i); + } + template <class OutputIter, class Traits> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor1(const format_functor1&); + format_functor1& operator=(const format_functor1&); +}; + +template <class charT, class Match, class Traits> +struct format_functor_c_string +{ + format_functor_c_string(const charT* ps) : func(ps) {} + + template <class OutputIter> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + typedef typename Match::char_type char_type; + const charT* end = func; + while(*end) ++end; + return regex_format_imp(i, m, func, end, f, t); + } +private: + const charT* func; + format_functor_c_string(const format_functor_c_string&); + format_functor_c_string& operator=(const format_functor_c_string&); +}; + +template <class Container, class Match, class Traits> +struct format_functor_container +{ + format_functor_container(const Container& c) : func(c) {} + + template <class OutputIter> + OutputIter operator()(const Match& m, OutputIter i, boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + typedef typename Match::char_type char_type; + return re_detail::regex_format_imp(i, m, func.begin(), func.end(), f, t); + } +private: + const Container& func; + format_functor_container(const format_functor_container&); + format_functor_container& operator=(const format_functor_container&); +}; + +template <class Func, class Match, class OutputIterator, class Traits = re_detail::trivial_format_traits<typename Match::char_type> > +struct compute_functor_type +{ + typedef typename format_traits<Func, Match, OutputIterator>::type tag; + typedef typename boost::remove_cv< typename boost::remove_pointer<Func>::type>::type maybe_char_type; + + typedef typename mpl::if_< + ::boost::is_same<tag, mpl::int_<0> >, format_functor_c_string<maybe_char_type, Match, Traits>, + typename mpl::if_< + ::boost::is_same<tag, mpl::int_<1> >, format_functor_container<Func, Match, Traits>, + typename mpl::if_< + ::boost::is_same<tag, mpl::int_<2> >, format_functor1<Func, Match>, + typename mpl::if_< + ::boost::is_same<tag, mpl::int_<3> >, format_functor2<Func, Match>, + format_functor3<Func, Match> + >::type + >::type + >::type + >::type type; +}; + +} // namespace re_detail + +template <class OutputIterator, class Iterator, class Allocator, class Functor> +inline OutputIterator regex_format(OutputIterator out, + const match_results<Iterator, Allocator>& m, + Functor fmt, + match_flag_type flags = format_all + ) { - std::basic_string<charT> result; - re_detail::string_out_iterator<std::basic_string<charT> > i(result); - re_detail::trivial_format_traits<charT> traits; - re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits); - return result; + return m.format(out, fmt, flags); } -template <class Iterator, class charT> -std::basic_string<charT> regex_format(const match_results<Iterator>& m, - const std::basic_string<charT>& fmt, +template <class Iterator, class Allocator, class Functor> +inline std::basic_string<typename match_results<Iterator, Allocator>::char_type> regex_format(const match_results<Iterator, Allocator>& m, + Functor fmt, match_flag_type flags = format_all) { - std::basic_string<charT> result; - re_detail::string_out_iterator<std::basic_string<charT> > i(result); - re_detail::trivial_format_traits<charT> traits; - re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits); - return result; + return m.format(fmt, flags); } #ifdef BOOST_MSVC diff --git a/3rdParty/Boost/src/boost/regex/v4/regex_replace.hpp b/3rdParty/Boost/src/boost/regex/v4/regex_replace.hpp index c4544c0..ac8e3c8 100644 --- a/3rdParty/Boost/src/boost/regex/v4/regex_replace.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/regex_replace.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (c) 1998-2002 + * Copyright (c) 1998-2009 * John Maddock * * Use, modification and distribution are subject to the @@ -35,12 +35,12 @@ namespace boost{ #pragma warning(pop) #endif -template <class OutputIterator, class BidirectionalIterator, class traits, class charT> +template <class OutputIterator, class BidirectionalIterator, class traits, class charT, class Formatter> OutputIterator regex_replace(OutputIterator out, BidirectionalIterator first, BidirectionalIterator last, const basic_regex<charT, traits>& e, - const charT* fmt, + Formatter fmt, match_flag_type flags = match_default) { regex_iterator<BidirectionalIterator, charT, traits> i(first, last, e, flags); @@ -69,21 +69,10 @@ OutputIterator regex_replace(OutputIterator out, return out; } -template <class OutputIterator, class Iterator, class traits, class charT> -inline OutputIterator regex_replace(OutputIterator out, - Iterator first, - Iterator last, - const basic_regex<charT, traits>& e, - const std::basic_string<charT>& fmt, - match_flag_type flags = match_default) -{ - return regex_replace(out, first, last, e, fmt.c_str(), flags); -} - -template <class traits, class charT> +template <class traits, class charT, class Formatter> std::basic_string<charT> regex_replace(const std::basic_string<charT>& s, const basic_regex<charT, traits>& e, - const charT* fmt, + Formatter fmt, match_flag_type flags = match_default) { std::basic_string<charT> result; @@ -92,18 +81,6 @@ std::basic_string<charT> regex_replace(const std::basic_string<charT>& s, return result; } -template <class traits, class charT> -std::basic_string<charT> regex_replace(const std::basic_string<charT>& s, - const basic_regex<charT, traits>& e, - const std::basic_string<charT>& fmt, - match_flag_type flags = match_default) -{ - std::basic_string<charT> result; - re_detail::string_out_iterator<std::basic_string<charT> > i(result); - regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); - return result; -} - #ifdef BOOST_MSVC #pragma warning(push) #pragma warning(disable: 4103) diff --git a/3rdParty/Boost/src/boost/regex/v4/regex_traits_defaults.hpp b/3rdParty/Boost/src/boost/regex/v4/regex_traits_defaults.hpp index 42428dd..ca13f19 100644 --- a/3rdParty/Boost/src/boost/regex/v4/regex_traits_defaults.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/regex_traits_defaults.hpp @@ -84,7 +84,7 @@ inline bool is_combining<unsigned char>(unsigned char) { return false; } -#ifndef __hpux // can't use WCHAR_MAX/MIN in pp-directives +#if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives #ifdef _MSC_VER template<> inline bool is_combining<wchar_t>(wchar_t c) @@ -159,7 +159,7 @@ struct character_pointer_range template <class charT> int get_default_class_id(const charT* p1, const charT* p2) { - static const charT data[72] = { + static const charT data[73] = { 'a', 'l', 'n', 'u', 'm', 'a', 'l', 'p', 'h', 'a', 'b', 'l', 'a', 'n', 'k', @@ -172,11 +172,12 @@ int get_default_class_id(const charT* p1, const charT* p2) 's', 'p', 'a', 'c', 'e', 'u', 'n', 'i', 'c', 'o', 'd', 'e', 'u', 'p', 'p', 'e', 'r', + 'v', 'w', 'o', 'r', 'd', 'x', 'd', 'i', 'g', 'i', 't', }; - static const character_pointer_range<charT> ranges[19] = + static const character_pointer_range<charT> ranges[21] = { {data+0, data+5,}, // alnum {data+5, data+10,}, // alpha @@ -185,6 +186,7 @@ int get_default_class_id(const charT* p1, const charT* p2) {data+20, data+21,}, // d {data+20, data+25,}, // digit {data+25, data+30,}, // graph + {data+29, data+30,}, // h {data+30, data+31,}, // l {data+30, data+35,}, // lower {data+35, data+40,}, // print @@ -194,9 +196,10 @@ int get_default_class_id(const charT* p1, const charT* p2) {data+57, data+58,}, // u {data+50, data+57,}, // unicode {data+57, data+62,}, // upper - {data+62, data+63,}, // w - {data+62, data+66,}, // word - {data+66, data+72,}, // xdigit + {data+62, data+63,}, // v + {data+63, data+64,}, // w + {data+63, data+67,}, // word + {data+67, data+73,}, // xdigit }; static const character_pointer_range<charT>* ranges_begin = ranges; static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); @@ -314,6 +317,43 @@ int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) return result; } +template <class charT> +inline const charT* get_escape_R_string() +{ +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable:4309 4245) +#endif + static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}', + '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; + static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + + charT c = static_cast<charT>(0x2029u); + bool b = (static_cast<unsigned>(c) == 0x2029u); + + return (b ? e1 : e2); +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif +} + +template <> +inline const char* get_escape_R_string<char>() +{ +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable:4309) +#endif + static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + return e2; +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif +} + } // re_detail } // boost diff --git a/3rdParty/Boost/src/boost/regex/v4/regex_workaround.hpp b/3rdParty/Boost/src/boost/regex/v4/regex_workaround.hpp index fc3c212..06527f1 100644 --- a/3rdParty/Boost/src/boost/regex/v4/regex_workaround.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/regex_workaround.hpp @@ -124,7 +124,7 @@ inline void pointer_construct(T* p, const T& t) #ifdef __cplusplus namespace boost{ namespace re_detail{ -#if BOOST_WORKAROUND(BOOST_MSVC,>=1400) && defined(_CPPLIB_VER) && defined(BOOST_DINKUMWARE_STDLIB) && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) +#if BOOST_WORKAROUND(BOOST_MSVC,>=1400) && BOOST_WORKAROUND(BOOST_MSVC, <1600) && defined(_CPPLIB_VER) && defined(BOOST_DINKUMWARE_STDLIB) && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) // // MSVC 8 will either emit warnings or else refuse to compile // code that makes perfectly legitimate use of std::copy, when diff --git a/3rdParty/Boost/src/boost/regex/v4/states.hpp b/3rdParty/Boost/src/boost/regex/v4/states.hpp index 44dd2b4..38690b2 100644 --- a/3rdParty/Boost/src/boost/regex/v4/states.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/states.hpp @@ -118,7 +118,9 @@ enum syntax_element_type syntax_element_backstep = syntax_element_long_set_rep + 1, // an assertion that a mark was matched: syntax_element_assert_backref = syntax_element_backstep + 1, - syntax_element_toggle_case = syntax_element_assert_backref + 1 + syntax_element_toggle_case = syntax_element_assert_backref + 1, + // a recursive expression: + syntax_element_recurse = syntax_element_toggle_case + 1 }; #ifdef BOOST_REGEX_DEBUG @@ -156,6 +158,7 @@ struct re_brace : public re_syntax_base // The index to match, can be zero (don't mark the sub-expression) // or negative (for perl style (?...) extentions): int index; + bool icase; }; /*** struct re_dot ************************************************** @@ -245,6 +248,14 @@ struct re_repeat : public re_alt bool greedy; // True if this is a greedy repeat }; +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + /*** enum re_jump_size_type ******************************************* Provides compiled size of re_jump structure (allowing for trailing alignment). We provide this so we know how manybytes to insert when constructing the machine diff --git a/3rdParty/Boost/src/boost/regex/v4/syntax_type.hpp b/3rdParty/Boost/src/boost/regex/v4/syntax_type.hpp index 92c00d4..3efdf0b 100644 --- a/3rdParty/Boost/src/boost/regex/v4/syntax_type.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/syntax_type.hpp @@ -92,8 +92,11 @@ static const escape_syntax_type escape_type_G = 52; / static const escape_syntax_type escape_type_property = 54; // for \p static const escape_syntax_type escape_type_not_property = 55; // for \P static const escape_syntax_type escape_type_named_char = 56; // for \N +static const escape_syntax_type escape_type_extended_backref = 57; // for \g +static const escape_syntax_type escape_type_reset_start_mark = 58; // for \K +static const escape_syntax_type escape_type_line_ending = 59; // for \R -static const escape_syntax_type syntax_max = 57; +static const escape_syntax_type syntax_max = 60; } } diff --git a/3rdParty/Boost/src/boost/regex/v4/u32regex_iterator.hpp b/3rdParty/Boost/src/boost/regex/v4/u32regex_iterator.hpp index 7e893e6..65ebd7f 100644 --- a/3rdParty/Boost/src/boost/regex/v4/u32regex_iterator.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/u32regex_iterator.hpp @@ -178,7 +178,7 @@ inline u32regex_iterator<typename std::basic_string<charT, Traits, Alloc>::const typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; return u32regex_iterator<iter_type>(p.begin(), p.end(), e, m); } -inline u32regex_iterator<const UChar*> make_u32regex_iterator(const UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +inline u32regex_iterator<const UChar*> make_u32regex_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) { return u32regex_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, m); } diff --git a/3rdParty/Boost/src/boost/regex/v4/u32regex_token_iterator.hpp b/3rdParty/Boost/src/boost/regex/v4/u32regex_token_iterator.hpp index 2726d48..c47a219 100644 --- a/3rdParty/Boost/src/boost/regex/v4/u32regex_token_iterator.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/u32regex_token_iterator.hpp @@ -294,7 +294,7 @@ inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>: typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, m); } -inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UnicodeString& s, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) { return u32regex_token_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); } @@ -327,7 +327,7 @@ inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>: return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, m); } template <std::size_t N> -inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) { return u32regex_token_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); } @@ -356,7 +356,7 @@ inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>: typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, m); } -inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const UnicodeString& s, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +inline u32regex_token_iterator<const UChar*> make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) { return u32regex_token_iterator<const UChar*>(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); } diff --git a/3rdParty/Boost/src/boost/regex/v4/w32_regex_traits.hpp b/3rdParty/Boost/src/boost/regex/v4/w32_regex_traits.hpp index 21a9694..d556207 100644 --- a/3rdParty/Boost/src/boost/regex/v4/w32_regex_traits.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/w32_regex_traits.hpp @@ -294,6 +294,8 @@ public: typedef typename w32_regex_traits<charT>::char_class_type char_class_type; BOOST_STATIC_CONSTANT(char_class_type, mask_word = 0x0400); // must be C1_DEFINED << 1 BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 0x0800); // must be C1_DEFINED << 2 + BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 0x1000); // must be C1_DEFINED << 3 + BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 0x2000); // must be C1_DEFINED << 4 BOOST_STATIC_CONSTANT(char_class_type, mask_base = 0x3ff); // all the masks used by the CT_CTYPE1 group typedef std::basic_string<charT> string_type; @@ -510,7 +512,7 @@ template <class charT> typename w32_regex_traits_implementation<charT>::char_class_type w32_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const { - static const char_class_type masks[20] = + static const char_class_type masks[22] = { 0, 0x0104u, // C1_ALPHA | C1_DIGIT @@ -520,6 +522,7 @@ typename w32_regex_traits_implementation<charT>::char_class_type 0x0004u, // C1_DIGIT 0x0004u, // C1_DIGIT (~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK + w32_regex_traits_implementation<charT>::mask_horizontal, 0x0002u, // C1_LOWER 0x0002u, // C1_LOWER (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL @@ -529,6 +532,7 @@ typename w32_regex_traits_implementation<charT>::char_class_type 0x0001u, // C1_UPPER w32_regex_traits_implementation<charT>::mask_unicode, 0x0001u, // C1_UPPER + w32_regex_traits_implementation<charT>::mask_vertical, 0x0104u | w32_regex_traits_implementation<charT>::mask_word, 0x0104u | w32_regex_traits_implementation<charT>::mask_word, 0x0080u, // C1_XDIGIT @@ -628,6 +632,12 @@ public: return true; else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_word) && (c == '_')) return true; + else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_vertical) + && (::boost::re_detail::is_separator(c) || (c == '\v'))) + return true; + else if((f & re_detail::w32_regex_traits_implementation<charT>::mask_horizontal) + && this->isctype(c, 0x0008u) && !this->isctype(c, re_detail::w32_regex_traits_implementation<charT>::mask_vertical)) + return true; return false; } int toi(const charT*& p1, const charT* p2, int radix)const |