diff options
Diffstat (limited to '3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp')
-rw-r--r-- | 3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp | 221 |
1 files changed, 217 insertions, 4 deletions
diff --git a/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp b/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp index 9f2cbee..ee207d0 100644 --- a/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp +++ b/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp @@ -240,6 +240,7 @@ protected: bool m_has_backrefs; // true if there are actually any backrefs unsigned m_backrefs; // bitmask of permitted backrefs boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; + bool m_has_recursions; // set when we have recursive expresisons to fixup typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character @@ -250,6 +251,7 @@ private: basic_regex_creator(const basic_regex_creator&); void fixup_pointers(re_syntax_base* state); + void fixup_recursions(re_syntax_base* state); void create_startmaps(re_syntax_base* state); int calculate_backstep(re_syntax_base* state); void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); @@ -263,7 +265,7 @@ private: template <class charT, class traits> basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data) - : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0) + : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false) { m_pdata->m_data.clear(); m_pdata->m_status = ::boost::regex_constants::error_ok; @@ -675,6 +677,8 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set( template <class charT, class traits> void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2) { + if(this->m_pdata->m_status) + return; // we've added all the states we need, now finish things off. // start by adding a terminating state: append_state(syntax_element_match); @@ -692,6 +696,15 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data()); // fixup pointers in the machine: fixup_pointers(m_pdata->m_first_state); + if(m_has_recursions) + { + m_pdata->m_has_recursions = true; + fixup_recursions(m_pdata->m_first_state); + if(this->m_pdata->m_status) + return; + } + else + m_pdata->m_has_recursions = false; // create nested startmaps: create_startmaps(m_pdata->m_first_state); // create main startmap: @@ -713,6 +726,13 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state) { switch(state->type) { + case syntax_element_recurse: + m_has_recursions = true; + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + break; case syntax_element_rep: case syntax_element_dot_rep: case syntax_element_char_rep: @@ -739,6 +759,128 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state) } template <class charT, class traits> +void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state) +{ + re_syntax_base* base = state; + while(state) + { + switch(state->type) + { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int id = static_cast<const re_brace*>(state)->index; + if(id < 0) + { + id = -id-1; + if(id >= 10000) + { + id = m_pdata->get_id(id); + if(id <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a marked sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; + case syntax_element_recurse: + { + bool ok = false; + re_syntax_base* p = base; + int id = static_cast<re_jump*>(state)->alt.i; + if(id > 10000) + id = m_pdata->get_id(id); + while(p) + { + if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == id)) + { + // + // We've found the target of the recursion, set the jump target: + // + static_cast<re_jump*>(state)->alt.p = p; + ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast<re_repeat*>(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast<const re_brace*>(p)->index == id) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast<re_recurse*>(state)->state_id = next_rep_id - 1; + } + + break; + } + p = p->next.p; + } + if(!ok) + { + // recursion to sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + default: + break; + } + state = state->next.p; + } +} + +template <class charT, class traits> void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state) { // non-recursive implementation: @@ -790,7 +932,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state) // if(0 == (this->flags() & regex_constants::no_except)) { - std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern); + std::string message = "Invalid lookbehind assertion encountered in the regular expression."; boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); e.raise(); } @@ -909,6 +1051,9 @@ template <class charT, class traits> void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) { int not_last_jump = 1; + re_syntax_base* recursion_start = 0; + int recursion_sub = 0; + re_syntax_base* recursion_restart = 0; // track case sensitivity: bool l_icase = m_icase; @@ -953,6 +1098,41 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, create_startmap(state->next.p, 0, pnull, mask); return; } + case syntax_element_recurse: + { + if(recursion_start == state) + { + // Infinite recursion!! + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered an infinite recursion."; + boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + else if(recursion_start == 0) + { + recursion_start = state; + recursion_restart = state->next.p; + state = static_cast<re_jump*>(state)->alt.p; + if(state->type == syntax_element_startmark) + recursion_sub = static_cast<re_brace*>(state)->index; + else + recursion_sub = 0; + break; + } + // fall through, can't handle nested recursion here... + } case syntax_element_backref: // can be null, and any character can match: if(pnull) @@ -1111,12 +1291,45 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, *pnull |= mask; return; } - else + else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index)) { - state = state->next.p; + // recursion termination: + recursion_start = 0; + state = recursion_restart; break; } + // + // Normally we just go to the next state... but if this sub-expression is + // the target of a recursion, then we might be ending a recursion, in which + // case we should check whatever follows that recursion, as well as whatever + // follows this state: + // + if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index) + { + bool ok = false; + re_syntax_base* p = m_pdata->m_first_state; + while(p) + { + if((p->type == syntax_element_recurse)) + { + re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p); + if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index)) + { + ok = true; + break; + } + } + p = p->next.p; + } + if(ok) + { + create_startmap(p->next.p, l_map, pnull, mask); + } + } + state = state->next.p; + break; + case syntax_element_startmark: // need to handle independent subs as a special case: if(static_cast<re_brace*>(state)->index == -3) |