summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp')
-rw-r--r--3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp221
1 files changed, 217 insertions, 4 deletions
diff --git a/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp b/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp
index 9f2cbee..ee207d0 100644
--- a/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp
+++ b/3rdParty/Boost/src/boost/regex/v4/basic_regex_creator.hpp
@@ -240,6 +240,7 @@ protected:
bool m_has_backrefs; // true if there are actually any backrefs
unsigned m_backrefs; // bitmask of permitted backrefs
boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for;
+ bool m_has_recursions; // set when we have recursive expresisons to fixup
typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character
@@ -250,6 +251,7 @@ private:
basic_regex_creator(const basic_regex_creator&);
void fixup_pointers(re_syntax_base* state);
+ void fixup_recursions(re_syntax_base* state);
void create_startmaps(re_syntax_base* state);
int calculate_backstep(re_syntax_base* state);
void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
@@ -263,7 +265,7 @@ private:
template <class charT, class traits>
basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
- : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0)
+ : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false)
{
m_pdata->m_data.clear();
m_pdata->m_status = ::boost::regex_constants::error_ok;
@@ -675,6 +677,8 @@ re_syntax_base* basic_regex_creator<charT, traits>::append_set(
template <class charT, class traits>
void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
{
+ if(this->m_pdata->m_status)
+ return;
// we've added all the states we need, now finish things off.
// start by adding a terminating state:
append_state(syntax_element_match);
@@ -692,6 +696,15 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
// fixup pointers in the machine:
fixup_pointers(m_pdata->m_first_state);
+ if(m_has_recursions)
+ {
+ m_pdata->m_has_recursions = true;
+ fixup_recursions(m_pdata->m_first_state);
+ if(this->m_pdata->m_status)
+ return;
+ }
+ else
+ m_pdata->m_has_recursions = false;
// create nested startmaps:
create_startmaps(m_pdata->m_first_state);
// create main startmap:
@@ -713,6 +726,13 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
{
switch(state->type)
{
+ case syntax_element_recurse:
+ m_has_recursions = true;
+ if(state->next.i)
+ state->next.p = getaddress(state->next.i, state);
+ else
+ state->next.p = 0;
+ break;
case syntax_element_rep:
case syntax_element_dot_rep:
case syntax_element_char_rep:
@@ -739,6 +759,128 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
}
template <class charT, class traits>
+void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
+{
+ re_syntax_base* base = state;
+ while(state)
+ {
+ switch(state->type)
+ {
+ case syntax_element_assert_backref:
+ {
+ // just check that the index is valid:
+ int id = static_cast<const re_brace*>(state)->index;
+ if(id < 0)
+ {
+ id = -id-1;
+ if(id >= 10000)
+ {
+ id = m_pdata->get_id(id);
+ if(id <= 0)
+ {
+ // check of sub-expression that doesn't exist:
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
+ //
+ // clear the expression, we should be empty:
+ //
+ this->m_pdata->m_expression = 0;
+ this->m_pdata->m_expression_len = 0;
+ //
+ // and throw if required:
+ //
+ if(0 == (this->flags() & regex_constants::no_except))
+ {
+ std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
+ e.raise();
+ }
+ }
+ }
+ }
+ }
+ break;
+ case syntax_element_recurse:
+ {
+ bool ok = false;
+ re_syntax_base* p = base;
+ int id = static_cast<re_jump*>(state)->alt.i;
+ if(id > 10000)
+ id = m_pdata->get_id(id);
+ while(p)
+ {
+ if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == id))
+ {
+ //
+ // We've found the target of the recursion, set the jump target:
+ //
+ static_cast<re_jump*>(state)->alt.p = p;
+ ok = true;
+ //
+ // Now scan the target for nested repeats:
+ //
+ p = p->next.p;
+ int next_rep_id = 0;
+ while(p)
+ {
+ switch(p->type)
+ {
+ case syntax_element_rep:
+ case syntax_element_dot_rep:
+ case syntax_element_char_rep:
+ case syntax_element_short_set_rep:
+ case syntax_element_long_set_rep:
+ next_rep_id = static_cast<re_repeat*>(p)->state_id;
+ break;
+ case syntax_element_endmark:
+ if(static_cast<const re_brace*>(p)->index == id)
+ next_rep_id = -1;
+ break;
+ default:
+ break;
+ }
+ if(next_rep_id)
+ break;
+ p = p->next.p;
+ }
+ if(next_rep_id > 0)
+ {
+ static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
+ }
+
+ break;
+ }
+ p = p->next.p;
+ }
+ if(!ok)
+ {
+ // recursion to sub-expression that doesn't exist:
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
+ //
+ // clear the expression, we should be empty:
+ //
+ this->m_pdata->m_expression = 0;
+ this->m_pdata->m_expression_len = 0;
+ //
+ // and throw if required:
+ //
+ if(0 == (this->flags() & regex_constants::no_except))
+ {
+ std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
+ e.raise();
+ }
+ }
+ }
+ default:
+ break;
+ }
+ state = state->next.p;
+ }
+}
+
+template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
{
// non-recursive implementation:
@@ -790,7 +932,7 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
//
if(0 == (this->flags() & regex_constants::no_except))
{
- std::string message = this->m_pdata->m_ptraits->error_string(boost::regex_constants::error_bad_pattern);
+ std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
e.raise();
}
@@ -909,6 +1051,9 @@ template <class charT, class traits>
void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
{
int not_last_jump = 1;
+ re_syntax_base* recursion_start = 0;
+ int recursion_sub = 0;
+ re_syntax_base* recursion_restart = 0;
// track case sensitivity:
bool l_icase = m_icase;
@@ -953,6 +1098,41 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
create_startmap(state->next.p, 0, pnull, mask);
return;
}
+ case syntax_element_recurse:
+ {
+ if(recursion_start == state)
+ {
+ // Infinite recursion!!
+ if(0 == this->m_pdata->m_status) // update the error code if not already set
+ this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
+ //
+ // clear the expression, we should be empty:
+ //
+ this->m_pdata->m_expression = 0;
+ this->m_pdata->m_expression_len = 0;
+ //
+ // and throw if required:
+ //
+ if(0 == (this->flags() & regex_constants::no_except))
+ {
+ std::string message = "Encountered an infinite recursion.";
+ boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
+ e.raise();
+ }
+ }
+ else if(recursion_start == 0)
+ {
+ recursion_start = state;
+ recursion_restart = state->next.p;
+ state = static_cast<re_jump*>(state)->alt.p;
+ if(state->type == syntax_element_startmark)
+ recursion_sub = static_cast<re_brace*>(state)->index;
+ else
+ recursion_sub = 0;
+ break;
+ }
+ // fall through, can't handle nested recursion here...
+ }
case syntax_element_backref:
// can be null, and any character can match:
if(pnull)
@@ -1111,12 +1291,45 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
*pnull |= mask;
return;
}
- else
+ else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
{
- state = state->next.p;
+ // recursion termination:
+ recursion_start = 0;
+ state = recursion_restart;
break;
}
+ //
+ // Normally we just go to the next state... but if this sub-expression is
+ // the target of a recursion, then we might be ending a recursion, in which
+ // case we should check whatever follows that recursion, as well as whatever
+ // follows this state:
+ //
+ if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
+ {
+ bool ok = false;
+ re_syntax_base* p = m_pdata->m_first_state;
+ while(p)
+ {
+ if((p->type == syntax_element_recurse))
+ {
+ re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
+ if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
+ {
+ ok = true;
+ break;
+ }
+ }
+ p = p->next.p;
+ }
+ if(ok)
+ {
+ create_startmap(p->next.p, l_map, pnull, mask);
+ }
+ }
+ state = state->next.p;
+ break;
+
case syntax_element_startmark:
// need to handle independent subs as a special case:
if(static_cast<re_brace*>(state)->index == -3)