summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '3rdParty/Boost/boost/regex/v4/states.hpp')
-rw-r--r--3rdParty/Boost/boost/regex/v4/states.hpp290
1 files changed, 290 insertions, 0 deletions
diff --git a/3rdParty/Boost/boost/regex/v4/states.hpp b/3rdParty/Boost/boost/regex/v4/states.hpp
new file mode 100644
index 0000000..44dd2b4
--- /dev/null
+++ b/3rdParty/Boost/boost/regex/v4/states.hpp
@@ -0,0 +1,290 @@
+/*
+ *
+ * Copyright (c) 1998-2002
+ * John Maddock
+ *
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0. (See accompanying file
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+ /*
+ * LOCATION: see http://www.boost.org for most recent version.
+ * FILE states.cpp
+ * VERSION see <boost/version.hpp>
+ * DESCRIPTION: Declares internal state machine structures.
+ */
+
+#ifndef BOOST_REGEX_V4_STATES_HPP
+#define BOOST_REGEX_V4_STATES_HPP
+
+#ifdef BOOST_MSVC
+#pragma warning(push)
+#pragma warning(disable: 4103)
+#endif
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
+
+namespace boost{
+namespace re_detail{
+
+/*** mask_type *******************************************************
+Whenever we have a choice of two alternatives, we use an array of bytes
+to indicate which of the two alternatives it is possible to take for any
+given input character. If mask_take is set, then we can take the next
+state, and if mask_skip is set then we can take the alternative.
+***********************************************************************/
+enum mask_type
+{
+ mask_take = 1,
+ mask_skip = 2,
+ mask_init = 4,
+ mask_any = mask_skip | mask_take,
+ mask_all = mask_any
+};
+
+/*** helpers **********************************************************
+These helpers let us use function overload resolution to detect whether
+we have narrow or wide character strings:
+***********************************************************************/
+struct _narrow_type{};
+struct _wide_type{};
+template <class charT> struct is_byte;
+template<> struct is_byte<char> { typedef _narrow_type width_type; };
+template<> struct is_byte<unsigned char>{ typedef _narrow_type width_type; };
+template<> struct is_byte<signed char> { typedef _narrow_type width_type; };
+template <class charT> struct is_byte { typedef _wide_type width_type; };
+
+/*** enum syntax_element_type ******************************************
+Every record in the state machine falls into one of the following types:
+***********************************************************************/
+enum syntax_element_type
+{
+ // start of a marked sub-expression, or perl-style (?...) extension
+ syntax_element_startmark = 0,
+ // end of a marked sub-expression, or perl-style (?...) extension
+ syntax_element_endmark = syntax_element_startmark + 1,
+ // any sequence of literal characters
+ syntax_element_literal = syntax_element_endmark + 1,
+ // start of line assertion: ^
+ syntax_element_start_line = syntax_element_literal + 1,
+ // end of line assertion $
+ syntax_element_end_line = syntax_element_start_line + 1,
+ // match any character: .
+ syntax_element_wild = syntax_element_end_line + 1,
+ // end of expression: we have a match when we get here
+ syntax_element_match = syntax_element_wild + 1,
+ // perl style word boundary: \b
+ syntax_element_word_boundary = syntax_element_match + 1,
+ // perl style within word boundary: \B
+ syntax_element_within_word = syntax_element_word_boundary + 1,
+ // start of word assertion: \<
+ syntax_element_word_start = syntax_element_within_word + 1,
+ // end of word assertion: \>
+ syntax_element_word_end = syntax_element_word_start + 1,
+ // start of buffer assertion: \`
+ syntax_element_buffer_start = syntax_element_word_end + 1,
+ // end of buffer assertion: \'
+ syntax_element_buffer_end = syntax_element_buffer_start + 1,
+ // backreference to previously matched sub-expression
+ syntax_element_backref = syntax_element_buffer_end + 1,
+ // either a wide character set [..] or one with multicharacter collating elements:
+ syntax_element_long_set = syntax_element_backref + 1,
+ // narrow character set: [...]
+ syntax_element_set = syntax_element_long_set + 1,
+ // jump to a new state in the machine:
+ syntax_element_jump = syntax_element_set + 1,
+ // choose between two production states:
+ syntax_element_alt = syntax_element_jump + 1,
+ // a repeat
+ syntax_element_rep = syntax_element_alt + 1,
+ // match a combining character sequence
+ syntax_element_combining = syntax_element_rep + 1,
+ // perl style soft buffer end: \z
+ syntax_element_soft_buffer_end = syntax_element_combining + 1,
+ // perl style continuation: \G
+ syntax_element_restart_continue = syntax_element_soft_buffer_end + 1,
+ // single character repeats:
+ syntax_element_dot_rep = syntax_element_restart_continue + 1,
+ syntax_element_char_rep = syntax_element_dot_rep + 1,
+ syntax_element_short_set_rep = syntax_element_char_rep + 1,
+ syntax_element_long_set_rep = syntax_element_short_set_rep + 1,
+ // a backstep for lookbehind repeats:
+ syntax_element_backstep = syntax_element_long_set_rep + 1,
+ // an assertion that a mark was matched:
+ syntax_element_assert_backref = syntax_element_backstep + 1,
+ syntax_element_toggle_case = syntax_element_assert_backref + 1
+};
+
+#ifdef BOOST_REGEX_DEBUG
+// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
+std::ostream& operator<<(std::ostream&, syntax_element_type);
+#endif
+
+struct re_syntax_base;
+
+/*** union offset_type ************************************************
+Points to another state in the machine. During machine construction
+we use integral offsets, but these are converted to pointers before
+execution of the machine.
+***********************************************************************/
+union offset_type
+{
+ re_syntax_base* p;
+ std::ptrdiff_t i;
+};
+
+/*** struct re_syntax_base ********************************************
+Base class for all states in the machine.
+***********************************************************************/
+struct re_syntax_base
+{
+ syntax_element_type type; // what kind of state this is
+ offset_type next; // next state in the machine
+};
+
+/*** struct re_brace **************************************************
+A marked parenthesis.
+***********************************************************************/
+struct re_brace : public re_syntax_base
+{
+ // The index to match, can be zero (don't mark the sub-expression)
+ // or negative (for perl style (?...) extentions):
+ int index;
+};
+
+/*** struct re_dot **************************************************
+Match anything.
+***********************************************************************/
+enum
+{
+ dont_care = 1,
+ force_not_newline = 0,
+ force_newline = 2,
+
+ test_not_newline = 2,
+ test_newline = 3
+};
+struct re_dot : public re_syntax_base
+{
+ unsigned char mask;
+};
+
+/*** struct re_literal ************************************************
+A string of literals, following this structure will be an
+array of characters: charT[length]
+***********************************************************************/
+struct re_literal : public re_syntax_base
+{
+ unsigned int length;
+};
+
+/*** struct re_case ************************************************
+Indicates whether we are moving to a case insensive block or not
+***********************************************************************/
+struct re_case : public re_syntax_base
+{
+ bool icase;
+};
+
+/*** struct re_set_long ***********************************************
+A wide character set of characters, following this structure will be
+an array of type charT:
+First csingles null-terminated strings
+Then 2 * cranges NULL terminated strings
+Then cequivalents NULL terminated strings
+***********************************************************************/
+template <class mask_type>
+struct re_set_long : public re_syntax_base
+{
+ unsigned int csingles, cranges, cequivalents;
+ mask_type cclasses;
+ mask_type cnclasses;
+ bool isnot;
+ bool singleton;
+};
+
+/*** struct re_set ****************************************************
+A set of narrow-characters, matches any of _map which is none-zero
+***********************************************************************/
+struct re_set : public re_syntax_base
+{
+ unsigned char _map[1 << CHAR_BIT];
+};
+
+/*** struct re_jump ***************************************************
+Jump to a new location in the machine (not next).
+***********************************************************************/
+struct re_jump : public re_syntax_base
+{
+ offset_type alt; // location to jump to
+};
+
+/*** struct re_alt ***************************************************
+Jump to a new location in the machine (possibly next).
+***********************************************************************/
+struct re_alt : public re_jump
+{
+ unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump
+ unsigned int can_be_null; // true if we match a NULL string
+};
+
+/*** struct re_repeat *************************************************
+Repeat a section of the machine
+***********************************************************************/
+struct re_repeat : public re_alt
+{
+ std::size_t min, max; // min and max allowable repeats
+ int state_id; // Unique identifier for this repeat
+ bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches)
+ bool greedy; // True if this is a greedy repeat
+};
+
+/*** enum re_jump_size_type *******************************************
+Provides compiled size of re_jump structure (allowing for trailing alignment).
+We provide this so we know how manybytes to insert when constructing the machine
+(The value of padding_mask is defined in regex_raw_buffer.hpp).
+***********************************************************************/
+enum re_jump_size_type
+{
+ re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
+ re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask),
+ re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask)
+};
+
+/*** proc re_is_set_member *********************************************
+Forward declaration: we'll need this one later...
+***********************************************************************/
+
+template<class charT, class traits>
+struct regex_data;
+
+template <class iterator, class charT, class traits_type, class char_classT>
+iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
+ iterator last,
+ const re_set_long<char_classT>* set_,
+ const regex_data<charT, traits_type>& e, bool icase);
+
+} // namespace re_detail
+
+} // namespace boost
+
+#ifdef BOOST_MSVC
+#pragma warning(push)
+#pragma warning(disable: 4103)
+#endif
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+#ifdef BOOST_MSVC
+#pragma warning(pop)
+#endif
+
+#endif
+
+