diff options
Diffstat (limited to '3rdParty/Expat/src/xmltok.c')
-rw-r--r-- | 3rdParty/Expat/src/xmltok.c | 244 |
1 files changed, 170 insertions, 74 deletions
diff --git a/3rdParty/Expat/src/xmltok.c b/3rdParty/Expat/src/xmltok.c index bf09dfc..cdf0720 100644 --- a/3rdParty/Expat/src/xmltok.c +++ b/3rdParty/Expat/src/xmltok.c @@ -6,10 +6,4 @@ -#ifdef COMPILED_FROM_DSP +#ifdef _WIN32 #include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" #else @@ -18,3 +12,3 @@ #endif -#endif /* ndef COMPILED_FROM_DSP */ +#endif /* ndef _WIN32 */ @@ -48,3 +42,3 @@ #define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) @@ -58,3 +52,3 @@ + ((((byte)[1]) >> 5) & 1)] \ - & (1 << (((byte)[1]) & 0x1F))) + & (1u << (((byte)[1]) & 0x1F))) @@ -71,3 +65,3 @@ + ((((byte)[2]) >> 5) & 1)] \ - & (1 << (((byte)[2]) & 0x1F))) + & (1u << (((byte)[2]) & 0x1F))) @@ -124,3 +118,3 @@ static int PTRFASTCALL -isNever(const ENCODING *enc, const char *p) +isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p)) { @@ -130,3 +124,3 @@ isNever(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isName2(const ENCODING *enc, const char *p) +utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p) { @@ -136,3 +130,3 @@ utf8_isName2(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isName3(const ENCODING *enc, const char *p) +utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p) { @@ -144,3 +138,3 @@ utf8_isName3(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isNmstrt2(const ENCODING *enc, const char *p) +utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p) { @@ -150,3 +144,3 @@ utf8_isNmstrt2(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isNmstrt3(const ENCODING *enc, const char *p) +utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p) { @@ -158,3 +152,3 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isInvalid2(const ENCODING *enc, const char *p) +utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p) { @@ -164,3 +158,3 @@ utf8_isInvalid2(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isInvalid3(const ENCODING *enc, const char *p) +utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p) { @@ -170,3 +164,3 @@ utf8_isInvalid3(const ENCODING *enc, const char *p) static int PTRFASTCALL -utf8_isInvalid4(const ENCODING *enc, const char *p) +utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p) { @@ -224,2 +218,13 @@ struct normal_encoding { +#define NULL_VTABLE \ + /* isName2 */ NULL, \ + /* isName3 */ NULL, \ + /* isName4 */ NULL, \ + /* isNmstrt2 */ NULL, \ + /* isNmstrt3 */ NULL, \ + /* isNmstrt4 */ NULL, \ + /* isInvalid2 */ NULL, \ + /* isInvalid3 */ NULL, \ + /* isInvalid4 */ NULL + static int FASTCALL checkCharRefNumber(int); @@ -320,4 +325,39 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ -static void PTRCALL -utf8_toUtf8(const ENCODING *enc, +void +align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef) +{ + const char * fromLim = *fromLimRef; + size_t walked = 0; + for (; fromLim > from; fromLim--, walked++) { + const unsigned char prev = (unsigned char)fromLim[-1]; + if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ + if (walked + 1 >= 4) { + fromLim += 4 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ + if (walked + 1 >= 3) { + fromLim += 3 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ + if (walked + 1 >= 2) { + fromLim += 2 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ + break; + } + } + *fromLimRef = fromLim; +} + +static enum XML_Convert_Result PTRCALL +utf8_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, @@ -327,9 +367,8 @@ utf8_toUtf8(const ENCODING *enc, const char *from; - if (fromLim - *fromP > toLim - *toP) { - /* Avoid copying partial characters. */ - for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) - if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) - break; - } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) + const char *fromLimInitial = fromLim; + + /* Avoid copying partial characters. */ + align_limit_to_full_utf8_characters(*fromP, &fromLim); + + for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) *to = *from; @@ -337,5 +376,12 @@ utf8_toUtf8(const ENCODING *enc, *toP = to; + + if (fromLim < fromLimInitial) + return XML_CONVERT_INPUT_INCOMPLETE; + else if ((to == toLim) && (from < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL utf8_toUtf16(const ENCODING *enc, @@ -344,7 +390,12 @@ utf8_toUtf16(const ENCODING *enc, { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: + if (fromLim - from < 2) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); @@ -353,2 +404,6 @@ utf8_toUtf16(const ENCODING *enc, case BT_LEAD3: + if (fromLim - from < 3) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } *to++ = (unsigned short)(((from[0] & 0xf) << 12) @@ -360,4 +415,10 @@ utf8_toUtf16(const ENCODING *enc, unsigned long n; - if (to + 1 == toLim) + if (toLim - to < 2) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; + goto after; + } + if (fromLim - from < 4) { + res = XML_CONVERT_INPUT_INCOMPLETE; goto after; + } n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) @@ -376,2 +437,4 @@ utf8_toUtf16(const ENCODING *enc, } + if (from < fromLim) + res = XML_CONVERT_OUTPUT_EXHAUSTED; after: @@ -379,2 +442,3 @@ after: *toP = to; + return res; } @@ -427,4 +491,4 @@ static const struct normal_encoding internal_utf8_encoding = { -static void PTRCALL -latin1_toUtf8(const ENCODING *enc, +static enum XML_Convert_Result PTRCALL +latin1_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, @@ -435,3 +499,3 @@ latin1_toUtf8(const ENCODING *enc, if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; @@ -439,3 +503,3 @@ latin1_toUtf8(const ENCODING *enc, if (toLim - *toP < 2) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = (char)((c >> 6) | UTF8_cval2); @@ -446,3 +510,3 @@ latin1_toUtf8(const ENCODING *enc, if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; @@ -452,4 +516,4 @@ latin1_toUtf8(const ENCODING *enc, -static void PTRCALL -latin1_toUtf16(const ENCODING *enc, +static enum XML_Convert_Result PTRCALL +latin1_toUtf16(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, @@ -457,4 +521,9 @@ latin1_toUtf16(const ENCODING *enc, { - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } @@ -469,3 +538,3 @@ static const struct normal_encoding latin1_encoding_ns = { }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; @@ -482,7 +551,7 @@ static const struct normal_encoding latin1_encoding = { }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; -static void PTRCALL -ascii_toUtf8(const ENCODING *enc, +static enum XML_Convert_Result PTRCALL +ascii_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, @@ -490,4 +559,9 @@ ascii_toUtf8(const ENCODING *enc, { - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } @@ -502,3 +576,3 @@ static const struct normal_encoding ascii_encoding_ns = { }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; @@ -515,3 +589,3 @@ static const struct normal_encoding ascii_encoding = { }, - STANDARD_VTABLE(sb_) + STANDARD_VTABLE(sb_) NULL_VTABLE }; @@ -538,4 +612,4 @@ unicode_byte_type(char hi, char lo) #define DEFINE_UTF16_TO_UTF8(E) \ -static void PTRCALL \ -E ## toUtf8(const ENCODING *enc, \ +static enum XML_Convert_Result PTRCALL \ +E ## toUtf8(const ENCODING *UNUSED_P(enc), \ const char **fromP, const char *fromLim, \ @@ -543,4 +617,5 @@ E ## toUtf8(const ENCODING *enc, \ { \ - const char *from; \ - for (from = *fromP; from != fromLim; from += 2) { \ + const char *from = *fromP; \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ int plane; \ @@ -554,3 +629,3 @@ E ## toUtf8(const ENCODING *enc, \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ @@ -564,3 +639,3 @@ E ## toUtf8(const ENCODING *enc, \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ @@ -572,3 +647,3 @@ E ## toUtf8(const ENCODING *enc, \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ @@ -582,3 +657,7 @@ E ## toUtf8(const ENCODING *enc, \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (fromLim - from < 4) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ } \ @@ -598,2 +677,6 @@ E ## toUtf8(const ENCODING *enc, \ *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ } @@ -601,4 +684,4 @@ E ## toUtf8(const ENCODING *enc, \ #define DEFINE_UTF16_TO_UTF16(E) \ -static void PTRCALL \ -E ## toUtf16(const ENCODING *enc, \ +static enum XML_Convert_Result PTRCALL \ +E ## toUtf16(const ENCODING *UNUSED_P(enc), \ const char **fromP, const char *fromLim, \ @@ -606,8 +689,16 @@ E ## toUtf16(const ENCODING *enc, \ { \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ } @@ -728,3 +819,3 @@ static const struct normal_encoding little2_encoding_ns = { }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; @@ -747,3 +838,3 @@ static const struct normal_encoding little2_encoding = { }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; @@ -760,3 +851,3 @@ static const struct normal_encoding internal_little2_encoding_ns = { }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; @@ -773,3 +864,3 @@ static const struct normal_encoding internal_little2_encoding = { }, - STANDARD_VTABLE(little2_) + STANDARD_VTABLE(little2_) NULL_VTABLE }; @@ -869,3 +960,3 @@ static const struct normal_encoding big2_encoding_ns = { }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; @@ -888,3 +979,3 @@ static const struct normal_encoding big2_encoding = { }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; @@ -901,3 +992,3 @@ static const struct normal_encoding internal_big2_encoding_ns = { }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; @@ -914,3 +1005,3 @@ static const struct normal_encoding internal_big2_encoding = { }, - STANDARD_VTABLE(big2_) + STANDARD_VTABLE(big2_) NULL_VTABLE }; @@ -940,3 +1031,3 @@ streqci(const char *s1, const char *s2) static void PTRCALL -initUpdatePosition(const ENCODING *enc, const char *ptr, +initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, POSITION *pos) @@ -1290,3 +1381,3 @@ unknown_isInvalid(const ENCODING *enc, const char *p) -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf8(const ENCODING *enc, @@ -1301,3 +1392,3 @@ unknown_toUtf8(const ENCODING *enc, if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; utf8 = uenc->utf8[(unsigned char)**fromP]; @@ -1308,3 +1399,3 @@ unknown_toUtf8(const ENCODING *enc, if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; @@ -1315,3 +1406,3 @@ unknown_toUtf8(const ENCODING *enc, if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; @@ -1324,3 +1415,3 @@ unknown_toUtf8(const ENCODING *enc, -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf16(const ENCODING *enc, @@ -1330,3 +1421,3 @@ unknown_toUtf16(const ENCODING *enc, const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); - while (*fromP != fromLim && *toP != toLim) { + while (*fromP < fromLim && *toP < toLim) { unsigned short c = uenc->utf16[(unsigned char)**fromP]; @@ -1342,2 +1433,7 @@ unknown_toUtf16(const ENCODING *enc, } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } @@ -1505,3 +1601,3 @@ initScan(const ENCODING * const *encodingTable, - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; |