summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '3rdParty/Expat/src/xmltok.c')
-rw-r--r--3rdParty/Expat/src/xmltok.c244
1 files changed, 170 insertions, 74 deletions
diff --git a/3rdParty/Expat/src/xmltok.c b/3rdParty/Expat/src/xmltok.c
index bf09dfc..cdf0720 100644
--- a/3rdParty/Expat/src/xmltok.c
+++ b/3rdParty/Expat/src/xmltok.c
@@ -6,10 +6,4 @@
-#ifdef COMPILED_FROM_DSP
+#ifdef _WIN32
#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
#else
@@ -18,3 +12,3 @@
#endif
-#endif /* ndef COMPILED_FROM_DSP */
+#endif /* ndef _WIN32 */
@@ -48,3 +42,3 @@
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
@@ -58,3 +52,3 @@
+ ((((byte)[1]) >> 5) & 1)] \
- & (1 << (((byte)[1]) & 0x1F)))
+ & (1u << (((byte)[1]) & 0x1F)))
@@ -71,3 +65,3 @@
+ ((((byte)[2]) >> 5) & 1)] \
- & (1 << (((byte)[2]) & 0x1F)))
+ & (1u << (((byte)[2]) & 0x1F)))
@@ -124,3 +118,3 @@
static int PTRFASTCALL
-isNever(const ENCODING *enc, const char *p)
+isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p))
{
@@ -130,3 +124,3 @@ isNever(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isName2(const ENCODING *enc, const char *p)
+utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -136,3 +130,3 @@ utf8_isName2(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isName3(const ENCODING *enc, const char *p)
+utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -144,3 +138,3 @@ utf8_isName3(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isNmstrt2(const ENCODING *enc, const char *p)
+utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -150,3 +144,3 @@ utf8_isNmstrt2(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isNmstrt3(const ENCODING *enc, const char *p)
+utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -158,3 +152,3 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isInvalid2(const ENCODING *enc, const char *p)
+utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -164,3 +158,3 @@ utf8_isInvalid2(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isInvalid3(const ENCODING *enc, const char *p)
+utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -170,3 +164,3 @@ utf8_isInvalid3(const ENCODING *enc, const char *p)
static int PTRFASTCALL
-utf8_isInvalid4(const ENCODING *enc, const char *p)
+utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p)
{
@@ -224,2 +218,13 @@ struct normal_encoding {
+#define NULL_VTABLE \
+ /* isName2 */ NULL, \
+ /* isName3 */ NULL, \
+ /* isName4 */ NULL, \
+ /* isNmstrt2 */ NULL, \
+ /* isNmstrt3 */ NULL, \
+ /* isNmstrt4 */ NULL, \
+ /* isInvalid2 */ NULL, \
+ /* isInvalid3 */ NULL, \
+ /* isInvalid4 */ NULL
+
static int FASTCALL checkCharRefNumber(int);
@@ -320,4 +325,39 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
-static void PTRCALL
-utf8_toUtf8(const ENCODING *enc,
+void
+align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+{
+ const char * fromLim = *fromLimRef;
+ size_t walked = 0;
+ for (; fromLim > from; fromLim--, walked++) {
+ const unsigned char prev = (unsigned char)fromLim[-1];
+ if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
+ if (walked + 1 >= 4) {
+ fromLim += 4 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
+ if (walked + 1 >= 3) {
+ fromLim += 3 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
+ if (walked + 1 >= 2) {
+ fromLim += 2 - 1;
+ break;
+ } else {
+ walked = 0;
+ }
+ } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
+ break;
+ }
+ }
+ *fromLimRef = fromLim;
+}
+
+static enum XML_Convert_Result PTRCALL
+utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
@@ -327,9 +367,8 @@ utf8_toUtf8(const ENCODING *enc,
const char *from;
- if (fromLim - *fromP > toLim - *toP) {
- /* Avoid copying partial characters. */
- for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
- if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
- break;
- }
- for (to = *toP, from = *fromP; from != fromLim; from++, to++)
+ const char *fromLimInitial = fromLim;
+
+ /* Avoid copying partial characters. */
+ align_limit_to_full_utf8_characters(*fromP, &fromLim);
+
+ for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
*to = *from;
@@ -337,5 +376,12 @@ utf8_toUtf8(const ENCODING *enc,
*toP = to;
+
+ if (fromLim < fromLimInitial)
+ return XML_CONVERT_INPUT_INCOMPLETE;
+ else if ((to == toLim) && (from < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc,
@@ -344,7 +390,12 @@ utf8_toUtf16(const ENCODING *enc,
{
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
unsigned short *to = *toP;
const char *from = *fromP;
- while (from != fromLim && to != toLim) {
+ while (from < fromLim && to < toLim) {
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
case BT_LEAD2:
+ if (fromLim - from < 2) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ goto after;
+ }
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
@@ -353,2 +404,6 @@ utf8_toUtf16(const ENCODING *enc,
case BT_LEAD3:
+ if (fromLim - from < 3) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
+ goto after;
+ }
*to++ = (unsigned short)(((from[0] & 0xf) << 12)
@@ -360,4 +415,10 @@ utf8_toUtf16(const ENCODING *enc,
unsigned long n;
- if (to + 1 == toLim)
+ if (toLim - to < 2) {
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
+ goto after;
+ }
+ if (fromLim - from < 4) {
+ res = XML_CONVERT_INPUT_INCOMPLETE;
goto after;
+ }
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
@@ -376,2 +437,4 @@ utf8_toUtf16(const ENCODING *enc,
}
+ if (from < fromLim)
+ res = XML_CONVERT_OUTPUT_EXHAUSTED;
after:
@@ -379,2 +442,3 @@ after:
*toP = to;
+ return res;
}
@@ -427,4 +491,4 @@ static const struct normal_encoding internal_utf8_encoding = {
-static void PTRCALL
-latin1_toUtf8(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+latin1_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
@@ -435,3 +499,3 @@ latin1_toUtf8(const ENCODING *enc,
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
c = (unsigned char)**fromP;
@@ -439,3 +503,3 @@ latin1_toUtf8(const ENCODING *enc,
if (toLim - *toP < 2)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = (char)((c >> 6) | UTF8_cval2);
@@ -446,3 +510,3 @@ latin1_toUtf8(const ENCODING *enc,
if (*toP == toLim)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
*(*toP)++ = *(*fromP)++;
@@ -452,4 +516,4 @@ latin1_toUtf8(const ENCODING *enc,
-static void PTRCALL
-latin1_toUtf16(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+latin1_toUtf16(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
@@ -457,4 +521,9 @@ latin1_toUtf16(const ENCODING *enc,
{
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = (unsigned char)*(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
@@ -469,3 +538,3 @@ static const struct normal_encoding latin1_encoding_ns = {
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
@@ -482,7 +551,7 @@ static const struct normal_encoding latin1_encoding = {
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
-static void PTRCALL
-ascii_toUtf8(const ENCODING *enc,
+static enum XML_Convert_Result PTRCALL
+ascii_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
@@ -490,4 +559,9 @@ ascii_toUtf8(const ENCODING *enc,
{
- while (*fromP != fromLim && *toP != toLim)
+ while (*fromP < fromLim && *toP < toLim)
*(*toP)++ = *(*fromP)++;
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
@@ -502,3 +576,3 @@ static const struct normal_encoding ascii_encoding_ns = {
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
@@ -515,3 +589,3 @@ static const struct normal_encoding ascii_encoding = {
},
- STANDARD_VTABLE(sb_)
+ STANDARD_VTABLE(sb_) NULL_VTABLE
};
@@ -538,4 +612,4 @@ unicode_byte_type(char hi, char lo)
#define DEFINE_UTF16_TO_UTF8(E) \
-static void PTRCALL \
-E ## toUtf8(const ENCODING *enc, \
+static enum XML_Convert_Result PTRCALL \
+E ## toUtf8(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
@@ -543,4 +617,5 @@ E ## toUtf8(const ENCODING *enc, \
{ \
- const char *from; \
- for (from = *fromP; from != fromLim; from += 2) { \
+ const char *from = *fromP; \
+ fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \
+ for (; from < fromLim; from += 2) { \
int plane; \
@@ -554,3 +629,3 @@ E ## toUtf8(const ENCODING *enc, \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
@@ -564,3 +639,3 @@ E ## toUtf8(const ENCODING *enc, \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
@@ -572,3 +647,3 @@ E ## toUtf8(const ENCODING *enc, \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
} \
@@ -582,3 +657,7 @@ E ## toUtf8(const ENCODING *enc, \
*fromP = from; \
- return; \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ } \
+ if (fromLim - from < 4) { \
+ *fromP = from; \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
} \
@@ -598,2 +677,6 @@ E ## toUtf8(const ENCODING *enc, \
*fromP = from; \
+ if (from < fromLim) \
+ return XML_CONVERT_INPUT_INCOMPLETE; \
+ else \
+ return XML_CONVERT_COMPLETED; \
}
@@ -601,4 +684,4 @@ E ## toUtf8(const ENCODING *enc, \
#define DEFINE_UTF16_TO_UTF16(E) \
-static void PTRCALL \
-E ## toUtf16(const ENCODING *enc, \
+static enum XML_Convert_Result PTRCALL \
+E ## toUtf16(const ENCODING *UNUSED_P(enc), \
const char **fromP, const char *fromLim, \
@@ -606,8 +689,16 @@ E ## toUtf16(const ENCODING *enc, \
{ \
+ enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \
+ fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \
/* Avoid copying first half only of surrogate */ \
if (fromLim - *fromP > ((toLim - *toP) << 1) \
- && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
+ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \
fromLim -= 2; \
- for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
+ res = XML_CONVERT_INPUT_INCOMPLETE; \
+ } \
+ for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
+ if ((*toP == toLim) && (*fromP < fromLim)) \
+ return XML_CONVERT_OUTPUT_EXHAUSTED; \
+ else \
+ return res; \
}
@@ -728,3 +819,3 @@ static const struct normal_encoding little2_encoding_ns = {
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
@@ -747,3 +838,3 @@ static const struct normal_encoding little2_encoding = {
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
@@ -760,3 +851,3 @@ static const struct normal_encoding internal_little2_encoding_ns = {
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
@@ -773,3 +864,3 @@ static const struct normal_encoding internal_little2_encoding = {
},
- STANDARD_VTABLE(little2_)
+ STANDARD_VTABLE(little2_) NULL_VTABLE
};
@@ -869,3 +960,3 @@ static const struct normal_encoding big2_encoding_ns = {
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
@@ -888,3 +979,3 @@ static const struct normal_encoding big2_encoding = {
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
@@ -901,3 +992,3 @@ static const struct normal_encoding internal_big2_encoding_ns = {
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
@@ -914,3 +1005,3 @@ static const struct normal_encoding internal_big2_encoding = {
},
- STANDARD_VTABLE(big2_)
+ STANDARD_VTABLE(big2_) NULL_VTABLE
};
@@ -940,3 +1031,3 @@ streqci(const char *s1, const char *s2)
static void PTRCALL
-initUpdatePosition(const ENCODING *enc, const char *ptr,
+initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr,
const char *end, POSITION *pos)
@@ -1290,3 +1381,3 @@ unknown_isInvalid(const ENCODING *enc, const char *p)
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING *enc,
@@ -1301,3 +1392,3 @@ unknown_toUtf8(const ENCODING *enc,
if (*fromP == fromLim)
- break;
+ return XML_CONVERT_COMPLETED;
utf8 = uenc->utf8[(unsigned char)**fromP];
@@ -1308,3 +1399,3 @@ unknown_toUtf8(const ENCODING *enc,
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
utf8 = buf;
@@ -1315,3 +1406,3 @@ unknown_toUtf8(const ENCODING *enc,
if (n > toLim - *toP)
- break;
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++;
@@ -1324,3 +1415,3 @@ unknown_toUtf8(const ENCODING *enc,
-static void PTRCALL
+static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc,
@@ -1330,3 +1421,3 @@ unknown_toUtf16(const ENCODING *enc,
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
- while (*fromP != fromLim && *toP != toLim) {
+ while (*fromP < fromLim && *toP < toLim) {
unsigned short c = uenc->utf16[(unsigned char)**fromP];
@@ -1342,2 +1433,7 @@ unknown_toUtf16(const ENCODING *enc,
}
+
+ if ((*toP == toLim) && (*fromP < fromLim))
+ return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else
+ return XML_CONVERT_COMPLETED;
}
@@ -1505,3 +1601,3 @@ initScan(const ENCODING * const *encodingTable,
- if (ptr == end)
+ if (ptr >= end)
return XML_TOK_NONE;