diff options
Diffstat (limited to '3rdParty/Expat/src/xmltok_impl.c')
-rw-r--r-- | 3rdParty/Expat/src/xmltok_impl.c | 348 |
1 files changed, 164 insertions, 184 deletions
diff --git a/3rdParty/Expat/src/xmltok_impl.c b/3rdParty/Expat/src/xmltok_impl.c index 9c2895b..4d9ae7d 100644 --- a/3rdParty/Expat/src/xmltok_impl.c +++ b/3rdParty/Expat/src/xmltok_impl.c @@ -1,8 +1,35 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. +/* This file is included! + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + + Copyright (c) 1997-2000 Thai Open Source Software Center Ltd + Copyright (c) 2000-2017 Expat development team + Licensed under the MIT license: + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to permit + persons to whom the Software is furnished to do so, subject to the + following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* This file is included! */ #ifdef XML_TOK_IMPL_C #ifndef IS_INVALID_CHAR @@ -47,6 +74,7 @@ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ + /* fall through */ \ case BT_NMSTRT: \ case BT_HEX: \ case BT_DIGIT: \ @@ -75,6 +103,7 @@ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ + /* fall through */ \ case BT_NMSTRT: \ case BT_HEX: \ ptr += MINBPC(enc); \ @@ -87,27 +116,45 @@ #define PREFIX(ident) ident #endif + +#define HAS_CHARS(enc, ptr, end, count) \ + (end - ptr >= count * MINBPC(enc)) + +#define HAS_CHAR(enc, ptr, end) \ + HAS_CHARS(enc, ptr, end, 1) + +#define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ + } + +#define REQUIRE_CHAR(enc, ptr, end) \ + REQUIRE_CHARS(enc, ptr, end, 1) + + /* ptr points to character following "<!-" */ static int PTRCALL PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (HAS_CHAR(enc, ptr, end)) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -131,8 +178,7 @@ static int PTRCALL PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -147,11 +193,10 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: @@ -175,7 +220,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, +PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, int *tokPtr) { int upper = 0; @@ -225,15 +270,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, { int tok; const char *target = ptr; - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -242,13 +286,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; @@ -266,8 +309,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; @@ -282,15 +324,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, +PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, const char **nextTokPtr) { static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ - if (end - ptr < 6 * MINBPC(enc)) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 6); for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; @@ -305,7 +346,7 @@ static int PTRCALL PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -319,13 +360,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; @@ -334,8 +373,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; @@ -348,7 +386,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -383,19 +421,18 @@ static int PTRCALL PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; @@ -432,7 +469,7 @@ static int PTRCALL PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -441,7 +478,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -464,7 +501,7 @@ static int PTRCALL PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (HAS_CHAR(enc, ptr, end)) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { @@ -474,7 +511,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; @@ -496,8 +533,7 @@ static int PTRCALL PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_NUM: @@ -506,7 +542,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -529,7 +565,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon = 0; #endif - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -540,8 +576,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, } hadColon = 1; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -555,8 +590,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, int t; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); t = BYTE_TYPE(enc, ptr); if (t == BT_EQUALS) break; @@ -570,7 +604,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_INVALID; } } - /* fall through */ + /* fall through */ case BT_EQUALS: { int open; @@ -579,8 +613,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #endif for (;;) { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); open = BYTE_TYPE(enc, ptr); if (open == BT_QUOT || open == BT_APOS) break; @@ -598,8 +631,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* in attribute value */ for (;;) { int t; - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); t = BYTE_TYPE(enc, ptr); if (t == open) break; @@ -624,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, } } ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: @@ -642,8 +673,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to closing quote */ for (;;) { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -655,8 +685,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, case BT_SOL: sol: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -688,13 +717,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon; #endif - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_EXCL: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -716,7 +744,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, hadColon = 0; #endif /* we have a start-tag */ - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -727,8 +755,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, } hadColon = 1; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -740,7 +767,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: @@ -765,8 +792,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_SOL: sol: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -785,7 +811,7 @@ static int PTRCALL PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -803,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -814,12 +840,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); @@ -832,7 +858,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -845,12 +871,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_RSQB: - if (ptr + MINBPC(enc) != end) { + if (HAS_CHARS(enc, ptr, end, 2)) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } - if (ptr + 2*MINBPC(enc) != end) { + if (HAS_CHARS(enc, ptr, end, 3)) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; @@ -884,8 +910,7 @@ static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: @@ -895,7 +920,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -913,15 +938,14 @@ static int PTRCALL PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: @@ -941,7 +965,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) @@ -950,7 +974,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, ptr += MINBPC(enc); if (t != open) break; - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { @@ -973,7 +997,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -992,8 +1016,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_LT: { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1021,7 +1044,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: @@ -1048,11 +1071,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; @@ -1065,7 +1087,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: @@ -1141,7 +1163,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: @@ -1154,8 +1176,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) @@ -1204,10 +1225,18 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1232,7 +1261,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1262,10 +1291,18 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the paranoia + * check is still valuable, however. + */ + return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ + } start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1294,7 +1331,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1326,15 +1363,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); @@ -1342,11 +1379,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, } break; case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { @@ -1373,7 +1410,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1407,6 +1444,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; + /* fall through */ default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ @@ -1521,7 +1559,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, } static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) +PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) { int result = 0; /* skip &# */ @@ -1565,7 +1603,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) } static int PTRCALL -PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, +PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end) { switch ((end - ptr)/MINBPC(enc)) { @@ -1619,76 +1657,18 @@ PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr1)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (*ptr1++ != *ptr2++) \ - return 0; - LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) -#undef LEAD_CASE - /* fall through */ - if (*ptr1++ != *ptr2++) - return 0; - break; - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } - } - break; - default: - if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; - switch (BYTE_TYPE(enc, ptr2)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } - } - } - /* not reached */ -} - -static int PTRCALL -PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, +PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) - return 0; + if (end1 - ptr1 < MINBPC(enc)) { + /* This line cannot be executed. The incoming data has already + * been tokenized once, so incomplete characters like this have + * already been eliminated from the input. Retaining the + * paranoia check is still valuable, however. + */ + return 0; /* LCOV_EXCL_LINE */ + } if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } @@ -1744,7 +1724,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *end, POSITION *pos) { - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -1760,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc, case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (XML_Size)-1; break; |