summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Markmann <tm@ayena.de>2019-07-26 09:04:38 (GMT)
committerTobias Markmann <tm@ayena.de>2019-07-26 09:31:03 (GMT)
commite74a38b60d600d547b3b2e4dfdd7ddb543855e3f (patch)
treee46b154a4f52026bc5d4be7e2768d45a8d1c28f2 /3rdParty/Expat/src/xmltok.c
parentf6fb85ba98fdd6601c4b8323c51c8367ccc4b52e (diff)
downloadswift-e74a38b60d600d547b3b2e4dfdd7ddb543855e3f.zip
swift-e74a38b60d600d547b3b2e4dfdd7ddb543855e3f.tar.bz2
Update 3rdParty/Expat to 2.2.7
Test-Information: None yet. Change-Id: Ia5b570c918b8059561b52062e8d43496f188ee4a
Diffstat (limited to '3rdParty/Expat/src/xmltok.c')
-rw-r--r--3rdParty/Expat/src/xmltok.c103
1 files changed, 81 insertions, 22 deletions
diff --git a/3rdParty/Expat/src/xmltok.c b/3rdParty/Expat/src/xmltok.c
index cdf0720..6b415d8 100644
--- a/3rdParty/Expat/src/xmltok.c
+++ b/3rdParty/Expat/src/xmltok.c
@@ -1,14 +1,53 @@
-/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
- See the file COPYING for copying permission.
+/*
+ __ __ _
+ ___\ \/ /_ __ __ _| |_
+ / _ \\ /| '_ \ / _` | __|
+ | __// \| |_) | (_| | |_
+ \___/_/\_\ .__/ \__,_|\__|
+ |_| XML parser
+
+ Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
+ Copyright (c) 2000-2017 Expat development team
+ Licensed under the MIT license:
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to permit
+ persons to whom the Software is furnished to do so, subject to the
+ following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+ NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <stddef.h>
+#include <string.h> /* memcpy */
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1700)
+ /* for vs2012/11.0/1700 and earlier Visual Studio compilers */
+# define bool int
+# define false 0
+# define true 1
+#else
+# include <stdbool.h>
+#endif
+
#ifdef _WIN32
#include "winconfig.h"
#else
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
#endif /* ndef _WIN32 */
@@ -21,19 +60,18 @@
#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
#else
#define IGNORE_SECTION_TOK_VTABLE /* as nothing */
#endif
#define VTABLE1 \
{ PREFIX(prologTok), PREFIX(contentTok), \
PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \
{ PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
- PREFIX(sameName), \
PREFIX(nameMatchesAscii), \
PREFIX(nameLength), \
PREFIX(skipS), \
PREFIX(getAtts), \
PREFIX(charRefNumber), \
PREFIX(predefinedEntityName), \
PREFIX(updatePosition), \
PREFIX(isPublicId)
@@ -318,19 +356,19 @@ sb_charMatches(const ENCODING *enc, const char *p, int c)
enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
UTF8_cval1 = 0x00,
UTF8_cval2 = 0xc0,
UTF8_cval3 = 0xe0,
UTF8_cval4 = 0xf0
};
void
-align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
+_INTERNAL_trim_to_complete_utf8_characters(const char * from, const char ** fromLimRef)
{
const char * fromLim = *fromLimRef;
size_t walked = 0;
for (; fromLim > from; fromLim--, walked++) {
const unsigned char prev = (unsigned char)fromLim[-1];
if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
if (walked + 1 >= 4) {
fromLim += 4 - 1;
break;
@@ -357,34 +395,49 @@ align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef)
}
*fromLimRef = fromLim;
}
static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING *UNUSED_P(enc),
const char **fromP, const char *fromLim,
char **toP, const char *toLim)
{
- char *to;
- const char *from;
- const char *fromLimInitial = fromLim;
+ bool input_incomplete = false;
+ bool output_exhausted = false;
+
+ /* Avoid copying partial characters (due to limited space). */
+ const ptrdiff_t bytesAvailable = fromLim - *fromP;
+ const ptrdiff_t bytesStorable = toLim - *toP;
+ if (bytesAvailable > bytesStorable) {
+ fromLim = *fromP + bytesStorable;
+ output_exhausted = true;
+ }
- /* Avoid copying partial characters. */
- align_limit_to_full_utf8_characters(*fromP, &fromLim);
+ /* Avoid copying partial characters (from incomplete input). */
+ {
+ const char * const fromLimBefore = fromLim;
+ _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
+ if (fromLim < fromLimBefore) {
+ input_incomplete = true;
+ }
+ }
- for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++)
- *to = *from;
- *fromP = from;
- *toP = to;
+ {
+ const ptrdiff_t bytesToCopy = fromLim - *fromP;
+ memcpy(*toP, *fromP, bytesToCopy);
+ *fromP += bytesToCopy;
+ *toP += bytesToCopy;
+ }
- if (fromLim < fromLimInitial)
- return XML_CONVERT_INPUT_INCOMPLETE;
- else if ((to == toLim) && (from < fromLim))
+ if (output_exhausted) /* needs to go first */
return XML_CONVERT_OUTPUT_EXHAUSTED;
+ else if (input_incomplete)
+ return XML_CONVERT_INPUT_INCOMPLETE;
else
return XML_CONVERT_COMPLETED;
}
static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
@@ -1013,19 +1066,23 @@ static const struct normal_encoding internal_big2_encoding = {
static int FASTCALL
streqci(const char *s1, const char *s2)
{
for (;;) {
char c1 = *s1++;
char c2 = *s2++;
if (ASCII_a <= c1 && c1 <= ASCII_z)
c1 += ASCII_A - ASCII_a;
if (ASCII_a <= c2 && c2 <= ASCII_z)
- c2 += ASCII_A - ASCII_a;
+ /* The following line will never get executed. streqci() is
+ * only called from two places, both of which guarantee to put
+ * upper-case strings into s2.
+ */
+ c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
if (c1 != c2)
return 0;
if (!c1)
break;
}
return 1;
}
static void PTRCALL
@@ -1285,19 +1342,19 @@ XmlUtf8Encode(int c, char *buf)
{
enum {
/* minN is minimum legal resulting value for N byte sequence */
min2 = 0x80,
min3 = 0x800,
min4 = 0x10000
};
if (c < 0)
- return 0;
+ return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
if (c < min2) {
buf[0] = (char)(c | UTF8_cval1);
return 1;
}
if (c < min3) {
buf[0] = (char)((c >> 6) | UTF8_cval2);
buf[1] = (char)((c & 0x3f) | 0x80);
return 2;
}
@@ -1308,19 +1365,19 @@ XmlUtf8Encode(int c, char *buf)
return 3;
}
if (c < 0x110000) {
buf[0] = (char)((c >> 18) | UTF8_cval4);
buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
buf[3] = (char)((c & 0x3f) | 0x80);
return 4;
}
- return 0;
+ return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
}
int FASTCALL
XmlUtf16Encode(int charNum, unsigned short *buf)
{
if (charNum < 0)
return 0;
if (charNum < 0x10000) {
buf[0] = (unsigned short)charNum;
@@ -1401,21 +1458,20 @@ unknown_toUtf8(const ENCODING *enc,
utf8 = buf;
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
- (BT_LEAD2 - 2));
}
else {
if (n > toLim - *toP)
return XML_CONVERT_OUTPUT_EXHAUSTED;
(*fromP)++;
}
- do {
- *(*toP)++ = *utf8++;
- } while (--n != 0);
+ memcpy(*toP, utf8, n);
+ *toP += n;
}
}
static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING *enc,
const char **fromP, const char *fromLim,
unsigned short **toP, const unsigned short *toLim)
{
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
@@ -1459,18 +1515,21 @@ XmlInitUnknownEncoding(void *mem,
e->normal.type[i] = BT_MALFORM;
/* This shouldn't really get used. */
e->utf16[i] = 0xFFFF;
e->utf8[i][0] = 1;
e->utf8[i][1] = 0;
}
else if (c < 0) {
if (c < -4)
return 0;
+ /* Multi-byte sequences need a converter function */
+ if (!convert)
+ return 0;
e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
e->utf8[i][0] = 0;
e->utf16[i] = 0;
}
else if (c < 0x80) {
if (latin1_encoding.type[c] != BT_OTHER
&& latin1_encoding.type[c] != BT_NONXML
&& c != i)
return 0;