From a616265f7a5a48c5769262027795f99df91a6ae8 Mon Sep 17 00:00:00 2001 From: Edwin Mons Date: Wed, 30 Oct 2019 01:09:29 +0100 Subject: Fix libxml2 crash on certain invalid input When the libxml2 parser is fed data with an odd combination of invalid input (triggering the parser to assume 2 or 4 byte encodings were in play), I/O errors might occur. In that case, the parser context will not have its internal error set, but the call to xmlParseChunk will return the right error. The parse() method now uses the output of xmlParseChunk directly instead of trying to obtain the error from the parser context afterwards. Encoding errors during parsing were emitted to stderr because the default error handlers were still in place. These have been replaced with custom handlers that suppress the output. Test-Information: Unit tests pass on Debian 9 Change-Id: Ie01db4be467e5197203c9d07d3356f5d44d8b9b4 diff --git a/Swiften/Parser/LibXMLParser.cpp b/Swiften/Parser/LibXMLParser.cpp index b8d941c..158958b 100644 --- a/Swiften/Parser/LibXMLParser.cpp +++ b/Swiften/Parser/LibXMLParser.cpp @@ -100,12 +100,20 @@ static void handleError(void*, const char* /*m*/, ... ) { static void handleWarning(void*, const char*, ... ) { } +static void handleGenericError(void*, const char*, ... ) { +} + +static void handleStructuredError(void*, xmlErrorPtr) { +} + bool LibXMLParser::initialized = false; LibXMLParser::LibXMLParser(XMLParserClient* client, bool allowComments) : XMLParser(client, allowComments), p(new Private()) { // Initialize libXML for multithreaded applications if (!initialized) { xmlInitParser(); + xmlSetGenericErrorFunc(nullptr, handleGenericError); + xmlSetStructuredErrorFunc(nullptr, handleStructuredError); initialized = true; } @@ -136,12 +144,12 @@ bool LibXMLParser::parse(const std::string& data, bool finalData) { if (data.size() > std::numeric_limits::max()) { return false; } - if (xmlParseChunk(p->context_, data.c_str(), static_cast(data.size()), finalData) == XML_ERR_OK) { + auto error = xmlParseChunk(p->context_, data.c_str(), static_cast(data.size()), finalData); + if (error == XML_ERR_OK) { return true; } if (stopped_) return false; - xmlError* error = xmlCtxtGetLastError(p->context_); - if (error->code == XML_WAR_NS_URI || error->code == XML_WAR_NS_URI_RELATIVE) { + if (error == XML_WAR_NS_URI || error == XML_WAR_NS_URI_RELATIVE) { xmlCtxtResetLastError(p->context_); p->context_->errNo = XML_ERR_OK; return true; diff --git a/Swiften/Parser/UnitTest/XMLParserTest.cpp b/Swiften/Parser/UnitTest/XMLParserTest.cpp index d38c1cc..89229c9 100644 --- a/Swiften/Parser/UnitTest/XMLParserTest.cpp +++ b/Swiften/Parser/UnitTest/XMLParserTest.cpp @@ -45,6 +45,7 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_TEST(testParse_Doctype); CPPUNIT_TEST(testParse_ProcessingInstructions); CPPUNIT_TEST(testParse_ProcessingPrefixedElement); + CPPUNIT_TEST(testParse_InvalidlyEncodedInput); CPPUNIT_TEST_SUITE_END(); public: @@ -410,6 +411,20 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_ASSERT_EQUAL(std::string("uriPrefix"), client_.events[1].ns); } + void testParse_InvalidlyEncodedInput() { + ParserType testling(&client_); + + // The following input was generated by a fuzzer, and triggered a crash in the LibXML2 parser because + // some types of error (buffer I/O errors, for instance) will not update the error in the parser context, + // and the code used to rely on that error always being set if parsing failed. + // This particular input will trick the parser into believing the encoding is UTF-16LE, which eventually will lead + // to two invalid encodings, followed by an I/O error. The latter will end parsing without updating the + // error in the parsing context, which used to trigger a crash. + testling.parse(std::string("<\0?\0\x80q type='get' id='aab9a'<\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9br:i>", 271)); + testling.parse("O/iq>"); + testling.parse("<\xff\xff\xff\x7fype:'get' to='won\x84" "erland.lit' id='aabea'>abber.org/protocol/disco#Nnfo'/>"); + } + private: class Client : public XMLParserClient { public: -- cgit v0.10.2-6-g49f6