From a616265f7a5a48c5769262027795f99df91a6ae8 Mon Sep 17 00:00:00 2001
From: Edwin Mons <edwin.mons@isode.com>
Date: Wed, 30 Oct 2019 01:09:29 +0100
Subject: Fix libxml2 crash on certain invalid input

When the libxml2 parser is fed data with an odd combination of invalid
input (triggering the parser to assume 2 or 4 byte encodings were in
play), I/O errors might occur.  In that case, the parser context will
not have its internal error set, but the call to xmlParseChunk will
return the right error.  The parse() method now uses the output of
xmlParseChunk directly instead of trying to obtain the error from the
parser context afterwards.

Encoding errors during parsing were emitted to stderr because the
default error handlers were still in place.  These have been replaced
with custom handlers that suppress the output.

Test-Information:

Unit tests pass on Debian 9

Change-Id: Ie01db4be467e5197203c9d07d3356f5d44d8b9b4

diff --git a/Swiften/Parser/LibXMLParser.cpp b/Swiften/Parser/LibXMLParser.cpp
index b8d941c..158958b 100644
--- a/Swiften/Parser/LibXMLParser.cpp
+++ b/Swiften/Parser/LibXMLParser.cpp
@@ -100,12 +100,20 @@ static void handleError(void*, const char* /*m*/, ... ) {
 static void handleWarning(void*, const char*, ... ) {
 }
 
+static void handleGenericError(void*, const char*, ... ) {
+}
+
+static void handleStructuredError(void*, xmlErrorPtr) {
+}
+
 bool LibXMLParser::initialized = false;
 
 LibXMLParser::LibXMLParser(XMLParserClient* client, bool allowComments) : XMLParser(client, allowComments), p(new Private()) {
     // Initialize libXML for multithreaded applications
     if (!initialized) {
         xmlInitParser();
+        xmlSetGenericErrorFunc(nullptr, handleGenericError);
+        xmlSetStructuredErrorFunc(nullptr, handleStructuredError);
         initialized = true;
     }
 
@@ -136,12 +144,12 @@ bool LibXMLParser::parse(const std::string& data, bool finalData) {
     if (data.size() > std::numeric_limits<int>::max()) {
         return false;
     }
-    if (xmlParseChunk(p->context_, data.c_str(), static_cast<int>(data.size()), finalData) == XML_ERR_OK) {
+    auto error = xmlParseChunk(p->context_, data.c_str(), static_cast<int>(data.size()), finalData);
+    if (error == XML_ERR_OK) {
         return true;
     }
     if (stopped_) return false;
-    xmlError* error = xmlCtxtGetLastError(p->context_);
-    if (error->code == XML_WAR_NS_URI || error->code == XML_WAR_NS_URI_RELATIVE) {
+    if (error == XML_WAR_NS_URI || error == XML_WAR_NS_URI_RELATIVE) {
         xmlCtxtResetLastError(p->context_);
         p->context_->errNo = XML_ERR_OK;
         return true;
diff --git a/Swiften/Parser/UnitTest/XMLParserTest.cpp b/Swiften/Parser/UnitTest/XMLParserTest.cpp
index d38c1cc..89229c9 100644
--- a/Swiften/Parser/UnitTest/XMLParserTest.cpp
+++ b/Swiften/Parser/UnitTest/XMLParserTest.cpp
@@ -45,6 +45,7 @@ class XMLParserTest : public CppUnit::TestFixture {
         CPPUNIT_TEST(testParse_Doctype);
         CPPUNIT_TEST(testParse_ProcessingInstructions);
         CPPUNIT_TEST(testParse_ProcessingPrefixedElement);
+        CPPUNIT_TEST(testParse_InvalidlyEncodedInput);
         CPPUNIT_TEST_SUITE_END();
 
     public:
@@ -410,6 +411,20 @@ class XMLParserTest : public CppUnit::TestFixture {
             CPPUNIT_ASSERT_EQUAL(std::string("uriPrefix"), client_.events[1].ns);
         }
 
+        void testParse_InvalidlyEncodedInput() {
+            ParserType testling(&client_);
+
+            // The following input was generated by a fuzzer, and triggered a crash in the LibXML2 parser because
+            // some types of error (buffer I/O errors, for instance) will not update the error in the parser context,
+            // and the code used to rely on that error always being set if parsing failed.
+            // This particular input will trick the parser into believing the encoding is UTF-16LE, which eventually will lead
+            // to two invalid encodings, followed by an I/O error.  The latter will end parsing without updating the
+            // error in the parsing context, which used to trigger a crash.
+            testling.parse(std::string("<\0?\0\x80q type='get' id='aab9a'<<query xmlns='jabber:iq:roster'/>\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9b\x9br:i><quq:private'><storage xml s='s'\x00\x10</query></iq>", 271));
+            testling.parse("<iq type='get'\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e\x9e to='ad5d8d2b25' ext='ca cs min@wonderland.t' id='aabda'><vCard xmlnr='vcard-temp'/>O/iq>");
+            testling.parse("<\xff\xff\xff\x7fype:'get' to='won\x84" "erland.lit' id='aabea'><tuery xmlns='\xd8Vtp://jabber.org/p\x88ot\x8b" "col/disco#info'/>abber.org/protocol/disco#Nnfo'/></iq>");
+        }
+
     private:
         class Client : public XMLParserClient {
             public:
-- 
cgit v0.10.2-6-g49f6