From 23e2766dab6d4a3f6158eca7649cd36b644634d3 Mon Sep 17 00:00:00 2001
From: Joanna Hulboj <joanna.hulboj@isode.com>
Date: Thu, 19 Sep 2019 20:48:44 +0100
Subject: Process attribute and element prefixes

XML (Expat/LibXML) parsing modified to process prefix information.
Prefixes for attributes stored within attributes.
Prefixes for elements passed in additional callback
(only if prefix present).

Test-information:
Unit tests pass on Windows 10 and Ubuntu 18.04.1 LTS.

Change-Id: Ib6b5087feed758c31895f426df6a3c7ea975f248

diff --git a/Swiften/Parser/Attribute.h b/Swiften/Parser/Attribute.h
index f54317e..07e63b4 100644
--- a/Swiften/Parser/Attribute.h
+++ b/Swiften/Parser/Attribute.h
@@ -14,6 +14,9 @@ namespace Swift {
             Attribute(const std::string& name, const std::string& ns) : name(name), ns(ns) {
             }
 
+            Attribute(const std::string& name, const std::string& ns, const std::string& prefix) : name(name), ns(ns), prefix(prefix) {
+            }
+
             const std::string& getName() const {
                 return name;
             }
@@ -22,6 +25,10 @@ namespace Swift {
                 return ns;
             }
 
+            const std::string& getPrefix() const {
+                return prefix;
+            }
+
             bool operator==(const Attribute& o) const {
                 return o.name == name && o.ns == ns;
             }
@@ -29,5 +36,6 @@ namespace Swift {
         private:
             std::string name;
             std::string ns;
+            std::string prefix;
     };
 }
diff --git a/Swiften/Parser/AttributeMap.cpp b/Swiften/Parser/AttributeMap.cpp
index f6767de..7814a64 100644
--- a/Swiften/Parser/AttributeMap.cpp
+++ b/Swiften/Parser/AttributeMap.cpp
@@ -54,3 +54,7 @@ boost::optional<std::string> AttributeMap::getAttributeValue(const std::string&
 void AttributeMap::addAttribute(const std::string& name, const std::string& ns, const std::string& value) {
     attributes.push_back(Entry(Attribute(name, ns), value));
 }
+
+void AttributeMap::addAttribute(const std::string& name, const std::string& ns, const std::string& prefix, const std::string& value) {
+    attributes.push_back(Entry(Attribute(name, ns, prefix), value));
+}
diff --git a/Swiften/Parser/AttributeMap.h b/Swiften/Parser/AttributeMap.h
index 804d6aa..26d5826 100644
--- a/Swiften/Parser/AttributeMap.h
+++ b/Swiften/Parser/AttributeMap.h
@@ -43,6 +43,7 @@ namespace Swift {
             boost::optional<std::string> getAttributeValue(const std::string&) const;
 
             void addAttribute(const std::string& name, const std::string& ns, const std::string& value);
+            void addAttribute(const std::string& name, const std::string& ns, const std::string& prefix, const std::string& value);
 
             const std::vector<Entry>& getEntries() const {
                 return attributes;
diff --git a/Swiften/Parser/ExpatParser.cpp b/Swiften/Parser/ExpatParser.cpp
index 640d561..6c3845a 100644
--- a/Swiften/Parser/ExpatParser.cpp
+++ b/Swiften/Parser/ExpatParser.cpp
@@ -11,6 +11,8 @@
 #include <memory>
 #include <string>
 
+#include <boost/algorithm/string.hpp>
+
 #include <expat.h>
 
 #include <Swiften/Base/String.h>
@@ -18,6 +20,33 @@
 
 #pragma clang diagnostic ignored "-Wdisabled-macro-expansion"
 
+namespace {
+struct XmlInfo {
+    std::string prefix;
+    std::string uri;
+    std::string name;
+};
+
+XmlInfo splitExpatInfo(const std::string& s, char sep) {
+    // name
+    // uri|name
+    // uri|name|prefix
+    std::vector<std::string> v;
+    boost::split(v, s, [sep](char c) {return c == sep; });
+    switch (v.size()) {
+    case 1:
+        return{ "", "", std::move(v[0]) };
+    case 2:
+        return{ "", std::move(v[0]), std::move(v[1]) };
+    case 3:
+        return{ std::move(v[2]), std::move(v[0]), std::move(v[1]) };
+    default:
+        return{ "", "", "" };
+    }
+}
+}
+
+
 namespace Swift {
 
 static const char NAMESPACE_SEPARATOR = '\x01';
@@ -27,33 +56,24 @@ struct ExpatParser::Private {
 };
 
 static void handleStartElement(void* parser, const XML_Char* name, const XML_Char** attributes) {
-    std::pair<std::string,std::string> nsTagPair = String::getSplittedAtFirst(name, NAMESPACE_SEPARATOR);
-    if (nsTagPair.second == "") {
-        nsTagPair.second = nsTagPair.first;
-        nsTagPair.first = "";
-    }
+    auto elemInfo = splitExpatInfo(name, NAMESPACE_SEPARATOR);
+
     AttributeMap attributeValues;
     const XML_Char** currentAttribute = attributes;
     while (*currentAttribute) {
-        std::pair<std::string,std::string> nsAttributePair = String::getSplittedAtFirst(*currentAttribute, NAMESPACE_SEPARATOR);
-        if (nsAttributePair.second == "") {
-            nsAttributePair.second = nsAttributePair.first;
-            nsAttributePair.first = "";
-        }
-        attributeValues.addAttribute(nsAttributePair.second, nsAttributePair.first, std::string(*(currentAttribute+1)));
+        auto attribInfo = splitExpatInfo(*currentAttribute, NAMESPACE_SEPARATOR);
+        attributeValues.addAttribute(attribInfo.name, attribInfo.uri, attribInfo.prefix, std::string(*(currentAttribute+1)));
         currentAttribute += 2;
     }
 
-    static_cast<XMLParser*>(parser)->getClient()->handleStartElement(nsTagPair.second, nsTagPair.first, attributeValues);
+    auto* client = static_cast<XMLParser*>(parser)->getClient();
+    client->handleStartElementPrefix(elemInfo.prefix, elemInfo.uri, elemInfo.name, elemInfo.name, elemInfo.uri, attributeValues);
+    client->handleStartElement(elemInfo.name, elemInfo.uri, attributeValues);
 }
 
 static void handleEndElement(void* parser, const XML_Char* name) {
-    std::pair<std::string,std::string> nsTagPair = String::getSplittedAtFirst(name, NAMESPACE_SEPARATOR);
-    if (nsTagPair.second == "") {
-        nsTagPair.second = nsTagPair.first;
-        nsTagPair.first = "";
-    }
-    static_cast<XMLParser*>(parser)->getClient()->handleEndElement(nsTagPair.second, nsTagPair.first);
+    auto elemInfo = splitExpatInfo(name, NAMESPACE_SEPARATOR);
+    static_cast<XMLParser*>(parser)->getClient()->handleEndElement(elemInfo.name, elemInfo.uri);
 }
 
 static void handleCharacterData(void* parser, const XML_Char* data, int len) {
@@ -88,6 +108,7 @@ static void handleDoctypeDeclaration(void* parser, const XML_Char* /*doctypeName
 
 ExpatParser::ExpatParser(XMLParserClient* client, bool allowComments) : XMLParser(client, allowComments), p(new Private()) {
     p->parser_ = XML_ParserCreateNS("UTF-8", NAMESPACE_SEPARATOR);
+    XML_SetReturnNSTriplet(p->parser_, true);
     XML_SetUserData(p->parser_, this);
     XML_SetElementHandler(p->parser_, handleStartElement, handleEndElement);
     XML_SetCharacterDataHandler(p->parser_, handleCharacterData);
diff --git a/Swiften/Parser/LibXMLParser.cpp b/Swiften/Parser/LibXMLParser.cpp
index 4e02059..71515a7 100644
--- a/Swiften/Parser/LibXMLParser.cpp
+++ b/Swiften/Parser/LibXMLParser.cpp
@@ -17,6 +17,12 @@
 #include <Swiften/Base/Log.h>
 #include <Swiften/Parser/XMLParserClient.h>
 
+namespace {
+std::string asString(const unsigned char* s) {
+    return s ? std::string(reinterpret_cast<const char*>(s)) : std::string();
+}
+}
+
 namespace Swift {
 
 struct LibXMLParser::Private {
@@ -24,34 +30,39 @@ struct LibXMLParser::Private {
     xmlParserCtxtPtr context_;
 };
 
-static void handleStartElement(void* parser, const xmlChar* name, const xmlChar*, const xmlChar* xmlns, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar ** attributes) {
+static void handleStartElement(void* parser, const xmlChar* name, const xmlChar* prefix, const xmlChar* xmlns, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar ** attributes) {
     AttributeMap attributeValues;
     if (nbDefaulted != 0) {
         // Just because i don't understand what this means yet :-)
         SWIFT_LOG(error) << "Unexpected nbDefaulted on XML element" << std::endl;
     }
     for (int i = 0; i < nbAttributes*5; i += 5) {
-        std::string attributeNS = "";
-        if (attributes[i+2]) {
-            attributeNS = std::string(reinterpret_cast<const char*>(attributes[i+2]));
-        }
+        std::string attributeName = asString(attributes[i]);
+        std::string attributePrefix = asString(attributes[i+1]);
+        std::string attributeNS = asString(attributes[i+2]);
         assert(attributes[i+4] >= attributes[i+3]);
         attributeValues.addAttribute(
-                std::string(reinterpret_cast<const char*>(attributes[i])),
+                attributeName,
                 attributeNS,
+                attributePrefix,
                 std::string(reinterpret_cast<const char*>(attributes[i+3]),
                     static_cast<size_t>(attributes[i+4]-attributes[i+3])));
     }
+    auto* client = static_cast<XMLParser*>(parser)->getClient();
     for (auto i = 0; i < nbNamespaces * 2; i += 2) {
-        const auto prefix = namespaces[i] ? std::string(reinterpret_cast<const char*>(namespaces[i])) : "";
-        const auto uri = std::string(reinterpret_cast<const char*>(namespaces[i + 1]));
-        static_cast<XMLParser*>(parser)->getClient()->handleNamespaceDeclaration(prefix, uri);
+        const auto prefix = asString(namespaces[i]);
+        const auto uri = asString(namespaces[i + 1]);
+        client->handleNamespaceDeclaration(prefix, uri);
     }
-    static_cast<XMLParser*>(parser)->getClient()->handleStartElement(reinterpret_cast<const char*>(name), (xmlns ? reinterpret_cast<const char*>(xmlns) : std::string()), attributeValues);
+    auto nameStr = asString(name);
+    auto xmlsnsStr = asString(xmlns);
+    auto prefixStr = asString(prefix);
+    client->handleStartElementPrefix(prefixStr, xmlsnsStr, nameStr, nameStr, xmlsnsStr, attributeValues);
+    client->handleStartElement(nameStr, xmlsnsStr, attributeValues);
 }
 
 static void handleEndElement(void *parser, const xmlChar* name, const xmlChar*, const xmlChar* xmlns) {
-    static_cast<XMLParser*>(parser)->getClient()->handleEndElement(reinterpret_cast<const char*>(name), (xmlns ? reinterpret_cast<const char*>(xmlns) : std::string()));
+    static_cast<XMLParser*>(parser)->getClient()->handleEndElement(asString(name), asString(xmlns));
 }
 
 static void handleCharacterData(void* parser, const xmlChar* data, int len) {
diff --git a/Swiften/Parser/UnitTest/AttributeMapTest.cpp b/Swiften/Parser/UnitTest/AttributeMapTest.cpp
index 4529eac..d9335c1 100644
--- a/Swiften/Parser/UnitTest/AttributeMapTest.cpp
+++ b/Swiften/Parser/UnitTest/AttributeMapTest.cpp
@@ -15,6 +15,7 @@ class AttributeMapTest : public CppUnit::TestFixture
 {
         CPPUNIT_TEST_SUITE(AttributeMapTest);
         CPPUNIT_TEST(testGetAttribute_Namespaced);
+        CPPUNIT_TEST(testGetAttribute_Namespaced_Prefix);
         CPPUNIT_TEST(testGetBoolAttribute_True);
         CPPUNIT_TEST(testGetBoolAttribute_1);
         CPPUNIT_TEST(testGetBoolAttribute_False);
@@ -34,6 +35,22 @@ class AttributeMapTest : public CppUnit::TestFixture
             CPPUNIT_ASSERT_EQUAL(std::string("en"), testling.getAttribute("lang", "http://www.w3.org/XML/1998/namespace"));
         }
 
+        void testGetAttribute_Namespaced_Prefix() {
+            AttributeMap testling;
+            testling.addAttribute("lang", "", "prefix", "nl");
+            testling.addAttribute("lang", "http://www.w3.org/XML/1998/namespace", "prefix", "en");
+            testling.addAttribute("lang", "", "prefix", "fr");
+
+            CPPUNIT_ASSERT_EQUAL(std::string("en"), testling.getAttribute("lang", "http://www.w3.org/XML/1998/namespace"));
+            const auto& entries = testling.getEntries();
+            auto it = std::find_if(entries.begin(), entries.end(), [](const AttributeMap::Entry& e) {
+                return e.getValue() == "en";
+            });
+            const bool found = it != entries.end();
+            CPPUNIT_ASSERT_EQUAL(true, found);
+            CPPUNIT_ASSERT_EQUAL(std::string("prefix"), it->getAttribute().getPrefix());
+        }
+
         void testGetBoolAttribute_True() {
             AttributeMap testling;
             testling.addAttribute("foo", "", "true");
diff --git a/Swiften/Parser/UnitTest/XMLParserTest.cpp b/Swiften/Parser/UnitTest/XMLParserTest.cpp
index 63d30ea..4db694e 100644
--- a/Swiften/Parser/UnitTest/XMLParserTest.cpp
+++ b/Swiften/Parser/UnitTest/XMLParserTest.cpp
@@ -35,6 +35,7 @@ class XMLParserTest : public CppUnit::TestFixture {
         CPPUNIT_TEST(testParse_WhitespaceInAttribute);
         CPPUNIT_TEST(testParse_AttributeWithoutNamespace);
         CPPUNIT_TEST(testParse_AttributeWithNamespace);
+        CPPUNIT_TEST(testParse_AttributeWithNamespaceNoPrefix);
         CPPUNIT_TEST(testParse_BillionLaughs);
         CPPUNIT_TEST(testParse_InternalEntity);
         //CPPUNIT_TEST(testParse_UndefinedPrefix);
@@ -43,6 +44,7 @@ class XMLParserTest : public CppUnit::TestFixture {
         CPPUNIT_TEST(testParse_DisallowCommentsInXML);
         CPPUNIT_TEST(testParse_Doctype);
         CPPUNIT_TEST(testParse_ProcessingInstructions);
+        CPPUNIT_TEST(testParse_ProcessingPrefixedElement);
         CPPUNIT_TEST_SUITE_END();
 
     public:
@@ -264,6 +266,7 @@ class XMLParserTest : public CppUnit::TestFixture {
             CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), client_.events[0].attributes.getEntries().size());
             CPPUNIT_ASSERT_EQUAL(std::string("attr"), client_.events[0].attributes.getEntries()[0].getAttribute().getName());
             CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getNamespace());
+            CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getPrefix());
         }
 
         void testParse_AttributeWithNamespace() {
@@ -275,6 +278,22 @@ class XMLParserTest : public CppUnit::TestFixture {
             CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), client_.events[0].attributes.getEntries().size());
             CPPUNIT_ASSERT_EQUAL(std::string("attr"), client_.events[0].attributes.getEntries()[0].getAttribute().getName());
             CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im/f"), client_.events[0].attributes.getEntries()[0].getAttribute().getNamespace());
+            CPPUNIT_ASSERT_EQUAL(std::string("f"), client_.events[0].attributes.getEntries()[0].getAttribute().getPrefix());
+            CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), client_.events[0].namespaces.size());
+            CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im"), client_.events[0].namespaces[""]);
+            CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im/f"), client_.events[0].namespaces["f"]);
+        }
+
+        void testParse_AttributeWithNamespaceNoPrefix() {
+            ParserType testling(&client_);
+
+            CPPUNIT_ASSERT(testling.parse(
+                "<query xmlns='http://swift.im' xmlns:f='http://swift.im/f' attr='3'/>"));
+
+            CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), client_.events[0].attributes.getEntries().size());
+            CPPUNIT_ASSERT_EQUAL(std::string("attr"), client_.events[0].attributes.getEntries()[0].getAttribute().getName());
+            CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getNamespace());
+            CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getPrefix());
             CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), client_.events[0].namespaces.size());
             CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im"), client_.events[0].namespaces[""]);
             CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im/f"), client_.events[0].namespaces["f"]);
@@ -373,25 +392,52 @@ class XMLParserTest : public CppUnit::TestFixture {
             CPPUNIT_ASSERT(!testling.parse("<?xml-stylesheet type=\"text/xsl\" href=\"Sample.xsl\"?>"));
         }
 
+        void testParse_ProcessingPrefixedElement() {
+            client_.testingStartElementPrefix = true;
+            ParserType testling(&client_);
+
+            CPPUNIT_ASSERT(testling.parse("<prefix:message xmlns='uri' xmlns:prefix='uriPrefix'/>"));
+
+            CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), client_.events.size());
+
+            CPPUNIT_ASSERT_EQUAL(Client::StartElementPrefix, client_.events[0].type);
+            CPPUNIT_ASSERT_EQUAL(std::string("message"), client_.events[0].data);
+            CPPUNIT_ASSERT_EQUAL(std::string("uriPrefix"), client_.events[0].ns);
+            CPPUNIT_ASSERT_EQUAL(std::string("prefix"), client_.events[0].prefix);
+
+            CPPUNIT_ASSERT_EQUAL(Client::EndElement, client_.events[1].type);
+            CPPUNIT_ASSERT_EQUAL(std::string("message"), client_.events[1].data);
+            CPPUNIT_ASSERT_EQUAL(std::string("uriPrefix"), client_.events[1].ns);
+        }
+
     private:
         class Client : public XMLParserClient {
             public:
                 using NamespaceMap = std::unordered_map<std::string /* prefix */, std::string /* uri */>;
-                enum Type { StartElement, EndElement, CharacterData, NamespaceDefined };
+                enum Type { StartElement, StartElementPrefix, EndElement, CharacterData, NamespaceDefined };
                 struct Event {
                     Event(
                             Type type,
                             const std::string& data,
                             const std::string& ns,
+                            const std::string& prefix,
+                            const AttributeMap& attributes,
+                            NamespaceMap namespaces)
+                                : type(type), data(data), ns(ns), prefix(prefix), attributes(attributes), namespaces(std::move(namespaces)) {}
+                    Event(
+                            Type type,
+                            const std::string& data,
+                            const std::string& ns,
                             const AttributeMap& attributes,
                             NamespaceMap namespaces = {})
-                                : type(type), data(data), ns(ns), attributes(attributes), namespaces(std::move(namespaces)) {}
+                                : Event(type, data, ns, {}, attributes, std::move(namespaces)) {}
                     Event(Type type, const std::string& data, const std::string& ns = std::string())
-                                : type(type), data(data), ns(ns) {}
+                                : Event(type, data, ns, "", AttributeMap(), NamespaceMap()) {}
 
                     Type type;
                     std::string data;
                     std::string ns;
+                    std::string prefix;
                     AttributeMap attributes;
                     NamespaceMap namespaces;
                 };
@@ -399,9 +445,15 @@ class XMLParserTest : public CppUnit::TestFixture {
                 Client() {}
 
                 void handleStartElement(const std::string& element, const std::string& ns, const AttributeMap& attributes) override {
+                    if (testingStartElementPrefix) return;
                     events.push_back(Event(StartElement, element, ns, attributes, std::move(namespaces_)));
                 }
 
+                void handleStartElementPrefix(const std::string& prefix, const std::string& uri, const std::string& name, const std::string&, const std::string&, const AttributeMap&) override {
+                    if (!testingStartElementPrefix) return;
+                    events.push_back(Event(StartElementPrefix, name, uri, prefix, AttributeMap(), NamespaceMap()));
+                }
+
                 void handleEndElement(const std::string& element, const std::string& ns) override {
                     events.push_back(Event(EndElement, element, ns));
                 }
@@ -415,6 +467,7 @@ class XMLParserTest : public CppUnit::TestFixture {
                 }
 
                 std::vector<Event> events;
+                bool testingStartElementPrefix = false;
             private:
                 NamespaceMap namespaces_;
         } client_;
diff --git a/Swiften/Parser/XMLParserClient.cpp b/Swiften/Parser/XMLParserClient.cpp
index 40be4e8..847e1f3 100644
--- a/Swiften/Parser/XMLParserClient.cpp
+++ b/Swiften/Parser/XMLParserClient.cpp
@@ -11,6 +11,12 @@ namespace Swift {
 XMLParserClient::~XMLParserClient() {
 }
 
+void XMLParserClient::handleStartElement(const std::string&, const std::string&, const AttributeMap&) {
+}
+
+void XMLParserClient::handleStartElementPrefix(const std::string&, const std::string&, const std::string&, const std::string&, const std::string&, const AttributeMap&) {
+}
+
 void XMLParserClient::handleNamespaceDeclaration(const std::string&, const std::string&) {
 }
 
diff --git a/Swiften/Parser/XMLParserClient.h b/Swiften/Parser/XMLParserClient.h
index 0682320..f519646 100644
--- a/Swiften/Parser/XMLParserClient.h
+++ b/Swiften/Parser/XMLParserClient.h
@@ -14,7 +14,13 @@ namespace Swift {
         public:
             virtual ~XMLParserClient();
 
-            virtual void handleStartElement(const std::string& element, const std::string& ns, const AttributeMap& attributes) = 0;
+             /**
+             * Client will have to implement only one of the following methods depending on whether
+             * he is interested in processing the element prefix or not.
+             */
+            virtual void handleStartElement(const std::string& element, const std::string& ns, const AttributeMap& attributes);
+            virtual void handleStartElementPrefix(const std::string& prefix, const std::string& uri, const std::string& name, const std::string& element, const std::string& ns, const AttributeMap& attributes);
+
             virtual void handleEndElement(const std::string& element, const std::string& ns) = 0;
             virtual void handleCharacterData(const std::string& data) = 0;
 
-- 
cgit v0.10.2-6-g49f6