From 23e2766dab6d4a3f6158eca7649cd36b644634d3 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Thu, 19 Sep 2019 20:48:44 +0100 Subject: Process attribute and element prefixes XML (Expat/LibXML) parsing modified to process prefix information. Prefixes for attributes stored within attributes. Prefixes for elements passed in additional callback (only if prefix present). Test-information: Unit tests pass on Windows 10 and Ubuntu 18.04.1 LTS. Change-Id: Ib6b5087feed758c31895f426df6a3c7ea975f248 diff --git a/Swiften/Parser/Attribute.h b/Swiften/Parser/Attribute.h index f54317e..07e63b4 100644 --- a/Swiften/Parser/Attribute.h +++ b/Swiften/Parser/Attribute.h @@ -14,6 +14,9 @@ namespace Swift { Attribute(const std::string& name, const std::string& ns) : name(name), ns(ns) { } + Attribute(const std::string& name, const std::string& ns, const std::string& prefix) : name(name), ns(ns), prefix(prefix) { + } + const std::string& getName() const { return name; } @@ -22,6 +25,10 @@ namespace Swift { return ns; } + const std::string& getPrefix() const { + return prefix; + } + bool operator==(const Attribute& o) const { return o.name == name && o.ns == ns; } @@ -29,5 +36,6 @@ namespace Swift { private: std::string name; std::string ns; + std::string prefix; }; } diff --git a/Swiften/Parser/AttributeMap.cpp b/Swiften/Parser/AttributeMap.cpp index f6767de..7814a64 100644 --- a/Swiften/Parser/AttributeMap.cpp +++ b/Swiften/Parser/AttributeMap.cpp @@ -54,3 +54,7 @@ boost::optional AttributeMap::getAttributeValue(const std::string& void AttributeMap::addAttribute(const std::string& name, const std::string& ns, const std::string& value) { attributes.push_back(Entry(Attribute(name, ns), value)); } + +void AttributeMap::addAttribute(const std::string& name, const std::string& ns, const std::string& prefix, const std::string& value) { + attributes.push_back(Entry(Attribute(name, ns, prefix), value)); +} diff --git a/Swiften/Parser/AttributeMap.h b/Swiften/Parser/AttributeMap.h index 804d6aa..26d5826 100644 --- a/Swiften/Parser/AttributeMap.h +++ b/Swiften/Parser/AttributeMap.h @@ -43,6 +43,7 @@ namespace Swift { boost::optional getAttributeValue(const std::string&) const; void addAttribute(const std::string& name, const std::string& ns, const std::string& value); + void addAttribute(const std::string& name, const std::string& ns, const std::string& prefix, const std::string& value); const std::vector& getEntries() const { return attributes; diff --git a/Swiften/Parser/ExpatParser.cpp b/Swiften/Parser/ExpatParser.cpp index 640d561..6c3845a 100644 --- a/Swiften/Parser/ExpatParser.cpp +++ b/Swiften/Parser/ExpatParser.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include @@ -18,6 +20,33 @@ #pragma clang diagnostic ignored "-Wdisabled-macro-expansion" +namespace { +struct XmlInfo { + std::string prefix; + std::string uri; + std::string name; +}; + +XmlInfo splitExpatInfo(const std::string& s, char sep) { + // name + // uri|name + // uri|name|prefix + std::vector v; + boost::split(v, s, [sep](char c) {return c == sep; }); + switch (v.size()) { + case 1: + return{ "", "", std::move(v[0]) }; + case 2: + return{ "", std::move(v[0]), std::move(v[1]) }; + case 3: + return{ std::move(v[2]), std::move(v[0]), std::move(v[1]) }; + default: + return{ "", "", "" }; + } +} +} + + namespace Swift { static const char NAMESPACE_SEPARATOR = '\x01'; @@ -27,33 +56,24 @@ struct ExpatParser::Private { }; static void handleStartElement(void* parser, const XML_Char* name, const XML_Char** attributes) { - std::pair nsTagPair = String::getSplittedAtFirst(name, NAMESPACE_SEPARATOR); - if (nsTagPair.second == "") { - nsTagPair.second = nsTagPair.first; - nsTagPair.first = ""; - } + auto elemInfo = splitExpatInfo(name, NAMESPACE_SEPARATOR); + AttributeMap attributeValues; const XML_Char** currentAttribute = attributes; while (*currentAttribute) { - std::pair nsAttributePair = String::getSplittedAtFirst(*currentAttribute, NAMESPACE_SEPARATOR); - if (nsAttributePair.second == "") { - nsAttributePair.second = nsAttributePair.first; - nsAttributePair.first = ""; - } - attributeValues.addAttribute(nsAttributePair.second, nsAttributePair.first, std::string(*(currentAttribute+1))); + auto attribInfo = splitExpatInfo(*currentAttribute, NAMESPACE_SEPARATOR); + attributeValues.addAttribute(attribInfo.name, attribInfo.uri, attribInfo.prefix, std::string(*(currentAttribute+1))); currentAttribute += 2; } - static_cast(parser)->getClient()->handleStartElement(nsTagPair.second, nsTagPair.first, attributeValues); + auto* client = static_cast(parser)->getClient(); + client->handleStartElementPrefix(elemInfo.prefix, elemInfo.uri, elemInfo.name, elemInfo.name, elemInfo.uri, attributeValues); + client->handleStartElement(elemInfo.name, elemInfo.uri, attributeValues); } static void handleEndElement(void* parser, const XML_Char* name) { - std::pair nsTagPair = String::getSplittedAtFirst(name, NAMESPACE_SEPARATOR); - if (nsTagPair.second == "") { - nsTagPair.second = nsTagPair.first; - nsTagPair.first = ""; - } - static_cast(parser)->getClient()->handleEndElement(nsTagPair.second, nsTagPair.first); + auto elemInfo = splitExpatInfo(name, NAMESPACE_SEPARATOR); + static_cast(parser)->getClient()->handleEndElement(elemInfo.name, elemInfo.uri); } static void handleCharacterData(void* parser, const XML_Char* data, int len) { @@ -88,6 +108,7 @@ static void handleDoctypeDeclaration(void* parser, const XML_Char* /*doctypeName ExpatParser::ExpatParser(XMLParserClient* client, bool allowComments) : XMLParser(client, allowComments), p(new Private()) { p->parser_ = XML_ParserCreateNS("UTF-8", NAMESPACE_SEPARATOR); + XML_SetReturnNSTriplet(p->parser_, true); XML_SetUserData(p->parser_, this); XML_SetElementHandler(p->parser_, handleStartElement, handleEndElement); XML_SetCharacterDataHandler(p->parser_, handleCharacterData); diff --git a/Swiften/Parser/LibXMLParser.cpp b/Swiften/Parser/LibXMLParser.cpp index 4e02059..71515a7 100644 --- a/Swiften/Parser/LibXMLParser.cpp +++ b/Swiften/Parser/LibXMLParser.cpp @@ -17,6 +17,12 @@ #include #include +namespace { +std::string asString(const unsigned char* s) { + return s ? std::string(reinterpret_cast(s)) : std::string(); +} +} + namespace Swift { struct LibXMLParser::Private { @@ -24,34 +30,39 @@ struct LibXMLParser::Private { xmlParserCtxtPtr context_; }; -static void handleStartElement(void* parser, const xmlChar* name, const xmlChar*, const xmlChar* xmlns, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar ** attributes) { +static void handleStartElement(void* parser, const xmlChar* name, const xmlChar* prefix, const xmlChar* xmlns, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar ** attributes) { AttributeMap attributeValues; if (nbDefaulted != 0) { // Just because i don't understand what this means yet :-) SWIFT_LOG(error) << "Unexpected nbDefaulted on XML element" << std::endl; } for (int i = 0; i < nbAttributes*5; i += 5) { - std::string attributeNS = ""; - if (attributes[i+2]) { - attributeNS = std::string(reinterpret_cast(attributes[i+2])); - } + std::string attributeName = asString(attributes[i]); + std::string attributePrefix = asString(attributes[i+1]); + std::string attributeNS = asString(attributes[i+2]); assert(attributes[i+4] >= attributes[i+3]); attributeValues.addAttribute( - std::string(reinterpret_cast(attributes[i])), + attributeName, attributeNS, + attributePrefix, std::string(reinterpret_cast(attributes[i+3]), static_cast(attributes[i+4]-attributes[i+3]))); } + auto* client = static_cast(parser)->getClient(); for (auto i = 0; i < nbNamespaces * 2; i += 2) { - const auto prefix = namespaces[i] ? std::string(reinterpret_cast(namespaces[i])) : ""; - const auto uri = std::string(reinterpret_cast(namespaces[i + 1])); - static_cast(parser)->getClient()->handleNamespaceDeclaration(prefix, uri); + const auto prefix = asString(namespaces[i]); + const auto uri = asString(namespaces[i + 1]); + client->handleNamespaceDeclaration(prefix, uri); } - static_cast(parser)->getClient()->handleStartElement(reinterpret_cast(name), (xmlns ? reinterpret_cast(xmlns) : std::string()), attributeValues); + auto nameStr = asString(name); + auto xmlsnsStr = asString(xmlns); + auto prefixStr = asString(prefix); + client->handleStartElementPrefix(prefixStr, xmlsnsStr, nameStr, nameStr, xmlsnsStr, attributeValues); + client->handleStartElement(nameStr, xmlsnsStr, attributeValues); } static void handleEndElement(void *parser, const xmlChar* name, const xmlChar*, const xmlChar* xmlns) { - static_cast(parser)->getClient()->handleEndElement(reinterpret_cast(name), (xmlns ? reinterpret_cast(xmlns) : std::string())); + static_cast(parser)->getClient()->handleEndElement(asString(name), asString(xmlns)); } static void handleCharacterData(void* parser, const xmlChar* data, int len) { diff --git a/Swiften/Parser/UnitTest/AttributeMapTest.cpp b/Swiften/Parser/UnitTest/AttributeMapTest.cpp index 4529eac..d9335c1 100644 --- a/Swiften/Parser/UnitTest/AttributeMapTest.cpp +++ b/Swiften/Parser/UnitTest/AttributeMapTest.cpp @@ -15,6 +15,7 @@ class AttributeMapTest : public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(AttributeMapTest); CPPUNIT_TEST(testGetAttribute_Namespaced); + CPPUNIT_TEST(testGetAttribute_Namespaced_Prefix); CPPUNIT_TEST(testGetBoolAttribute_True); CPPUNIT_TEST(testGetBoolAttribute_1); CPPUNIT_TEST(testGetBoolAttribute_False); @@ -34,6 +35,22 @@ class AttributeMapTest : public CppUnit::TestFixture CPPUNIT_ASSERT_EQUAL(std::string("en"), testling.getAttribute("lang", "http://www.w3.org/XML/1998/namespace")); } + void testGetAttribute_Namespaced_Prefix() { + AttributeMap testling; + testling.addAttribute("lang", "", "prefix", "nl"); + testling.addAttribute("lang", "http://www.w3.org/XML/1998/namespace", "prefix", "en"); + testling.addAttribute("lang", "", "prefix", "fr"); + + CPPUNIT_ASSERT_EQUAL(std::string("en"), testling.getAttribute("lang", "http://www.w3.org/XML/1998/namespace")); + const auto& entries = testling.getEntries(); + auto it = std::find_if(entries.begin(), entries.end(), [](const AttributeMap::Entry& e) { + return e.getValue() == "en"; + }); + const bool found = it != entries.end(); + CPPUNIT_ASSERT_EQUAL(true, found); + CPPUNIT_ASSERT_EQUAL(std::string("prefix"), it->getAttribute().getPrefix()); + } + void testGetBoolAttribute_True() { AttributeMap testling; testling.addAttribute("foo", "", "true"); diff --git a/Swiften/Parser/UnitTest/XMLParserTest.cpp b/Swiften/Parser/UnitTest/XMLParserTest.cpp index 63d30ea..4db694e 100644 --- a/Swiften/Parser/UnitTest/XMLParserTest.cpp +++ b/Swiften/Parser/UnitTest/XMLParserTest.cpp @@ -35,6 +35,7 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_TEST(testParse_WhitespaceInAttribute); CPPUNIT_TEST(testParse_AttributeWithoutNamespace); CPPUNIT_TEST(testParse_AttributeWithNamespace); + CPPUNIT_TEST(testParse_AttributeWithNamespaceNoPrefix); CPPUNIT_TEST(testParse_BillionLaughs); CPPUNIT_TEST(testParse_InternalEntity); //CPPUNIT_TEST(testParse_UndefinedPrefix); @@ -43,6 +44,7 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_TEST(testParse_DisallowCommentsInXML); CPPUNIT_TEST(testParse_Doctype); CPPUNIT_TEST(testParse_ProcessingInstructions); + CPPUNIT_TEST(testParse_ProcessingPrefixedElement); CPPUNIT_TEST_SUITE_END(); public: @@ -264,6 +266,7 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_ASSERT_EQUAL(static_cast(1), client_.events[0].attributes.getEntries().size()); CPPUNIT_ASSERT_EQUAL(std::string("attr"), client_.events[0].attributes.getEntries()[0].getAttribute().getName()); CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getNamespace()); + CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getPrefix()); } void testParse_AttributeWithNamespace() { @@ -275,6 +278,22 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_ASSERT_EQUAL(static_cast(1), client_.events[0].attributes.getEntries().size()); CPPUNIT_ASSERT_EQUAL(std::string("attr"), client_.events[0].attributes.getEntries()[0].getAttribute().getName()); CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im/f"), client_.events[0].attributes.getEntries()[0].getAttribute().getNamespace()); + CPPUNIT_ASSERT_EQUAL(std::string("f"), client_.events[0].attributes.getEntries()[0].getAttribute().getPrefix()); + CPPUNIT_ASSERT_EQUAL(static_cast(2), client_.events[0].namespaces.size()); + CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im"), client_.events[0].namespaces[""]); + CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im/f"), client_.events[0].namespaces["f"]); + } + + void testParse_AttributeWithNamespaceNoPrefix() { + ParserType testling(&client_); + + CPPUNIT_ASSERT(testling.parse( + "")); + + CPPUNIT_ASSERT_EQUAL(static_cast(1), client_.events[0].attributes.getEntries().size()); + CPPUNIT_ASSERT_EQUAL(std::string("attr"), client_.events[0].attributes.getEntries()[0].getAttribute().getName()); + CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getNamespace()); + CPPUNIT_ASSERT_EQUAL(std::string(""), client_.events[0].attributes.getEntries()[0].getAttribute().getPrefix()); CPPUNIT_ASSERT_EQUAL(static_cast(2), client_.events[0].namespaces.size()); CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im"), client_.events[0].namespaces[""]); CPPUNIT_ASSERT_EQUAL(std::string("http://swift.im/f"), client_.events[0].namespaces["f"]); @@ -373,25 +392,52 @@ class XMLParserTest : public CppUnit::TestFixture { CPPUNIT_ASSERT(!testling.parse("")); } + void testParse_ProcessingPrefixedElement() { + client_.testingStartElementPrefix = true; + ParserType testling(&client_); + + CPPUNIT_ASSERT(testling.parse("")); + + CPPUNIT_ASSERT_EQUAL(static_cast(2), client_.events.size()); + + CPPUNIT_ASSERT_EQUAL(Client::StartElementPrefix, client_.events[0].type); + CPPUNIT_ASSERT_EQUAL(std::string("message"), client_.events[0].data); + CPPUNIT_ASSERT_EQUAL(std::string("uriPrefix"), client_.events[0].ns); + CPPUNIT_ASSERT_EQUAL(std::string("prefix"), client_.events[0].prefix); + + CPPUNIT_ASSERT_EQUAL(Client::EndElement, client_.events[1].type); + CPPUNIT_ASSERT_EQUAL(std::string("message"), client_.events[1].data); + CPPUNIT_ASSERT_EQUAL(std::string("uriPrefix"), client_.events[1].ns); + } + private: class Client : public XMLParserClient { public: using NamespaceMap = std::unordered_map; - enum Type { StartElement, EndElement, CharacterData, NamespaceDefined }; + enum Type { StartElement, StartElementPrefix, EndElement, CharacterData, NamespaceDefined }; struct Event { Event( Type type, const std::string& data, const std::string& ns, + const std::string& prefix, + const AttributeMap& attributes, + NamespaceMap namespaces) + : type(type), data(data), ns(ns), prefix(prefix), attributes(attributes), namespaces(std::move(namespaces)) {} + Event( + Type type, + const std::string& data, + const std::string& ns, const AttributeMap& attributes, NamespaceMap namespaces = {}) - : type(type), data(data), ns(ns), attributes(attributes), namespaces(std::move(namespaces)) {} + : Event(type, data, ns, {}, attributes, std::move(namespaces)) {} Event(Type type, const std::string& data, const std::string& ns = std::string()) - : type(type), data(data), ns(ns) {} + : Event(type, data, ns, "", AttributeMap(), NamespaceMap()) {} Type type; std::string data; std::string ns; + std::string prefix; AttributeMap attributes; NamespaceMap namespaces; }; @@ -399,9 +445,15 @@ class XMLParserTest : public CppUnit::TestFixture { Client() {} void handleStartElement(const std::string& element, const std::string& ns, const AttributeMap& attributes) override { + if (testingStartElementPrefix) return; events.push_back(Event(StartElement, element, ns, attributes, std::move(namespaces_))); } + void handleStartElementPrefix(const std::string& prefix, const std::string& uri, const std::string& name, const std::string&, const std::string&, const AttributeMap&) override { + if (!testingStartElementPrefix) return; + events.push_back(Event(StartElementPrefix, name, uri, prefix, AttributeMap(), NamespaceMap())); + } + void handleEndElement(const std::string& element, const std::string& ns) override { events.push_back(Event(EndElement, element, ns)); } @@ -415,6 +467,7 @@ class XMLParserTest : public CppUnit::TestFixture { } std::vector events; + bool testingStartElementPrefix = false; private: NamespaceMap namespaces_; } client_; diff --git a/Swiften/Parser/XMLParserClient.cpp b/Swiften/Parser/XMLParserClient.cpp index 40be4e8..847e1f3 100644 --- a/Swiften/Parser/XMLParserClient.cpp +++ b/Swiften/Parser/XMLParserClient.cpp @@ -11,6 +11,12 @@ namespace Swift { XMLParserClient::~XMLParserClient() { } +void XMLParserClient::handleStartElement(const std::string&, const std::string&, const AttributeMap&) { +} + +void XMLParserClient::handleStartElementPrefix(const std::string&, const std::string&, const std::string&, const std::string&, const std::string&, const AttributeMap&) { +} + void XMLParserClient::handleNamespaceDeclaration(const std::string&, const std::string&) { } diff --git a/Swiften/Parser/XMLParserClient.h b/Swiften/Parser/XMLParserClient.h index 0682320..f519646 100644 --- a/Swiften/Parser/XMLParserClient.h +++ b/Swiften/Parser/XMLParserClient.h @@ -14,7 +14,13 @@ namespace Swift { public: virtual ~XMLParserClient(); - virtual void handleStartElement(const std::string& element, const std::string& ns, const AttributeMap& attributes) = 0; + /** + * Client will have to implement only one of the following methods depending on whether + * he is interested in processing the element prefix or not. + */ + virtual void handleStartElement(const std::string& element, const std::string& ns, const AttributeMap& attributes); + virtual void handleStartElementPrefix(const std::string& prefix, const std::string& uri, const std::string& name, const std::string& element, const std::string& ns, const AttributeMap& attributes); + virtual void handleEndElement(const std::string& element, const std::string& ns) = 0; virtual void handleCharacterData(const std::string& data) = 0; -- cgit v0.10.2-6-g49f6