From 8e1bdd6727a2e0d4e5b0894a83905c275348b037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Remko=20Tron=C3=A7on?= <git@el-tramo.be> Date: Fri, 15 Apr 2011 22:53:27 +0200 Subject: JID escaping algorithm refactoring. Made algorithm more efficient. Fixed some incorrect escaping. Added more tests from the XEP. diff --git a/SwifTools/URIHandler/XMPPURI.cpp b/SwifTools/URIHandler/XMPPURI.cpp index 496871c..de83ac2 100644 --- a/SwifTools/URIHandler/XMPPURI.cpp +++ b/SwifTools/URIHandler/XMPPURI.cpp @@ -48,7 +48,8 @@ struct PercentUnencodeFormatter { if (s.fail() || s.bad()) { throw std::runtime_error("Invalid escape character"); } - return std::string(reinterpret_cast<const char*>(&value), 1); + unsigned char charValue = static_cast<unsigned char>(value); + return std::string(reinterpret_cast<const char*>(&charValue), 1); } }; diff --git a/Swiften/JID/JID.cpp b/Swiften/JID/JID.cpp index 5fd7524..00adf34 100644 --- a/Swiften/JID/JID.cpp +++ b/Swiften/JID/JID.cpp @@ -7,13 +7,17 @@ #define SWIFTEN_CACHE_JID_PREP #include <vector> +#include <list> #include <iostream> #include <string> #ifdef SWIFTEN_CACHE_JID_PREP #include <boost/unordered_map.hpp> #endif -#include <boost/algorithm/string/replace.hpp> +#include <boost/assign/list_of.hpp> +#include <boost/algorithm/string/find_format.hpp> +#include <boost/algorithm/string/finder.hpp> +#include <sstream> #include <stringprep.h> #include <Swiften/Base/String.h> @@ -28,6 +32,75 @@ static PrepCache domainPrepCache; static PrepCache resourcePrepCache; #endif +static const std::list<char> escapedChars = boost::assign::list_of(' ')('"')('&')('\'')('/')('<')('>')('@')(':'); + +bool getEscapeSequenceValue(const std::string& sequence, unsigned char& value) { + std::stringstream s; + unsigned int v; + s << std::hex << sequence; + s >> v; + value = static_cast<unsigned char>(v); + return (!s.fail() && !s.bad() && (value == 0x5C || std::find(escapedChars.begin(), escapedChars.end(), value) != escapedChars.end())); +} + +struct UnescapedCharacterFinder { + template<typename Iterator> boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) { + for (; begin != end; ++begin) { + if (std::find(escapedChars.begin(), escapedChars.end(), *begin) != escapedChars.end()) { + return boost::iterator_range<Iterator>(begin, begin + 1); + } + else if (*begin == '\\') { + // Check if we have an escaped dissalowed character sequence + Iterator innerBegin = begin + 1; + if (innerBegin != end && innerBegin + 1 != end) { + Iterator innerEnd = innerBegin + 2; + unsigned char value; + if (getEscapeSequenceValue(std::string(innerBegin, innerEnd), value)) { + return boost::iterator_range<Iterator>(begin, begin + 1); + } + } + } + } + return boost::iterator_range<Iterator>(end, end); + } +}; + +struct UnescapedCharacterFormatter { + template<typename FindResult> std::string operator()(const FindResult& match) const { + std::ostringstream s; + s << '\\' << std::hex << static_cast<int>(*match.begin()); + return s.str(); + } +}; + +struct EscapedCharacterFinder { + template<typename Iterator> boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) { + for (; begin != end; ++begin) { + if (*begin == '\\') { + Iterator innerEnd = begin + 1; + for (size_t i = 0; i < 2 && innerEnd != end; ++i, ++innerEnd) { + } + unsigned char value; + if (getEscapeSequenceValue(std::string(begin + 1, innerEnd), value)) { + return boost::iterator_range<Iterator>(begin, innerEnd); + } + } + } + return boost::iterator_range<Iterator>(end, end); + } +}; + +struct EscapedCharacterFormatter { + template<typename FindResult> std::string operator()(const FindResult& match) const { + unsigned char value; + if (getEscapeSequenceValue(std::string(match.begin() + 1, match.end()), value)) { + return std::string(reinterpret_cast<const char*>(&value), 1); + } + return boost::copy_range<std::string>(match); + } +}; + + namespace Swift { JID::JID(const char* jid) { @@ -128,38 +201,11 @@ int JID::compare(const Swift::JID& o, CompareType compareType) const { } std::string JID::getEscapedNode(const std::string& node) { - std::string escaped = node; - - boost::algorithm::replace_all(escaped, "\\", "\\5c"); - boost::algorithm::replace_all(escaped, " ", "\\20"); - boost::algorithm::replace_all(escaped, "\"", "\\22"); - boost::algorithm::replace_all(escaped, "&", "\\26"); - boost::algorithm::replace_all(escaped, "'", "\\27"); - boost::algorithm::replace_all(escaped, "/", "\\2f"); - boost::algorithm::replace_all(escaped, "<", "\\3c"); - boost::algorithm::replace_all(escaped, ">", "\\3e"); - boost::algorithm::replace_all(escaped, "@", "\\40"); - boost::algorithm::replace_all(escaped, ":", "\\3a"); - - return escaped; + return boost::find_format_all_copy(node, UnescapedCharacterFinder(), UnescapedCharacterFormatter()); } std::string JID::getUnescapedNode() const { - std::string unescaped = node_; - - boost::algorithm::replace_all(unescaped, "\\20", " "); - boost::algorithm::replace_all(unescaped, "\\22", "\""); - boost::algorithm::replace_all(unescaped, "\\26", "&"); - boost::algorithm::replace_all(unescaped, "\\27", "'"); - boost::algorithm::replace_all(unescaped, "\\2f", "/"); - boost::algorithm::replace_all(unescaped, "\\3c", "<"); - boost::algorithm::replace_all(unescaped, "\\3e", ">"); - boost::algorithm::replace_all(unescaped, "\\40", "@"); - boost::algorithm::replace_all(unescaped, "\\3a", ":"); - boost::algorithm::replace_all(unescaped, "\\5c", "\\"); - - - return unescaped; + return boost::find_format_all_copy(node_, EscapedCharacterFinder(), EscapedCharacterFormatter()); } } // namespace Swift diff --git a/Swiften/JID/JID.h b/Swiften/JID/JID.h index f2a95be..98b42da 100644 --- a/Swiften/JID/JID.h +++ b/Swiften/JID/JID.h @@ -38,8 +38,16 @@ namespace Swift { return !hasResource_; } + /** + * Returns the given node, escaped according to XEP-0106. + * The resulting node is a valid node for a JID, whereas the input value can contain characters + * that are not allowed. + */ static std::string getEscapedNode(const std::string& node); + /** + * Returns the node of the current JID, unescaped according to XEP-0106. + */ std::string getUnescapedNode() const; JID toBare() const { diff --git a/Swiften/JID/UnitTest/JIDTest.cpp b/Swiften/JID/UnitTest/JIDTest.cpp index 619df7f..5bb2b7a 100644 --- a/Swiften/JID/UnitTest/JIDTest.cpp +++ b/Swiften/JID/UnitTest/JIDTest.cpp @@ -52,7 +52,9 @@ class JIDTest : public CppUnit::TestFixture CPPUNIT_TEST(testHasResource); CPPUNIT_TEST(testHasResource_NoResource); CPPUNIT_TEST(testGetEscapedNode); + CPPUNIT_TEST(testGetEscapedNode_XEP106Examples); CPPUNIT_TEST(testGetUnescapedNode); + CPPUNIT_TEST(testGetUnescapedNode_XEP106Examples); CPPUNIT_TEST_SUITE_END(); public: @@ -319,7 +321,22 @@ class JIDTest : public CppUnit::TestFixture CPPUNIT_ASSERT_EQUAL(std::string("alice\\40wonderland.lit"), escaped); escaped = JID::getEscapedNode("\\& \" ' / <\\\\> @ :\\3a\\40"); - CPPUNIT_ASSERT_EQUAL(std::string("\\5c\\26\\20\\22\\20\\27\\20\\2f\\20\\3c\\5c\\5c\\3e\\20\\40\\20\\3a\\5c3a\\5c40"), escaped); + CPPUNIT_ASSERT_EQUAL(std::string("\\\\26\\20\\22\\20\\27\\20\\2f\\20\\3c\\\\\\3e\\20\\40\\20\\3a\\5c3a\\5c40"), escaped); + } + + void testGetEscapedNode_XEP106Examples() { + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("space cadet"), std::string("space\\20cadet")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("call me \"ishmael\""), std::string("call\\20me\\20\\22ishmael\\22")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("at&t guy"), std::string("at\\26t\\20guy")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("d'artagnan"), std::string("d\\27artagnan")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("/.fanboy"), std::string("\\2f.fanboy")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("::foo::"), std::string("\\3a\\3afoo\\3a\\3a")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("<foo>"), std::string("\\3cfoo\\3e")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("user@host"), std::string("user\\40host")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\net"), std::string("c\\3a\\net")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\\\net"), std::string("c\\3a\\\\net")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\cool stuff"), std::string("c\\3a\\cool\\20stuff")); + CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\5commas"), std::string("c\\3a\\5c5commas")); } void testGetUnescapedNode() { @@ -328,6 +345,21 @@ class JIDTest : public CppUnit::TestFixture CPPUNIT_ASSERT(testling.isValid()); CPPUNIT_ASSERT_EQUAL(input, testling.getUnescapedNode()); } + + void testGetUnescapedNode_XEP106Examples() { + CPPUNIT_ASSERT_EQUAL(std::string("space cadet"), JID("space\\20cadet@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("call me \"ishmael\""), JID("call\\20me\\20\\22ishmael\\22@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("at&t guy"), JID("at\\26t\\20guy@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("d'artagnan"), JID("d\\27artagnan@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("/.fanboy"), JID("\\2f.fanboy@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("::foo::"), JID("\\3a\\3afoo\\3a\\3a@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("<foo>"), JID("\\3cfoo\\3e@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("user@host"), JID("user\\40host@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("c:\\net"), JID("c\\3a\\net@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("c:\\\\net"), JID("c\\3a\\\\net@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("c:\\cool stuff"), JID("c\\3a\\cool\\20stuff@example.com").getUnescapedNode()); + CPPUNIT_ASSERT_EQUAL(std::string("c:\\5commas"), JID("c\\3a\\5c5commas@example.com").getUnescapedNode()); + } }; CPPUNIT_TEST_SUITE_REGISTRATION(JIDTest); -- cgit v0.10.2-6-g49f6