From 8e1bdd6727a2e0d4e5b0894a83905c275348b037 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Remko=20Tron=C3=A7on?= <git@el-tramo.be>
Date: Fri, 15 Apr 2011 22:53:27 +0200
Subject: JID escaping algorithm refactoring.

Made algorithm more efficient.
Fixed some incorrect escaping.
Added more tests from the XEP.

diff --git a/SwifTools/URIHandler/XMPPURI.cpp b/SwifTools/URIHandler/XMPPURI.cpp
index 496871c..de83ac2 100644
--- a/SwifTools/URIHandler/XMPPURI.cpp
+++ b/SwifTools/URIHandler/XMPPURI.cpp
@@ -48,7 +48,8 @@ struct PercentUnencodeFormatter {
 		if (s.fail() || s.bad()) {
 			throw std::runtime_error("Invalid escape character");
 		}
-		return std::string(reinterpret_cast<const char*>(&value), 1);
+		unsigned char charValue = static_cast<unsigned char>(value);
+		return std::string(reinterpret_cast<const char*>(&charValue), 1);
 	}
 };
 
diff --git a/Swiften/JID/JID.cpp b/Swiften/JID/JID.cpp
index 5fd7524..00adf34 100644
--- a/Swiften/JID/JID.cpp
+++ b/Swiften/JID/JID.cpp
@@ -7,13 +7,17 @@
 #define SWIFTEN_CACHE_JID_PREP
 
 #include <vector>
+#include <list>
 #include <iostream>
 
 #include <string>
 #ifdef SWIFTEN_CACHE_JID_PREP
 #include <boost/unordered_map.hpp>
 #endif
-#include <boost/algorithm/string/replace.hpp>
+#include <boost/assign/list_of.hpp>
+#include <boost/algorithm/string/find_format.hpp>
+#include <boost/algorithm/string/finder.hpp>
+#include <sstream>
 #include <stringprep.h>
 
 #include <Swiften/Base/String.h>
@@ -28,6 +32,75 @@ static PrepCache domainPrepCache;
 static PrepCache resourcePrepCache;
 #endif
 
+static const std::list<char> escapedChars = boost::assign::list_of(' ')('"')('&')('\'')('/')('<')('>')('@')(':');
+
+bool getEscapeSequenceValue(const std::string& sequence, unsigned char& value) {
+	std::stringstream s;
+	unsigned int v;
+	s << std::hex << sequence;
+	s >> v;
+	value = static_cast<unsigned char>(v);
+	return (!s.fail() && !s.bad() && (value == 0x5C || std::find(escapedChars.begin(), escapedChars.end(), value) != escapedChars.end()));
+}
+
+struct UnescapedCharacterFinder {
+	template<typename Iterator>	boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) {
+		for (; begin != end; ++begin) {
+			if (std::find(escapedChars.begin(), escapedChars.end(), *begin) != escapedChars.end()) {
+				return boost::iterator_range<Iterator>(begin, begin + 1);
+			}
+			else if (*begin == '\\') {
+				// Check if we have an escaped dissalowed character sequence
+				Iterator innerBegin = begin + 1;
+				if (innerBegin != end && innerBegin + 1 != end) {
+					Iterator innerEnd = innerBegin + 2;
+					unsigned char value;
+					if (getEscapeSequenceValue(std::string(innerBegin, innerEnd), value)) {
+						return boost::iterator_range<Iterator>(begin, begin + 1);
+					}
+				}
+			}
+		}
+		return boost::iterator_range<Iterator>(end, end);
+	}
+};
+
+struct UnescapedCharacterFormatter {
+	template<typename FindResult>	std::string operator()(const FindResult& match) const {
+		std::ostringstream s;
+		s << '\\' << std::hex << static_cast<int>(*match.begin());
+		return s.str();
+	}
+};
+
+struct EscapedCharacterFinder {
+	template<typename Iterator>	boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) {
+		for (; begin != end; ++begin) {
+			if (*begin == '\\') {
+				Iterator innerEnd = begin + 1;
+				for (size_t i = 0; i < 2 && innerEnd != end; ++i, ++innerEnd) {
+				}
+				unsigned char value;
+				if (getEscapeSequenceValue(std::string(begin + 1, innerEnd), value)) {
+					return boost::iterator_range<Iterator>(begin, innerEnd);
+				}
+			}
+		}
+		return boost::iterator_range<Iterator>(end, end);
+	}
+};
+
+struct EscapedCharacterFormatter {
+	template<typename FindResult>	std::string operator()(const FindResult& match) const {
+		unsigned char value;
+		if (getEscapeSequenceValue(std::string(match.begin() + 1, match.end()), value)) {
+			return std::string(reinterpret_cast<const char*>(&value), 1);
+		}
+		return boost::copy_range<std::string>(match);
+	}
+};
+
+
 namespace Swift {
 
 JID::JID(const char* jid) {
@@ -128,38 +201,11 @@ int JID::compare(const Swift::JID& o, CompareType compareType) const {
 }
 
 std::string JID::getEscapedNode(const std::string& node) {
-	std::string escaped = node;
-	
-	boost::algorithm::replace_all(escaped, "\\", "\\5c");
-	boost::algorithm::replace_all(escaped, " ", "\\20");
-	boost::algorithm::replace_all(escaped, "\"", "\\22");
-	boost::algorithm::replace_all(escaped, "&", "\\26");
-	boost::algorithm::replace_all(escaped, "'", "\\27");
-	boost::algorithm::replace_all(escaped, "/", "\\2f");
-	boost::algorithm::replace_all(escaped, "<", "\\3c");
-	boost::algorithm::replace_all(escaped, ">", "\\3e");
-	boost::algorithm::replace_all(escaped, "@", "\\40");
-	boost::algorithm::replace_all(escaped, ":", "\\3a");
-
-	return escaped;
+	return boost::find_format_all_copy(node, UnescapedCharacterFinder(), UnescapedCharacterFormatter());
 }
 
 std::string JID::getUnescapedNode() const {
-	std::string unescaped = node_;
-
-	boost::algorithm::replace_all(unescaped, "\\20", " ");
-	boost::algorithm::replace_all(unescaped, "\\22", "\"");
-	boost::algorithm::replace_all(unescaped, "\\26", "&");
-	boost::algorithm::replace_all(unescaped, "\\27", "'");
-	boost::algorithm::replace_all(unescaped, "\\2f", "/");
-	boost::algorithm::replace_all(unescaped, "\\3c", "<");
-	boost::algorithm::replace_all(unescaped, "\\3e", ">");
-	boost::algorithm::replace_all(unescaped, "\\40", "@");
-	boost::algorithm::replace_all(unescaped, "\\3a", ":");
-	boost::algorithm::replace_all(unescaped, "\\5c", "\\");
-	
-
-	return unescaped;
+	return boost::find_format_all_copy(node_, EscapedCharacterFinder(), EscapedCharacterFormatter());
 }
 
 } // namespace Swift
diff --git a/Swiften/JID/JID.h b/Swiften/JID/JID.h
index f2a95be..98b42da 100644
--- a/Swiften/JID/JID.h
+++ b/Swiften/JID/JID.h
@@ -38,8 +38,16 @@ namespace Swift {
 				return !hasResource_;
 			}
 
+			/**
+			 * Returns the given node, escaped according to XEP-0106.
+			 * The resulting node is a valid node for a JID, whereas the input value can contain characters
+			 * that are not allowed.
+			 */
 			static std::string getEscapedNode(const std::string& node);
 
+			/**
+			 * Returns the node of the current JID, unescaped according to XEP-0106.
+			 */
 			std::string getUnescapedNode() const;
 
 			JID toBare() const {
diff --git a/Swiften/JID/UnitTest/JIDTest.cpp b/Swiften/JID/UnitTest/JIDTest.cpp
index 619df7f..5bb2b7a 100644
--- a/Swiften/JID/UnitTest/JIDTest.cpp
+++ b/Swiften/JID/UnitTest/JIDTest.cpp
@@ -52,7 +52,9 @@ class JIDTest : public CppUnit::TestFixture
 		CPPUNIT_TEST(testHasResource);
 		CPPUNIT_TEST(testHasResource_NoResource);
 		CPPUNIT_TEST(testGetEscapedNode);
+		CPPUNIT_TEST(testGetEscapedNode_XEP106Examples);
 		CPPUNIT_TEST(testGetUnescapedNode);
+		CPPUNIT_TEST(testGetUnescapedNode_XEP106Examples);
 		CPPUNIT_TEST_SUITE_END();
 
 	public:
@@ -319,7 +321,22 @@ class JIDTest : public CppUnit::TestFixture
 			CPPUNIT_ASSERT_EQUAL(std::string("alice\\40wonderland.lit"), escaped);
 
 			escaped = JID::getEscapedNode("\\& \" ' / <\\\\> @ :\\3a\\40");
-			CPPUNIT_ASSERT_EQUAL(std::string("\\5c\\26\\20\\22\\20\\27\\20\\2f\\20\\3c\\5c\\5c\\3e\\20\\40\\20\\3a\\5c3a\\5c40"), escaped);
+			CPPUNIT_ASSERT_EQUAL(std::string("\\\\26\\20\\22\\20\\27\\20\\2f\\20\\3c\\\\\\3e\\20\\40\\20\\3a\\5c3a\\5c40"), escaped);
+		}
+
+		void testGetEscapedNode_XEP106Examples() {
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("space cadet"), std::string("space\\20cadet"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("call me \"ishmael\""), std::string("call\\20me\\20\\22ishmael\\22"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("at&t guy"), std::string("at\\26t\\20guy"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("d'artagnan"), std::string("d\\27artagnan"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("/.fanboy"), std::string("\\2f.fanboy"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("::foo::"), std::string("\\3a\\3afoo\\3a\\3a"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("<foo>"), std::string("\\3cfoo\\3e"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("user@host"), std::string("user\\40host"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\net"), std::string("c\\3a\\net"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\\\net"), std::string("c\\3a\\\\net"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\cool stuff"), std::string("c\\3a\\cool\\20stuff"));
+			CPPUNIT_ASSERT_EQUAL(JID::getEscapedNode("c:\\5commas"), std::string("c\\3a\\5c5commas"));
 		}
 
 		void testGetUnescapedNode() {
@@ -328,6 +345,21 @@ class JIDTest : public CppUnit::TestFixture
 			CPPUNIT_ASSERT(testling.isValid());
 			CPPUNIT_ASSERT_EQUAL(input, testling.getUnescapedNode());
 		}
+
+		void testGetUnescapedNode_XEP106Examples() {
+			CPPUNIT_ASSERT_EQUAL(std::string("space cadet"), JID("space\\20cadet@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("call me \"ishmael\""), JID("call\\20me\\20\\22ishmael\\22@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("at&t guy"), JID("at\\26t\\20guy@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("d'artagnan"), JID("d\\27artagnan@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("/.fanboy"), JID("\\2f.fanboy@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("::foo::"), JID("\\3a\\3afoo\\3a\\3a@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("<foo>"), JID("\\3cfoo\\3e@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("user@host"), JID("user\\40host@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("c:\\net"), JID("c\\3a\\net@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("c:\\\\net"), JID("c\\3a\\\\net@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("c:\\cool stuff"), JID("c\\3a\\cool\\20stuff@example.com").getUnescapedNode());
+			CPPUNIT_ASSERT_EQUAL(std::string("c:\\5commas"), JID("c\\3a\\5c5commas@example.com").getUnescapedNode());
+		}
 };
 
 CPPUNIT_TEST_SUITE_REGISTRATION(JIDTest);
-- 
cgit v0.10.2-6-g49f6