From ab402e9e044e6f141a318c9b082671c828b915f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Remko=20Tron=C3=A7on?= <git@el-tramo.be>
Date: Sat, 15 Sep 2012 13:10:20 +0200
Subject: Added URL parser.


diff --git a/SwifTools/URIHandler/XMPPURI.cpp b/SwifTools/URIHandler/XMPPURI.cpp
index cb81391..cf99ae6 100644
--- a/SwifTools/URIHandler/XMPPURI.cpp
+++ b/SwifTools/URIHandler/XMPPURI.cpp
@@ -6,6 +6,7 @@
 
 #include <SwifTools/URIHandler/XMPPURI.h>
 
+#include <Swiften/Base/URL.h>
 #include <boost/algorithm/string/predicate.hpp>
 #include <boost/algorithm/string/find_format.hpp>
 #include <boost/algorithm/string/formatter.hpp>
@@ -18,83 +19,6 @@
 
 using namespace Swift;
 
-// Disabling this code for now, since GCC4.5+boost1.42 (on ubuntu) seems to
-// result in a bug. Replacing it with naive code.
-#if 0
-// Should be in anonymous namespace, but older GCCs complain if we do that
-struct PercentEncodedCharacterFinder {
-	template<typename Iterator>
-	boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) {
-		boost::iterator_range<Iterator> r = boost::first_finder("%")(begin, end);
-		if (r.end() == end) {
-			return r;
-		}
-		else {
-			if (r.end() + 1 == end || r.end() + 2 == end) {
-				throw std::runtime_error("Incomplete escape character");
-			}
-			else {
-				r.advance_end(2);
-				return r;
-			}
-		}
-	}
-};
-
-struct PercentUnencodeFormatter {
-	template<typename FindResult>
-	std::string operator()(const FindResult& match) const {
-		std::stringstream s;
-		s << std::hex << std::string(match.begin() + 1, match.end());
-		unsigned int value;
-		s >> value;
-		if (s.fail() || s.bad()) {
-			throw std::runtime_error("Invalid escape character");
-		}
-		unsigned char charValue = static_cast<unsigned char>(value);
-		return std::string(reinterpret_cast<const char*>(&charValue), 1);
-	}
-};
-
-namespace {
-	std::string unescape(const std::string& s) {
-		try {
-			return boost::find_format_all_copy(s, PercentEncodedCharacterFinder(), PercentUnencodeFormatter());
-		}
-		catch (const std::exception&) {
-			return "";
-		}
-	}
-}
-#endif
-namespace {
-	std::string unescape(const std::string& str) {
-		std::string result;
-		for (size_t i = 0; i < str.size(); ++i) {
-			if (str[i] == '%') {
-				if (i + 3 < str.size()) {
-					std::stringstream s;
-					s << std::hex << str.substr(i+1, 2);
-					unsigned int value;
-					s >> value;
-					if (s.fail() || s.bad()) {
-						return "";
-					}
-					unsigned char charValue = static_cast<unsigned char>(value);
-					result += std::string(reinterpret_cast<const char*>(&charValue), 1);
-					i += 2;
-				}
-				else {
-					return "";
-				}
-			}
-			else {
-				result += str[i];
-			}
-		}
-		return result;
-	}
-}
 
 XMPPURI::XMPPURI() {
 }
@@ -110,7 +34,7 @@ XMPPURI XMPPURI::fromString(const std::string& s) {
 		// Parse authority
 		if (boost::starts_with(uri, "//")) {
 			size_t i = uri.find_first_of("/#?", 2);
-			result.setAuthority(JID(unescape(uri.substr(2, i - 2))));
+			result.setAuthority(JID(URL::unescape(uri.substr(2, i - 2))));
 			if (i == uri.npos) {
 				uri = "";
 				parsePath = parseQuery = parseFragment = false;
@@ -129,7 +53,7 @@ XMPPURI XMPPURI::fromString(const std::string& s) {
 		// Parse path
 		if (parsePath) {
 			size_t i = uri.find_first_of("#?");
-			result.setPath(JID(unescape(uri.substr(0, i))));
+			result.setPath(JID(URL::unescape(uri.substr(0, i))));
 			if (i == uri.npos) {
 				uri = "";
 				parseQuery = parseFragment = false;
@@ -153,14 +77,14 @@ XMPPURI XMPPURI::fromString(const std::string& s) {
 	    		std::vector<std::string> keyValue;
 	    		boost::split(keyValue, *it, boost::is_any_of("="));
 	    		if (keyValue.size() == 1) {
-	    			result.addQueryParameter(unescape(keyValue[0]), "");
+	    			result.addQueryParameter(URL::unescape(keyValue[0]), "");
 	    		}
 	    		else if (keyValue.size() >= 2) {
-	    			result.addQueryParameter(unescape(keyValue[0]), unescape(keyValue[1]));
+	    			result.addQueryParameter(URL::unescape(keyValue[0]), URL::unescape(keyValue[1]));
 	    		}
 	    	}
 	    	else {
-	    		result.setQueryType(unescape(boost::copy_range<std::string>(*it)));
+	    		result.setQueryType(URL::unescape(boost::copy_range<std::string>(*it)));
 	    		haveType = true;
 	    	}
 	    }
@@ -169,7 +93,7 @@ XMPPURI XMPPURI::fromString(const std::string& s) {
 
 		// Parse fragment
 		if (parseFragment) {
-			result.setFragment(unescape(uri));
+			result.setFragment(URL::unescape(uri));
 		}
 	}
 	return result;
diff --git a/SwifTools/URIHandler/XMPPURI.h b/SwifTools/URIHandler/XMPPURI.h
index 266b79b..36bfc41 100644
--- a/SwifTools/URIHandler/XMPPURI.h
+++ b/SwifTools/URIHandler/XMPPURI.h
@@ -12,6 +12,7 @@
 #include <Swiften/JID/JID.h>
 
 namespace Swift {
+	// TODO: Implement using Base/URI
 	class XMPPURI {
 		public:
 			XMPPURI();
diff --git a/Swift/QtUI/QtConnectionSettingsWindow.cpp b/Swift/QtUI/QtConnectionSettingsWindow.cpp
index b5afe50..56ac65f 100644
--- a/Swift/QtUI/QtConnectionSettingsWindow.cpp
+++ b/Swift/QtUI/QtConnectionSettingsWindow.cpp
@@ -48,8 +48,7 @@ QtConnectionSettingsWindow::QtConnectionSettingsWindow(const ClientOptions& opti
 	ui.manual_proxyType->setCurrentIndex(0);
 
 	ClientOptions defaults;
-	if (options.boshURL.empty()) {
-		int i = 0;
+	if (options.boshURL.isEmpty()) {
 		bool isDefault = options.useStreamCompression == defaults.useStreamCompression;
 		isDefault &= options.useTLS == defaults.useTLS;
 		isDefault &= options.allowPLAINWithoutTLS == defaults.allowPLAINWithoutTLS;
@@ -83,7 +82,7 @@ QtConnectionSettingsWindow::QtConnectionSettingsWindow(const ClientOptions& opti
 	} else {
 		ui.connectionMethod->setCurrentIndex(2);
 		ui.bosh_uri->setText(P2QSTRING(options.boshURL.toString()));
-		if (!options.boshHTTPConnectProxyURL.empty()) {
+		if (!options.boshHTTPConnectProxyURL.isEmpty()) {
 			ui.bosh_manualProxy->setChecked(true);
 			ui.bosh_manualProxyHost->setText(P2QSTRING(options.boshHTTPConnectProxyURL.getHost()));
 			ui.bosh_manualProxyPort->setText(P2QSTRING(boost::lexical_cast<std::string>(options.boshHTTPConnectProxyURL.getPort())));
@@ -125,7 +124,7 @@ ClientOptions QtConnectionSettingsWindow::getOptions() {
 		}
 		else {
 			/* BOSH */
-			options.boshURL = URL(Q2PSTRING(ui.bosh_uri->text()));
+			options.boshURL = URL::fromString(Q2PSTRING(ui.bosh_uri->text()));
 			if (ui.bosh_manualProxy->isChecked()) {
 				std::string host = Q2PSTRING(ui.bosh_manualProxyHost->text());
 				int port = 80;
diff --git a/Swiften/Base/SConscript b/Swiften/Base/SConscript
index a5f3592..754164b 100644
--- a/Swiften/Base/SConscript
+++ b/Swiften/Base/SConscript
@@ -13,5 +13,6 @@ objects = swiften_env.SwiftenObject([
 			"RandomGenerator.cpp",
 			"BoostRandomGenerator.cpp",
 			"sleep.cpp",
+			"URL.cpp",
 		])
 swiften_env.Append(SWIFTEN_OBJECTS = [objects])
diff --git a/Swiften/Base/URL.cpp b/Swiften/Base/URL.cpp
new file mode 100644
index 0000000..c36863f
--- /dev/null
+++ b/Swiften/Base/URL.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2010 Remko Tronçon
+ * Licensed under the GNU General Public License v3.
+ * See Documentation/Licenses/GPLv3.txt for more information.
+ */
+
+#include <Swiften/Base/URL.h>
+
+namespace Swift {
+
+int URL::getPortOrDefaultPort(const URL& url) {
+	if (url.getPort()) {
+		return *url.getPort();
+	}
+	else if (url.getScheme() == "http") {
+		return 80;
+	}
+	else if (url.getScheme() == "https") {
+		return 443;
+	}
+	else {
+		std::cerr << "Unknown scheme: " + url.getScheme() << std::endl;
+		return 80;
+	}
+}
+
+URL URL::fromString(const std::string& urlString) {
+	size_t colonIndex = urlString.find(':');
+	if (colonIndex == std::string::npos) {
+		return URL();
+	}
+	std::string scheme = urlString.substr(0, colonIndex);
+
+	// Authority
+	if (urlString.size() > colonIndex + 2 && urlString[colonIndex+1] == '/' && urlString[colonIndex+2] == '/') {
+		size_t authorityIndex = colonIndex + 3;
+		size_t slashIndex = urlString.find('/', authorityIndex);
+		std::string authority;
+		std::string path;
+		if (slashIndex == std::string::npos) {
+			authority = urlString.substr(authorityIndex);
+			path = "";
+		}
+		else {
+			authority = urlString.substr(authorityIndex, slashIndex - authorityIndex);
+			path = unescape(urlString.substr(slashIndex));
+		}
+
+		size_t atIndex = authority.find('@');
+		std::string userInfo;
+		std::string hostAndPort;
+		if (atIndex != std::string::npos) {
+			userInfo = authority.substr(0, atIndex);
+			hostAndPort = authority.substr(atIndex + 1);
+		}
+		else {
+			userInfo = "";
+			hostAndPort = authority;
+		}
+
+		std::string host;
+		boost::optional<int> port;
+		colonIndex = hostAndPort.find(':');
+		if (colonIndex != std::string::npos) {
+			host = unescape(hostAndPort.substr(0, colonIndex));
+			try {
+				port = boost::lexical_cast<int>(hostAndPort.substr(colonIndex + 1));
+			}
+			catch (const boost::bad_lexical_cast&) {
+				return URL();
+			}
+		}
+		else {
+			host = unescape(hostAndPort);
+		}
+
+		if (port) {
+			return URL(scheme, host, *port, path);
+		}
+		else {
+			return URL(scheme, host, path);
+		}
+	}
+	else {
+		// We don't support URLs without authorities yet
+		return URL();
+	}
+}
+
+// Disabling this code for now, since GCC4.5+boost1.42 (on ubuntu) seems to
+// result in a bug. Replacing it with naive code.
+#if 0
+// Should be in anonymous namespace, but older GCCs complain if we do that
+struct PercentEncodedCharacterFinder {
+template<typename Iterator>
+boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) {
+	boost::iterator_range<Iterator> r = boost::first_finder("%")(begin, end);
+	if (r.end() == end) {
+		return r;
+	}
+	else {
+		if (r.end() + 1 == end || r.end() + 2 == end) {
+			throw std::runtime_error("Incomplete escape character");
+		}
+		else {
+			r.advance_end(2);
+			return r;
+		}
+	}
+}
+};
+
+struct PercentUnencodeFormatter {
+template<typename FindResult>
+std::string operator()(const FindResult& match) const {
+	std::stringstream s;
+	s << std::hex << std::string(match.begin() + 1, match.end());
+	unsigned int value;
+	s >> value;
+	if (s.fail() || s.bad()) {
+		throw std::runtime_error("Invalid escape character");
+	}
+	unsigned char charValue = static_cast<unsigned char>(value);
+	return std::string(reinterpret_cast<const char*>(&charValue), 1);
+}
+};
+
+std::string unescape(const std::string& s) {
+	try {
+		return boost::find_format_all_copy(s, PercentEncodedCharacterFinder(), PercentUnencodeFormatter());
+	}
+	catch (const std::exception&) {
+		return "";
+	}
+}
+#endif
+
+std::string URL::unescape(const std::string& str) {
+	std::string result;
+	for (size_t i = 0; i < str.size(); ++i) {
+		if (str[i] == '%') {
+			if (i + 3 < str.size()) {
+				std::stringstream s;
+				s << std::hex << str.substr(i+1, 2);
+				unsigned int value;
+				s >> value;
+				if (s.fail() || s.bad()) {
+					return "";
+				}
+				unsigned char charValue = static_cast<unsigned char>(value);
+				result += std::string(reinterpret_cast<const char*>(&charValue), 1);
+				i += 2;
+			}
+			else {
+				return "";
+			}
+		}
+		else {
+			result += str[i];
+		}
+	}
+	return result;
+}
+
+}
diff --git a/Swiften/Base/URL.h b/Swiften/Base/URL.h
index 94dc4cb..9821ed5 100644
--- a/Swiften/Base/URL.h
+++ b/Swiften/Base/URL.h
@@ -8,32 +8,27 @@
 
 #include <string>
 #include <boost/lexical_cast.hpp>
+#include <boost/optional.hpp>
 
 namespace Swift {
 
 class URL {
 	public:
 
-		URL() : scheme(""), user(""), password(""), host(""), port(-1), path(""), isEmpty(true) {
+		URL() : scheme(""), user(""), password(""), host(""), path(""), empty(true) {
 		}
 
-		URL(const std::string& urlString) {
-			host = urlString;
-			port = 80;
-			scheme = "http";
-			isEmpty = false;
-			//FIXME
-		}
-
-		URL(const std::string& scheme, const std::string& host, int port, const std::string& path) : scheme(scheme), user(), password(), host(host), port(port), path(path), isEmpty(false) {
+		URL(const std::string& scheme, const std::string& host, int port, const std::string& path) : scheme(scheme), user(), password(), host(host), port(port), path(path), empty(false) {
+		}	
 
+		URL(const std::string& scheme, const std::string& host, const std::string& path) : scheme(scheme), user(), password(), host(host), path(path), empty(false) {
 		}	
 
 		/**
 		 * Whether the URL is empty.
 		 */
-		bool empty() const {
-			return isEmpty;
+		bool isEmpty() const {
+			return empty;
 		}
 
 		/**
@@ -53,7 +48,7 @@ class URL {
 		/**
 		 * Port number
 		 */
-		int getPort() const {
+		boost::optional<int> getPort() const {
 			return port;
 		}
 
@@ -65,7 +60,7 @@ class URL {
 		}
 
 		const std::string toString() const {
-			if (isEmpty) {
+			if (empty) {
 				return "";
 			}
 			std::string result = scheme + "://";
@@ -86,13 +81,18 @@ class URL {
 			return result;
 		}
 
+		static int getPortOrDefaultPort(const URL& url);
+		static URL fromString(const std::string&);
+		static std::string unescape(const std::string&);
+
+
 	private:
 		std::string scheme;
 		std::string user;
 		std::string password;
 		std::string host;
-		int port;
+		boost::optional<int> port;
 		std::string path;
-		bool isEmpty;
+		bool empty;
 	};
 }
diff --git a/Swiften/Base/UnitTest/URLTest.cpp b/Swiften/Base/UnitTest/URLTest.cpp
new file mode 100644
index 0000000..4de1d33
--- /dev/null
+++ b/Swiften/Base/UnitTest/URLTest.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2012 Remko Tronçon
+ * Licensed under the GNU General Public License v3.
+ * See Documentation/Licenses/GPLv3.txt for more information.
+ */
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/extensions/TestFactoryRegistry.h>
+
+#include <Swiften/Base/URL.h>
+#include <boost/lexical_cast.hpp>
+
+using namespace Swift;
+
+class URLTest : public CppUnit::TestFixture {
+		CPPUNIT_TEST_SUITE(URLTest);
+		CPPUNIT_TEST(testFromString);
+		CPPUNIT_TEST(testFromString_WithoutPath);
+		CPPUNIT_TEST(testFromString_WithPort);
+		CPPUNIT_TEST(testFromString_WithPortWithoutPath);
+		CPPUNIT_TEST(testFromString_WithUserInfo);
+		CPPUNIT_TEST(testFromString_NonASCIIHost);
+		CPPUNIT_TEST(testFromString_NonASCIIPath);
+		CPPUNIT_TEST_SUITE_END();
+
+	public:
+		void testFromString() {
+			URL url = URL::fromString("http://foo.bar/baz/bam");
+			
+			CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+			CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+			CPPUNIT_ASSERT(!url.getPort());
+			CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath());
+		}
+
+		void testFromString_WithoutPath() {
+			URL url = URL::fromString("http://foo.bar");
+			
+			CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+			CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+			CPPUNIT_ASSERT(!url.getPort());
+			CPPUNIT_ASSERT_EQUAL(std::string(""), url.getPath());
+		}
+
+		void testFromString_WithPort() {
+			URL url = URL::fromString("http://foo.bar:1234/baz/bam");
+			
+			CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+			CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+			CPPUNIT_ASSERT_EQUAL(1234, *url.getPort());
+			CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath());
+		}
+
+		void testFromString_WithPortWithoutPath() {
+			URL url = URL::fromString("http://foo.bar:1234");
+			
+			CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+			CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+			CPPUNIT_ASSERT_EQUAL(1234, *url.getPort());
+			CPPUNIT_ASSERT_EQUAL(std::string(""), url.getPath());
+		}
+
+		void testFromString_WithUserInfo() {
+			URL url = URL::fromString("http://user:pass@foo.bar/baz/bam");
+			
+			CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+			CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+			CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath());
+		}
+
+		void testFromString_NonASCIIHost() {
+			URL url = URL::fromString("http://www.tron%C3%A7on.be/baz/bam");
+
+			CPPUNIT_ASSERT_EQUAL(std::string("www.tron\xc3\xa7on.be"), url.getHost());
+		}
+
+		void testFromString_NonASCIIPath() {
+			URL url = URL::fromString("http://foo.bar/baz/tron%C3%A7on/bam");
+			
+			CPPUNIT_ASSERT_EQUAL(std::string("/baz/tron\xc3\xa7on/bam"), url.getPath());
+		}
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(URLTest);
diff --git a/Swiften/Client/CoreClient.cpp b/Swiften/Client/CoreClient.cpp
index 485cd4a..5e19b4b 100644
--- a/Swiften/Client/CoreClient.cpp
+++ b/Swiften/Client/CoreClient.cpp
@@ -109,7 +109,7 @@ void CoreClient::connect(const ClientOptions& o) {
 	std::string host = o.manualHostname.empty() ?  jid_.getDomain() : o.manualHostname;
 	int port = o.manualPort;
 	assert(!connector_);
-	if (options.boshURL.empty()) {
+	if (options.boshURL.isEmpty()) {
 		connector_ = boost::make_shared<ChainedConnector>(host, port, o.manualHostname.empty(), networkFactories->getDomainNameResolver(), connectionFactories, networkFactories->getTimerFactory());
 		connector_->onConnectFinished.connect(boost::bind(&CoreClient::handleConnectorFinished, this, _1, _2));
 		connector_->setTimeoutMilliseconds(2*60*1000);
diff --git a/Swiften/Network/BOSHConnectionPool.cpp b/Swiften/Network/BOSHConnectionPool.cpp
index d0aa622..83310fb 100644
--- a/Swiften/Network/BOSHConnectionPool.cpp
+++ b/Swiften/Network/BOSHConnectionPool.cpp
@@ -29,12 +29,12 @@ BOSHConnectionPool::BOSHConnectionPool(const URL& boshURL, DomainNameResolver* r
 		restartCount(0),
 		pendingRestart(false) {
 
-	if (!boshHTTPConnectProxyURL.empty()) {
+	if (!boshHTTPConnectProxyURL.isEmpty()) {
 		if (boshHTTPConnectProxyURL.getScheme() == "https") {
 			connectionFactory = new TLSConnectionFactory(tlsFactory, connectionFactory);
 			myConnectionFactories.push_back(connectionFactory);
 		}
-		connectionFactory = new HTTPConnectProxiedConnectionFactory(realResolver, connectionFactory, timerFactory, boshHTTPConnectProxyURL.getHost(), boshHTTPConnectProxyURL.getPort(), boshHTTPConnectProxyAuthID, boshHTTPConnectProxyAuthPassword);
+		connectionFactory = new HTTPConnectProxiedConnectionFactory(realResolver, connectionFactory, timerFactory, boshHTTPConnectProxyURL.getHost(), URL::getPortOrDefaultPort(boshHTTPConnectProxyURL), boshHTTPConnectProxyAuthID, boshHTTPConnectProxyAuthPassword);
 	}
 	if (boshURL.getScheme() == "https") {
 		connectionFactory = new TLSConnectionFactory(tlsFactory, connectionFactory);
@@ -210,7 +210,7 @@ void BOSHConnectionPool::handleConnectionDisconnected(bool error, BOSHConnection
 }
 
 boost::shared_ptr<BOSHConnection> BOSHConnectionPool::createConnection() {
-	Connector::ref connector = Connector::create(boshURL.getHost(), boshURL.getPort(), false, resolver, connectionFactory, timerFactory);
+	Connector::ref connector = Connector::create(boshURL.getHost(), URL::getPortOrDefaultPort(boshURL), false, resolver, connectionFactory, timerFactory);
 	BOSHConnection::ref connection = BOSHConnection::create(boshURL, connector, xmlParserFactory);
 	connection->onXMPPDataRead.connect(boost::bind(&BOSHConnectionPool::handleDataRead, this, _1));
 	connection->onSessionStarted.connect(boost::bind(&BOSHConnectionPool::handleSessionStarted, this, _1, _2));
diff --git a/Swiften/SConscript b/Swiften/SConscript
index 0d14f77..7ad2a38 100644
--- a/Swiften/SConscript
+++ b/Swiften/SConscript
@@ -279,6 +279,7 @@ if env["SCONS_STAGE"] == "build" :
 			File("Base/UnitTest/StringTest.cpp"),
 			File("Base/UnitTest/DateTimeTest.cpp"),
 			File("Base/UnitTest/ByteArrayTest.cpp"),
+			File("Base/UnitTest/URLTest.cpp"),
 			File("Chat/UnitTest/ChatStateNotifierTest.cpp"),
 #		File("Chat/UnitTest/ChatStateTrackerTest.cpp"),
 			File("Client/UnitTest/ClientSessionTest.cpp"),
-- 
cgit v0.10.2-6-g49f6