summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRemko Tronçon <git@el-tramo.be>2012-09-15 11:10:20 (GMT)
committerRemko Tronçon <git@el-tramo.be>2012-09-15 12:05:19 (GMT)
commitab402e9e044e6f141a318c9b082671c828b915f3 (patch)
tree06194776a0102007b9a5fa5a6650f60b648d813b /Swiften/Base
parente94541a7156f4ccceaf1a3f7135b9c89c067883b (diff)
downloadswift-ab402e9e044e6f141a318c9b082671c828b915f3.zip
swift-ab402e9e044e6f141a318c9b082671c828b915f3.tar.bz2
Added URL parser.
Diffstat (limited to 'Swiften/Base')
-rw-r--r--Swiften/Base/SConscript1
-rw-r--r--Swiften/Base/URL.cpp165
-rw-r--r--Swiften/Base/URL.h32
-rw-r--r--Swiften/Base/UnitTest/URLTest.cpp84
4 files changed, 266 insertions, 16 deletions
diff --git a/Swiften/Base/SConscript b/Swiften/Base/SConscript
index a5f3592..754164b 100644
--- a/Swiften/Base/SConscript
+++ b/Swiften/Base/SConscript
@@ -13,5 +13,6 @@ objects = swiften_env.SwiftenObject([
"RandomGenerator.cpp",
"BoostRandomGenerator.cpp",
"sleep.cpp",
+ "URL.cpp",
])
swiften_env.Append(SWIFTEN_OBJECTS = [objects])
diff --git a/Swiften/Base/URL.cpp b/Swiften/Base/URL.cpp
new file mode 100644
index 0000000..c36863f
--- /dev/null
+++ b/Swiften/Base/URL.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2010 Remko Tronçon
+ * Licensed under the GNU General Public License v3.
+ * See Documentation/Licenses/GPLv3.txt for more information.
+ */
+
+#include <Swiften/Base/URL.h>
+
+namespace Swift {
+
+int URL::getPortOrDefaultPort(const URL& url) {
+ if (url.getPort()) {
+ return *url.getPort();
+ }
+ else if (url.getScheme() == "http") {
+ return 80;
+ }
+ else if (url.getScheme() == "https") {
+ return 443;
+ }
+ else {
+ std::cerr << "Unknown scheme: " + url.getScheme() << std::endl;
+ return 80;
+ }
+}
+
+URL URL::fromString(const std::string& urlString) {
+ size_t colonIndex = urlString.find(':');
+ if (colonIndex == std::string::npos) {
+ return URL();
+ }
+ std::string scheme = urlString.substr(0, colonIndex);
+
+ // Authority
+ if (urlString.size() > colonIndex + 2 && urlString[colonIndex+1] == '/' && urlString[colonIndex+2] == '/') {
+ size_t authorityIndex = colonIndex + 3;
+ size_t slashIndex = urlString.find('/', authorityIndex);
+ std::string authority;
+ std::string path;
+ if (slashIndex == std::string::npos) {
+ authority = urlString.substr(authorityIndex);
+ path = "";
+ }
+ else {
+ authority = urlString.substr(authorityIndex, slashIndex - authorityIndex);
+ path = unescape(urlString.substr(slashIndex));
+ }
+
+ size_t atIndex = authority.find('@');
+ std::string userInfo;
+ std::string hostAndPort;
+ if (atIndex != std::string::npos) {
+ userInfo = authority.substr(0, atIndex);
+ hostAndPort = authority.substr(atIndex + 1);
+ }
+ else {
+ userInfo = "";
+ hostAndPort = authority;
+ }
+
+ std::string host;
+ boost::optional<int> port;
+ colonIndex = hostAndPort.find(':');
+ if (colonIndex != std::string::npos) {
+ host = unescape(hostAndPort.substr(0, colonIndex));
+ try {
+ port = boost::lexical_cast<int>(hostAndPort.substr(colonIndex + 1));
+ }
+ catch (const boost::bad_lexical_cast&) {
+ return URL();
+ }
+ }
+ else {
+ host = unescape(hostAndPort);
+ }
+
+ if (port) {
+ return URL(scheme, host, *port, path);
+ }
+ else {
+ return URL(scheme, host, path);
+ }
+ }
+ else {
+ // We don't support URLs without authorities yet
+ return URL();
+ }
+}
+
+// Disabling this code for now, since GCC4.5+boost1.42 (on ubuntu) seems to
+// result in a bug. Replacing it with naive code.
+#if 0
+// Should be in anonymous namespace, but older GCCs complain if we do that
+struct PercentEncodedCharacterFinder {
+template<typename Iterator>
+boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) {
+ boost::iterator_range<Iterator> r = boost::first_finder("%")(begin, end);
+ if (r.end() == end) {
+ return r;
+ }
+ else {
+ if (r.end() + 1 == end || r.end() + 2 == end) {
+ throw std::runtime_error("Incomplete escape character");
+ }
+ else {
+ r.advance_end(2);
+ return r;
+ }
+ }
+}
+};
+
+struct PercentUnencodeFormatter {
+template<typename FindResult>
+std::string operator()(const FindResult& match) const {
+ std::stringstream s;
+ s << std::hex << std::string(match.begin() + 1, match.end());
+ unsigned int value;
+ s >> value;
+ if (s.fail() || s.bad()) {
+ throw std::runtime_error("Invalid escape character");
+ }
+ unsigned char charValue = static_cast<unsigned char>(value);
+ return std::string(reinterpret_cast<const char*>(&charValue), 1);
+}
+};
+
+std::string unescape(const std::string& s) {
+ try {
+ return boost::find_format_all_copy(s, PercentEncodedCharacterFinder(), PercentUnencodeFormatter());
+ }
+ catch (const std::exception&) {
+ return "";
+ }
+}
+#endif
+
+std::string URL::unescape(const std::string& str) {
+ std::string result;
+ for (size_t i = 0; i < str.size(); ++i) {
+ if (str[i] == '%') {
+ if (i + 3 < str.size()) {
+ std::stringstream s;
+ s << std::hex << str.substr(i+1, 2);
+ unsigned int value;
+ s >> value;
+ if (s.fail() || s.bad()) {
+ return "";
+ }
+ unsigned char charValue = static_cast<unsigned char>(value);
+ result += std::string(reinterpret_cast<const char*>(&charValue), 1);
+ i += 2;
+ }
+ else {
+ return "";
+ }
+ }
+ else {
+ result += str[i];
+ }
+ }
+ return result;
+}
+
+}
diff --git a/Swiften/Base/URL.h b/Swiften/Base/URL.h
index 94dc4cb..9821ed5 100644
--- a/Swiften/Base/URL.h
+++ b/Swiften/Base/URL.h
@@ -8,32 +8,27 @@
#include <string>
#include <boost/lexical_cast.hpp>
+#include <boost/optional.hpp>
namespace Swift {
class URL {
public:
- URL() : scheme(""), user(""), password(""), host(""), port(-1), path(""), isEmpty(true) {
+ URL() : scheme(""), user(""), password(""), host(""), path(""), empty(true) {
}
- URL(const std::string& urlString) {
- host = urlString;
- port = 80;
- scheme = "http";
- isEmpty = false;
- //FIXME
- }
-
- URL(const std::string& scheme, const std::string& host, int port, const std::string& path) : scheme(scheme), user(), password(), host(host), port(port), path(path), isEmpty(false) {
+ URL(const std::string& scheme, const std::string& host, int port, const std::string& path) : scheme(scheme), user(), password(), host(host), port(port), path(path), empty(false) {
+ }
+ URL(const std::string& scheme, const std::string& host, const std::string& path) : scheme(scheme), user(), password(), host(host), path(path), empty(false) {
}
/**
* Whether the URL is empty.
*/
- bool empty() const {
- return isEmpty;
+ bool isEmpty() const {
+ return empty;
}
/**
@@ -53,7 +48,7 @@ class URL {
/**
* Port number
*/
- int getPort() const {
+ boost::optional<int> getPort() const {
return port;
}
@@ -65,7 +60,7 @@ class URL {
}
const std::string toString() const {
- if (isEmpty) {
+ if (empty) {
return "";
}
std::string result = scheme + "://";
@@ -86,13 +81,18 @@ class URL {
return result;
}
+ static int getPortOrDefaultPort(const URL& url);
+ static URL fromString(const std::string&);
+ static std::string unescape(const std::string&);
+
+
private:
std::string scheme;
std::string user;
std::string password;
std::string host;
- int port;
+ boost::optional<int> port;
std::string path;
- bool isEmpty;
+ bool empty;
};
}
diff --git a/Swiften/Base/UnitTest/URLTest.cpp b/Swiften/Base/UnitTest/URLTest.cpp
new file mode 100644
index 0000000..4de1d33
--- /dev/null
+++ b/Swiften/Base/UnitTest/URLTest.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2012 Remko Tronçon
+ * Licensed under the GNU General Public License v3.
+ * See Documentation/Licenses/GPLv3.txt for more information.
+ */
+
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/extensions/TestFactoryRegistry.h>
+
+#include <Swiften/Base/URL.h>
+#include <boost/lexical_cast.hpp>
+
+using namespace Swift;
+
+class URLTest : public CppUnit::TestFixture {
+ CPPUNIT_TEST_SUITE(URLTest);
+ CPPUNIT_TEST(testFromString);
+ CPPUNIT_TEST(testFromString_WithoutPath);
+ CPPUNIT_TEST(testFromString_WithPort);
+ CPPUNIT_TEST(testFromString_WithPortWithoutPath);
+ CPPUNIT_TEST(testFromString_WithUserInfo);
+ CPPUNIT_TEST(testFromString_NonASCIIHost);
+ CPPUNIT_TEST(testFromString_NonASCIIPath);
+ CPPUNIT_TEST_SUITE_END();
+
+ public:
+ void testFromString() {
+ URL url = URL::fromString("http://foo.bar/baz/bam");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+ CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+ CPPUNIT_ASSERT(!url.getPort());
+ CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath());
+ }
+
+ void testFromString_WithoutPath() {
+ URL url = URL::fromString("http://foo.bar");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+ CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+ CPPUNIT_ASSERT(!url.getPort());
+ CPPUNIT_ASSERT_EQUAL(std::string(""), url.getPath());
+ }
+
+ void testFromString_WithPort() {
+ URL url = URL::fromString("http://foo.bar:1234/baz/bam");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+ CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+ CPPUNIT_ASSERT_EQUAL(1234, *url.getPort());
+ CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath());
+ }
+
+ void testFromString_WithPortWithoutPath() {
+ URL url = URL::fromString("http://foo.bar:1234");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+ CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+ CPPUNIT_ASSERT_EQUAL(1234, *url.getPort());
+ CPPUNIT_ASSERT_EQUAL(std::string(""), url.getPath());
+ }
+
+ void testFromString_WithUserInfo() {
+ URL url = URL::fromString("http://user:pass@foo.bar/baz/bam");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme());
+ CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost());
+ CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath());
+ }
+
+ void testFromString_NonASCIIHost() {
+ URL url = URL::fromString("http://www.tron%C3%A7on.be/baz/bam");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("www.tron\xc3\xa7on.be"), url.getHost());
+ }
+
+ void testFromString_NonASCIIPath() {
+ URL url = URL::fromString("http://foo.bar/baz/tron%C3%A7on/bam");
+
+ CPPUNIT_ASSERT_EQUAL(std::string("/baz/tron\xc3\xa7on/bam"), url.getPath());
+ }
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(URLTest);