diff options
author | Remko Tronçon <git@el-tramo.be> | 2012-09-15 11:10:20 (GMT) |
---|---|---|
committer | Remko Tronçon <git@el-tramo.be> | 2012-09-15 12:05:19 (GMT) |
commit | ab402e9e044e6f141a318c9b082671c828b915f3 (patch) | |
tree | 06194776a0102007b9a5fa5a6650f60b648d813b /Swiften/Base | |
parent | e94541a7156f4ccceaf1a3f7135b9c89c067883b (diff) | |
download | swift-contrib-ab402e9e044e6f141a318c9b082671c828b915f3.zip swift-contrib-ab402e9e044e6f141a318c9b082671c828b915f3.tar.bz2 |
Added URL parser.
Diffstat (limited to 'Swiften/Base')
-rw-r--r-- | Swiften/Base/SConscript | 1 | ||||
-rw-r--r-- | Swiften/Base/URL.cpp | 165 | ||||
-rw-r--r-- | Swiften/Base/URL.h | 32 | ||||
-rw-r--r-- | Swiften/Base/UnitTest/URLTest.cpp | 84 |
4 files changed, 266 insertions, 16 deletions
diff --git a/Swiften/Base/SConscript b/Swiften/Base/SConscript index a5f3592..754164b 100644 --- a/Swiften/Base/SConscript +++ b/Swiften/Base/SConscript @@ -13,5 +13,6 @@ objects = swiften_env.SwiftenObject([ "RandomGenerator.cpp", "BoostRandomGenerator.cpp", "sleep.cpp", + "URL.cpp", ]) swiften_env.Append(SWIFTEN_OBJECTS = [objects]) diff --git a/Swiften/Base/URL.cpp b/Swiften/Base/URL.cpp new file mode 100644 index 0000000..c36863f --- /dev/null +++ b/Swiften/Base/URL.cpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2010 Remko Tronçon + * Licensed under the GNU General Public License v3. + * See Documentation/Licenses/GPLv3.txt for more information. + */ + +#include <Swiften/Base/URL.h> + +namespace Swift { + +int URL::getPortOrDefaultPort(const URL& url) { + if (url.getPort()) { + return *url.getPort(); + } + else if (url.getScheme() == "http") { + return 80; + } + else if (url.getScheme() == "https") { + return 443; + } + else { + std::cerr << "Unknown scheme: " + url.getScheme() << std::endl; + return 80; + } +} + +URL URL::fromString(const std::string& urlString) { + size_t colonIndex = urlString.find(':'); + if (colonIndex == std::string::npos) { + return URL(); + } + std::string scheme = urlString.substr(0, colonIndex); + + // Authority + if (urlString.size() > colonIndex + 2 && urlString[colonIndex+1] == '/' && urlString[colonIndex+2] == '/') { + size_t authorityIndex = colonIndex + 3; + size_t slashIndex = urlString.find('/', authorityIndex); + std::string authority; + std::string path; + if (slashIndex == std::string::npos) { + authority = urlString.substr(authorityIndex); + path = ""; + } + else { + authority = urlString.substr(authorityIndex, slashIndex - authorityIndex); + path = unescape(urlString.substr(slashIndex)); + } + + size_t atIndex = authority.find('@'); + std::string userInfo; + std::string hostAndPort; + if (atIndex != std::string::npos) { + userInfo = authority.substr(0, atIndex); + hostAndPort = authority.substr(atIndex + 1); + } + else { + userInfo = ""; + hostAndPort = authority; + } + + std::string host; + boost::optional<int> port; + colonIndex = hostAndPort.find(':'); + if (colonIndex != std::string::npos) { + host = unescape(hostAndPort.substr(0, colonIndex)); + try { + port = boost::lexical_cast<int>(hostAndPort.substr(colonIndex + 1)); + } + catch (const boost::bad_lexical_cast&) { + return URL(); + } + } + else { + host = unescape(hostAndPort); + } + + if (port) { + return URL(scheme, host, *port, path); + } + else { + return URL(scheme, host, path); + } + } + else { + // We don't support URLs without authorities yet + return URL(); + } +} + +// Disabling this code for now, since GCC4.5+boost1.42 (on ubuntu) seems to +// result in a bug. Replacing it with naive code. +#if 0 +// Should be in anonymous namespace, but older GCCs complain if we do that +struct PercentEncodedCharacterFinder { +template<typename Iterator> +boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) { + boost::iterator_range<Iterator> r = boost::first_finder("%")(begin, end); + if (r.end() == end) { + return r; + } + else { + if (r.end() + 1 == end || r.end() + 2 == end) { + throw std::runtime_error("Incomplete escape character"); + } + else { + r.advance_end(2); + return r; + } + } +} +}; + +struct PercentUnencodeFormatter { +template<typename FindResult> +std::string operator()(const FindResult& match) const { + std::stringstream s; + s << std::hex << std::string(match.begin() + 1, match.end()); + unsigned int value; + s >> value; + if (s.fail() || s.bad()) { + throw std::runtime_error("Invalid escape character"); + } + unsigned char charValue = static_cast<unsigned char>(value); + return std::string(reinterpret_cast<const char*>(&charValue), 1); +} +}; + +std::string unescape(const std::string& s) { + try { + return boost::find_format_all_copy(s, PercentEncodedCharacterFinder(), PercentUnencodeFormatter()); + } + catch (const std::exception&) { + return ""; + } +} +#endif + +std::string URL::unescape(const std::string& str) { + std::string result; + for (size_t i = 0; i < str.size(); ++i) { + if (str[i] == '%') { + if (i + 3 < str.size()) { + std::stringstream s; + s << std::hex << str.substr(i+1, 2); + unsigned int value; + s >> value; + if (s.fail() || s.bad()) { + return ""; + } + unsigned char charValue = static_cast<unsigned char>(value); + result += std::string(reinterpret_cast<const char*>(&charValue), 1); + i += 2; + } + else { + return ""; + } + } + else { + result += str[i]; + } + } + return result; +} + +} diff --git a/Swiften/Base/URL.h b/Swiften/Base/URL.h index 94dc4cb..9821ed5 100644 --- a/Swiften/Base/URL.h +++ b/Swiften/Base/URL.h @@ -8,32 +8,27 @@ #include <string> #include <boost/lexical_cast.hpp> +#include <boost/optional.hpp> namespace Swift { class URL { public: - URL() : scheme(""), user(""), password(""), host(""), port(-1), path(""), isEmpty(true) { + URL() : scheme(""), user(""), password(""), host(""), path(""), empty(true) { } - URL(const std::string& urlString) { - host = urlString; - port = 80; - scheme = "http"; - isEmpty = false; - //FIXME - } - - URL(const std::string& scheme, const std::string& host, int port, const std::string& path) : scheme(scheme), user(), password(), host(host), port(port), path(path), isEmpty(false) { + URL(const std::string& scheme, const std::string& host, int port, const std::string& path) : scheme(scheme), user(), password(), host(host), port(port), path(path), empty(false) { + } + URL(const std::string& scheme, const std::string& host, const std::string& path) : scheme(scheme), user(), password(), host(host), path(path), empty(false) { } /** * Whether the URL is empty. */ - bool empty() const { - return isEmpty; + bool isEmpty() const { + return empty; } /** @@ -53,7 +48,7 @@ class URL { /** * Port number */ - int getPort() const { + boost::optional<int> getPort() const { return port; } @@ -65,7 +60,7 @@ class URL { } const std::string toString() const { - if (isEmpty) { + if (empty) { return ""; } std::string result = scheme + "://"; @@ -86,13 +81,18 @@ class URL { return result; } + static int getPortOrDefaultPort(const URL& url); + static URL fromString(const std::string&); + static std::string unescape(const std::string&); + + private: std::string scheme; std::string user; std::string password; std::string host; - int port; + boost::optional<int> port; std::string path; - bool isEmpty; + bool empty; }; } diff --git a/Swiften/Base/UnitTest/URLTest.cpp b/Swiften/Base/UnitTest/URLTest.cpp new file mode 100644 index 0000000..4de1d33 --- /dev/null +++ b/Swiften/Base/UnitTest/URLTest.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 Remko Tronçon + * Licensed under the GNU General Public License v3. + * See Documentation/Licenses/GPLv3.txt for more information. + */ + +#include <cppunit/extensions/HelperMacros.h> +#include <cppunit/extensions/TestFactoryRegistry.h> + +#include <Swiften/Base/URL.h> +#include <boost/lexical_cast.hpp> + +using namespace Swift; + +class URLTest : public CppUnit::TestFixture { + CPPUNIT_TEST_SUITE(URLTest); + CPPUNIT_TEST(testFromString); + CPPUNIT_TEST(testFromString_WithoutPath); + CPPUNIT_TEST(testFromString_WithPort); + CPPUNIT_TEST(testFromString_WithPortWithoutPath); + CPPUNIT_TEST(testFromString_WithUserInfo); + CPPUNIT_TEST(testFromString_NonASCIIHost); + CPPUNIT_TEST(testFromString_NonASCIIPath); + CPPUNIT_TEST_SUITE_END(); + + public: + void testFromString() { + URL url = URL::fromString("http://foo.bar/baz/bam"); + + CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme()); + CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost()); + CPPUNIT_ASSERT(!url.getPort()); + CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath()); + } + + void testFromString_WithoutPath() { + URL url = URL::fromString("http://foo.bar"); + + CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme()); + CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost()); + CPPUNIT_ASSERT(!url.getPort()); + CPPUNIT_ASSERT_EQUAL(std::string(""), url.getPath()); + } + + void testFromString_WithPort() { + URL url = URL::fromString("http://foo.bar:1234/baz/bam"); + + CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme()); + CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost()); + CPPUNIT_ASSERT_EQUAL(1234, *url.getPort()); + CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath()); + } + + void testFromString_WithPortWithoutPath() { + URL url = URL::fromString("http://foo.bar:1234"); + + CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme()); + CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost()); + CPPUNIT_ASSERT_EQUAL(1234, *url.getPort()); + CPPUNIT_ASSERT_EQUAL(std::string(""), url.getPath()); + } + + void testFromString_WithUserInfo() { + URL url = URL::fromString("http://user:pass@foo.bar/baz/bam"); + + CPPUNIT_ASSERT_EQUAL(std::string("http"), url.getScheme()); + CPPUNIT_ASSERT_EQUAL(std::string("foo.bar"), url.getHost()); + CPPUNIT_ASSERT_EQUAL(std::string("/baz/bam"), url.getPath()); + } + + void testFromString_NonASCIIHost() { + URL url = URL::fromString("http://www.tron%C3%A7on.be/baz/bam"); + + CPPUNIT_ASSERT_EQUAL(std::string("www.tron\xc3\xa7on.be"), url.getHost()); + } + + void testFromString_NonASCIIPath() { + URL url = URL::fromString("http://foo.bar/baz/tron%C3%A7on/bam"); + + CPPUNIT_ASSERT_EQUAL(std::string("/baz/tron\xc3\xa7on/bam"), url.getPath()); + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(URLTest); |