diff options
author | Remko Tronçon <git@el-tramo.be> | 2012-09-15 11:10:20 (GMT) |
---|---|---|
committer | Remko Tronçon <git@el-tramo.be> | 2012-09-15 12:05:19 (GMT) |
commit | ab402e9e044e6f141a318c9b082671c828b915f3 (patch) | |
tree | 06194776a0102007b9a5fa5a6650f60b648d813b /Swiften/Base/URL.cpp | |
parent | e94541a7156f4ccceaf1a3f7135b9c89c067883b (diff) | |
download | swift-contrib-ab402e9e044e6f141a318c9b082671c828b915f3.zip swift-contrib-ab402e9e044e6f141a318c9b082671c828b915f3.tar.bz2 |
Added URL parser.
Diffstat (limited to 'Swiften/Base/URL.cpp')
-rw-r--r-- | Swiften/Base/URL.cpp | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/Swiften/Base/URL.cpp b/Swiften/Base/URL.cpp new file mode 100644 index 0000000..c36863f --- /dev/null +++ b/Swiften/Base/URL.cpp @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2010 Remko Tronçon + * Licensed under the GNU General Public License v3. + * See Documentation/Licenses/GPLv3.txt for more information. + */ + +#include <Swiften/Base/URL.h> + +namespace Swift { + +int URL::getPortOrDefaultPort(const URL& url) { + if (url.getPort()) { + return *url.getPort(); + } + else if (url.getScheme() == "http") { + return 80; + } + else if (url.getScheme() == "https") { + return 443; + } + else { + std::cerr << "Unknown scheme: " + url.getScheme() << std::endl; + return 80; + } +} + +URL URL::fromString(const std::string& urlString) { + size_t colonIndex = urlString.find(':'); + if (colonIndex == std::string::npos) { + return URL(); + } + std::string scheme = urlString.substr(0, colonIndex); + + // Authority + if (urlString.size() > colonIndex + 2 && urlString[colonIndex+1] == '/' && urlString[colonIndex+2] == '/') { + size_t authorityIndex = colonIndex + 3; + size_t slashIndex = urlString.find('/', authorityIndex); + std::string authority; + std::string path; + if (slashIndex == std::string::npos) { + authority = urlString.substr(authorityIndex); + path = ""; + } + else { + authority = urlString.substr(authorityIndex, slashIndex - authorityIndex); + path = unescape(urlString.substr(slashIndex)); + } + + size_t atIndex = authority.find('@'); + std::string userInfo; + std::string hostAndPort; + if (atIndex != std::string::npos) { + userInfo = authority.substr(0, atIndex); + hostAndPort = authority.substr(atIndex + 1); + } + else { + userInfo = ""; + hostAndPort = authority; + } + + std::string host; + boost::optional<int> port; + colonIndex = hostAndPort.find(':'); + if (colonIndex != std::string::npos) { + host = unescape(hostAndPort.substr(0, colonIndex)); + try { + port = boost::lexical_cast<int>(hostAndPort.substr(colonIndex + 1)); + } + catch (const boost::bad_lexical_cast&) { + return URL(); + } + } + else { + host = unescape(hostAndPort); + } + + if (port) { + return URL(scheme, host, *port, path); + } + else { + return URL(scheme, host, path); + } + } + else { + // We don't support URLs without authorities yet + return URL(); + } +} + +// Disabling this code for now, since GCC4.5+boost1.42 (on ubuntu) seems to +// result in a bug. Replacing it with naive code. +#if 0 +// Should be in anonymous namespace, but older GCCs complain if we do that +struct PercentEncodedCharacterFinder { +template<typename Iterator> +boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) { + boost::iterator_range<Iterator> r = boost::first_finder("%")(begin, end); + if (r.end() == end) { + return r; + } + else { + if (r.end() + 1 == end || r.end() + 2 == end) { + throw std::runtime_error("Incomplete escape character"); + } + else { + r.advance_end(2); + return r; + } + } +} +}; + +struct PercentUnencodeFormatter { +template<typename FindResult> +std::string operator()(const FindResult& match) const { + std::stringstream s; + s << std::hex << std::string(match.begin() + 1, match.end()); + unsigned int value; + s >> value; + if (s.fail() || s.bad()) { + throw std::runtime_error("Invalid escape character"); + } + unsigned char charValue = static_cast<unsigned char>(value); + return std::string(reinterpret_cast<const char*>(&charValue), 1); +} +}; + +std::string unescape(const std::string& s) { + try { + return boost::find_format_all_copy(s, PercentEncodedCharacterFinder(), PercentUnencodeFormatter()); + } + catch (const std::exception&) { + return ""; + } +} +#endif + +std::string URL::unescape(const std::string& str) { + std::string result; + for (size_t i = 0; i < str.size(); ++i) { + if (str[i] == '%') { + if (i + 3 < str.size()) { + std::stringstream s; + s << std::hex << str.substr(i+1, 2); + unsigned int value; + s >> value; + if (s.fail() || s.bad()) { + return ""; + } + unsigned char charValue = static_cast<unsigned char>(value); + result += std::string(reinterpret_cast<const char*>(&charValue), 1); + i += 2; + } + else { + return ""; + } + } + else { + result += str[i]; + } + } + return result; +} + +} |