summaryrefslogtreecommitdiffstats
blob: c36863f2b2017782dba06b57c8189f8f0d287d4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*
 * Copyright (c) 2010 Remko Tronçon
 * Licensed under the GNU General Public License v3.
 * See Documentation/Licenses/GPLv3.txt for more information.
 */

#include <Swiften/Base/URL.h>

namespace Swift {

int URL::getPortOrDefaultPort(const URL& url) {
	if (url.getPort()) {
		return *url.getPort();
	}
	else if (url.getScheme() == "http") {
		return 80;
	}
	else if (url.getScheme() == "https") {
		return 443;
	}
	else {
		std::cerr << "Unknown scheme: " + url.getScheme() << std::endl;
		return 80;
	}
}

URL URL::fromString(const std::string& urlString) {
	size_t colonIndex = urlString.find(':');
	if (colonIndex == std::string::npos) {
		return URL();
	}
	std::string scheme = urlString.substr(0, colonIndex);

	// Authority
	if (urlString.size() > colonIndex + 2 && urlString[colonIndex+1] == '/' && urlString[colonIndex+2] == '/') {
		size_t authorityIndex = colonIndex + 3;
		size_t slashIndex = urlString.find('/', authorityIndex);
		std::string authority;
		std::string path;
		if (slashIndex == std::string::npos) {
			authority = urlString.substr(authorityIndex);
			path = "";
		}
		else {
			authority = urlString.substr(authorityIndex, slashIndex - authorityIndex);
			path = unescape(urlString.substr(slashIndex));
		}

		size_t atIndex = authority.find('@');
		std::string userInfo;
		std::string hostAndPort;
		if (atIndex != std::string::npos) {
			userInfo = authority.substr(0, atIndex);
			hostAndPort = authority.substr(atIndex + 1);
		}
		else {
			userInfo = "";
			hostAndPort = authority;
		}

		std::string host;
		boost::optional<int> port;
		colonIndex = hostAndPort.find(':');
		if (colonIndex != std::string::npos) {
			host = unescape(hostAndPort.substr(0, colonIndex));
			try {
				port = boost::lexical_cast<int>(hostAndPort.substr(colonIndex + 1));
			}
			catch (const boost::bad_lexical_cast&) {
				return URL();
			}
		}
		else {
			host = unescape(hostAndPort);
		}

		if (port) {
			return URL(scheme, host, *port, path);
		}
		else {
			return URL(scheme, host, path);
		}
	}
	else {
		// We don't support URLs without authorities yet
		return URL();
	}
}

// Disabling this code for now, since GCC4.5+boost1.42 (on ubuntu) seems to
// result in a bug. Replacing it with naive code.
#if 0
// Should be in anonymous namespace, but older GCCs complain if we do that
struct PercentEncodedCharacterFinder {
template<typename Iterator>
boost::iterator_range<Iterator> operator()(Iterator begin, Iterator end) {
	boost::iterator_range<Iterator> r = boost::first_finder("%")(begin, end);
	if (r.end() == end) {
		return r;
	}
	else {
		if (r.end() + 1 == end || r.end() + 2 == end) {
			throw std::runtime_error("Incomplete escape character");
		}
		else {
			r.advance_end(2);
			return r;
		}
	}
}
};

struct PercentUnencodeFormatter {
template<typename FindResult>
std::string operator()(const FindResult& match) const {
	std::stringstream s;
	s << std::hex << std::string(match.begin() + 1, match.end());
	unsigned int value;
	s >> value;
	if (s.fail() || s.bad()) {
		throw std::runtime_error("Invalid escape character");
	}
	unsigned char charValue = static_cast<unsigned char>(value);
	return std::string(reinterpret_cast<const char*>(&charValue), 1);
}
};

std::string unescape(const std::string& s) {
	try {
		return boost::find_format_all_copy(s, PercentEncodedCharacterFinder(), PercentUnencodeFormatter());
	}
	catch (const std::exception&) {
		return "";
	}
}
#endif

std::string URL::unescape(const std::string& str) {
	std::string result;
	for (size_t i = 0; i < str.size(); ++i) {
		if (str[i] == '%') {
			if (i + 3 < str.size()) {
				std::stringstream s;
				s << std::hex << str.substr(i+1, 2);
				unsigned int value;
				s >> value;
				if (s.fail() || s.bad()) {
					return "";
				}
				unsigned char charValue = static_cast<unsigned char>(value);
				result += std::string(reinterpret_cast<const char*>(&charValue), 1);
				i += 2;
			}
			else {
				return "";
			}
		}
		else {
			result += str[i];
		}
	}
	return result;
}

}