Blame - url/gurl.cc - mojo-tools

blob: 46ca408da9c9c55f1e919365dab03db5038aedff [file] [log] [blame]

James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#ifdef WIN32
				6	#include <windows.h>
				7	#else
				8	#include <pthread.h>
				9	#endif
				10
				11	#include <algorithm>
				12	#include <ostream>
				13
				14	#include "url/gurl.h"
				15
				16	#include "base/logging.h"
				17	#include "url/url_canon_stdstring.h"
				18	#include "url/url_util.h"
				19
				20	namespace {
				21
				22	static std::string* empty_string = NULL;
				23	static GURL* empty_gurl = NULL;
				24
				25	#ifdef WIN32
				26
				27	// Returns a static reference to an empty string for returning a reference
				28	// when there is no underlying string.
				29	const std::string& EmptyStringForGURL() {
				30	// Avoid static object construction/destruction on startup/shutdown.
				31	if (!empty_string) {
				32	// Create the string. Be careful that we don't break in the case that this
				33	// is being called from multiple threads. Statics are not threadsafe.
				34	std::string* new_empty_string = new std::string;
				35	if (InterlockedCompareExchangePointer(
				36	reinterpret_cast<PVOID*>(&empty_string), new_empty_string, NULL)) {
				37	// The old value was non-NULL, so no replacement was done. Another
				38	// thread did the initialization out from under us.
				39	delete new_empty_string;
				40	}
				41	}
				42	return *empty_string;
				43	}
				44
				45	#else
				46
				47	static pthread_once_t empty_string_once = PTHREAD_ONCE_INIT;
				48	static pthread_once_t empty_gurl_once = PTHREAD_ONCE_INIT;
				49
				50	void EmptyStringForGURLOnce(void) {
				51	empty_string = new std::string;
				52	}
				53
				54	const std::string& EmptyStringForGURL() {
				55	// Avoid static object construction/destruction on startup/shutdown.
				56	pthread_once(&empty_string_once, EmptyStringForGURLOnce);
				57	return *empty_string;
				58	}
				59
				60	#endif // WIN32
				61
				62	} // namespace
				63
				64	GURL::GURL() : is_valid_(false) {
				65	}
				66
				67	GURL::GURL(const GURL& other)
				68	: spec_(other.spec_),
				69	is_valid_(other.is_valid_),
				70	parsed_(other.parsed_) {
				71	if (other.inner_url_)
				72	inner_url_.reset(new GURL(*other.inner_url_));
				73	// Valid filesystem urls should always have an inner_url_.
				74	DCHECK(!is_valid_ \|\| !SchemeIsFileSystem() \|\| inner_url_);
				75	}
				76
				77	GURL::GURL(const std::string& url_string) {
				78	InitCanonical(url_string, true);
				79	}
				80
				81	GURL::GURL(const base::string16& url_string) {
				82	InitCanonical(url_string, true);
				83	}
				84
				85	GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
				86	InitCanonical(url_string, false);
				87	}
				88
				89	GURL::GURL(const char* canonical_spec,
				90	size_t canonical_spec_len,
				91	const url::Parsed& parsed,
				92	bool is_valid)
				93	: spec_(canonical_spec, canonical_spec_len),
				94	is_valid_(is_valid),
				95	parsed_(parsed) {
				96	InitializeFromCanonicalSpec();
				97	}
				98
				99	GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
				100	: is_valid_(is_valid),
				101	parsed_(parsed) {
				102	spec_.swap(canonical_spec);
				103	InitializeFromCanonicalSpec();
				104	}
				105
				106	template<typename STR>
				107	void GURL::InitCanonical(const STR& input_spec, bool trim_path_end) {
				108	// Reserve enough room in the output for the input, plus some extra so that
				109	// we have room if we have to escape a few things without reallocating.
				110	spec_.reserve(input_spec.size() + 32);
				111	url::StdStringCanonOutput output(&spec_);
				112	is_valid_ = url::Canonicalize(
				113	input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
				114	NULL, &output, &parsed_);
				115
				116	output.Complete(); // Must be done before using string.
				117	if (is_valid_ && SchemeIsFileSystem()) {
				118	inner_url_.reset(new GURL(spec_.data(), parsed_.Length(),
				119	*parsed_.inner_parsed(), true));
				120	}
				121	}
				122
				123	void GURL::InitializeFromCanonicalSpec() {
				124	if (is_valid_ && SchemeIsFileSystem()) {
				125	inner_url_.reset(
				126	new GURL(spec_.data(), parsed_.Length(),
				127	*parsed_.inner_parsed(), true));
				128	}
				129
				130	#ifndef NDEBUG
				131	// For testing purposes, check that the parsed canonical URL is identical to
				132	// what we would have produced. Skip checking for invalid URLs have no meaning
				133	// and we can't always canonicalize then reproducabely.
				134	if (is_valid_) {
				135	url::Component scheme;
				136	// We can't do this check on the inner_url of a filesystem URL, as
				137	// canonical_spec actually points to the start of the outer URL, so we'd
				138	// end up with infinite recursion in this constructor.
				139	if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
				140	url::kFileSystemScheme, &scheme) \|\|
				141	scheme.begin == parsed_.scheme.begin) {
				142	// We need to retain trailing whitespace on path URLs, as the \|parsed_\|
				143	// spec we originally received may legitimately contain trailing white-
				144	// space on the path or components e.g. if the #ref has been
				145	// removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
				146	GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
				147
				148	DCHECK(test_url.is_valid_ == is_valid_);
				149	DCHECK(test_url.spec_ == spec_);
				150
				151	DCHECK(test_url.parsed_.scheme == parsed_.scheme);
				152	DCHECK(test_url.parsed_.username == parsed_.username);
				153	DCHECK(test_url.parsed_.password == parsed_.password);
				154	DCHECK(test_url.parsed_.host == parsed_.host);
				155	DCHECK(test_url.parsed_.port == parsed_.port);
				156	DCHECK(test_url.parsed_.path == parsed_.path);
				157	DCHECK(test_url.parsed_.query == parsed_.query);
				158	DCHECK(test_url.parsed_.ref == parsed_.ref);
				159	}
				160	}
				161	#endif
				162	}
				163
				164	GURL::~GURL() {
				165	}
				166
				167	GURL& GURL::operator=(GURL other) {
				168	Swap(&other);
				169	return *this;
				170	}
				171
				172	const std::string& GURL::spec() const {
				173	if (is_valid_ \|\| spec_.empty())
				174	return spec_;
				175
				176	DCHECK(false) << "Trying to get the spec of an invalid URL!";
				177	return EmptyStringForGURL();
				178	}
				179
James Robinson	d2015d9	2014-12-08 13:45:40 -0800	[diff] [blame]	180	bool GURL::operator==(const GURL& other) const {
				181	return spec_ == other.spec_;
				182	}
				183
				184	bool GURL::operator!=(const GURL& other) const {
				185	return spec_ != other.spec_;
				186	}
				187
				188	bool GURL::operator<(const GURL& other) const {
				189	return spec_ < other.spec_;
				190	}
				191
				192	bool GURL::operator>(const GURL& other) const {
				193	return spec_ > other.spec_;
				194	}
				195
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	196	GURL GURL::Resolve(const std::string& relative) const {
				197	return ResolveWithCharsetConverter(relative, NULL);
				198	}
				199	GURL GURL::Resolve(const base::string16& relative) const {
				200	return ResolveWithCharsetConverter(relative, NULL);
				201	}
				202
				203	// Note: code duplicated below (it's inconvenient to use a template here).
				204	GURL GURL::ResolveWithCharsetConverter(
				205	const std::string& relative,
				206	url::CharsetConverter* charset_converter) const {
				207	// Not allowed for invalid URLs.
				208	if (!is_valid_)
				209	return GURL();
				210
				211	GURL result;
				212
				213	// Reserve enough room in the output for the input, plus some extra so that
				214	// we have room if we have to escape a few things without reallocating.
				215	result.spec_.reserve(spec_.size() + 32);
				216	url::StdStringCanonOutput output(&result.spec_);
				217
				218	if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
				219	parsed_, relative.data(),
				220	static_cast<int>(relative.length()),
				221	charset_converter, &output, &result.parsed_)) {
				222	// Error resolving, return an empty URL.
				223	return GURL();
				224	}
				225
				226	output.Complete();
				227	result.is_valid_ = true;
				228	if (result.SchemeIsFileSystem()) {
				229	result.inner_url_.reset(
				230	new GURL(result.spec_.data(), result.parsed_.Length(),
				231	*result.parsed_.inner_parsed(), true));
				232	}
				233	return result;
				234	}
				235
				236	// Note: code duplicated above (it's inconvenient to use a template here).
				237	GURL GURL::ResolveWithCharsetConverter(
				238	const base::string16& relative,
				239	url::CharsetConverter* charset_converter) const {
				240	// Not allowed for invalid URLs.
				241	if (!is_valid_)
				242	return GURL();
				243
				244	GURL result;
				245
				246	// Reserve enough room in the output for the input, plus some extra so that
				247	// we have room if we have to escape a few things without reallocating.
				248	result.spec_.reserve(spec_.size() + 32);
				249	url::StdStringCanonOutput output(&result.spec_);
				250
				251	if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
				252	parsed_, relative.data(),
				253	static_cast<int>(relative.length()),
				254	charset_converter, &output, &result.parsed_)) {
				255	// Error resolving, return an empty URL.
				256	return GURL();
				257	}
				258
				259	output.Complete();
				260	result.is_valid_ = true;
				261	if (result.SchemeIsFileSystem()) {
				262	result.inner_url_.reset(
				263	new GURL(result.spec_.data(), result.parsed_.Length(),
				264	*result.parsed_.inner_parsed(), true));
				265	}
				266	return result;
				267	}
				268
				269	// Note: code duplicated below (it's inconvenient to use a template here).
				270	GURL GURL::ReplaceComponents(
				271	const url::Replacements<char>& replacements) const {
				272	GURL result;
				273
				274	// Not allowed for invalid URLs.
				275	if (!is_valid_)
				276	return GURL();
				277
				278	// Reserve enough room in the output for the input, plus some extra so that
				279	// we have room if we have to escape a few things without reallocating.
				280	result.spec_.reserve(spec_.size() + 32);
				281	url::StdStringCanonOutput output(&result.spec_);
				282
				283	result.is_valid_ = url::ReplaceComponents(
				284	spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
				285	NULL, &output, &result.parsed_);
				286
				287	output.Complete();
				288	if (result.is_valid_ && result.SchemeIsFileSystem()) {
				289	result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
				290	*result.parsed_.inner_parsed(), true));
				291	}
				292	return result;
				293	}
				294
				295	// Note: code duplicated above (it's inconvenient to use a template here).
				296	GURL GURL::ReplaceComponents(
				297	const url::Replacements<base::char16>& replacements) const {
				298	GURL result;
				299
				300	// Not allowed for invalid URLs.
				301	if (!is_valid_)
				302	return GURL();
				303
				304	// Reserve enough room in the output for the input, plus some extra so that
				305	// we have room if we have to escape a few things without reallocating.
				306	result.spec_.reserve(spec_.size() + 32);
				307	url::StdStringCanonOutput output(&result.spec_);
				308
				309	result.is_valid_ = url::ReplaceComponents(
				310	spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
				311	NULL, &output, &result.parsed_);
				312
				313	output.Complete();
				314	if (result.is_valid_ && result.SchemeIsFileSystem()) {
				315	result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(),
				316	*result.parsed_.inner_parsed(), true));
				317	}
				318	return result;
				319	}
				320
				321	GURL GURL::GetOrigin() const {
				322	// This doesn't make sense for invalid or nonstandard URLs, so return
				323	// the empty URL
				324	if (!is_valid_ \|\| !IsStandard())
				325	return GURL();
				326
				327	if (SchemeIsFileSystem())
				328	return inner_url_->GetOrigin();
				329
				330	url::Replacements<char> replacements;
				331	replacements.ClearUsername();
				332	replacements.ClearPassword();
				333	replacements.ClearPath();
				334	replacements.ClearQuery();
				335	replacements.ClearRef();
				336
				337	return ReplaceComponents(replacements);
				338	}
				339
				340	GURL GURL::GetAsReferrer() const {
James Robinson	6a64b81	2014-12-03 13:38:42 -0800	[diff] [blame]	341	if (!is_valid_ \|\| !SchemeIsHTTPOrHTTPS())
				342	return GURL();
				343
				344	if (!has_ref() && !has_username() && !has_password())
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	345	return GURL(*this);
				346
				347	url::Replacements<char> replacements;
				348	replacements.ClearRef();
				349	replacements.ClearUsername();
				350	replacements.ClearPassword();
				351	return ReplaceComponents(replacements);
				352	}
				353
				354	GURL GURL::GetWithEmptyPath() const {
				355	// This doesn't make sense for invalid or nonstandard URLs, so return
				356	// the empty URL.
				357	if (!is_valid_ \|\| !IsStandard())
				358	return GURL();
				359
				360	// We could optimize this since we know that the URL is canonical, and we are
				361	// appending a canonical path, so avoiding re-parsing.
				362	GURL other(*this);
				363	if (parsed_.path.len == 0)
				364	return other;
				365
				366	// Clear everything after the path.
				367	other.parsed_.query.reset();
				368	other.parsed_.ref.reset();
				369
				370	// Set the path, since the path is longer than one, we can just set the
				371	// first character and resize.
				372	other.spec_[other.parsed_.path.begin] = '/';
				373	other.parsed_.path.len = 1;
				374	other.spec_.resize(other.parsed_.path.begin + 1);
				375	return other;
				376	}
				377
				378	bool GURL::IsStandard() const {
				379	return url::IsStandard(spec_.data(), parsed_.scheme);
				380	}
				381
				382	bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
				383	if (parsed_.scheme.len <= 0)
				384	return lower_ascii_scheme == NULL;
				385	return url::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin,
				386	spec_.data() + parsed_.scheme.end(),
				387	lower_ascii_scheme);
				388	}
				389
				390	bool GURL::SchemeIsHTTPOrHTTPS() const {
				391	return SchemeIs(url::kHttpScheme) \|\| SchemeIs(url::kHttpsScheme);
				392	}
				393
				394	bool GURL::SchemeIsWSOrWSS() const {
				395	return SchemeIs(url::kWsScheme) \|\| SchemeIs(url::kWssScheme);
				396	}
				397
				398	int GURL::IntPort() const {
				399	if (parsed_.port.is_nonempty())
				400	return url::ParsePort(spec_.data(), parsed_.port);
				401	return url::PORT_UNSPECIFIED;
				402	}
				403
				404	int GURL::EffectiveIntPort() const {
				405	int int_port = IntPort();
				406	if (int_port == url::PORT_UNSPECIFIED && IsStandard())
				407	return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
				408	parsed_.scheme.len);
				409	return int_port;
				410	}
				411
				412	std::string GURL::ExtractFileName() const {
				413	url::Component file_component;
				414	url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
				415	return ComponentString(file_component);
				416	}
				417
				418	std::string GURL::PathForRequest() const {
				419	DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
				420	if (parsed_.ref.len >= 0) {
				421	// Clip off the reference when it exists. The reference starts after the #
				422	// sign, so we have to subtract one to also remove it.
				423	return std::string(spec_, parsed_.path.begin,
				424	parsed_.ref.begin - parsed_.path.begin - 1);
				425	}
				426	// Compute the actual path length, rather than depending on the spec's
				427	// terminator. If we're an inner_url, our spec continues on into our outer
				428	// url's path/query/ref.
				429	int path_len = parsed_.path.len;
				430	if (parsed_.query.is_valid())
				431	path_len = parsed_.query.end() - parsed_.path.begin;
				432
				433	return std::string(spec_, parsed_.path.begin, path_len);
				434	}
				435
				436	std::string GURL::HostNoBrackets() const {
				437	// If host looks like an IPv6 literal, strip the square brackets.
				438	url::Component h(parsed_.host);
				439	if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
				440	h.begin++;
				441	h.len -= 2;
				442	}
				443	return ComponentString(h);
				444	}
				445
				446	std::string GURL::GetContent() const {
				447	return is_valid_ ? ComponentString(parsed_.GetContent()) : std::string();
				448	}
				449
				450	bool GURL::HostIsIPAddress() const {
				451	if (!is_valid_ \|\| spec_.empty())
				452	return false;
				453
				454	url::RawCanonOutputT<char, 128> ignored_output;
				455	url::CanonHostInfo host_info;
				456	url::CanonicalizeIPAddress(spec_.c_str(), parsed_.host, &ignored_output,
				457	&host_info);
				458	return host_info.IsIPAddress();
				459	}
				460
				461	#ifdef WIN32
				462
				463	const GURL& GURL::EmptyGURL() {
				464	// Avoid static object construction/destruction on startup/shutdown.
				465	if (!empty_gurl) {
				466	// Create the string. Be careful that we don't break in the case that this
				467	// is being called from multiple threads.
				468	GURL* new_empty_gurl = new GURL;
				469	if (InterlockedCompareExchangePointer(
				470	reinterpret_cast<PVOID*>(&empty_gurl), new_empty_gurl, NULL)) {
				471	// The old value was non-NULL, so no replacement was done. Another
				472	// thread did the initialization out from under us.
				473	delete new_empty_gurl;
				474	}
				475	}
				476	return *empty_gurl;
				477	}
				478
				479	#else
				480
				481	void EmptyGURLOnce(void) {
				482	empty_gurl = new GURL;
				483	}
				484
				485	const GURL& GURL::EmptyGURL() {
				486	// Avoid static object construction/destruction on startup/shutdown.
				487	pthread_once(&empty_gurl_once, EmptyGURLOnce);
				488	return *empty_gurl;
				489	}
				490
				491	#endif // WIN32
				492
				493	bool GURL::DomainIs(const char* lower_ascii_domain,
				494	int domain_len) const {
				495	// Return false if this URL is not valid or domain is empty.
				496	if (!is_valid_ \|\| !domain_len)
				497	return false;
				498
				499	// FileSystem URLs have empty parsed_.host, so check this first.
				500	if (SchemeIsFileSystem() && inner_url_)
				501	return inner_url_->DomainIs(lower_ascii_domain, domain_len);
				502
				503	if (!parsed_.host.is_nonempty())
				504	return false;
				505
				506	// Check whether the host name is end with a dot. If yes, treat it
				507	// the same as no-dot unless the input comparison domain is end
				508	// with dot.
				509	const char* last_pos = spec_.data() + parsed_.host.end() - 1;
				510	int host_len = parsed_.host.len;
				511	if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
				512	last_pos--;
				513	host_len--;
				514	}
				515
				516	// Return false if host's length is less than domain's length.
				517	if (host_len < domain_len)
				518	return false;
				519
				520	// Compare this url whether belong specific domain.
				521	const char* start_pos = spec_.data() + parsed_.host.begin +
				522	host_len - domain_len;
				523
				524	if (!url::LowerCaseEqualsASCII(start_pos,
				525	last_pos + 1,
				526	lower_ascii_domain,
				527	lower_ascii_domain + domain_len))
				528	return false;
				529
				530	// Check whether host has right domain start with dot, make sure we got
				531	// right domain range. For example www.google.com has domain
				532	// "google.com" but www.iamnotgoogle.com does not.
				533	if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
				534	'.' != *(start_pos - 1))
				535	return false;
				536
				537	return true;
				538	}
				539
				540	void GURL::Swap(GURL* other) {
				541	spec_.swap(other->spec_);
				542	std::swap(is_valid_, other->is_valid_);
				543	std::swap(parsed_, other->parsed_);
				544	inner_url_.swap(other->inner_url_);
				545	}
				546
				547	std::ostream& operator<<(std::ostream& out, const GURL& url) {
				548	return out << url.possibly_invalid_spec();
				549	}