Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a. I think the most significant change is that url::Origin is now actually an origin. TBR=jamesr@chromium.org Review URL: https://codereview.chromium.org/2029803003 .

commit: 718eee97ed6a4df41d14726eb2eddc871d9eaaa3 [log] [tgz]
author: Viet-Trung Luu <viettrungluu@chromium.org> Wed Jun 01 15:20:18 2016 -0700
committer: Viet-Trung Luu <viettrungluu@chromium.org> Wed Jun 01 15:20:18 2016 -0700
tree: fa12a283240821c434f7a0d3c6679e2b96b5aa81
parent: dc748045a3e7e6d56999ec8d5de148dd7901159e [diff]
diff --git a/BUILD.gn b/BUILD.gn
index 4f6b637..34dff8a 100644
--- a/BUILD.gn
+++ b/BUILD.gn

@@ -24,6 +24,8 @@
     "gurl.h",
     "origin.cc",
     "origin.h",
+    "scheme_host_port.cc",
+    "scheme_host_port.h",
     "third_party/mozilla/url_parse.cc",
     "third_party/mozilla/url_parse.h",
     "url_canon.h",
@@ -91,10 +93,20 @@
 
 # TODO(dpranke): crbug.com/360936. Get this to build and run on Android.
 if (!is_android) {
+  # TODO(GYP): Delete this after we've converted everything to GN.
+  # The _run targets exist only for compatibility w/ GYP.
+  group("url_unittests_run") {
+    testonly = true
+    deps = [
+      ":url_unittests",
+    ]
+  }
+
   test("url_unittests") {
     sources = [
       "gurl_unittest.cc",
       "origin_unittest.cc",
+      "scheme_host_port_unittest.cc",
       "url_canon_icu_unittest.cc",
       "url_canon_unittest.cc",
       "url_parse_unittest.cc",

diff --git a/android/java/src/org/chromium/url/IDNStringUtil.java b/android/java/src/org/chromium/url/IDNStringUtil.java
index 32000fd..37d77dc 100644
--- a/android/java/src/org/chromium/url/IDNStringUtil.java
+++ b/android/java/src/org/chromium/url/IDNStringUtil.java

@@ -4,8 +4,8 @@
 
 package org.chromium.url;
 
-import org.chromium.base.CalledByNative;
-import org.chromium.base.JNINamespace;
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
 
 import java.net.IDN;
 

diff --git a/gurl.cc b/gurl.cc
index 46ca408..c22236f 100644
--- a/gurl.cc
+++ b/gurl.cc

@@ -14,6 +14,8 @@
 #include "url/gurl.h"
 
 #include "base/logging.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
 #include "url/url_canon_stdstring.h"
 #include "url/url_util.h"
 
@@ -59,7 +61,7 @@
 
 #endif  // WIN32
 
-} // namespace
+}  // namespace
 
 GURL::GURL() : is_valid_(false) {
 }
@@ -130,7 +132,7 @@
 #ifndef NDEBUG
   // For testing purposes, check that the parsed canonical URL is identical to
   // what we would have produced. Skip checking for invalid URLs have no meaning
-  // and we can't always canonicalize then reproducabely.
+  // and we can't always canonicalize then reproducibly.
   if (is_valid_) {
     url::Component scheme;
     // We can't do this check on the inner_url of a filesystem URL, as
@@ -193,17 +195,8 @@
   return spec_ > other.spec_;
 }
 
-GURL GURL::Resolve(const std::string& relative) const {
-  return ResolveWithCharsetConverter(relative, NULL);
-}
-GURL GURL::Resolve(const base::string16& relative) const {
-  return ResolveWithCharsetConverter(relative, NULL);
-}
-
 // Note: code duplicated below (it's inconvenient to use a template here).
-GURL GURL::ResolveWithCharsetConverter(
-    const std::string& relative,
-    url::CharsetConverter* charset_converter) const {
+GURL GURL::Resolve(const std::string& relative) const {
   // Not allowed for invalid URLs.
   if (!is_valid_)
     return GURL();
@@ -218,7 +211,7 @@
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
-                            charset_converter, &output, &result.parsed_)) {
+                            nullptr, &output, &result.parsed_)) {
     // Error resolving, return an empty URL.
     return GURL();
   }
@@ -234,9 +227,7 @@
 }
 
 // Note: code duplicated above (it's inconvenient to use a template here).
-GURL GURL::ResolveWithCharsetConverter(
-    const base::string16& relative,
-    url::CharsetConverter* charset_converter) const {
+GURL GURL::Resolve(const base::string16& relative) const {
   // Not allowed for invalid URLs.
   if (!is_valid_)
     return GURL();
@@ -251,7 +242,7 @@
   if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
                             parsed_, relative.data(),
                             static_cast<int>(relative.length()),
-                            charset_converter, &output, &result.parsed_)) {
+                            nullptr, &output, &result.parsed_)) {
     // Error resolving, return an empty URL.
     return GURL();
   }
@@ -320,7 +311,7 @@
 
 GURL GURL::GetOrigin() const {
   // This doesn't make sense for invalid or nonstandard URLs, so return
-  // the empty URL
+  // the empty URL.
   if (!is_valid_ || !IsStandard())
     return GURL();
 
@@ -382,9 +373,10 @@
 bool GURL::SchemeIs(const char* lower_ascii_scheme) const {
   if (parsed_.scheme.len <= 0)
     return lower_ascii_scheme == NULL;
-  return url::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin,
-                                   spec_.data() + parsed_.scheme.end(),
-                                   lower_ascii_scheme);
+  return base::LowerCaseEqualsASCII(
+      base::StringPiece(spec_.data() + parsed_.scheme.begin,
+                        parsed_.scheme.len),
+      lower_ascii_scheme);
 }
 
 bool GURL::SchemeIsHTTPOrHTTPS() const {
@@ -416,16 +408,17 @@
 }
 
 std::string GURL::PathForRequest() const {
-  DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-empty";
+  DCHECK(parsed_.path.len > 0)
+      << "Canonical path for requests should be non-empty";
   if (parsed_.ref.len >= 0) {
-    // Clip off the reference when it exists. The reference starts after the #
-    // sign, so we have to subtract one to also remove it.
+    // Clip off the reference when it exists. The reference starts after the
+    // #-sign, so we have to subtract one to also remove it.
     return std::string(spec_, parsed_.path.begin,
                        parsed_.ref.begin - parsed_.path.begin - 1);
   }
   // Compute the actual path length, rather than depending on the spec's
-  // terminator.  If we're an inner_url, our spec continues on into our outer
-  // url's path/query/ref.
+  // terminator. If we're an inner_url, our spec continues on into our outer
+  // URL's path/query/ref.
   int path_len = parsed_.path.len;
   if (parsed_.query.is_valid())
     path_len = parsed_.query.end() - parsed_.path.begin;
@@ -490,48 +483,45 @@
 
 #endif  // WIN32
 
-bool GURL::DomainIs(const char* lower_ascii_domain,
-                    int domain_len) const {
-  // Return false if this URL is not valid or domain is empty.
-  if (!is_valid_ || !domain_len)
+bool GURL::DomainIs(base::StringPiece lower_ascii_domain) const {
+  if (!is_valid_ || lower_ascii_domain.empty())
     return false;
 
   // FileSystem URLs have empty parsed_.host, so check this first.
   if (SchemeIsFileSystem() && inner_url_)
-    return inner_url_->DomainIs(lower_ascii_domain, domain_len);
+    return inner_url_->DomainIs(lower_ascii_domain);
 
   if (!parsed_.host.is_nonempty())
     return false;
 
-  // Check whether the host name is end with a dot. If yes, treat it
-  // the same as no-dot unless the input comparison domain is end
-  // with dot.
-  const char* last_pos = spec_.data() + parsed_.host.end() - 1;
+  // If the host name ends with a dot but the input domain doesn't,
+  // then we ignore the dot in the host name.
+  const char* host_last_pos = spec_.data() + parsed_.host.end() - 1;
   int host_len = parsed_.host.len;
-  if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
-    last_pos--;
+  int domain_len = lower_ascii_domain.length();
+  if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) {
+    host_last_pos--;
     host_len--;
   }
 
-  // Return false if host's length is less than domain's length.
   if (host_len < domain_len)
     return false;
 
-  // Compare this url whether belong specific domain.
-  const char* start_pos = spec_.data() + parsed_.host.begin +
-                          host_len - domain_len;
+  // |host_first_pos| is the start of the compared part of the host name, not
+  // start of the whole host name.
+  const char* host_first_pos = spec_.data() + parsed_.host.begin +
+                               host_len - domain_len;
 
-  if (!url::LowerCaseEqualsASCII(start_pos,
-                                 last_pos + 1,
-                                 lower_ascii_domain,
-                                 lower_ascii_domain + domain_len))
+  if (!base::LowerCaseEqualsASCII(
+           base::StringPiece(host_first_pos, domain_len), lower_ascii_domain))
     return false;
 
-  // Check whether host has right domain start with dot, make sure we got
-  // right domain range. For example www.google.com has domain
-  // "google.com" but www.iamnotgoogle.com does not.
+  // Make sure there aren't extra characters in host before the compared part;
+  // if the host name is longer than the input domain name, then the character
+  // immediately before the compared part should be a dot. For example,
+  // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
   if ('.' != lower_ascii_domain[0] && host_len > domain_len &&
-      '.' != *(start_pos - 1))
+      '.' != *(host_first_pos - 1))
     return false;
 
   return true;

diff --git a/gurl.h b/gurl.h
index 566fc5e..dccfec4 100644
--- a/gurl.h
+++ b/gurl.h

@@ -10,11 +10,12 @@
 
 #include "base/memory/scoped_ptr.h"
 #include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_stdstring.h"
 #include "url/url_constants.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 class URL_EXPORT GURL {
  public:
@@ -91,7 +92,7 @@
 
   // Returns the potentially invalid spec for a the URL. This spec MUST NOT be
   // modified or sent over the network. It is designed to be displayed in error
-  // messages to the user, as the apperance of the spec may explain the error.
+  // messages to the user, as the appearance of the spec may explain the error.
   // If the spec is valid, the valid spec will be returned.
   //
   // The returned string is guaranteed to be valid UTF-8.
@@ -124,9 +125,8 @@
   // pages.
   //
   // It may be impossible to resolve the URLs properly. If the input is not
-  // "standard" (SchemeIsStandard() == false) and the input looks relative, we
-  // can't resolve it. In these cases, the result will be an empty, invalid
-  // GURL.
+  // "standard" (IsStandard() == false) and the input looks relative, we can't
+  // resolve it. In these cases, the result will be an empty, invalid GURL.
   //
   // The result may also be a nonempty, invalid URL if the input has some kind
   // of encoding error. In these cases, we will try to construct a "good" URL
@@ -137,20 +137,6 @@
   GURL Resolve(const std::string& relative) const;
   GURL Resolve(const base::string16& relative) const;
 
-  // Like Resolve() above but takes a character set encoder which will be used
-  // for any query text specified in the input. The charset converter parameter
-  // may be NULL, in which case it will be treated as UTF-8.
-  //
-  // TODO(brettw): These should be replaced with versions that take something
-  // more friendly than a raw CharsetConverter (maybe like an ICU character set
-  // name).
-  GURL ResolveWithCharsetConverter(
-      const std::string& relative,
-      url::CharsetConverter* charset_converter) const;
-  GURL ResolveWithCharsetConverter(
-      const base::string16& relative,
-      url::CharsetConverter* charset_converter) const;
-
   // Creates a new GURL by replacing the current URL's components with the
   // supplied versions. See the Replacements class in url_canon.h for more.
   //
@@ -194,10 +180,11 @@
   // returned.
   GURL GetAsReferrer() const;
 
-  // Returns true if the scheme for the current URL is a known "standard"
-  // scheme. Standard schemes have an authority and a path section. This
-  // includes file: and filesystem:, which some callers may want to filter out
-  // explicitly by calling SchemeIsFile[System].
+  // Returns true if the scheme for the current URL is a known "standard-format"
+  // scheme. A standard-format scheme adheres to what RFC 3986 calls "generic
+  // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes
+  // file: and filesystem:, which some callers may want to filter out explicitly
+  // by calling SchemeIsFile[System].
   bool IsStandard() const;
 
   // Returns true if the given parameter (should be lower-case ASCII to match
@@ -223,10 +210,32 @@
     return SchemeIs(url::kFileSystemScheme);
   }
 
-  // If the scheme indicates a secure connection
+  // Returns true if the scheme indicates a secure connection.
+  //
+  // NOTE: This function is deprecated. You probably want
+  // |SchemeIsCryptographic| (if you just want to know if a scheme uses TLS for
+  // network transport) or Chromium's |IsOriginSecure| for a higher-level test
+  // about an origin's security. See those functions' documentation for more
+  // detail.
+  //
+  // TODO(palmer): Audit callers and change them to |SchemeIsCryptographic| or
+  // |IsOriginSecure|, as appropriate. Then remove |SchemeIsSecure|.
+  // crbug.com/362214
   bool SchemeIsSecure() const {
     return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) ||
-        (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure());
+           (SchemeIsFileSystem() && inner_url() &&
+            inner_url()->SchemeIsSecure());
+  }
+
+  // Returns true if the scheme indicates a network connection that uses TLS or
+  // some other cryptographic protocol (e.g. QUIC) for security.
+  //
+  // This function is a not a complete test of whether or not an origin's code
+  // is minimally trustworthy. For that, see Chromium's |IsOriginSecure| for a
+  // higher-level and more complete semantics. See that function's documentation
+  // for more detail.
+  bool SchemeIsCryptographic() const {
+    return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme);
   }
 
   // Returns true if the scheme is "blob".
@@ -235,13 +244,12 @@
   }
 
   // The "content" of the URL is everything after the scheme (skipping the
-  // scheme delimiting colon). It is an error to get the origin of an invalid
-  // URL. The result will be an empty string.
+  // scheme delimiting colon). It is an error to get the content of an invalid
+  // URL: the result will be an empty string.
   std::string GetContent() const;
 
   // Returns true if the hostname is an IP address. Note: this function isn't
   // as cheap as a simple getter because it re-parses the hostname to verify.
-  // This currently identifies only IPv4 addresses (bug 822685).
   bool HostIsIPAddress() const;
 
   // Getters for various components of the URL. The returned string will be
@@ -274,8 +282,8 @@
     return ComponentString(parsed_.ref);
   }
 
-  // Existance querying. These functions will return true if the corresponding
-  // URL component exists in this URL. Note that existance is different than
+  // Existence querying. These functions will return true if the corresponding
+  // URL component exists in this URL. Note that existence is different than
   // being nonempty. http://www.google.com/? has a query that just happens to
   // be empty, and has_query() will return true.
   bool has_scheme() const {
@@ -288,7 +296,7 @@
     return parsed_.password.len >= 0;
   }
   bool has_host() const {
-    // Note that hosts are special, absense of host means length 0.
+    // Note that hosts are special, absence of host means length 0.
     return parsed_.host.len > 0;
   }
   bool has_port() const {
@@ -310,7 +318,7 @@
   // values defined in Parsed for ExtractPort.
   int IntPort() const;
 
-  // Returns the port number of the url, or the default port number.
+  // Returns the port number of the URL, or the default port number.
   // If the scheme has no concept of port (or unknown default) returns
   // PORT_UNSPECIFIED.
   int EffectiveIntPort() const;
@@ -324,29 +332,21 @@
   std::string PathForRequest() const;
 
   // Returns the host, excluding the square brackets surrounding IPv6 address
-  // literals.  This can be useful for passing to getaddrinfo().
+  // literals. This can be useful for passing to getaddrinfo().
   std::string HostNoBrackets() const;
 
   // Returns true if this URL's host matches or is in the same domain as
-  // the given input string. For example if this URL was "www.google.com",
-  // this would match "com", "google.com", and "www.google.com
-  // (input domain should be lower-case ASCII to match the canonicalized
-  // scheme). This call is more efficient than getting the host and check
+  // the given input string. For example, if the hostname of the URL is
+  // "www.google.com", this will return true for "com", "google.com", and
+  // "www.google.com".
+  //
+  // The input domain should be lower-case ASCII to match the canonicalized
+  // scheme. This call is more efficient than getting the host and check
   // whether host has the specific domain or not because no copies or
   // object constructions are done.
-  //
-  // If function DomainIs has parameter domain_len, which means the parameter
-  // lower_ascii_domain does not gurantee to terminate with NULL character.
-  bool DomainIs(const char* lower_ascii_domain, int domain_len) const;
+  bool DomainIs(base::StringPiece lower_ascii_domain) const;
 
-  // If function DomainIs only has parameter lower_ascii_domain, which means
-  // domain string should be terminate with NULL character.
-  bool DomainIs(const char* lower_ascii_domain) const {
-    return DomainIs(lower_ascii_domain,
-                    static_cast<int>(strlen(lower_ascii_domain)));
-  }
-
-  // Swaps the contents of this GURL object with the argument without doing
+  // Swaps the contents of this GURL object with |other|, without doing
   // any memory allocations.
   void Swap(GURL* other);
 
@@ -363,8 +363,8 @@
 
  private:
   // Variant of the string parsing constructor that allows the caller to elect
-  // retain trailing whitespace, if any, on the passed URL spec but only  if the
-  // scheme is one that allows trailing whitespace. The primary use-case is
+  // retain trailing whitespace, if any, on the passed URL spec, but only if
+  // the scheme is one that allows trailing whitespace. The primary use-case is
   // for data: URLs. In most cases, you want to use the single parameter
   // constructor above.
   enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };

diff --git a/gurl_unittest.cc b/gurl_unittest.cc
index bea1a0c..18aa2ae 100644
--- a/gurl_unittest.cc
+++ b/gurl_unittest.cc

@@ -45,14 +45,15 @@
   EXPECT_EQ("something:///HOSTNAME.com/",
             TypesTestCase("something:///HOSTNAME.com/"));
 
-  // In the reverse, known schemes should always trigger standard URL handling.
+  // Conversely, URLs with known schemes should always trigger standard URL
+  // handling.
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
 
 #ifdef WIN32
-  // URLs that look like absolute Windows drive specs.
+  // URLs that look like Windows absolute path specs.
   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
@@ -60,7 +61,7 @@
 #endif
 }
 
-// Test the basic creation and querying of components in a GURL. We assume
+// Test the basic creation and querying of components in a GURL. We assume that
 // the parser is already tested and works, so we are mostly interested if the
 // object does the right thing with the results.
 TEST(GURLTest, Components) {
@@ -175,7 +176,7 @@
   EXPECT_EQ("", invalid2.ref());
 }
 
-// This is a regression test for http://crbug.com/309975 .
+// This is a regression test for http://crbug.com/309975.
 TEST(GURLTest, SelfAssign) {
   GURL a("filesystem:http://example.com/temporary/");
   // This should not crash.
@@ -245,9 +246,9 @@
 }
 
 TEST(GURLTest, ExtraSlashesBeforeAuthority) {
-  // According to RFC3986, the hier-part for URI with an authority must use only
-  // two slashes, GURL intentionally just ignores slashes more than 2 and parses
-  // the following part as an authority.
+  // According to RFC3986, the hierarchical part for URI with an authority
+  // must use only two slashes; GURL intentionally just ignores extra slashes
+  // if there are more than 2, and parses the following part as an authority.
   GURL url("http:///host");
   EXPECT_EQ("host", url.host());
   EXPECT_EQ("/", url.path());
@@ -378,7 +379,7 @@
 }
 
 TEST(GURLTest, Replacements) {
-  // The url canonicalizer replacement test will handle most of these case.
+  // The URL canonicalizer replacement test will handle most of these case.
   // The most important thing to do here is to check that the proper
   // canonicalizer gets called based on the scheme of the input.
   struct ReplaceCase {
@@ -395,7 +396,7 @@
   } replace_cases[] = {
     {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
     {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
-    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
+    {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"},
 #ifdef WIN32
     {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
 #endif
@@ -435,7 +436,7 @@
 
   EXPECT_EQ("data: one ? two ", url_no_ref.spec());
 
-  // Importing a parsed url via this constructor overload will retain trailing
+  // Importing a parsed URL via this constructor overload will retain trailing
   // whitespace.
   GURL import_url(url_no_ref.spec(),
                   url_no_ref.parsed_for_possibly_invalid_spec(),
@@ -561,43 +562,56 @@
 }
 
 TEST(GURLTest, DomainIs) {
-  const char google_domain[] = "google.com";
+  GURL url_1("http://google.com/foo");
+  EXPECT_TRUE(url_1.DomainIs("google.com"));
 
-  GURL url_1("http://www.google.com:99/foo");
-  EXPECT_TRUE(url_1.DomainIs(google_domain));
+  // Subdomain and port are ignored.
+  GURL url_2("http://www.google.com:99/foo");
+  EXPECT_TRUE(url_2.DomainIs("google.com"));
 
-  GURL url_2("http://google.com:99/foo");
-  EXPECT_TRUE(url_2.DomainIs(google_domain));
+  // Different top-level domain.
+  GURL url_3("http://www.google.com.cn/foo");
+  EXPECT_FALSE(url_3.DomainIs("google.com"));
 
-  GURL url_3("http://google.com./foo");
-  EXPECT_TRUE(url_3.DomainIs(google_domain));
+  // Different host name.
+  GURL url_4("http://www.iamnotgoogle.com/foo");
+  EXPECT_FALSE(url_4.DomainIs("google.com"));
 
-  GURL url_4("http://google.com/foo");
-  EXPECT_FALSE(url_4.DomainIs("google.com."));
+  // The input must be lower-cased otherwise DomainIs returns false.
+  GURL url_5("http://www.google.com/foo");
+  EXPECT_FALSE(url_5.DomainIs("Google.com"));
 
-  GURL url_5("http://google.com./foo");
-  EXPECT_TRUE(url_5.DomainIs("google.com."));
+  // If the URL is invalid, DomainIs returns false.
+  GURL invalid_url("google.com");
+  EXPECT_FALSE(invalid_url.is_valid());
+  EXPECT_FALSE(invalid_url.DomainIs("google.com"));
+}
 
-  GURL url_6("http://www.google.com./foo");
-  EXPECT_TRUE(url_6.DomainIs(".com."));
+TEST(GURLTest, DomainIsTerminatingDotBehavior) {
+  // If the host part ends with a dot, it matches input domains
+  // with or without a dot.
+  GURL url_with_dot("http://www.google.com./foo");
+  EXPECT_TRUE(url_with_dot.DomainIs("google.com"));
+  EXPECT_TRUE(url_with_dot.DomainIs("google.com."));
+  EXPECT_TRUE(url_with_dot.DomainIs(".com"));
+  EXPECT_TRUE(url_with_dot.DomainIs(".com."));
 
-  GURL url_7("http://www.balabala.com/foo");
-  EXPECT_FALSE(url_7.DomainIs(google_domain));
+  // But, if the host name doesn't end with a dot and the input
+  // domain does, then it's considered to not match.
+  GURL url_without_dot("http://google.com/foo");
+  EXPECT_FALSE(url_without_dot.DomainIs("google.com."));
 
-  GURL url_8("http://www.google.com.cn/foo");
-  EXPECT_FALSE(url_8.DomainIs(google_domain));
+  // If the URL ends with two dots, it doesn't match.
+  GURL url_with_two_dots("http://www.google.com../foo");
+  EXPECT_FALSE(url_with_two_dots.DomainIs("google.com"));
+}
 
-  GURL url_9("http://www.iamnotgoogle.com/foo");
-  EXPECT_FALSE(url_9.DomainIs(google_domain));
+TEST(GURLTest, DomainIsWithFilesystemScheme) {
+  GURL url_1("filesystem:http://www.google.com:99/foo/");
+  EXPECT_TRUE(url_1.DomainIs("google.com"));
 
-  GURL url_10("http://www.iamnotgoogle.com../foo");
-  EXPECT_FALSE(url_10.DomainIs(".com"));
-
-  GURL url_11("filesystem:http://www.google.com:99/foo/");
-  EXPECT_TRUE(url_11.DomainIs(google_domain));
-
-  GURL url_12("filesystem:http://www.iamnotgoogle.com/foo/");
-  EXPECT_FALSE(url_12.DomainIs(google_domain));
+  GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/");
+  EXPECT_FALSE(url_2.DomainIs("google.com"));
 }
 
 // Newlines should be stripped from inputs.
@@ -642,4 +656,29 @@
   EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
 }
 
+TEST(GURLTest, ContentAndPathForNonStandardURLs) {
+  struct TestCase {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"null", ""},
+      {"not-a-standard-scheme:this is arbitrary content",
+       "this is arbitrary content"},
+      {"view-source:http://example.com/path", "http://example.com/path"},
+      {"blob:http://example.com/GUID", "http://example.com/GUID"},
+      {"blob://http://example.com/GUID", "//http://example.com/GUID"},
+      {"blob:http://user:password@example.com/GUID",
+       "http://user:password@example.com/GUID"},
+
+      // TODO(mkwst): This seems like a bug. https://crbug.com/513600
+      {"filesystem:http://example.com/path", "/"},
+  };
+
+  for (const auto& test : cases) {
+    GURL url(test.url);
+    EXPECT_EQ(test.expected, url.path()) << test.url;
+    EXPECT_EQ(test.expected, url.GetContent()) << test.url;
+  }
+}
+
 }  // namespace url

diff --git a/origin.cc b/origin.cc
index cebf5dd..9d0c4f0 100644
--- a/origin.cc
+++ b/origin.cc

@@ -1,20 +1,82 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "url/origin.h"
 
+#include <string.h>
+
 #include "base/logging.h"
-#include "base/strings/pattern.h"
+#include "base/strings/string_number_conversions.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
 
 namespace url {
 
-Origin::Origin() : string_("null") {}
+Origin::Origin() : unique_(true) {
+}
 
-Origin::Origin(const std::string& origin) : string_(origin) {
-  DCHECK(origin == "null" || base::MatchPattern(origin, "?*://?*"));
-  DCHECK_GT(origin.size(), 0u);
-  DCHECK(origin == "file://" || origin[origin.size() - 1] != '/');
+Origin::Origin(const GURL& url) : unique_(true) {
+  if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
+    return;
+
+  if (url.SchemeIsFileSystem()) {
+    tuple_ = SchemeHostPort(*url.inner_url());
+  } else if (url.SchemeIsBlob()) {
+    // If we're dealing with a 'blob:' URL, https://url.spec.whatwg.org/#origin
+    // defines the origin as the origin of the URL which results from parsing
+    // the "path", which boils down to everything after the scheme. GURL's
+    // 'GetContent()' gives us exactly that.
+    tuple_ = SchemeHostPort(GURL(url.GetContent()));
+  } else {
+    tuple_ = SchemeHostPort(url);
+  }
+
+  unique_ = tuple_.IsInvalid();
+}
+
+Origin::Origin(base::StringPiece scheme, base::StringPiece host, uint16 port)
+    : tuple_(scheme, host, port) {
+  unique_ = tuple_.IsInvalid();
+}
+
+Origin::~Origin() {
+}
+
+// static
+Origin Origin::UnsafelyCreateOriginWithoutNormalization(
+    base::StringPiece scheme,
+    base::StringPiece host,
+    uint16 port) {
+  return Origin(scheme, host, port);
+}
+
+std::string Origin::Serialize() const {
+  if (unique())
+    return "null";
+
+  if (scheme() == kFileScheme)
+    return "file://";
+
+  return tuple_.Serialize();
+}
+
+bool Origin::IsSameOriginWith(const Origin& other) const {
+  if (unique_ || other.unique_)
+    return false;
+
+  return tuple_.Equals(other.tuple_);
+}
+
+bool Origin::operator<(const Origin& other) const {
+  return tuple_ < other.tuple_;
+}
+
+std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
+  return out << origin.Serialize();
 }
 
 }  // namespace url

diff --git a/origin.h b/origin.h
index 777e4e1..c94c38c 100644
--- a/origin.h
+++ b/origin.h

@@ -1,4 +1,4 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -7,27 +7,130 @@
 
 #include <string>
 
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "url/scheme_host_port.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_constants.h"
 #include "url/url_export.h"
 
+class GURL;
+
 namespace url {
 
-// Origin represents a Web Origin serialized to a string.
-// See RFC6454 for details.
+// An Origin is a tuple of (scheme, host, port), as described in RFC 6454.
+//
+// TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
+// If you only need to extract the bits of a URL which are relevant for a
+// network connection, use 'url::SchemeHostPort'.
+//
+// STL;SDR: If you aren't making actual network connections, use 'url::Origin'.
+//
+// 'Origin', like 'SchemeHostPort', is composed of a tuple of (scheme, host,
+// port), but contains a number of additional concepts which make it appropriate
+// for use as a security boundary and access control mechanism between contexts.
+//
+// This class ought to be used when code needs to determine if two resources
+// are "same-origin", and when a canonical serialization of an origin is
+// required. Note that some origins are "unique", meaning that they are not
+// same-origin with any other origin (including themselves).
+//
+// There are a few subtleties to note:
+//
+// * Invalid and non-standard GURLs are parsed as unique origins. This includes
+//   non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
+//
+// * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the
+//   internals of the URL. That is, 'filesystem:https://example.com/temporary/f'
+//   is parsed as ('https', 'example.com', 443).
+//
+// * Unique origins all serialize to the string "null"; this means that the
+//   serializations of two unique origins are identical to each other, though
+//   the origins themselves are not "the same". This means that origins'
+//   serializations must not be relied upon for security checks.
+//
+// * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0),
+//   but their behavior may differ from embedder to embedder.
+//
+// * The host component of an IPv6 address includes brackets, just like the URL
+//   representation.
+//
+// Usage:
+//
+// * Origins are generally constructed from an already-canonicalized GURL:
+//
+//     GURL url("https://example.com/");
+//     url::Origin origin(url);
+//     origin.scheme(); // "https"
+//     origin.host(); // "example.com"
+//     origin.port(); // 443
+//     origin.IsUnique(); // false
+//
+// * To answer the question "Are |this| and |that| "same-origin" with each
+//   other?", use |Origin::IsSameOriginWith|:
+//
+//     if (this.IsSameOriginWith(that)) {
+//       // Amazingness goes here.
+//     }
 class URL_EXPORT Origin {
  public:
+  // Creates a unique Origin.
   Origin();
-  explicit Origin(const std::string& origin);
 
-  const std::string& string() const { return string_; }
+  // Creates an Origin from |url|, as described at
+  // https://url.spec.whatwg.org/#origin, with the following additions:
+  //
+  // 1. If |url| is invalid or non-standard, a unique Origin is constructed.
+  // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
+  //    out of everything in the URL which follows the scheme).
+  // 3. 'file' URLs all parse as ("file", "", 0).
+  explicit Origin(const GURL& url);
 
-  bool IsSameAs(const Origin& that) const {
-    return string_ == that.string_;
-  }
+  // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
+  // must be valid and canonicalized. In particular, note that this cannot be
+  // used to create unique origins; 'url::Origin()' is the right way to do that.
+  //
+  // This constructor should be used in order to pass 'Origin' objects back and
+  // forth over IPC (as transitioning through GURL would risk potentially
+  // dangerous recanonicalization); other potential callers should prefer the
+  // 'GURL'-based constructor.
+  static Origin UnsafelyCreateOriginWithoutNormalization(
+      base::StringPiece scheme,
+      base::StringPiece host,
+      uint16 port);
+
+  ~Origin();
+
+  // For unique origins, these return ("", "", 0).
+  const std::string& scheme() const { return tuple_.scheme(); }
+  const std::string& host() const { return tuple_.host(); }
+  uint16 port() const { return tuple_.port(); }
+
+  bool unique() const { return unique_; }
+
+  // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
+  // the addition that all Origins with a 'file' scheme serialize to "file://".
+  std::string Serialize() const;
+
+  // Two Origins are "same-origin" if their schemes, hosts, and ports are exact
+  // matches; and neither is unique.
+  bool IsSameOriginWith(const Origin& other) const;
+
+  // Allows SchemeHostPort to used as a key in STL (for example, a std::set or
+  // std::map).
+  bool operator<(const Origin& other) const;
 
  private:
-  std::string string_;
+  Origin(base::StringPiece scheme, base::StringPiece host, uint16 port);
+
+  SchemeHostPort tuple_;
+  bool unique_;
 };
 
+URL_EXPORT std::ostream& operator<<(std::ostream& out,
+                                    const Origin& origin);
+
 }  // namespace url
 
 #endif  // URL_ORIGIN_H_

diff --git a/origin_unittest.cc b/origin_unittest.cc
index c094ee6..ec4ec65 100644
--- a/origin_unittest.cc
+++ b/origin_unittest.cc

@@ -1,41 +1,251 @@
-// Copyright 2014 The Chromium Authors. All rights reserved.
+// Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "testing/gtest/include/gtest/gtest.h"
+#include "base/logging.h"
 #include "url/origin.h"
-
-namespace url {
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
 
 namespace {
 
-// Each test examines the Origin is constructed correctly without
-// violating DCHECKs.
-TEST(OriginTest, constructEmpty) {
-  Origin origin;
-  EXPECT_EQ("null", origin.string());
+TEST(OriginTest, UniqueOriginComparison) {
+  url::Origin unique_origin;
+  EXPECT_EQ("", unique_origin.scheme());
+  EXPECT_EQ("", unique_origin.host());
+  EXPECT_EQ(0, unique_origin.port());
+  EXPECT_TRUE(unique_origin.unique());
+  EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin));
+
+  const char* const urls[] = {"data:text/html,Hello!",
+                              "javascript:alert(1)",
+                              "file://example.com:443/etc/passwd",
+                              "yay",
+                              "http::///invalid.example.com/"};
+
+  for (const auto& test_url : urls) {
+    SCOPED_TRACE(test_url);
+    GURL url(test_url);
+    url::Origin origin(url);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(unique_origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
+  }
 }
 
-TEST(OriginTest, constructNull) {
-  Origin origin("null");
-  EXPECT_EQ("null", origin.string());
+TEST(OriginTest, ConstructFromGURL) {
+  url::Origin different_origin(GURL("https://not-in-the-list.test/"));
+
+  struct TestCases {
+    const char* const url;
+    const char* const expected_scheme;
+    const char* const expected_host;
+    const uint16 expected_port;
+  } cases[] = {
+      // IP Addresses
+      {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+      {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+
+      // Punycode
+      {"http://☃.net/", "http", "xn--n3h.net", 80},
+      {"blob:http://☃.net/", "http", "xn--n3h.net", 80},
+
+      // Generic URLs
+      {"http://example.com/", "http", "example.com", 80},
+      {"http://example.com:123/", "http", "example.com", 123},
+      {"https://example.com/", "https", "example.com", 443},
+      {"https://example.com:123/", "https", "example.com", 123},
+      {"http://user:pass@example.com/", "http", "example.com", 80},
+      {"http://example.com:123/?query", "http", "example.com", 123},
+      {"https://example.com/#1234", "https", "example.com", 443},
+      {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123},
+
+      // Registered URLs
+      {"ftp://example.com/", "ftp", "example.com", 21},
+      {"gopher://example.com/", "gopher", "example.com", 70},
+      {"ws://example.com/", "ws", "example.com", 80},
+      {"wss://example.com/", "wss", "example.com", 443},
+
+      // file: URLs
+      {"file:///etc/passwd", "file", "", 0},
+      {"file://example.com/etc/passwd", "file", "example.com", 0},
+
+      // Filesystem:
+      {"filesystem:http://example.com/type/", "http", "example.com", 80},
+      {"filesystem:http://example.com:123/type/", "http", "example.com", 123},
+      {"filesystem:https://example.com/type/", "https", "example.com", 443},
+      {"filesystem:https://example.com:123/type/", "https", "example.com", 123},
+
+      // Blob:
+      {"blob:http://example.com/guid-goes-here", "http", "example.com", 80},
+      {"blob:http://example.com:123/guid-goes-here", "http", "example.com", 123},
+      {"blob:https://example.com/guid-goes-here", "https", "example.com", 443},
+      {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    EXPECT_EQ(test_case.expected_scheme, origin.scheme());
+    EXPECT_EQ(test_case.expected_host, origin.host());
+    EXPECT_EQ(test_case.expected_port, origin.port());
+    EXPECT_FALSE(origin.unique());
+    EXPECT_TRUE(origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(different_origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(origin.IsSameOriginWith(different_origin));
+  }
 }
 
-TEST(OriginTest, constructValidOrigin) {
-  Origin origin("http://example.com:8080");
-  EXPECT_EQ("http://example.com:8080", origin.string());
+TEST(OriginTest, Serialization) {
+  struct TestCases {
+    const char* const url;
+    const char* const expected;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://"},
+      {"file://example.com/etc/passwd", "file://"},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    EXPECT_EQ(test_case.expected, origin.Serialize());
+
+    // The '<<' operator should produce the same serialization as Serialize().
+    std::stringstream out;
+    out << origin;
+    EXPECT_EQ(test_case.expected, out.str());
+  }
 }
 
-TEST(OriginTest, constructValidFileOrigin) {
-  Origin origin("file://");
-  EXPECT_EQ("file://", origin.string());
+TEST(OriginTest, Comparison) {
+  // These URLs are arranged in increasing order:
+  const char* const urls[] = {
+      "data:uniqueness",
+      "http://a:80",
+      "http://b:80",
+      "https://a:80",
+      "https://b:80",
+      "http://a:81",
+      "http://b:81",
+      "https://a:81",
+      "https://b:81",
+  };
+
+  for (size_t i = 0; i < arraysize(urls); i++) {
+    GURL current_url(urls[i]);
+    url::Origin current(current_url);
+    for (size_t j = i; j < arraysize(urls); j++) {
+      GURL compare_url(urls[j]);
+      url::Origin to_compare(compare_url);
+      EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+      EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+    }
+  }
 }
 
-TEST(OriginTest, constructValidOriginWithoutPort) {
-  Origin origin("wss://example2.com");
-  EXPECT_EQ("wss://example2.com", origin.string());
+TEST(OriginTest, UnsafelyCreate) {
+  struct TestCase {
+    const char* scheme;
+    const char* host;
+    uint16 port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"https", "example.com", 443},
+      {"https", "example.com", 123},
+      {"file", "", 0},
+      {"file", "example.com", 0},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization(
+        test.scheme, test.host, test.port);
+    EXPECT_EQ(test.scheme, origin.scheme());
+    EXPECT_EQ(test.host, origin.host());
+    EXPECT_EQ(test.port, origin.port());
+    EXPECT_FALSE(origin.unique());
+    EXPECT_TRUE(origin.IsSameOriginWith(origin));
+  }
 }
 
-}  // namespace
+TEST(OriginTest, UnsafelyCreateUniqueOnInvalidInput) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16 port;
+  } cases[] = {{"", "", 0},
+               {"data", "", 0},
+               {"blob", "", 0},
+               {"filesystem", "", 0},
+               {"data", "example.com", 80},
+               {"http", "☃.net", 80},
+               {"http\nmore", "example.com", 80},
+               {"http\rmore", "example.com", 80},
+               {"http\n", "example.com", 80},
+               {"http\r", "example.com", 80},
+               {"http", "example.com\nnot-example.com", 80},
+               {"http", "example.com\rnot-example.com", 80},
+               {"http", "example.com\n", 80},
+               {"http", "example.com\r", 80},
+               {"http", "example.com", 0},
+               {"file", "", 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization(
+        test.scheme, test.host, test.port);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+  }
+}
+
+TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
+  struct TestCases {
+    const char* scheme;
+    size_t scheme_length;
+    const char* host;
+    size_t host_length;
+    uint16 port;
+  } cases[] = {{"http\0more", 9, "example.com", 11, 80},
+               {"http\0", 5, "example.com", 11, 80},
+               {"\0http", 5, "example.com", 11, 80},
+               {"http", 4, "example.com\0not-example.com", 27, 80},
+               {"http", 4, "example.com\0", 12, 80},
+               {"http", 4, "\0example.com", 12, 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization(
+        std::string(test.scheme, test.scheme_length),
+        std::string(test.host, test.host_length), test.port);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+  }
+}
 
 }  // namespace url

diff --git a/scheme_host_port.cc b/scheme_host_port.cc
new file mode 100644
index 0000000..c2fe830
--- /dev/null
+++ b/scheme_host_port.cc

@@ -0,0 +1,129 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/scheme_host_port.h"
+
+#include <string.h>
+
+#include "base/logging.h"
+#include "base/strings/string_number_conversions.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
+
+namespace url {
+
+SchemeHostPort::SchemeHostPort() : port_(0) {
+}
+
+SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
+                               base::StringPiece host,
+                               uint16 port)
+    : scheme_(scheme.data(), scheme.length()),
+      host_(host.data(), host.length()),
+      port_(port) {
+  // Try to canonicalize the host (copy/pasted from net/base. :( ).
+  const url::Component raw_host_component(0, static_cast<int>(host.length()));
+  std::string canon_host;
+  url::StdStringCanonOutput canon_host_output(&canon_host);
+  url::CanonHostInfo host_info;
+  url::CanonicalizeHostVerbose(host.data(), raw_host_component,
+                               &canon_host_output, &host_info);
+
+  if (host_info.out_host.is_nonempty() &&
+      host_info.family != url::CanonHostInfo::BROKEN) {
+    // Success!  Assert that there's no extra garbage.
+    canon_host_output.Complete();
+    DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
+  } else {
+    // Empty host, or canonicalization failed.
+    canon_host.clear();
+  }
+
+  // Return an invalid SchemeHostPort object if any of the following conditions
+  // hold:
+  //
+  // 1. The provided scheme is non-standard, 'blob:', or 'filesystem:'.
+  // 2. The provided host is non-canonical.
+  // 3. The scheme is 'file' and the port is non-zero.
+  // 4. The scheme is not 'file', and the port is zero or the host is empty.
+  bool isUnsupportedScheme =
+      !url::IsStandard(scheme.data(),
+                       url::Component(0, static_cast<int>(scheme.length()))) ||
+      scheme == kFileSystemScheme || scheme == kBlobScheme;
+  bool isNoncanonicalHost = host != canon_host;
+  bool isFileSchemeWithPort = scheme == kFileScheme && port != 0;
+  bool isNonFileSchemeWithoutPortOrHost =
+      scheme != kFileScheme && (port == 0 || host.empty());
+  if (isUnsupportedScheme || isNoncanonicalHost || isFileSchemeWithPort ||
+      isNonFileSchemeWithoutPortOrHost) {
+    scheme_.clear();
+    host_.clear();
+    port_ = 0;
+  }
+}
+
+SchemeHostPort::SchemeHostPort(const GURL& url) : port_(0) {
+  if (!url.is_valid() || !url.IsStandard())
+    return;
+
+  // These schemes do not follow the generic URL syntax, so we treat them as
+  // invalid (scheme, host, port) tuples (even though such URLs' _Origin_ might
+  // have a (scheme, host, port) tuple, they themselves do not).
+  if (url.SchemeIsBlob() || url.SchemeIsFileSystem())
+    return;
+
+  scheme_ = url.scheme();
+  host_ = url.host();
+  port_ = url.EffectiveIntPort() == url::PORT_UNSPECIFIED
+              ? 0
+              : url.EffectiveIntPort();
+}
+
+SchemeHostPort::~SchemeHostPort() {
+}
+
+bool SchemeHostPort::IsInvalid() const {
+  return scheme_.empty() && host_.empty() && !port_;
+}
+
+std::string SchemeHostPort::Serialize() const {
+  std::string result;
+  if (IsInvalid())
+    return result;
+
+  bool is_default_port =
+      port_ == url::DefaultPortForScheme(scheme_.data(),
+                                         static_cast<int>(scheme_.length()));
+
+  result.append(scheme_);
+  result.append(kStandardSchemeSeparator);
+  result.append(host_);
+
+  if (scheme_ != kFileScheme && !is_default_port) {
+    result.push_back(':');
+    result.append(base::IntToString(port_));
+  }
+
+  return result;
+}
+
+bool SchemeHostPort::Equals(const SchemeHostPort& other) const {
+  return port_ == other.port() && scheme_ == other.scheme() &&
+         host_ == other.host();
+}
+
+bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
+  if (port_ != other.port_)
+    return port_ < other.port_;
+  if (scheme_ != other.scheme_)
+    return scheme_ < other.scheme_;
+  if (host_ != other.host_)
+    return host_ < other.host_;
+  return false;
+}
+
+}  // namespace url

diff --git a/scheme_host_port.h b/scheme_host_port.h
new file mode 100644
index 0000000..2cc9e07
--- /dev/null
+++ b/scheme_host_port.h

@@ -0,0 +1,132 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_SCHEME_HOST_PORT_H_
+#define URL_SCHEME_HOST_PORT_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/strings/string_piece.h"
+#include "url/url_export.h"
+
+class GURL;
+
+namespace url {
+
+// This class represents a (scheme, host, port) tuple extracted from a URL.
+//
+// The primary purpose of this class is to represent relevant network-authority
+// information for a URL. It is _not_ an Origin, as described in RFC 6454. In
+// particular, it is generally NOT the right thing to use for security
+// decisions.
+//
+// Instead, this class is a mechanism for simplifying URLs with standard schemes
+// (that is, those which follow the generic syntax of RFC 3986) down to the
+// uniquely identifying information necessary for network fetches. This makes it
+// suitable as a cache key for a collection of active connections, for instance.
+// It may, however, be inappropriate to use as a cache key for persistent
+// storage associated with a host.
+//
+// In particular, note that:
+//
+// * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax
+//   (e.g. those registered with GURL as "standard schemes"). Non-standard
+//   schemes such as "blob", "filesystem", "data", and "javascript" can only be
+//   represented as invalid SchemeHostPort objects.
+//
+// * The "file" scheme follows the standard syntax, but it is important to note
+//   that the authority portion (host, port) is optional. URLs without an
+//   authority portion will be represented with an empty string for the host,
+//   and a port of 0 (e.g. "file:///etc/hosts" => ("file", "", 0)), and URLs
+//   with a host-only authority portion will be represented with a port of 0
+//   (e.g. "file://example.com/etc/hosts" => ("file", "example.com", 0)). See
+//   Section 3 of RFC 3986 to better understand these constructs.
+//
+// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
+//   particular, it has no notion of a "unique" Origin. If you need to take
+//   uniqueness into account (and, if you're making security-relevant decisions
+//   then you absolutely do), please use 'url::Origin' instead[1].
+//
+// [1]: // TODO(mkwst): Land 'url::Origin'. :)
+//
+// Usage:
+//
+// * SchemeHostPort objects are commonly created from GURL objects:
+//
+//     GURL url("https://example.com/");
+//     url::SchemeHostPort tuple(url);
+//     tuple.scheme(); // "https"
+//     tuple.host(); // "example.com"
+//     tuple.port(); // 443
+//
+// * Objects may also be explicitly created and compared:
+//
+//     url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443);
+//     tuple.scheme(); // "https"
+//     tuple.host(); // "example.com"
+//     tuple.port(); // 443
+//
+//     GURL url("https://example.com/");
+//     tuple.Equals(url::SchemeHostPort(url)); // true
+class URL_EXPORT SchemeHostPort {
+ public:
+  // Creates an invalid (scheme, host, port) tuple, which represents an invalid
+  // or non-standard URL.
+  SchemeHostPort();
+
+  // Creates a (scheme, host, port) tuple. |host| must be a canonicalized
+  // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
+  // must be a standard scheme. |port| must not be 0, unless |scheme| does not
+  // support ports (e.g. 'file'). In that case, |port| must be 0.
+  //
+  // Copies the data in |scheme| and |host|.
+  SchemeHostPort(base::StringPiece scheme, base::StringPiece host, uint16 port);
+
+  // Creates a (scheme, host, port) tuple from |url|, as described at
+  // https://tools.ietf.org/html/rfc6454#section-4
+  //
+  // If |url| is invalid or non-standard, the result will be an invalid
+  // SchemeHostPort object.
+  explicit SchemeHostPort(const GURL& url);
+
+  ~SchemeHostPort();
+
+  // Returns the host component, in URL form. That is all IDN domain names will
+  // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and
+  // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]").
+  const std::string& host() const { return host_; }
+  const std::string& scheme() const { return scheme_; }
+  uint16 port() const { return port_; }
+  bool IsInvalid() const;
+
+  // Serializes the SchemeHostPort tuple to a canonical form.
+  //
+  // While this string form resembles the Origin serialization specified in
+  // Section 6.2 of RFC 6454, it is important to note that invalid
+  // SchemeHostPort tuples serialize to the empty string, rather than being
+  // serialized as a unique Origin.
+  std::string Serialize() const;
+
+  // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
+  // are exact matches.
+  //
+  // Note that this comparison is _not_ the same as an origin-based comparison.
+  // In particular, invalid SchemeHostPort objects match each other (and
+  // themselves). Unique origins, on the other hand, would not.
+  bool Equals(const SchemeHostPort& other) const;
+
+  // Allows SchemeHostPort to used as a key in STL (for example, a std::set or
+  // std::map).
+  bool operator<(const SchemeHostPort& other) const;
+
+ private:
+  std::string scheme_;
+  std::string host_;
+  uint16 port_;
+};
+
+}  // namespace url
+
+#endif  // URL_SCHEME_HOST_PORT_H_

diff --git a/scheme_host_port_unittest.cc b/scheme_host_port_unittest.cc
new file mode 100644
index 0000000..817631d
--- /dev/null
+++ b/scheme_host_port_unittest.cc

@@ -0,0 +1,215 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/scheme_host_port.h"
+
+namespace {
+
+TEST(SchemeHostPortTest, Invalid) {
+  url::SchemeHostPort invalid;
+  EXPECT_EQ("", invalid.scheme());
+  EXPECT_EQ("", invalid.host());
+  EXPECT_EQ(0, invalid.port());
+  EXPECT_TRUE(invalid.IsInvalid());
+  EXPECT_TRUE(invalid.Equals(invalid));
+
+  const char* urls[] = {"data:text/html,Hello!",
+                        "javascript:alert(1)",
+                        "file://example.com:443/etc/passwd",
+                        "blob:https://example.com/uuid-goes-here",
+                        "filesystem:https://example.com/temporary/yay.png"};
+
+  for (const auto& test : urls) {
+    SCOPED_TRACE(test);
+    GURL url(test);
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_TRUE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+    EXPECT_TRUE(tuple.Equals(invalid));
+    EXPECT_TRUE(invalid.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, ExplicitConstruction) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16 port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"https", "example.com", 443},
+      {"https", "example.com", 123},
+      {"file", "", 0},
+      {"file", "example.com", 0},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+    EXPECT_EQ(test.scheme, tuple.scheme());
+    EXPECT_EQ(test.host, tuple.host());
+    EXPECT_EQ(test.port, tuple.port());
+    EXPECT_FALSE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, InvalidConstruction) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16 port;
+  } cases[] = {{"", "", 0},
+               {"data", "", 0},
+               {"blob", "", 0},
+               {"filesystem", "", 0},
+               {"http", "", 80},
+               {"data", "example.com", 80},
+               {"http", "☃.net", 80},
+               {"http\nmore", "example.com", 80},
+               {"http\rmore", "example.com", 80},
+               {"http\n", "example.com", 80},
+               {"http\r", "example.com", 80},
+               {"http", "example.com\nnot-example.com", 80},
+               {"http", "example.com\rnot-example.com", 80},
+               {"http", "example.com\n", 80},
+               {"http", "example.com\r", 80},
+               {"http", "example.com", 0},
+               {"file", "", 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_TRUE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, InvalidConstructionWithEmbeddedNulls) {
+  struct TestCases {
+    const char* scheme;
+    size_t scheme_length;
+    const char* host;
+    size_t host_length;
+    uint16 port;
+  } cases[] = {{"http\0more", 9, "example.com", 11, 80},
+               {"http\0", 5, "example.com", 11, 80},
+               {"\0http", 5, "example.com", 11, 80},
+               {"http", 4, "example.com\0not-example.com", 27, 80},
+               {"http", 4, "example.com\0", 12, 80},
+               {"http", 4, "\0example.com", 12, 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(std::string(test.scheme, test.scheme_length),
+                              std::string(test.host, test.host_length),
+                              test.port);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_TRUE(tuple.IsInvalid());
+  }
+}
+
+TEST(SchemeHostPortTest, GURLConstruction) {
+  struct TestCases {
+    const char* url;
+    const char* scheme;
+    const char* host;
+    uint16 port;
+  } cases[] = {
+      {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+      {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+      {"http://☃.net/", "http", "xn--n3h.net", 80},
+      {"http://example.com/", "http", "example.com", 80},
+      {"http://example.com:123/", "http", "example.com", 123},
+      {"https://example.com/", "https", "example.com", 443},
+      {"https://example.com:123/", "https", "example.com", 123},
+      {"file:///etc/passwd", "file", "", 0},
+      {"file://example.com/etc/passwd", "file", "example.com", 0},
+      {"http://u:p@example.com/", "http", "example.com", 80},
+      {"http://u:p@example.com/path", "http", "example.com", 80},
+      {"http://u:p@example.com/path?123", "http", "example.com", 80},
+      {"http://u:p@example.com/path?123#hash", "http", "example.com", 80},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(test.url);
+    GURL url(test.url);
+    EXPECT_TRUE(url.is_valid());
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ(test.scheme, tuple.scheme());
+    EXPECT_EQ(test.host, tuple.host());
+    EXPECT_EQ(test.port, tuple.port());
+    EXPECT_FALSE(tuple.IsInvalid());
+    EXPECT_TRUE(tuple.Equals(tuple));
+  }
+}
+
+TEST(SchemeHostPortTest, Serialization) {
+  struct TestCases {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://"},
+      {"file://example.com/etc/passwd", "file://example.com"},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(test.url);
+    GURL url(test.url);
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ(test.expected, tuple.Serialize());
+  }
+}
+
+TEST(SchemeHostPortTest, Comparison) {
+  // These tuples are arranged in increasing order:
+  struct SchemeHostPorts {
+    const char* scheme;
+    const char* host;
+    uint16 port;
+  } tuples[] = {
+      {"http", "a", 80},
+      {"http", "b", 80},
+      {"https", "a", 80},
+      {"https", "b", 80},
+      {"http", "a", 81},
+      {"http", "b", 81},
+      {"https", "a", 81},
+      {"https", "b", 81},
+  };
+
+  for (size_t i = 0; i < arraysize(tuples); i++) {
+    url::SchemeHostPort current(tuples[i].scheme, tuples[i].host,
+                                tuples[i].port);
+    for (size_t j = i; j < arraysize(tuples); j++) {
+      url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host,
+                                     tuples[j].port);
+      EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+      EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+    }
+  }
+}
+
+}  // namespace url

diff --git a/third_party/mozilla/url_parse.h b/third_party/mozilla/url_parse.h
index 71dbb78..7bfcdc8 100644
--- a/third_party/mozilla/url_parse.h
+++ b/third_party/mozilla/url_parse.h

@@ -5,9 +5,6 @@
 #ifndef URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
 #define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
 
-#include <string>
-
-#include "base/basictypes.h"
 #include "base/strings/string16.h"
 #include "url/url_export.h"
 

diff --git a/url_canon.h b/url_canon.h
index 432f291..95d5345 100644
--- a/url_canon.h
+++ b/url_canon.h

@@ -9,8 +9,8 @@
 #include <string.h>
 
 #include "base/strings/string16.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 namespace url {
 
@@ -285,7 +285,7 @@
 // User info: username/password. If present, this will add the delimiters so
 // the output will be "<username>:<password>@" or "<username>@". Empty
 // username/password pairs, or empty passwords, will get converted to
-// nonexistant in the canonical version.
+// nonexistent in the canonical version.
 //
 // The components for the username and password refer to ranges in the
 // respective source strings. Usually, these will be the same string, which
@@ -317,13 +317,13 @@
 
   // This field summarizes how the input was classified by the canonicalizer.
   enum Family {
-    NEUTRAL,   // - Doesn't resemble an IP address.  As far as the IP
+    NEUTRAL,   // - Doesn't resemble an IP address. As far as the IP
                //   canonicalizer is concerned, it should be treated as a
                //   hostname.
-    BROKEN,    // - Almost an IP, but was not canonicalized.  This could be an
+    BROKEN,    // - Almost an IP, but was not canonicalized. This could be an
                //   IPv4 address where truncation occurred, or something
                //   containing the special characters :[] which did not parse
-               //   as an IPv6 address.  Never attempt to connect to this
+               //   as an IPv6 address. Never attempt to connect to this
                //   address, because it might actually succeed!
     IPV4,      // - Successfully canonicalized as an IPv4 address.
     IPV6,      // - Successfully canonicalized as an IPv6 address.
@@ -331,7 +331,7 @@
   Family family;
 
   // If |family| is IPV4, then this is the number of nonempty dot-separated
-  // components in the input text, from 1 to 4.  If |family| is not IPV4,
+  // components in the input text, from 1 to 4. If |family| is not IPV4,
   // this value is undefined.
   int num_ipv4_components;
 
@@ -355,7 +355,7 @@
 
 // Host.
 //
-// The 8-bit version requires UTF-8 encoding.  Use this version when you only
+// The 8-bit version requires UTF-8 encoding. Use this version when you only
 // need to know whether canonicalization succeeded.
 URL_EXPORT bool CanonicalizeHost(const char* spec,
                                  const Component& host,
@@ -368,7 +368,7 @@
 
 // Extended version of CanonicalizeHost, which returns additional information.
 // Use this when you need to know whether the hostname was an IP address.
-// A successful return is indicated by host_info->family != BROKEN.  See the
+// A successful return is indicated by host_info->family != BROKEN. See the
 // definition of CanonHostInfo above for details.
 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
                                         const Component& host,
@@ -554,7 +554,7 @@
                                     CanonOutput* output,
                                     Parsed* new_parsed);
 
-// Use for mailto URLs. This "canonicalizes" the url into a path and query
+// Use for mailto URLs. This "canonicalizes" the URL into a path and query
 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
 // the query encoding if there is a query. This is because a mailto URL is
 // really intended for an external mail program, and the encoding of a page,
@@ -578,9 +578,9 @@
 // treated on the same code path as regular canonicalization (the same string
 // for each component).
 //
-// A Parsed structure usually goes along with this. Those
-// components identify offsets within these strings, so that they can all be
-// in the same string, or spread arbitrarily across different ones.
+// A Parsed structure usually goes along with this. Those components identify
+// offsets within these strings, so that they can all be in the same string,
+// or spread arbitrarily across different ones.
 //
 // This structures does not own any data. It is the caller's responsibility to
 // ensure that the data the pointers point to stays in scope and is not
@@ -725,7 +725,7 @@
   }
   bool IsRefOverridden() const { return sources_.ref != NULL; }
 
-  // Getters for the itnernal data. See the variables below for how the
+  // Getters for the internal data. See the variables below for how the
   // information is encoded.
   const URLComponentSource<CHAR>& sources() const { return sources_; }
   const Parsed& components() const { return components_; }
@@ -863,7 +863,7 @@
 // The base URL should be canonical and have a host (may be empty for file
 // URLs) and a path. If it doesn't have these, we can't resolve relative
 // URLs off of it and will return the base as the output with an error flag.
-// Becausee it is canonical is should also be ASCII.
+// Because it is canonical is should also be ASCII.
 //
 // The query charset converter follows the same rules as CanonicalizeQuery.
 //

diff --git a/url_canon_etc.cc b/url_canon_etc.cc
index 7409efd..e9da94c 100644
--- a/url_canon_etc.cc
+++ b/url_canon_etc.cc

@@ -95,9 +95,9 @@
   // The output scheme starts from the current position.
   out_scheme->begin = output->length();
 
-  // Danger: it's important that this code does not strip any characters: it
-  // only emits the canonical version (be it valid or escaped) of each of
-  // the input characters. Stripping would put it out of sync with
+  // Danger: it's important that this code does not strip any characters;
+  // it only emits the canonical version (be it valid or escaped) for each
+  // of the input characters. Stripping would put it out of sync with
   // FindAndCompareScheme, which could cause some security checks on
   // schemes to be incorrect.
   bool success = true;
@@ -218,7 +218,7 @@
   char buf[buf_size];
   WritePortInt(buf, buf_size, port_num);
 
-  // Append the port number to the output, preceeded by a colon.
+  // Append the port number to the output, preceded by a colon.
   output->push_back(':');
   out_port->begin = output->length();
   for (int i = 0; i < buf_size && buf[i]; i++)

diff --git a/url_canon_host.cc b/url_canon_host.cc
index 513248a..fce4d3a 100644
--- a/url_canon_host.cc
+++ b/url_canon_host.cc

@@ -34,7 +34,7 @@
 // NOTE: I didn't actually test all the control characters. Some may be
 // disallowed in the input, but they are all accepted escaped except for 0.
 // I also didn't test if characters affecting HTML parsing are allowed
-// unescaped, eg. (") or (#), which would indicate the beginning of the path.
+// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
 // Surprisingly, space is accepted in the input and always escaped.
 
 // This table lists the canonical version of all characters we allow in the
@@ -316,11 +316,11 @@
   }
 
   if (!success) {
-    // Canonicalization failed.  Set BROKEN to notify the caller.
+    // Canonicalization failed. Set BROKEN to notify the caller.
     host_info->family = CanonHostInfo::BROKEN;
   } else {
     // After all the other canonicalization, check if we ended up with an IP
-    // address.  IP addresses are small, so writing into this temporary buffer
+    // address. IP addresses are small, so writing into this temporary buffer
     // should not cause an allocation.
     RawCanonOutput<64> canon_ip;
     CanonicalizeIPAddress(output->data(),
@@ -328,7 +328,7 @@
                           &canon_ip, host_info);
 
     // If we got an IPv4/IPv6 address, copy the canonical form back to the
-    // real buffer.  Otherwise, it's a hostname or broken IP, in which case
+    // real buffer. Otherwise, it's a hostname or broken IP, in which case
     // we just leave it in place.
     if (host_info->IsIPAddress()) {
       output->set_length(output_begin);

diff --git a/url_canon_icu.cc b/url_canon_icu.cc
index 741bed2..8a80d71 100644
--- a/url_canon_icu.cc
+++ b/url_canon_icu.cc

@@ -99,8 +99,10 @@
     // TODO(jungshik): Change options as different parties (browsers,
     // registrars, search engines) converge toward a consensus.
     value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
-    if (U_FAILURE(err))
+    if (U_FAILURE(err)) {
+      CHECK(false) << "failed to open UTS46 data with error: " << err;
       value = NULL;
+    }
   }
 
   UIDNA* value;

diff --git a/url_canon_internal.cc b/url_canon_internal.cc
index 1554814..164c6cf 100644
--- a/url_canon_internal.cc
+++ b/url_canon_internal.cc

@@ -249,9 +249,9 @@
 
 bool ReadUTFChar(const char* str, int* begin, int length,
                  unsigned* code_point_out) {
-  // This depends on ints and int32s being the same thing.  If they're not, it
+  // This depends on ints and int32s being the same thing. If they're not, it
   // will fail to compile.
-  // TODO(mmenke):  This should probably be fixed.
+  // TODO(mmenke): This should probably be fixed.
   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
       !base::IsValidCharacter(*code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;
@@ -262,9 +262,9 @@
 
 bool ReadUTFChar(const base::char16* str, int* begin, int length,
                  unsigned* code_point_out) {
-  // This depends on ints and int32s being the same thing.  If they're not, it
+  // This depends on ints and int32s being the same thing. If they're not, it
   // will fail to compile.
-  // TODO(mmenke):  This should probably be fixed.
+  // TODO(mmenke): This should probably be fixed.
   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
       !base::IsValidCharacter(*code_point_out)) {
     *code_point_out = kUnicodeReplacementCharacter;

diff --git a/url_canon_internal.h b/url_canon_internal.h
index 71bfc40..8a926b6 100644
--- a/url_canon_internal.h
+++ b/url_canon_internal.h

@@ -7,7 +7,7 @@
 
 // This file is intended to be included in another C++ file where the character
 // types are defined. This allows us to write mostly generic code, but not have
-// templace bloat because everything is inlined when anybody calls any of our
+// template bloat because everything is inlined when anybody calls any of our
 // functions.
 
 #include <stdlib.h>
@@ -41,7 +41,7 @@
   // Valid in an ASCII-representation of an octal digit.
   CHAR_OCT = 32,
 
-  // Characters that do not require escaping in encodeURIComponent.  Characters
+  // Characters that do not require escaping in encodeURIComponent. Characters
   // that do not have this flag will be escaped; see url_util.cc.
   CHAR_COMPONENT = 64,
 };
@@ -175,7 +175,7 @@
              output);
     Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
              output);
-  } else if (char_value <= 0x10FFFF) {  // Max unicode code point.
+  } else if (char_value <= 0x10FFFF) {  // Max Unicode code point.
     // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
              output);
@@ -199,7 +199,7 @@
 }
 
 // Writes the given character to the output as UTF-8. This does NO checking
-// of the validity of the unicode characters; the caller should ensure that
+// of the validity of the Unicode characters; the caller should ensure that
 // the value it is appending is valid to append.
 inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
   DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
@@ -207,7 +207,7 @@
 
 // Writes the given character to the output as UTF-8, escaping ALL
 // characters (even when they are ASCII). This does NO checking of the
-// validity of the unicode characters; the caller should ensure that the value
+// validity of the Unicode characters; the caller should ensure that the value
 // it is appending is valid to append.
 inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
   DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
@@ -260,7 +260,7 @@
 // that any following characters are.
 inline bool AppendUTF8EscapedChar(const base::char16* str, int* begin,
                                   int length, CanonOutput* output) {
-  // UTF-16 input. Readchar16 will handle invalid characters for us and give
+  // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
   // us the kUnicodeReplacementCharacter, so we don't have to do special
   // checking after failure, just pass through the failure to the caller.
   unsigned char_value;

diff --git a/url_canon_internal_file.h b/url_canon_internal_file.h
index 6903098..26a3eae 100644
--- a/url_canon_internal_file.h
+++ b/url_canon_internal_file.h

@@ -113,15 +113,15 @@
   new_parsed->path.begin = output->length();
   output->push_back('/');
 
-  // Copies and normalizes the "c:" at the beginning, if present.
+  // Copy and normalize the "c:" at the beginning, if present.
   int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
                                     parsed.path.end(), output);
 
-  // Copies the rest of the path
+  // Copy the rest of the path.
   FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
   new_parsed->path.len = output->length() - new_parsed->path.begin;
 
-  // Things following the path we can use the standard canonicalizers for.
+  // For things following the path, we can use the standard canonicalizers.
   success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
       source.query, parsed.query, output, &new_parsed->query);
   success &= URLCanonInternal<CHAR, UCHAR>::DoRef(

diff --git a/url_canon_ip.cc b/url_canon_ip.cc
index 45f95de..87c30c7 100644
--- a/url_canon_ip.cc
+++ b/url_canon_ip.cc

@@ -4,9 +4,10 @@
 
 #include "url/url_canon_ip.h"
 
+#include <stdint.h>
 #include <stdlib.h>
+#include <limits>
 
-#include "base/basictypes.h"
 #include "base/logging.h"
 #include "url/url_canon_internal.h"
 
@@ -92,7 +93,7 @@
 template<typename CHAR>
 CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
                                             const Component& component,
-                                            uint32* number) {
+                                            uint32_t* number) {
   // Figure out the base
   SharedCharTypes base;
   int base_prefix_len = 0;  // Size of the prefix for this base.
@@ -118,7 +119,7 @@
     base_prefix_len++;
 
   // Put the component, minus any base prefix, into a NULL-terminated buffer so
-  // we can call the standard library.  Because leading zeros have already been
+  // we can call the standard library. Because leading zeros have already been
   // discarded, filling the entire buffer is guaranteed to trigger the 32-bit
   // overflow check.
   const int kMaxComponentLen = 16;
@@ -133,7 +134,7 @@
     if (!IsCharOfType(input, base))
       return CanonHostInfo::NEUTRAL;
 
-    // Fill the buffer, if there's space remaining.  This check allows us to
+    // Fill the buffer, if there's space remaining. This check allows us to
     // verify that all characters are numeric, even those that don't fit.
     if (dest_i < kMaxComponentLen)
       buf[dest_i++] = input;
@@ -143,14 +144,14 @@
 
   // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
   // number can overflow a 64-bit number in <= 16 characters).
-  uint64 num = _strtoui64(buf, NULL, BaseForType(base));
+  uint64_t num = _strtoui64(buf, NULL, BaseForType(base));
 
   // Check for 32-bit overflow.
-  if (num > kuint32max)
+  if (num > std::numeric_limits<uint32_t>::max())
     return CanonHostInfo::BROKEN;
 
-  // No overflow.  Success!
-  *number = static_cast<uint32>(num);
+  // No overflow. Success!
+  *number = static_cast<uint32_t>(num);
   return CanonHostInfo::IPV4;
 }
 
@@ -167,10 +168,10 @@
 
   // Convert existing components to digits. Values up to
   // |existing_components| will be valid.
-  uint32 component_values[4];
+  uint32_t component_values[4];
   int existing_components = 0;
 
-  // Set to true if one or more components are BROKEN.  BROKEN is only
+  // Set to true if one or more components are BROKEN. BROKEN is only
   // returned if all components are IPV4 or BROKEN, so, for example,
   // 12345678912345.de returns NEUTRAL rather than broken.
   bool broken = false;
@@ -198,7 +199,7 @@
   // First, process all components but the last, while making sure each fits
   // within an 8-bit field.
   for (int i = 0; i < existing_components - 1; i++) {
-    if (component_values[i] > kuint8max)
+    if (component_values[i] > std::numeric_limits<uint8_t>::max())
       return CanonHostInfo::BROKEN;
     address[i] = static_cast<unsigned char>(component_values[i]);
   }
@@ -209,7 +210,7 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
-  uint32 last_value = component_values[existing_components - 1];
+  uint32_t last_value = component_values[existing_components - 1];
 #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4)
 #pragma GCC diagnostic pop
 #endif
@@ -440,11 +441,12 @@
   return true;
 }
 
-// Converts a hex comonent into a number. This cannot fail since the caller has
+// Converts a hex component into a number. This cannot fail since the caller has
 // already verified that each character in the string was a hex digit, and
 // that there were no more than 4 characters.
-template<typename CHAR>
-uint16 IPv6HexComponentToNumber(const CHAR* spec, const Component& component) {
+template <typename CHAR>
+uint16_t IPv6HexComponentToNumber(const CHAR* spec,
+                                  const Component& component) {
   DCHECK(component.len <= 4);
 
   // Copy the hex string into a C-string.
@@ -455,7 +457,7 @@
 
   // Convert it to a number (overflow is not possible, since with 4 hex
   // characters we can at most have a 16 bit number).
-  return static_cast<uint16>(_strtoui64(buf, NULL, 16));
+  return static_cast<uint16_t>(_strtoui64(buf, NULL, 16));
 }
 
 // Converts an IPv6 address to a 128-bit number (network byte order), returning
@@ -497,7 +499,7 @@
     // Append the hex component's value.
     if (i != ipv6_parsed.num_hex_components) {
       // Get the 16-bit value for this hex component.
-      uint16 number = IPv6HexComponentToNumber<CHAR>(
+      uint16_t number = IPv6HexComponentToNumber<CHAR>(
           spec, ipv6_parsed.hex_components[i]);
       // Append to |address|, in network byte order.
       address[cur_index_in_address++] = (number & 0xFF00) >> 8;
@@ -576,7 +578,7 @@
       }
     }
 
-    // No invalid characters.  Could still be IPv4 or a hostname.
+    // No invalid characters. Could still be IPv4 or a hostname.
     host_info->family = CanonHostInfo::NEUTRAL;
     return false;
   }

diff --git a/url_canon_ip.h b/url_canon_ip.h
index 19ecfdb..937bd46 100644
--- a/url_canon_ip.h
+++ b/url_canon_ip.h

@@ -6,9 +6,9 @@
 #define URL_URL_CANON_IP_H_
 
 #include "base/strings/string16.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 namespace url {
 
@@ -30,14 +30,14 @@
 // Not all components may exist. If there are only 3 components, for example,
 // the last one will have a length of -1 or 0 to indicate it does not exist.
 //
-// Note that many platform's inet_addr will ignore everything after a space
-// in certain curcumstances if the stuff before the space looks like an IP
+// Note that many platforms' inet_addr will ignore everything after a space
+// in certain circumstances if the stuff before the space looks like an IP
 // address. IE6 is included in this. We do NOT handle this case. In many cases,
 // the browser's canonicalization will get run before this which converts
-// spaces to %20 (in the case of IE7) or rejects them (in the case of
-// Mozilla), so this code path never gets hit. Our host canonicalization will
-// notice these spaces and escape them, which will make IP address finding
-// fail. This seems like better behavior than stripping after a space.
+// spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
+// so this code path never gets hit. Our host canonicalization will notice
+// these spaces and escape them, which will make IP address finding fail. This
+// seems like better behavior than stripping after a space.
 URL_EXPORT bool FindIPv4Components(const char* spec,
                                    const Component& host,
                                    Component components[4]);

diff --git a/url_canon_mailtourl.cc b/url_canon_mailtourl.cc
index 7c48b95..fb6bc9a 100644
--- a/url_canon_mailtourl.cc
+++ b/url_canon_mailtourl.cc

@@ -55,7 +55,7 @@
     new_parsed->path.reset();
   }
 
-  // Query -- always use the default utf8 charset converter.
+  // Query -- always use the default UTF8 charset converter.
   CanonicalizeQuery(source.query, parsed.query, NULL,
                     output, &new_parsed->query);
 

diff --git a/url_canon_path.cc b/url_canon_path.cc
index ceff689..ee1cd96 100644
--- a/url_canon_path.cc
+++ b/url_canon_path.cc

@@ -173,7 +173,7 @@
 // copied to the output.
 //
 // We do not collapse multiple slashes in a row to a single slash. It seems
-// no web browsers do this, and we don't want incompababilities, even though
+// no web browsers do this, and we don't want incompatibilities, even though
 // it would be correct for most systems.
 template<typename CHAR, typename UCHAR>
 bool DoPartialPath(const CHAR* spec,
@@ -200,7 +200,7 @@
         // Needs special handling of some sort.
         int dotlen;
         if ((dotlen = IsDot(spec, i, end)) > 0) {
-          // See if this dot was preceeded by a slash in the output. We
+          // See if this dot was preceded by a slash in the output. We
           // assume that when canonicalizing paths, they will always
           // start with a slash and not a dot, so we don't have to
           // bounds check the output.
@@ -230,7 +230,7 @@
                 break;
             }
           } else {
-            // This dot is not preceeded by a slash, it is just part of some
+            // This dot is not preceded by a slash, it is just part of some
             // file name.
             output->push_back('.');
             i += dotlen - 1;

diff --git a/url_canon_pathurl.cc b/url_canon_pathurl.cc
index 0d23ccb..494fbda 100644
--- a/url_canon_pathurl.cc
+++ b/url_canon_pathurl.cc

@@ -14,7 +14,7 @@
 namespace {
 
 // Canonicalize the given |component| from |source| into |output| and
-// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput|
+// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
 // prior to the canonicalized component; i.e. for the '?' or '#' characters.
 template<typename CHAR, typename UCHAR>
 bool DoCanonicalizePathComponent(const CHAR* source,

diff --git a/url_canon_query.cc b/url_canon_query.cc
index 5494ddf..bf59d10 100644
--- a/url_canon_query.cc
+++ b/url_canon_query.cc

@@ -80,7 +80,7 @@
 }
 
 // Runs the converter with the given UTF-16 input. We don't have to do
-// anything, but this overriddden function allows us to use the same code
+// anything, but this overridden function allows us to use the same code
 // for both UTF-8 and UTF-16 input.
 void RunConverter(const base::char16* spec,
                   const Component& query,

diff --git a/url_canon_relative.cc b/url_canon_relative.cc
index 06ca99c..c2e94e4 100644
--- a/url_canon_relative.cc
+++ b/url_canon_relative.cc

@@ -17,14 +17,14 @@
 namespace {
 
 // Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug
-// 379034), whereas IE is case-insensetive.
+// 379034), whereas IE is case-insensitive.
 //
 // We choose to be more permissive like IE. We don't need to worry about
 // unescaping or anything here: neither IE or Firefox allow this. We also
 // don't have to worry about invalid scheme characters since we are comparing
 // against the canonical scheme of the base.
 //
-// The base URL should always be canonical, therefore is ASCII.
+// The base URL should always be canonical, therefore it should be ASCII.
 template<typename CHAR>
 bool AreSchemesEqual(const char* base,
                      const Component& base_scheme,
@@ -82,7 +82,7 @@
 
 #ifdef WIN32
   // We special case paths like "C:\foo" so they can link directly to the
-  // file on Windows (IE compatability). The security domain stuff should
+  // file on Windows (IE compatibility). The security domain stuff should
   // prevent a link like this from actually being followed if its on a
   // web page.
   //
@@ -91,22 +91,22 @@
   // is a file and the answer will still be correct.
   //
   // We require strict backslashes when detecting UNC since two forward
-  // shashes should be treated a a relative URL with a hostname.
+  // slashes should be treated a a relative URL with a hostname.
   if (DoesBeginWindowsDriveSpec(url, begin, url_len) ||
       DoesBeginUNCPath(url, begin, url_len, true))
     return true;
 #endif  // WIN32
 
   // See if we've got a scheme, if not, we know this is a relative URL.
-  // BUT: Just because we have a scheme, doesn't make it absolute.
+  // BUT, just because we have a scheme, doesn't make it absolute.
   // "http:foo.html" is a relative URL with path "foo.html". If the scheme is
-  // empty, we treat it as relative (":foo") like IE does.
+  // empty, we treat it as relative (":foo"), like IE does.
   Component scheme;
   const bool scheme_is_empty =
       !ExtractScheme(url, url_len, &scheme) || scheme.len == 0;
   if (scheme_is_empty) {
     if (url[begin] == '#') {
-      // |url| is a bare fragement (e.g. "#foo"). This can be resolved against
+      // |url| is a bare fragment (e.g. "#foo"). This can be resolved against
       // any base. Fall-through.
     } else if (!is_base_hierarchical) {
       // Don't allow relative URLs if the base scheme doesn't support it.
@@ -145,7 +145,7 @@
   int colon_offset = scheme.end();
 
   // If it's a filesystem URL, the only valid way to make it relative is not to
-  // supply a scheme.  There's no equivalent to e.g. http:index.html.
+  // supply a scheme. There's no equivalent to e.g. http:index.html.
   if (CompareSchemeComponent(url, scheme, kFileSystemScheme))
     return true;
 
@@ -394,7 +394,7 @@
                             query_converter, output, out_parsed);
 }
 
-// Resolves a relative URL that happens to be an absolute file path.  Examples
+// Resolves a relative URL that happens to be an absolute file path. Examples
 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo".
 template<typename CHAR>
 bool DoResolveAbsoluteFile(const CHAR* relative_url,
@@ -460,7 +460,7 @@
   // how strict the UNC finder is).
   //
   // We also allow Windows absolute drive specs on any scheme (for example
-  // "c:\foo") like IE does. There must be no preceeding slashes in this
+  // "c:\foo") like IE does. There must be no preceding slashes in this
   // case (we reject anything like "/c:/foo") because that should be treated
   // as a path. For file URLs, we allow any number of slashes since that would
   // be setting the path.

diff --git a/url_canon_stdurl.cc b/url_canon_stdurl.cc
index 7a61de8..7d1758b 100644
--- a/url_canon_stdurl.cc
+++ b/url_canon_stdurl.cc

@@ -169,7 +169,7 @@
 }
 
 // For 16-bit replacements, we turn all the replacements into UTF-8 so the
-// regular codepath can be used.
+// regular code path can be used.
 bool ReplaceStandardURL(const char* base,
                         const Parsed& base_parsed,
                         const Replacements<base::char16>& replacements,

diff --git a/url_canon_unittest.cc b/url_canon_unittest.cc
index 3ab8710..0ccd6c9 100644
--- a/url_canon_unittest.cc
+++ b/url_canon_unittest.cc

@@ -6,10 +6,10 @@
 
 #include "base/macros.h"
 #include "testing/gtest/include/gtest/gtest.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_internal.h"
 #include "url/url_canon_stdstring.h"
-#include "url/url_parse.h"
 #include "url/url_test_utils.h"
 
 namespace url {
@@ -38,7 +38,7 @@
   bool expected_success;
 };
 
-// Test cases for CanonicalizeIPAddress().  The inputs are identical to
+// Test cases for CanonicalizeIPAddress(). The inputs are identical to
 // DualComponentCase, but the output has extra CanonHostInfo fields.
 struct IPAddressCase {
   const char* input8;
@@ -127,7 +127,7 @@
 
 #if defined(GTEST_HAS_DEATH_TEST)
 // TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
-// cause the Chromium stacktrace dialog to appear and hang the test.
+// cause the Chromium stack trace dialog to appear and hang the test.
 // See http://crbug.com/49580.
 #if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON)
 #define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid
@@ -157,10 +157,10 @@
   } utf_cases[] = {
       // Valid canonical input should get passed through & escaped.
     {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
-      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
+      // Test a character that takes > 16 bits (U+10300 = old italic letter A)
     {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
-      // Non-shortest-form UTF-8 are invalid. The bad char should be replaced
-      // with the invalid character (EF BF DB in UTF-8).
+      // Non-shortest-form UTF-8 characters are invalid. The bad character
+      // should be replaced with the invalid character (EF BF DB in UTF-8).
     {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, "%EF%BF%BD%E5%A5%BD"},
       // Invalid UTF-8 sequences should be marked as invalid (the first
       // sequence is truncated).
@@ -259,7 +259,7 @@
     EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
     EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
 
-    // Now try the wide version
+    // Now try the wide version.
     out_str.clear();
     StdStringCanonOutput output2(&out_str);
 
@@ -275,7 +275,7 @@
     EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
   }
 
-  // Test the case where the scheme is declared nonexistant, it should be
+  // Test the case where the scheme is declared nonexistent, it should be
   // converted into an empty scheme.
   Component out_comp;
   out_str.clear();
@@ -638,7 +638,7 @@
     {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
     {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
     {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
-      // Old trunctations tests.  They're all "BROKEN" now.
+      // Old trunctations tests. They're all "BROKEN" now.
     {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -754,16 +754,17 @@
 
     {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
 
-      // Can only have one "::" contraction in an IPv6 string literal.
+    // Can only have one "::" contraction in an IPv6 string literal.
     {"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-      // No more than 2 consecutive ':'s.
+    // No more than 2 consecutive ':'s.
     {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-      // Non-IP addresses due to invalid characters.
+    // Non-IP addresses due to invalid characters.
     {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
-      // If there are not enough components, the last one should fill them out.
+    // If there are not enough components, the last one should fill them out.
     // ... omitted at this time ...
-      // Too many components means not an IP address.  Similarly with too few if using IPv4 compat or mapped addresses.
+    // Too many components means not an IP address. Similarly, with too few
+    // if using IPv4 compat or mapped addresses.
     {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
     {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
@@ -887,7 +888,7 @@
     {"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true},
     {"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true },
 
-      // IE7 compatability: old versions allowed backslashes in usernames, but
+      // IE7 compatibility: old versions allowed backslashes in usernames, but
       // IE7 does not. We disallow it as well.
     {"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
   };
@@ -943,7 +944,7 @@
   // buffer. The parser unit tests will test scanning the number correctly.
   //
   // Note that the CanonicalizePort will always prepend a colon to the output
-  // to separate it from the colon that it assumes preceeds it.
+  // to separate it from the colon that it assumes precedes it.
   struct PortCase {
     const char* input;
     int default_port;
@@ -1329,7 +1330,7 @@
     {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"},
       // Replace nothing
     {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"},
-      // Replace scheme with filesystem.  The result is garbage, but you asked
+      // Replace scheme with filesystem. The result is garbage, but you asked
       // for it.
     {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"},
   };
@@ -1594,7 +1595,7 @@
     {"file:", "file:///", true, Component(), Component(7, 1)},
     {"file:UNChost/path", "file://unchost/path", true, Component(7, 7), Component(14, 5)},
       // CanonicalizeFileURL supports absolute Windows style paths for IE
-      // compatability. Note that the caller must decide that this is a file
+      // compatibility. Note that the caller must decide that this is a file
       // URL itself so it can call the file canonicalizer. This is usually
       // done automatically as part of relative URL resolving.
     {"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
@@ -1605,7 +1606,7 @@
     {"\\\\server\\file", "file://server/file", true, Component(7, 6), Component(13, 5)},
     {"/\\server/file", "file://server/file", true, Component(7, 6), Component(13, 5)},
       // We should preserve the number of slashes after the colon for IE
-      // compatability, except when there is none, in which case we should
+      // compatibility, except when there is none, in which case we should
       // add one.
     {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(), Component(7, 16)},
     {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, Component(), Component(7, 19)},
@@ -1807,7 +1808,7 @@
 
 TEST(URLCanonTest, _itoa_s) {
   // We fill the buffer with 0xff to ensure that it's getting properly
-  // null-terminated.  We also allocate one byte more than what we tell
+  // null-terminated. We also allocate one byte more than what we tell
   // _itoa_s about, and ensure that the extra byte is untouched.
   char buf[6];
   memset(buf, 0xff, sizeof(buf));
@@ -1846,7 +1847,7 @@
 
 TEST(URLCanonTest, _itow_s) {
   // We fill the buffer with 0xff to ensure that it's getting properly
-  // null-terminated.  We also allocate one byte more than what we tell
+  // null-terminated. We also allocate one byte more than what we tell
   // _itoa_s about, and ensure that the extra byte is untouched.
   base::char16 buf[6];
   const char fill_mem = 0xff;
@@ -2022,7 +2023,7 @@
       // which is what is required.
     {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
       // Filesystem URL tests; filesystem URLs are only valid and relative if
-      // they have no scheme, e.g. "./index.html".  There's no valid equivalent
+      // they have no scheme, e.g. "./index.html". There's no valid equivalent
       // to http:index.html.
     {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
     {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
@@ -2090,10 +2091,10 @@
   }
 }
 
-// It used to be when we did a replacement with a long buffer of UTF-16
-// characters, we would get invalid data in the URL. This is because the buffer
-// it used to hold the UTF-8 data was resized, while some pointers were still
-// kept to the old buffer that was removed.
+// It used to be the case that when we did a replacement with a long buffer of
+// UTF-16 characters, we would get invalid data in the URL. This is because the
+// buffer that it used to hold the UTF-8 data was resized, while some pointers
+// were still kept to the old buffer that was removed.
 TEST(URLCanonTest, ReplacementOverflow) {
   const char src[] = "file:///C:/foo/bar";
   int src_len = static_cast<int>(strlen(src));
@@ -2101,7 +2102,7 @@
   ParseFileURL(src, src_len, &parsed);
 
   // Override two components, the path with something short, and the query with
-  // sonething long enough to trigger the bug.
+  // something long enough to trigger the bug.
   Replacements<base::char16> repl;
   base::string16 new_query;
   for (int i = 0; i < 4800; i++)

diff --git a/url_parse.h b/url_parse.h
deleted file mode 100644
index 3b9c546..0000000
--- a/url_parse.h
+++ /dev/null

@@ -1,11 +0,0 @@
-// Copyright 2013 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef URL_URL_PARSE_H_
-#define URL_URL_PARSE_H_
-
-// TODO(tfarina): Remove this file when the callers are updated.
-#include "url/third_party/mozilla/url_parse.h"
-
-#endif  // URL_URL_PARSE_H_

diff --git a/url_parse_file.cc b/url_parse_file.cc
index c08ddc6..fcbb12d 100644
--- a/url_parse_file.cc
+++ b/url_parse_file.cc

@@ -3,8 +3,8 @@
 // found in the LICENSE file.
 
 #include "base/logging.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_file.h"
-#include "url/url_parse.h"
 #include "url/url_parse_internal.h"
 
 // Interesting IE file:isms...

diff --git a/url_parse_internal.h b/url_parse_internal.h
index 4070b7e..7630878 100644
--- a/url_parse_internal.h
+++ b/url_parse_internal.h

@@ -7,11 +7,11 @@
 
 // Contains common inline helper functions used by the URL parsing routines.
 
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 namespace url {
 
-// We treat slashes and backslashes the same for IE compatability.
+// We treat slashes and backslashes the same for IE compatibility.
 inline bool IsURLSlash(base::char16 ch) {
   return ch == '/' || ch == '\\';
 }

diff --git a/url_parse_unittest.cc b/url_parse_unittest.cc
index 71b2438..6bf536e 100644
--- a/url_parse_unittest.cc
+++ b/url_parse_unittest.cc

@@ -2,11 +2,11 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 #include "base/macros.h"
 #include "testing/gtest/include/gtest/gtest.h"
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 // Interesting IE file:isms...
 //
@@ -90,13 +90,13 @@
 bool ComponentMatches(const char* input,
                       const char* reference,
                       const Component& component) {
-  // If the component is nonexistant (length == -1), it should begin at 0.
+  // If the component is nonexistent (length == -1), it should begin at 0.
   EXPECT_TRUE(component.len >= 0 || component.len == -1);
 
   // Begin should be valid.
   EXPECT_LE(0, component.begin);
 
-  // A NULL reference means the component should be nonexistant.
+  // A NULL reference means the component should be nonexistent.
   if (!reference)
     return component.len == -1;
   if (component.len < 0)
@@ -345,7 +345,7 @@
 
 TEST(URLParser, PathURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
+  // to reset something that is reset by the constructor.
   Parsed parsed;
   for (size_t i = 0; i < arraysize(path_cases); i++) {
     const char* url = path_cases[i].input;
@@ -356,7 +356,7 @@
     EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent()))
         << i;
 
-    // The remaining components are never used for path urls.
+    // The remaining components are never used for path URLs.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.host);
@@ -537,7 +537,7 @@
     Component key, value;
     if (!ExtractQueryKeyValue(url, &query, &key, &value)) {
       if (parameter >= i && !expected_key)
-        return true;  // Expected nonexistant key, got one.
+        return true;  // Expected nonexistent key, got one.
       return false;  // Not enough keys.
     }
 
@@ -613,7 +613,7 @@
 
 TEST(URLParser, MailtoUrl) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
+  // to reset something that is reset by the constructor.
   Parsed parsed;
   for (size_t i = 0; i < arraysize(mailto_cases); ++i) {
     const char* url = mailto_cases[i].input;
@@ -625,7 +625,7 @@
     EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
     EXPECT_EQ(PORT_UNSPECIFIED, port);
 
-    // The remaining components are never used for mailto urls.
+    // The remaining components are never used for mailto URLs.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.port);
@@ -645,7 +645,7 @@
 
 TEST(URLParser, FileSystemURL) {
   // Declared outside for loop to try to catch cases in init() where we forget
-  // to reset something that is reset by the construtor.
+  // to reset something that is reset by the constructor.
   Parsed parsed;
   for (size_t i = 0; i < arraysize(filesystem_cases); i++) {
     const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
@@ -667,7 +667,7 @@
       int port = ParsePort(url, parsed.inner_parsed()->port);
       EXPECT_EQ(parsecase->inner_port, port);
 
-      // The remaining components are never used for filesystem urls.
+      // The remaining components are never used for filesystem URLs.
       ExpectInvalidComponent(parsed.inner_parsed()->query);
       ExpectInvalidComponent(parsed.inner_parsed()->ref);
     }
@@ -676,7 +676,7 @@
     EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
     EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
 
-    // The remaining components are never used for filesystem urls.
+    // The remaining components are never used for filesystem URLs.
     ExpectInvalidComponent(parsed.username);
     ExpectInvalidComponent(parsed.password);
     ExpectInvalidComponent(parsed.host);

diff --git a/url_test_utils.h b/url_test_utils.h
index 6400bac..156c428 100644
--- a/url_test_utils.h
+++ b/url_test_utils.h

@@ -19,7 +19,7 @@
 namespace test_utils {
 
 // Converts a UTF-16 string from native wchar_t format to char16, by
-// truncating the high 32 bits.  This is not meant to handle true UTF-32
+// truncating the high 32 bits. This is not meant to handle true UTF-32
 // encoded strings.
 inline base::string16 WStringToUTF16(const wchar_t* src) {
   base::string16 str;
@@ -30,7 +30,7 @@
   return str;
 }
 
-// Converts a string from UTF-8 to UTF-16
+// Converts a string from UTF-8 to UTF-16.
 inline base::string16 ConvertUTF8ToUTF16(const std::string& src) {
   int length = static_cast<int>(src.length());
   EXPECT_LT(length, 1024);
@@ -39,7 +39,7 @@
   return base::string16(output.data(), output.length());
 }
 
-// Converts a string from UTF-16 to UTF-8
+// Converts a string from UTF-16 to UTF-8.
 inline std::string ConvertUTF16ToUTF8(const base::string16& src) {
   std::string str;
   StdStringCanonOutput output(&str);

diff --git a/url_util.cc b/url_util.cc
index 008a5e4..279ab7e 100644
--- a/url_util.cc
+++ b/url_util.cc

@@ -9,6 +9,7 @@
 
 #include "base/debug/leak_annotations.h"
 #include "base/logging.h"
+#include "base/strings/string_util.h"
 #include "url/url_canon_internal.h"
 #include "url/url_file.h"
 #include "url/url_util_internal.h"
@@ -17,28 +18,11 @@
 
 namespace {
 
-// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
-// so we don't want to use it here.
-template<class Char>
-inline Char ToLowerASCII(Char c) {
-  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
-}
-
-// Backend for LowerCaseEqualsASCII.
-template<typename Iter>
-inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {
-  for (Iter it = a_begin; it != a_end; ++it, ++b) {
-    if (!*b || ToLowerASCII(*it) != *b)
-      return false;
-  }
-  return *b == 0;
-}
-
 const int kNumStandardURLSchemes = 8;
 const char* kStandardURLSchemes[kNumStandardURLSchemes] = {
   kHttpScheme,
   kHttpsScheme,
-  kFileScheme,  // Yes, file urls can have a hostname!
+  kFileScheme,  // Yes, file URLs can have a hostname!
   kFtpScheme,
   kGopherScheme,
   kWsScheme,    // WebSocket.
@@ -54,6 +38,17 @@
 // See the LockStandardSchemes declaration in the header.
 bool standard_schemes_locked = false;
 
+// This template converts a given character type to the corresponding
+// StringPiece type.
+template<typename CHAR> struct CharToStringPiece {
+};
+template<> struct CharToStringPiece<char> {
+  typedef base::StringPiece Piece;
+};
+template<> struct CharToStringPiece<base::char16> {
+  typedef base::StringPiece16 Piece;
+};
+
 // Ensures that the standard_schemes list is initialized, does nothing if it
 // already has values.
 void InitStandardSchemes() {
@@ -72,9 +67,10 @@
                                      const char* compare_to) {
   if (!component.is_nonempty())
     return compare_to[0] == 0;  // When component is empty, match empty scheme.
-  return LowerCaseEqualsASCII(&spec[component.begin],
-                              &spec[component.end()],
-                              compare_to);
+  return base::LowerCaseEqualsASCII(
+      typename CharToStringPiece<CHAR>::Piece(
+          &spec[component.begin], component.len),
+      compare_to);
 }
 
 // Returns true if the given scheme identified by |scheme| within |spec| is one
@@ -86,8 +82,10 @@
 
   InitStandardSchemes();
   for (size_t i = 0; i < standard_schemes->size(); i++) {
-    if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],
-                             standard_schemes->at(i)))
+    if (base::LowerCaseEqualsASCII(
+            typename CharToStringPiece<CHAR>::Piece(
+                &spec[scheme.begin], scheme.len),
+            standard_schemes->at(i)))
       return true;
   }
   return false;
@@ -134,7 +132,7 @@
   Parsed parsed_input;
 #ifdef WIN32
   // For Windows, we allow things that look like absolute Windows paths to be
-  // fixed up magically to file URLs. This is done for IE compatability. For
+  // fixed up magically to file URLs. This is done for IE compatibility. For
   // example, this will change "c:/foo" into a file URL rather than treating
   // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
   // There is similar logic in url_canon_relative.cc for
@@ -177,13 +175,14 @@
                                       charset_converter, output, output_parsed);
 
   } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
-    // Mailto are treated like a standard url with only a scheme, path, query
+    // Mailto URLs are treated like standard URLs, with only a scheme, path,
+    // and query.
     ParseMailtoURL(spec, spec_len, &parsed_input);
     success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,
                                     output_parsed);
 
   } else {
-    // "Weird" URLs like data: and javascript:
+    // "Weird" URLs like data: and javascript:.
     ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);
     success = CanonicalizePathURL(spec, spec_len, parsed_input, output,
                                   output_parsed);
@@ -273,7 +272,7 @@
                          CanonOutput* output,
                          Parsed* out_parsed) {
   // If the scheme is overridden, just do a simple string substitution and
-  // reparse the whole thing. There are lots of edge cases that we really don't
+  // re-parse the whole thing. There are lots of edge cases that we really don't
   // want to deal with. Like what happens if I replace "http://e:8080/foo"
   // with a file. Does it become "file:///E:/8080/foo" where the port number
   // becomes part of the path? Parsing that string as a file URL says "yes"
@@ -320,7 +319,7 @@
     // getting replaced here. If ReplaceComponents didn't re-check everything,
     // we wouldn't know if something *not* getting replaced is a problem.
     // If the scheme-specific replacers are made more intelligent so they don't
-    // re-check everything, we should instead recanonicalize the whole thing
+    // re-check everything, we should instead re-canonicalize the whole thing
     // after this call to check validity (this assumes replacing the scheme is
     // much much less common than other types of replacements, like clearing the
     // ref).
@@ -373,7 +372,7 @@
   //
   // This normally means you're trying to set up a new standard scheme too late
   // in your application's init process. Locate where your app does this
-  // initialization and calls LockStandardScheme, and add your new standard
+  // initialization and calls LockStandardSchemes, and add your new standard
   // scheme there.
   DCHECK(!standard_schemes_locked) <<
       "Trying to add a standard scheme after the list has been locked.";
@@ -382,7 +381,7 @@
   if (scheme_len == 0)
     return;
 
-  // Dulicate the scheme into a new buffer and add it to the list of standard
+  // Duplicate the scheme into a new buffer and add it to the list of standard
   // schemes. This pointer will be leaked on shutdown.
   char* dup_scheme = new char[scheme_len + 1];
   ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);
@@ -486,31 +485,6 @@
                              charset_converter, output, out_parsed);
 }
 
-// Front-ends for LowerCaseEqualsASCII.
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b) {
-  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
-}
-
-bool LowerCaseEqualsASCII(const char* a_begin,
-                          const char* a_end,
-                          const char* b_begin,
-                          const char* b_end) {
-  while (a_begin != a_end && b_begin != b_end &&
-         ToLowerASCII(*a_begin) == *b_begin) {
-    a_begin++;
-    b_begin++;
-  }
-  return a_begin == a_end && b_begin == b_end;
-}
-
-bool LowerCaseEqualsASCII(const base::char16* a_begin,
-                          const base::char16* a_end,
-                          const char* b) {
-  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
-}
-
 void DecodeURLEscapeSequences(const char* input,
                               int length,
                               CanonOutputW* output) {

diff --git a/url_util.h b/url_util.h
index 458d1e8..5817044 100644
--- a/url_util.h
+++ b/url_util.h

@@ -8,10 +8,10 @@
 #include <string>
 
 #include "base/strings/string16.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_constants.h"
 #include "url/url_export.h"
-#include "url/url_parse.h"
 
 namespace url {
 
@@ -20,14 +20,13 @@
 // Initialization is NOT required, it will be implicitly initialized when first
 // used. However, this implicit initialization is NOT threadsafe. If you are
 // using this library in a threaded environment and don't have a consistent
-// "first call" (an example might be calling "AddStandardScheme" with your
-// special application-specific schemes) then you will want to call initialize
-// before spawning any threads.
+// "first call" (an example might be calling AddStandardScheme with your special
+// application-specific schemes) then you will want to call initialize before
+// spawning any threads.
 //
-// It is OK to call this function more than once, subsequent calls will simply
-// "noop", unless Shutdown() was called in the mean time. This will also be a
-// "noop" if other calls to the library have forced an initialization
-// beforehand.
+// It is OK to call this function more than once, subsequent calls will be
+// no-ops, unless Shutdown was called in the mean time. This will also be a
+// no-op if other calls to the library have forced an initialization beforehand.
 URL_EXPORT void Initialize();
 
 // Cleanup is not required, except some strings may leak. For most user
@@ -38,10 +37,13 @@
 
 // Schemes --------------------------------------------------------------------
 
-// Adds an application-defined scheme to the internal list of "standard" URL
-// schemes. This function is not threadsafe and can not be called concurrently
-// with any other url_util function. It will assert if the list of standard
-// schemes has been locked (see LockStandardSchemes).
+// Adds an application-defined scheme to the internal list of "standard-format"
+// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
+// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
+//
+// This function is not threadsafe and can not be called concurrently with any
+// other url_util function. It will assert if the list of standard schemes has
+// been locked (see LockStandardSchemes).
 URL_EXPORT void AddStandardScheme(const char* new_scheme);
 
 // Sets a flag to prevent future calls to AddStandardScheme from succeeding.
@@ -85,19 +87,11 @@
                               compare, found_scheme);
 }
 
-// Returns true if the given string represents a standard URL. This means that
-// either the scheme is in the list of known standard schemes.
+// Returns true if the given string represents a URL whose scheme is in the list
+// of known standard-format schemes (see AddStandardScheme).
 URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
 URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
 
-// TODO(brettw) remove this. This is a temporary compatibility hack to avoid
-// breaking the WebKit build when this version is synced via Chrome.
-inline bool IsStandard(const char* spec,
-                       int spec_len,
-                       const Component& scheme) {
-  return IsStandard(spec, scheme);
-}
-
 // URL library wrappers -------------------------------------------------------
 
 // Parses the given spec according to the extracted scheme type. Normal users
@@ -150,7 +144,7 @@
                                 CanonOutput* output,
                                 Parsed* output_parsed);
 
-// Replaces components in the given VALID input url. The new canonical URL info
+// Replaces components in the given VALID input URL. The new canonical URL info
 // is written to output and out_parsed.
 //
 // Returns true if the resulting URL is valid.
@@ -172,29 +166,12 @@
 
 // String helper functions ----------------------------------------------------
 
-// Compare the lower-case form of the given string against the given ASCII
-// string.  This is useful for doing checking if an input string matches some
-// token, and it is optimized to avoid intermediate string copies.
-//
-// The versions of this function that don't take a b_end assume that the b
-// string is NULL terminated.
-URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
-                                     const char* a_end,
-                                     const char* b);
-URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
-                                     const char* a_end,
-                                     const char* b_begin,
-                                     const char* b_end);
-URL_EXPORT bool LowerCaseEqualsASCII(const base::char16* a_begin,
-                                     const base::char16* a_end,
-                                     const char* b);
-
 // Unescapes the given string using URL escaping rules.
 URL_EXPORT void DecodeURLEscapeSequences(const char* input,
                                          int length,
                                          CanonOutputW* output);
 
-// Escapes the given string as defined by the JS method encodeURIComponent.  See
+// Escapes the given string as defined by the JS method encodeURIComponent. See
 // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
 URL_EXPORT void EncodeURIComponent(const char* input,
                                    int length,

diff --git a/url_util_internal.h b/url_util_internal.h
index c72598f..756c736 100644
--- a/url_util_internal.h
+++ b/url_util_internal.h

@@ -8,7 +8,7 @@
 #include <string>
 
 #include "base/strings/string16.h"
-#include "url/url_parse.h"
+#include "url/third_party/mozilla/url_parse.h"
 
 namespace url {
 

diff --git a/url_util_unittest.cc b/url_util_unittest.cc
index 73ff93b..9297765 100644
--- a/url_util_unittest.cc
+++ b/url_util_unittest.cc

@@ -4,9 +4,9 @@
 
 #include "base/macros.h"
 #include "testing/gtest/include/gtest/gtest.h"
+#include "url/third_party/mozilla/url_parse.h"
 #include "url/url_canon.h"
 #include "url/url_canon_stdstring.h"
-#include "url/url_parse.h"
 #include "url/url_test_utils.h"
 #include "url/url_util.h"
 
@@ -44,7 +44,7 @@
   EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme));
   EXPECT_TRUE(found_scheme == Component());
 
-  // When there is a whitespace char in scheme, it should canonicalize the url
+  // When there is a whitespace char in scheme, it should canonicalize the URL
   // before comparison.
   const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
   EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
@@ -305,8 +305,8 @@
 }
 
 TEST(URLUtilTest, TestNoRefComponent) {
-  // The hash-mark must be ignored when mailto: scheme is
-  // parsed, even if the url has a base and relative part.
+  // The hash-mark must be ignored when mailto: scheme is parsed,
+  // even if the URL has a base and relative part.
   const char* base = "mailto://to/";
   const char* rel = "any#body";
commit	718eee97ed6a4df41d14726eb2eddc871d9eaaa3	[log] [tgz]
author	Viet-Trung Luu <viettrungluu@chromium.org>	Wed Jun 01 15:20:18 2016 -0700
committer	Viet-Trung Luu <viettrungluu@chromium.org>	Wed Jun 01 15:20:18 2016 -0700
tree	fa12a283240821c434f7a0d3c6679e2b96b5aa81
parent	dc748045a3e7e6d56999ec8d5de148dd7901159e [diff]