| // Copyright 2013 The Chromium Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | // Functions for canonicalizing "path" URLs. Not to be confused with the path | 
 | // of a URL, these are URLs that have no authority section, only a path. For | 
 | // example, "javascript:" and "data:". | 
 |  | 
 | #include "url/url_canon.h" | 
 | #include "url/url_canon_internal.h" | 
 |  | 
 | namespace url { | 
 |  | 
 | namespace { | 
 |  | 
 | // Canonicalize the given |component| from |source| into |output| and | 
 | // |new_component|. If |separator| is non-zero, it is pre-pended to |ouput| | 
 | // prior to the canonicalized component; i.e. for the '?' or '#' characters. | 
 | template<typename CHAR, typename UCHAR> | 
 | bool DoCanonicalizePathComponent(const CHAR* source, | 
 |                                  const Component& component, | 
 |                                  char separator, | 
 |                                  CanonOutput* output, | 
 |                                  Component* new_component) { | 
 |   bool success = true; | 
 |   if (component.is_valid()) { | 
 |     if (separator) | 
 |       output->push_back(separator); | 
 |     // Copy the path using path URL's more lax escaping rules (think for | 
 |     // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all | 
 |     // ASCII characters alone. This helps readability of JavaStript. | 
 |     new_component->begin = output->length(); | 
 |     int end = component.end(); | 
 |     for (int i = component.begin; i < end; i++) { | 
 |       UCHAR uch = static_cast<UCHAR>(source[i]); | 
 |       if (uch < 0x20 || uch >= 0x80) | 
 |         success &= AppendUTF8EscapedChar(source, &i, end, output); | 
 |       else | 
 |         output->push_back(static_cast<char>(uch)); | 
 |     } | 
 |     new_component->len = output->length() - new_component->begin; | 
 |   } else { | 
 |     // Empty part. | 
 |     new_component->reset(); | 
 |   } | 
 |   return success; | 
 | } | 
 |  | 
 | template <typename CHAR, typename UCHAR> | 
 | bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, | 
 |                            const Parsed& parsed, | 
 |                            CanonOutput* output, | 
 |                            Parsed* new_parsed) { | 
 |   // Scheme: this will append the colon. | 
 |   bool success = CanonicalizeScheme(source.scheme, parsed.scheme, | 
 |                                     output, &new_parsed->scheme); | 
 |  | 
 |   // We assume there's no authority for path URLs. Note that hosts should never | 
 |   // have -1 length. | 
 |   new_parsed->username.reset(); | 
 |   new_parsed->password.reset(); | 
 |   new_parsed->host.reset(); | 
 |   new_parsed->port.reset(); | 
 |   // We allow path URLs to have the path, query and fragment components, but we | 
 |   // will canonicalize each of the via the weaker path URL rules. | 
 |   success &= DoCanonicalizePathComponent<CHAR, UCHAR>( | 
 |       source.path, parsed.path, '\0', output, &new_parsed->path); | 
 |   success &= DoCanonicalizePathComponent<CHAR, UCHAR>( | 
 |       source.query, parsed.query, '?', output, &new_parsed->query); | 
 |   success &= DoCanonicalizePathComponent<CHAR, UCHAR>( | 
 |       source.ref, parsed.ref, '#', output, &new_parsed->ref); | 
 |  | 
 |   return success; | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | bool CanonicalizePathURL(const char* spec, | 
 |                          int spec_len, | 
 |                          const Parsed& parsed, | 
 |                          CanonOutput* output, | 
 |                          Parsed* new_parsed) { | 
 |   return DoCanonicalizePathURL<char, unsigned char>( | 
 |       URLComponentSource<char>(spec), parsed, output, new_parsed); | 
 | } | 
 |  | 
 | bool CanonicalizePathURL(const base::char16* spec, | 
 |                          int spec_len, | 
 |                          const Parsed& parsed, | 
 |                          CanonOutput* output, | 
 |                          Parsed* new_parsed) { | 
 |   return DoCanonicalizePathURL<base::char16, base::char16>( | 
 |       URLComponentSource<base::char16>(spec), parsed, output, new_parsed); | 
 | } | 
 |  | 
 | bool ReplacePathURL(const char* base, | 
 |                     const Parsed& base_parsed, | 
 |                     const Replacements<char>& replacements, | 
 |                     CanonOutput* output, | 
 |                     Parsed* new_parsed) { | 
 |   URLComponentSource<char> source(base); | 
 |   Parsed parsed(base_parsed); | 
 |   SetupOverrideComponents(base, replacements, &source, &parsed); | 
 |   return DoCanonicalizePathURL<char, unsigned char>( | 
 |       source, parsed, output, new_parsed); | 
 | } | 
 |  | 
 | bool ReplacePathURL(const char* base, | 
 |                     const Parsed& base_parsed, | 
 |                     const Replacements<base::char16>& replacements, | 
 |                     CanonOutput* output, | 
 |                     Parsed* new_parsed) { | 
 |   RawCanonOutput<1024> utf8; | 
 |   URLComponentSource<char> source(base); | 
 |   Parsed parsed(base_parsed); | 
 |   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); | 
 |   return DoCanonicalizePathURL<char, unsigned char>( | 
 |       source, parsed, output, new_parsed); | 
 | } | 
 |  | 
 | }  // namespace url |