blob: c34bb2c5fdb3505434d510a6f1281fb878cedb02 [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/cert/internal/verify_name_match.h"
#include <string.h>
#include "base/stl_util.h"
#include "base/strings/string16.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/strings/utf_string_conversions.h"
#include "base/sys_byteorder.h"
#include "base/third_party/icu/icu_utf.h"
#include "base/tuple.h"
#include "net/der/input.h"
#include "net/der/parser.h"
#include "net/der/tag.h"
namespace net {
namespace {
// Types of character set checking that NormalizeDirectoryString can perform.
enum CharsetEnforcement {
NO_ENFORCEMENT,
ENFORCE_PRINTABLE_STRING,
ENFORCE_ASCII,
};
// Normalizes |output|, a UTF-8 encoded string, as if it contained
// only ASCII characters.
//
// This could be considered a partial subset of RFC 5280 rules, and
// is compatible with RFC 2459/3280.
//
// In particular, RFC 5280, Section 7.1 describes how UTF8String
// and PrintableString should be compared - using the LDAP StringPrep
// profile of RFC 4518, with case folding and whitespace compression.
// However, because it is optional for 2459/3280 implementations and because
// it's desirable to avoid the size cost of the StringPrep tables,
// this function treats |output| as if it was composed of ASCII.
//
// That is, rather than folding all whitespace characters, it only
// folds ' '. Rather than case folding using locale-aware handling,
// it only folds A-Z to a-z.
//
// This gives better results than outright rejecting (due to mismatched
// encodings), or from doing a strict binary comparison (the minimum
// required by RFC 3280), and is sufficient for those certificates
// publicly deployed.
//
// If |charset_enforcement| is not NO_ENFORCEMENT and |output| contains any
// characters not allowed in the specified charset, returns false.
//
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizeDirectoryString(
CharsetEnforcement charset_enforcement,
std::string* output) {
// Normalized version will always be equal or shorter than input.
// Normalize in place and then truncate the output if necessary.
std::string::const_iterator read_iter = output->begin();
std::string::iterator write_iter = output->begin();
for (; read_iter != output->end() && *read_iter == ' '; ++read_iter) {
// Ignore leading whitespace.
}
for (; read_iter != output->end(); ++read_iter) {
const unsigned char c = *read_iter;
if (c == ' ') {
// If there are non-whitespace characters remaining in input, compress
// multiple whitespace chars to a single space, otherwise ignore trailing
// whitespace.
std::string::const_iterator next_iter = read_iter + 1;
if (next_iter != output->end() && *next_iter != ' ')
*(write_iter++) = ' ';
} else if (c >= 'A' && c <= 'Z') {
// Fold case.
*(write_iter++) = c + ('a' - 'A');
} else {
// Note that these checks depend on the characters allowed by earlier
// conditions also being valid for the enforced charset.
switch (charset_enforcement) {
case ENFORCE_PRINTABLE_STRING:
// See NormalizePrintableStringValue comment for the acceptable list
// of characters.
if (!((c >= 'a' && c <= 'z') || (c >= '\'' && c <= ':') || c == '=' ||
c == '?'))
return false;
break;
case ENFORCE_ASCII:
if (c > 0x7F)
return false;
break;
case NO_ENFORCEMENT:
break;
}
*(write_iter++) = c;
}
}
if (write_iter != output->end())
output->erase(write_iter, output->end());
return true;
}
// Normalizes the DER-encoded PrintableString value |in| according to
// RFC 2459, Section 4.1.2.4
//
// Briefly, normalization involves removing leading and trailing
// whitespace, folding multiple whitespace characters into a single
// whitespace character, and normalizing on case (this function
// normalizes to lowercase).
//
// During normalization, this function also validates that |in|
// is properly encoded - that is, that it restricts to the character
// set defined in X.680 (2008), Section 41.4, Table 10. X.680 defines
// the valid characters as
// a-z A-Z 0-9 (space) ' ( ) + , - . / : = ?
//
// However, due to an old OpenSSL encoding bug, a number of
// certificates have also included '*', which has historically been
// allowed by implementations, and so is also allowed here.
//
// If |in| can be normalized, returns true and sets |output| to the
// case folded, normalized value. If |in| is invalid, returns false.
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizePrintableStringValue(const der::Input& in,
std::string* output) {
in.AsString().swap(*output);
return NormalizeDirectoryString(ENFORCE_PRINTABLE_STRING, output);
}
// Normalized a UTF8String value. See the comment for NormalizeDirectoryString
// for details.
//
// If |in| can be normalized, returns true and sets |output| to the
// case folded, normalized value. If |in| is invalid, returns false.
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizeUtf8StringValue(const der::Input& in,
std::string* output) {
in.AsString().swap(*output);
return NormalizeDirectoryString(NO_ENFORCEMENT, output);
}
// IA5String is ISO/IEC Registrations 1 and 6 from the ISO
// "International Register of Coded Character Sets to be used
// with Escape Sequences", plus space and delete. That's just the
// polite way of saying 0x00 - 0x7F, aka ASCII (or, more formally,
// ISO/IEC 646)
//
// If |in| can be normalized, returns true and sets |output| to the case folded,
// normalized value. If |in| is invalid, returns false.
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizeIA5StringValue(const der::Input& in,
std::string* output) {
in.AsString().swap(*output);
return NormalizeDirectoryString(ENFORCE_ASCII, output);
}
// Converts BMPString value to UTF-8 and then normalizes it. See the comment for
// NormalizeDirectoryString for details.
//
// If |in| can be normalized, returns true and sets |output| to the case folded,
// normalized value. If |in| is invalid, returns false.
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizeBmpStringValue(const der::Input& in,
std::string* output) {
if (in.Length() % 2 != 0)
return false;
base::string16 in_16bit;
if (in.Length()) {
memcpy(base::WriteInto(&in_16bit, in.Length() / 2 + 1), in.UnsafeData(),
in.Length());
}
for (base::char16& c : in_16bit) {
// BMPString is UCS-2 in big-endian order.
c = base::NetToHost16(c);
// BMPString only supports codepoints in the Basic Multilingual Plane;
// surrogates are not allowed.
if (CBU_IS_SURROGATE(c))
return false;
}
if (!base::UTF16ToUTF8(in_16bit.data(), in_16bit.size(), output))
return false;
return NormalizeDirectoryString(NO_ENFORCEMENT, output);
}
// Converts UniversalString value to UTF-8 and then normalizes it. See the
// comment for NormalizeDirectoryString for details.
//
// If |in| can be normalized, returns true and sets |output| to the case folded,
// normalized value. If |in| is invalid, returns false.
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizeUniversalStringValue(const der::Input& in,
std::string* output) {
if (in.Length() % 4 != 0)
return false;
std::vector<uint32_t> in_32bit(in.Length() / 4);
if (in.Length())
memcpy(vector_as_array(&in_32bit), in.UnsafeData(), in.Length());
for (const uint32_t c : in_32bit) {
// UniversalString is UCS-4 in big-endian order.
uint32_t codepoint = base::NetToHost32(c);
if (!CBU_IS_UNICODE_CHAR(codepoint))
return false;
base::WriteUnicodeCharacter(codepoint, output);
}
return NormalizeDirectoryString(NO_ENFORCEMENT, output);
}
// Converts the string |value| to UTF-8, normalizes it, and stores in |output|.
// |tag| must one of the types for which IsNormalizableDirectoryString is true.
//
// If |value| can be normalized, returns true and sets |output| to the case
// folded, normalized value. If |value| is invalid, returns false.
// NOTE: |output| will be modified regardless of the return.
WARN_UNUSED_RESULT bool NormalizeValue(const der::Tag tag,
const der::Input& value,
std::string* output) {
switch (tag) {
case der::kPrintableString:
return NormalizePrintableStringValue(value, output);
case der::kUtf8String:
return NormalizeUtf8StringValue(value, output);
case der::kIA5String:
return NormalizeIA5StringValue(value, output);
case der::kUniversalString:
return NormalizeUniversalStringValue(value, output);
case der::kBmpString:
return NormalizeBmpStringValue(value, output);
default:
NOTREACHED();
return false;
}
}
// Returns true if |tag| is a string type that NormalizeValue can handle.
bool IsNormalizableDirectoryString(der::Tag tag) {
switch (tag) {
case der::kPrintableString:
case der::kUtf8String:
// RFC 5280 only requires handling IA5String for comparing domainComponent
// values, but handling it here avoids the need to special case anything.
case der::kIA5String:
case der::kUniversalString:
case der::kBmpString:
return true;
// TeletexString isn't normalized. Section 8 of RFC 5280 briefly
// describes the historical confusion between treating TeletexString
// as Latin1String vs T.61, and there are even incompatibilities within
// T.61 implementations. As this time is virtually unused, simply
// treat it with a binary comparison, as permitted by RFC 3280/5280.
default:
return false;
}
}
// Returns true if the AttributeValue (|a_tag|, |a_value|) matches (|b_tag|,
// |b_value|).
bool VerifyValueMatch(const der::Tag a_tag,
const der::Input& a_value,
const der::Tag b_tag,
const der::Input& b_value) {
if (IsNormalizableDirectoryString(a_tag) &&
IsNormalizableDirectoryString(b_tag)) {
std::string a_normalized, b_normalized;
if (!NormalizeValue(a_tag, a_value, &a_normalized) ||
!NormalizeValue(b_tag, b_value, &b_normalized))
return false;
return a_normalized == b_normalized;
}
// Attributes encoded with different types may be assumed to be unequal.
if (a_tag != b_tag)
return false;
// All other types use binary comparison.
return a_value.Equals(b_value);
}
struct AttributeTypeAndValue {
AttributeTypeAndValue(der::Input in_type,
der::Tag in_value_tag,
der::Input in_value)
: type(in_type), value_tag(in_value_tag), value(in_value) {}
der::Input type;
der::Tag value_tag;
der::Input value;
};
// Parses all the ASN.1 AttributeTypeAndValue elements in |parser| and stores
// each as an AttributeTypeAndValue object in |out|.
//
// AttributeTypeAndValue is defined in RFC 5280 section 4.1.2.4:
//
// AttributeTypeAndValue ::= SEQUENCE {
// type AttributeType,
// value AttributeValue }
//
// AttributeType ::= OBJECT IDENTIFIER
//
// AttributeValue ::= ANY -- DEFINED BY AttributeType
//
// DirectoryString ::= CHOICE {
// teletexString TeletexString (SIZE (1..MAX)),
// printableString PrintableString (SIZE (1..MAX)),
// universalString UniversalString (SIZE (1..MAX)),
// utf8String UTF8String (SIZE (1..MAX)),
// bmpString BMPString (SIZE (1..MAX)) }
//
// The type of the component AttributeValue is determined by the AttributeType;
// in general it will be a DirectoryString.
WARN_UNUSED_RESULT bool ReadRdn(der::Parser* parser,
std::vector<AttributeTypeAndValue>* out) {
while (parser->HasMore()) {
der::Parser attr_type_and_value;
if (!parser->ReadSequence(&attr_type_and_value))
return false;
// Read the attribute type, which must be an OBJECT IDENTIFIER.
der::Input type;
if (!attr_type_and_value.ReadTag(der::kOid, &type))
return false;
// Read the attribute value.
der::Tag tag;
der::Input value;
if (!attr_type_and_value.ReadTagAndValue(&tag, &value))
return false;
// There should be no more elements in the sequence after reading the
// attribute type and value.
if (attr_type_and_value.HasMore())
return false;
out->push_back(AttributeTypeAndValue(type, tag, value));
}
return true;
}
// Verifies that |a_parser| and |b_parser| are the same length and that every
// AttributeTypeAndValue in |a_parser| has a matching AttributeTypeAndValue in
// |b_parser|.
bool VerifyRdnMatch(der::Parser* a_parser, der::Parser* b_parser) {
std::vector<AttributeTypeAndValue> a_type_and_values, b_type_and_values;
if (!ReadRdn(a_parser, &a_type_and_values) ||
!ReadRdn(b_parser, &b_type_and_values))
return false;
// RFC 5280 section 4.1.2.4
// RelativeDistinguishedName ::= SET SIZE (1..MAX) OF AttributeTypeAndValue
if (a_type_and_values.empty() || b_type_and_values.empty())
return false;
// RFC 5280 section 7.1:
// Two relative distinguished names RDN1 and RDN2 match if they have the same
// number of naming attributes and for each naming attribute in RDN1 there is
// a matching naming attribute in RDN2.
if (a_type_and_values.size() != b_type_and_values.size())
return false;
// The ordering of elements may differ due to denormalized values sorting
// differently in the DER encoding. Since the number of elements should be
// small, a naive linear search for each element should be fine. (Hostile
// certificates already have ways to provoke pathological behavior.)
for (const auto& a : a_type_and_values) {
bool matched = false;
for (const auto& b : b_type_and_values) {
if (a.type.Equals(b.type) &&
VerifyValueMatch(a.value_tag, a.value, b.value_tag, b.value)) {
matched = true;
break;
}
}
if (!matched)
return false;
}
// Every element in |a_type_and_values| had a matching element in
// |b_type_and_values|.
return true;
}
} // namespace
// |a| and |b| are ASN.1 RDNSequence values (not including the Sequence tag),
// defined in RFC 5280 section 4.1.2.4:
//
// Name ::= CHOICE { -- only one possibility for now --
// rdnSequence RDNSequence }
//
// RDNSequence ::= SEQUENCE OF RelativeDistinguishedName
//
// RelativeDistinguishedName ::=
// SET SIZE (1..MAX) OF AttributeTypeAndValue
bool VerifyNameMatch(const der::Input& a, const der::Input& b) {
// Empty Names are allowed. RFC 5280 section 4.1.2.4 requires "The issuer
// field MUST contain a non-empty distinguished name (DN)", while section
// 4.1.2.6 allows for the Subject to be empty in certain cases. The caller is
// assumed to have verified those conditions.
// RFC 5280 section 7.1:
// Two distinguished names DN1 and DN2 match if they have the same number of
// RDNs, for each RDN in DN1 there is a matching RDN in DN2, and the matching
// RDNs appear in the same order in both DNs.
// First just check if the inputs have the same number of RDNs:
der::Parser a_rdn_sequence_counter(a);
der::Parser b_rdn_sequence_counter(b);
while (a_rdn_sequence_counter.HasMore() && b_rdn_sequence_counter.HasMore()) {
if (!a_rdn_sequence_counter.SkipTag(der::kSet) ||
!b_rdn_sequence_counter.SkipTag(der::kSet)) {
return false;
}
}
if (a_rdn_sequence_counter.HasMore() || b_rdn_sequence_counter.HasMore())
return false;
// Same number of RDNs, now check if they match.
der::Parser a_rdn_sequence(a);
der::Parser b_rdn_sequence(b);
while (a_rdn_sequence.HasMore() && b_rdn_sequence.HasMore()) {
der::Parser a_rdn, b_rdn;
if (!a_rdn_sequence.ReadConstructed(der::kSet, &a_rdn) ||
!b_rdn_sequence.ReadConstructed(der::kSet, &b_rdn)) {
return false;
}
if (!VerifyRdnMatch(&a_rdn, &b_rdn))
return false;
}
return true;
}
} // namespace net