| // Copyright 2014 The Chromium Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | // A streaming validator for UTF-8. Validation is based on the definition in | 
 | // RFC-3629. In particular, it does not reject the invalid characters rejected | 
 | // by base::IsStringUTF8(). | 
 | // | 
 | // The implementation detects errors on the first possible byte. | 
 |  | 
 | #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 
 | #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ | 
 |  | 
 | #include <string> | 
 |  | 
 | #include "base/basictypes.h" | 
 | #include "base/i18n/base_i18n_export.h" | 
 |  | 
 | namespace base { | 
 |  | 
 | class BASE_I18N_EXPORT StreamingUtf8Validator { | 
 |  public: | 
 |   // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it | 
 |   // processes characters it alternates between VALID_ENDPOINT and | 
 |   // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the | 
 |   // state changes permanently to INVALID. | 
 |   enum State { | 
 |     VALID_ENDPOINT, | 
 |     VALID_MIDPOINT, | 
 |     INVALID | 
 |   }; | 
 |  | 
 |   StreamingUtf8Validator() : state_(0u) {} | 
 |   // Trivial destructor intentionally omitted. | 
 |  | 
 |   // Validate |size| bytes starting at |data|. If the concatenation of all calls | 
 |   // to AddBytes() since this object was constructed or reset is a valid UTF-8 | 
 |   // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8 | 
 |   // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was | 
 |   // present, returns INVALID. | 
 |   State AddBytes(const char* data, size_t size); | 
 |  | 
 |   // Return the object to a freshly-constructed state so that it can be re-used. | 
 |   void Reset(); | 
 |  | 
 |   // Validate a complete string using the same criteria. Returns true if the | 
 |   // string only contains complete, valid UTF-8 codepoints. | 
 |   static bool Validate(const std::string& string); | 
 |  | 
 |  private: | 
 |   // The current state of the validator. Value 0 is the initial/valid state. | 
 |   // The state is stored as an offset into |kUtf8ValidatorTables|. The special | 
 |   // state |kUtf8InvalidState| is invalid. | 
 |   uint8 state_; | 
 |  | 
 |   // This type could be made copyable but there is currently no use-case for | 
 |   // it. | 
 |   DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); | 
 | }; | 
 |  | 
 | }  // namespace base | 
 |  | 
 | #endif  // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |