Mojom lexer.
R=viettrungluu@chromium.org
Review URL: https://codereview.chromium.org/1034083003
diff --git a/mojom/BUILD.gn b/mojom/BUILD.gn
new file mode 100644
index 0000000..d26d44c
--- /dev/null
+++ b/mojom/BUILD.gn
@@ -0,0 +1,43 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//testing/test.gni")
+
+group("mojom") {
+ testonly = true
+ deps = [
+ ":tests",
+ ]
+}
+
+group("tests") {
+ testonly = true
+ deps = [
+ ":lexer_unittest",
+ ]
+}
+
+test("lexer_unittest") {
+ sources = [
+ "lexer_unittest.cc",
+ ]
+
+ deps = [
+ "//base",
+ "//base/test:run_all_unittests",
+ "//testing/gtest",
+ ":lexer",
+ ]
+}
+
+source_set("lexer") {
+ sources = [
+ "lexer.cc",
+ "lexer.h",
+ ]
+
+ deps = [
+ "//base",
+ ]
+}
diff --git a/mojom/lexer.cc b/mojom/lexer.cc
new file mode 100644
index 0000000..e55e2fb
--- /dev/null
+++ b/mojom/lexer.cc
@@ -0,0 +1,420 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "mojom/lexer.h"
+
+#include <map>
+#include <string>
+
+#include "base/lazy_instance.h"
+
+namespace mojo {
+namespace mojom {
+
+namespace {
+
+class KeywordsDict {
+ public:
+ KeywordsDict();
+
+ private:
+ std::map<std::string, mojom::TokenType> keywords_;
+ friend std::map<std::string, mojom::TokenType>& Keywords();
+
+ DISALLOW_COPY_AND_ASSIGN(KeywordsDict);
+};
+static base::LazyInstance<KeywordsDict> g_keywords = LAZY_INSTANCE_INITIALIZER;
+
+std::map<std::string, mojom::TokenType>& Keywords() {
+ return g_keywords.Get().keywords_;
+}
+
+KeywordsDict::KeywordsDict() {
+ keywords_["import"] = TokenType::IMPORT;
+ keywords_["module"] = TokenType::MODULE;
+ keywords_["struct"] = TokenType::STRUCT;
+ keywords_["union"] = TokenType::UNION;
+ keywords_["interface"] = TokenType::INTERFACE;
+ keywords_["enum"] = TokenType::ENUM;
+ keywords_["const"] = TokenType::CONST;
+ keywords_["true"] = TokenType::TRUE;
+ keywords_["false"] = TokenType::FALSE;
+ keywords_["default"] = TokenType::DEFAULT;
+}
+
+// Non-localized versions of isalpha.
+bool IsAlpha(char c) {
+ return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
+}
+
+// Non-localized versions of isnum.
+bool IsDigit(char c) {
+ return ('0' <= c && c <= '9');
+}
+
+bool IsHexDigit(char c) {
+ return (IsDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'));
+}
+
+// Non-localized versions of isalnum.
+bool IsAlnum(char c) {
+ return IsAlpha(c) || IsDigit(c);
+}
+
+// MojomLexer tokenizes a mojom source file. It is NOT thread-safe.
+class MojomLexer {
+ public:
+ explicit MojomLexer(const std::string& source);
+ ~MojomLexer();
+
+ // Returns the list of tokens in the source file.
+ std::vector<Token> Tokenize();
+
+ private:
+ // The GetNextToken.* functions all return true if they could find a token
+ // (even an error token) and false otherwise.
+ bool GetNextToken(Token* result);
+ bool GetNextTokenSingleChar(Token* result);
+ bool GetNextTokenEqualsOrResponse(Token* result);
+ bool GetNextTokenIdentifier(Token* result);
+ bool GetNextTokenDecConst(Token* result);
+ bool GetNextTokenHexConst(Token* result);
+ bool GetNextTokenOrdinal(Token* result);
+ bool GetNextTokenStringLiteral(Token* result);
+
+ void ConsumeSkippable();
+ void ConsumeDigits();
+ void ConsumeEol();
+ void Consume(size_t num);
+
+ bool eos(size_t offset_plus) {
+ return offset_ + offset_plus >= source_.size();
+ }
+
+ const std::string source_;
+ size_t offset_;
+ size_t line_no_;
+ size_t offset_in_line_;
+
+ DISALLOW_COPY_AND_ASSIGN(MojomLexer);
+};
+
+std::vector<Token> MojomLexer::Tokenize() {
+ offset_ = 0;
+ line_no_ = 0;
+ offset_in_line_ = 0;
+
+ std::vector<Token> result;
+ Token cur;
+ while (GetNextToken(&cur)) {
+ result.push_back(cur);
+
+ // As soon as an error token is found, stop tokenizing.
+ if (cur.error()) {
+ break;
+ }
+ }
+
+ return result;
+}
+
+bool MojomLexer::GetNextToken(Token* result) {
+ // Skip all spaces which may be in front of the next token.
+ ConsumeSkippable();
+
+ // If we found the end of the source signal that is so.
+ if (eos(0))
+ return false;
+
+ // Save the current position in the source code.
+ result->char_pos = offset_;
+ result->line_no = line_no_;
+ result->line_pos = offset_in_line_;
+
+ if (GetNextTokenSingleChar(result) || GetNextTokenEqualsOrResponse(result) ||
+ GetNextTokenIdentifier(result) || GetNextTokenHexConst(result) ||
+ GetNextTokenDecConst(result) || GetNextTokenDecConst(result) ||
+ GetNextTokenOrdinal(result) || GetNextTokenStringLiteral(result))
+ return true;
+
+ result->token = source_.substr(offset_, 1);
+ result->token_type = TokenType::ERROR_ILLEGAL_CHAR;
+ return true;
+}
+
+void MojomLexer::ConsumeSkippable() {
+ if (eos(0))
+ return;
+
+ bool found_non_space = false;
+ while (!found_non_space && !eos(0)) {
+ switch (source_[offset_]) {
+ case ' ':
+ case '\t':
+ case '\r':
+ Consume(1);
+ break;
+ case '\n':
+ ConsumeEol();
+ break;
+ default:
+ found_non_space = true;
+ break;
+ }
+ }
+}
+
+// Finds all single-character tokens except for '='.
+bool MojomLexer::GetNextTokenSingleChar(Token* result) {
+ switch (source_[offset_]) {
+ case '(':
+ result->token_type = TokenType::LPAREN;
+ break;
+ case ')':
+ result->token_type = TokenType::RPAREN;
+ break;
+ case '[':
+ result->token_type = TokenType::LBRACKET;
+ break;
+ case ']':
+ result->token_type = TokenType::RBRACKET;
+ break;
+ case '{':
+ result->token_type = TokenType::LBRACE;
+ break;
+ case '}':
+ result->token_type = TokenType::RBRACE;
+ break;
+ case '<':
+ result->token_type = TokenType::LANGLE;
+ break;
+ case '>':
+ result->token_type = TokenType::RANGLE;
+ break;
+ case ';':
+ result->token_type = TokenType::SEMI;
+ break;
+ case ',':
+ result->token_type = TokenType::COMMA;
+ break;
+ case '.':
+ result->token_type = TokenType::DOT;
+ break;
+ case '-':
+ result->token_type = TokenType::MINUS;
+ break;
+ case '+':
+ result->token_type = TokenType::PLUS;
+ break;
+ case '&':
+ result->token_type = TokenType::AMP;
+ break;
+ case '?':
+ result->token_type = TokenType::QSTN;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ result->token = source_.substr(offset_, 1);
+ Consume(1);
+ return true;
+}
+
+// Finds '=' or '=>'.
+bool MojomLexer::GetNextTokenEqualsOrResponse(Token* result) {
+ if (source_[offset_] != '=')
+ return false;
+ Consume(1);
+
+ if (eos(0) || source_[offset_] != '>') {
+ result->token_type = TokenType::EQUALS;
+ result->token = "=";
+ } else {
+ result->token_type = TokenType::RESPONSE;
+ result->token = "=>";
+ Consume(1);
+ }
+ return true;
+}
+
+// valid C identifiers (K&R2: A.2.3)
+bool MojomLexer::GetNextTokenIdentifier(Token* result) {
+ char c = source_[offset_];
+
+ // Identifiers start with a letter or underscore.
+ if (!(IsAlpha(c) || c == '_'))
+ return false;
+ size_t start_offset = offset_;
+
+ // Identifiers contain letters numbers and underscores.
+ while (!eos(0) && (IsAlnum(source_[offset_]) || c == '_'))
+ Consume(1);
+
+ result->token = source_.substr(start_offset, offset_ - start_offset);
+ result->token_type = TokenType::IDENTIFIER;
+
+ if (Keywords().count(result->token))
+ result->token_type = Keywords()[result->token];
+
+ return true;
+}
+
+// integer constants (K&R2: A.2.5.1) dec
+// floating constants (K&R2: A.2.5.3)
+bool MojomLexer::GetNextTokenDecConst(Token* result) {
+ if (!IsDigit(source_[offset_]))
+ return false;
+
+ result->token_type = TokenType::INT_CONST_DEC;
+ // If the number starts with a zero and is not a floating point number.
+ if (source_[offset_] == '0' &&
+ (eos(1) || (source_[offset_] == 'e' && source_[offset_] == 'E' &&
+ source_[offset_] == '.'))) {
+ // TODO(azani): Catch and error on octal.
+ result->token = "0";
+ Consume(1);
+ return true;
+ }
+
+ size_t start_offset = offset_;
+
+ // First, we consume all the digits.
+ ConsumeDigits();
+
+ // If there is a fractional part, we consume the . and the following digits.
+ if (!eos(0) && source_[offset_] == '.') {
+ result->token_type = TokenType::FLOAT_CONST;
+ Consume(1);
+ ConsumeDigits();
+ }
+
+ // If there is an exponential part, we consume the e and the following digits.
+ if (!eos(0) && (source_[offset_] == 'e' || source_[offset_] == 'E')) {
+ if (!eos(2) && (source_[offset_ + 1] == '-' || source_[offset_ + 1]) &&
+ IsDigit(source_[offset_ + 2])) {
+ result->token_type = TokenType::FLOAT_CONST;
+ Consume(2); // Consume e/E and +/-
+ ConsumeDigits();
+ } else if (!eos(1) && IsDigit(source_[offset_ + 1])) {
+ result->token_type = TokenType::FLOAT_CONST;
+ Consume(1); // Consume e/E
+ ConsumeDigits();
+ }
+ }
+
+ result->token = source_.substr(start_offset, offset_ - start_offset);
+ return true;
+}
+
+// integer constants (K&R2: A.2.5.1) hex
+bool MojomLexer::GetNextTokenHexConst(Token* result) {
+ // Hex numbers start with a 0, x and then some hex numeral.
+ if (eos(2) || source_[offset_] != '0' ||
+ (source_[offset_ + 1] != 'x' && source_[offset_ + 1] != 'X') ||
+ !IsHexDigit(source_[offset_ + 2]))
+ return false;
+
+ result->token_type = TokenType::INT_CONST_HEX;
+ size_t start_offset = offset_;
+ Consume(2);
+
+ while (IsHexDigit(source_[offset_]))
+ Consume(1);
+
+ result->token = source_.substr(start_offset, offset_ - start_offset);
+ return true;
+}
+
+bool MojomLexer::GetNextTokenOrdinal(Token* result) {
+ // Ordinals start with '@' and then some digit.
+ if (eos(1) || source_[offset_] != '@' || !IsDigit(source_[offset_ + 1]))
+ return false;
+ size_t start_offset = offset_;
+ // Consumes '@'.
+ Consume(1);
+
+ result->token_type = TokenType::ORDINAL;
+ ConsumeDigits();
+
+ result->token = source_.substr(start_offset, offset_ - start_offset);
+ return true;
+}
+
+bool MojomLexer::GetNextTokenStringLiteral(Token* result) {
+ // Ordinals start with '@' and then some digit.
+ if (source_[offset_] != '"')
+ return false;
+
+ size_t start_offset = offset_;
+ // Consumes '"'.
+ Consume(1);
+
+ while (source_[offset_] != '"') {
+ if (source_[offset_] == '\n' || eos(0)) {
+ result->token_type = TokenType::ERROR_UNTERMINATED_STRING_LITERAL;
+ result->token = source_.substr(start_offset, offset_ - start_offset);
+ return true;
+ }
+
+ // This block will be skipped if the backslash is at the end of the source.
+ if (source_[offset_] == '\\' && !eos(1)) {
+ // Consume the backslash. This will ensure \" is consumed.
+ Consume(1);
+ }
+ Consume(1);
+ }
+ // Consume the closing doublequotes.
+ Consume(1);
+
+ result->token_type = TokenType::STRING_LITERAL;
+
+ result->token = source_.substr(start_offset, offset_ - start_offset);
+ return true;
+}
+
+void MojomLexer::ConsumeDigits() {
+ while (!eos(0) && IsDigit(source_[offset_]))
+ Consume(1);
+}
+
+void MojomLexer::ConsumeEol() {
+ ++offset_;
+ ++line_no_;
+ offset_in_line_ = 0;
+}
+
+void MojomLexer::Consume(size_t num) {
+ offset_ += num;
+ offset_in_line_ += num;
+}
+
+MojomLexer::MojomLexer(const std::string& source)
+ : source_(source), offset_(0), line_no_(0), offset_in_line_(0) {
+}
+
+MojomLexer::~MojomLexer() {
+}
+
+} // namespace
+
+Token::Token()
+ : token_type(TokenType::ERROR_UNKNOWN),
+ char_pos(0),
+ line_no(0),
+ line_pos(0) {
+}
+
+Token::~Token() {
+}
+
+// Accepts the text of a mojom file and returns the ordered list of tokens
+// found in the file.
+std::vector<Token> Tokenize(const std::string& source) {
+ return MojomLexer(source).Tokenize();
+}
+
+} // namespace mojom
+} // namespace mojo
diff --git a/mojom/lexer.h b/mojom/lexer.h
new file mode 100644
index 0000000..b477a37
--- /dev/null
+++ b/mojom/lexer.h
@@ -0,0 +1,92 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MOJO_PUBLIC_TOOLS_BINDINGS_MOJOM_CPP_LEXER_H_
+#define MOJO_PUBLIC_TOOLS_BINDINGS_MOJOM_CPP_LEXER_H_
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+
+namespace mojo {
+namespace mojom {
+
+enum class TokenType {
+ // Errors
+ ERROR_UNKNOWN,
+ ERROR_ILLEGAL_CHAR,
+ ERROR_UNTERMINATED_STRING_LITERAL,
+
+ // Punctuators and Separators
+ LPAREN,
+ RPAREN,
+ LBRACKET,
+ RBRACKET,
+ LBRACE,
+ RBRACE,
+ LANGLE,
+ RANGLE,
+ SEMI,
+ COMMA,
+ DOT,
+ MINUS,
+ PLUS,
+ AMP,
+ QSTN,
+ EQUALS,
+ RESPONSE,
+
+ // Identifiers
+ IDENTIFIER,
+
+ // Keywords
+ IMPORT,
+ MODULE,
+ STRUCT,
+ UNION,
+ INTERFACE,
+ ENUM,
+ CONST,
+ TRUE,
+ FALSE,
+ DEFAULT,
+
+ // Constants
+ INT_CONST_DEC,
+ INT_CONST_HEX,
+ FLOAT_CONST,
+ ORDINAL,
+ STRING_LITERAL,
+
+ // TODO(azani): Check that all tokens were implemented.
+ // TODO(azani): Error out on octal.
+};
+
+struct Token {
+ Token();
+ ~Token();
+
+ bool error() const {
+ return (token_type == TokenType::ERROR_ILLEGAL_CHAR ||
+ token_type == TokenType::ERROR_UNTERMINATED_STRING_LITERAL ||
+ token_type == TokenType::ERROR_UNKNOWN);
+ }
+
+ TokenType token_type;
+ std::string token;
+ size_t char_pos;
+ size_t line_no;
+ size_t line_pos;
+};
+
+// Accepts the text of a mojom file and returns the ordered list of tokens
+// found in the file.
+std::vector<Token> Tokenize(const std::string& source);
+
+} // namespace mojom
+} // namespace mojo
+
+#endif // MOJO_PUBLIC_TOOLS_BINDINGS_MOJOM_CPP_LEXER_H_
diff --git a/mojom/lexer_unittest.cc b/mojom/lexer_unittest.cc
new file mode 100644
index 0000000..f4db79a
--- /dev/null
+++ b/mojom/lexer_unittest.cc
@@ -0,0 +1,162 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/logging.h"
+#include "mojom/lexer.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace mojo {
+namespace mojom {
+namespace {
+
+TEST(LexerTest, AllNonErrorTokens) {
+ const struct TestData {
+ const char* name;
+ const char* source;
+ mojom::TokenType expected_token;
+ } test_data[] = {
+ {"LPAREN", "(", mojom::TokenType::LPAREN},
+ {"RPAREN", ")", mojom::TokenType::RPAREN},
+ {"LBRACKET", "[", mojom::TokenType::LBRACKET},
+ {"RBRACKET", "]", mojom::TokenType::RBRACKET},
+ {"LBRACE", "{", mojom::TokenType::LBRACE},
+ {"RBRACE", "}", mojom::TokenType::RBRACE},
+ {"LANGLE", "<", mojom::TokenType::LANGLE},
+ {"RANGLE", ">", mojom::TokenType::RANGLE},
+ {"SEMI", ";", mojom::TokenType::SEMI},
+ {"COMMA", ",", mojom::TokenType::COMMA},
+ {"DOT", ".", mojom::TokenType::DOT},
+ {"MINUS", "-", mojom::TokenType::MINUS},
+ {"PLUS", "+", mojom::TokenType::PLUS},
+ {"AMP", "&", mojom::TokenType::AMP},
+ {"QSTN", "?", mojom::TokenType::QSTN},
+ {"EQUALS", "=", mojom::TokenType::EQUALS},
+ {"RESPONSE", "=>", mojom::TokenType::RESPONSE},
+ {"IDENTIFIER", "something", mojom::TokenType::IDENTIFIER},
+ {"IMPORT", "import", mojom::TokenType::IMPORT},
+ {"MODULE", "module", mojom::TokenType::MODULE},
+ {"STRUCT", "struct", mojom::TokenType::STRUCT},
+ {"UNION", "union", mojom::TokenType::UNION},
+ {"INTERFACE", "interface", mojom::TokenType::INTERFACE},
+ {"ENUM", "enum", mojom::TokenType::ENUM},
+ {"CONST", "const", mojom::TokenType::CONST},
+ {"TRUE", "true", mojom::TokenType::TRUE},
+ {"FALSE", "false", mojom::TokenType::FALSE},
+ {"DEFAULT", "default", mojom::TokenType::DEFAULT},
+ {"INT_CONST_DEC", "10", mojom::TokenType::INT_CONST_DEC},
+ {"INT_CONST_DEC_0", "0", mojom::TokenType::INT_CONST_DEC},
+ {"FLOAT_CONST", "10.5", mojom::TokenType::FLOAT_CONST},
+ {"FLOAT_CONST_E", "10e5", mojom::TokenType::FLOAT_CONST},
+ {"FLOAT_CONST_ZERO", "0.5", mojom::TokenType::FLOAT_CONST},
+ {"FLOAT_CONST_E_ZERO", "0e5", mojom::TokenType::FLOAT_CONST},
+ {"FLOAT_CONST_E_PLUS", "10e+5", mojom::TokenType::FLOAT_CONST},
+ {"FLOAT_CONST_E_MINUS", "10e-5", mojom::TokenType::FLOAT_CONST},
+ {"INT_CONST_HEX", "0x10A", mojom::TokenType::INT_CONST_HEX},
+ {"ORDINAL", "@10", mojom::TokenType::ORDINAL},
+ {"STRING_LITERAL", "\"hello world\"", mojom::TokenType::STRING_LITERAL},
+ {"STRING_LITERAL_ESCAPE",
+ "\"hello \\\"world\\\"\"",
+ mojom::TokenType::STRING_LITERAL},
+ {"STRING_LITERAL_HEX_ESCAPE",
+ "\"hello \\x23 world\"",
+ mojom::TokenType::STRING_LITERAL},
+ };
+ for (size_t i = 0; i < arraysize(test_data); i++) {
+ const char* test_name = test_data[i].name;
+ const char* source = test_data[i].source;
+ const mojom::TokenType expected_token = test_data[i].expected_token;
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ DCHECK(tokens.size() >= 1) << "Failure to tokenize at all: " << test_name;
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(expected_token, token.token_type)
+ << "Wrong token type: " << test_name;
+ EXPECT_EQ(source, token.token) << "Wrong token value: " << test_name;
+ }
+}
+
+TEST(LexerTest, TokenPosition) {
+ std::string source(" \n .");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(mojom::TokenType::DOT, token.token_type);
+ EXPECT_EQ(".", token.token);
+ EXPECT_EQ(5U, token.char_pos);
+ EXPECT_EQ(1U, token.line_no);
+ EXPECT_EQ(2U, token.line_pos);
+}
+
+TEST(LexerTest, ExhaustedTokens) {
+ std::string source("");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ EXPECT_EQ(0U, tokens.size());
+}
+
+TEST(LexerTest, SkipSkippable) {
+ std::string source(" \t \r \n .");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(mojom::TokenType::DOT, token.token_type);
+ EXPECT_EQ(".", token.token);
+}
+
+TEST(LexerTest, SkipToTheEnd) {
+ std::string source(" \t \r \n ");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ EXPECT_EQ(0U, tokens.size());
+}
+
+TEST(LexerTest, TokenizeMoreThanOne) {
+ std::string source("()");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+
+ EXPECT_EQ(mojom::TokenType::LPAREN, tokens[0].token_type);
+ EXPECT_EQ(mojom::TokenType::RPAREN, tokens[1].token_type);
+ EXPECT_EQ(2U, tokens.size());
+}
+
+TEST(LexerTest, ERROR_ILLEGAL_CHAR) {
+ std::string source("#");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(mojom::TokenType::ERROR_ILLEGAL_CHAR, token.token_type);
+ EXPECT_EQ("#", token.token);
+ EXPECT_TRUE(token.error());
+}
+
+TEST(LexerTest, ERROR_UNTERMINATED_STRING_LITERAL_EOL) {
+ std::string source("\"Hello \n World\"");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(mojom::TokenType::ERROR_UNTERMINATED_STRING_LITERAL,
+ token.token_type);
+ EXPECT_EQ("\"Hello ", token.token);
+ EXPECT_EQ(0U, token.char_pos);
+ EXPECT_TRUE(token.error());
+}
+
+TEST(LexerTest, ERROR_UNTERMINATED_STRING_LITERAL_EOF) {
+ std::string source("\"Hello ");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(mojom::TokenType::ERROR_UNTERMINATED_STRING_LITERAL,
+ token.token_type);
+ EXPECT_EQ("\"Hello ", token.token);
+ EXPECT_EQ(0U, token.char_pos);
+ EXPECT_TRUE(token.error());
+}
+
+TEST(LexerTest, ERROR_UNTERMINATED_STRING_LITERAL_ESC_EOF) {
+ std::string source("\"Hello \\");
+ std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+ const mojom::Token token = tokens[0];
+ EXPECT_EQ(mojom::TokenType::ERROR_UNTERMINATED_STRING_LITERAL,
+ token.token_type);
+ EXPECT_EQ("\"Hello \\", token.token);
+ EXPECT_EQ(0U, token.char_pos);
+ EXPECT_TRUE(token.error());
+}
+
+} // namespace
+} // namespace mojom
+} // namespace mojo