Mojom lexer.

R=viettrungluu@chromium.org

Review URL: https://codereview.chromium.org/1034083003
diff --git a/mojom/BUILD.gn b/mojom/BUILD.gn
new file mode 100644
index 0000000..d26d44c
--- /dev/null
+++ b/mojom/BUILD.gn
@@ -0,0 +1,43 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//testing/test.gni")
+
+group("mojom") {
+  testonly = true
+  deps = [
+    ":tests",
+  ]
+}
+
+group("tests") {
+  testonly = true
+  deps = [
+    ":lexer_unittest",
+  ]
+}
+
+test("lexer_unittest") {
+  sources = [
+    "lexer_unittest.cc",
+  ]
+
+  deps = [
+    "//base",
+    "//base/test:run_all_unittests",
+    "//testing/gtest",
+    ":lexer",
+  ]
+}
+
+source_set("lexer") {
+  sources = [
+    "lexer.cc",
+    "lexer.h",
+  ]
+
+  deps = [
+    "//base",
+  ]
+}
diff --git a/mojom/lexer.cc b/mojom/lexer.cc
new file mode 100644
index 0000000..e55e2fb
--- /dev/null
+++ b/mojom/lexer.cc
@@ -0,0 +1,420 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "mojom/lexer.h"
+
+#include <map>
+#include <string>
+
+#include "base/lazy_instance.h"
+
+namespace mojo {
+namespace mojom {
+
+namespace {
+
+class KeywordsDict {
+ public:
+  KeywordsDict();
+
+ private:
+  std::map<std::string, mojom::TokenType> keywords_;
+  friend std::map<std::string, mojom::TokenType>& Keywords();
+
+  DISALLOW_COPY_AND_ASSIGN(KeywordsDict);
+};
+static base::LazyInstance<KeywordsDict> g_keywords = LAZY_INSTANCE_INITIALIZER;
+
+std::map<std::string, mojom::TokenType>& Keywords() {
+  return g_keywords.Get().keywords_;
+}
+
+KeywordsDict::KeywordsDict() {
+  keywords_["import"] = TokenType::IMPORT;
+  keywords_["module"] = TokenType::MODULE;
+  keywords_["struct"] = TokenType::STRUCT;
+  keywords_["union"] = TokenType::UNION;
+  keywords_["interface"] = TokenType::INTERFACE;
+  keywords_["enum"] = TokenType::ENUM;
+  keywords_["const"] = TokenType::CONST;
+  keywords_["true"] = TokenType::TRUE;
+  keywords_["false"] = TokenType::FALSE;
+  keywords_["default"] = TokenType::DEFAULT;
+}
+
+// Non-localized versions of isalpha.
+bool IsAlpha(char c) {
+  return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'));
+}
+
+// Non-localized versions of isnum.
+bool IsDigit(char c) {
+  return ('0' <= c && c <= '9');
+}
+
+bool IsHexDigit(char c) {
+  return (IsDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'));
+}
+
+// Non-localized versions of isalnum.
+bool IsAlnum(char c) {
+  return IsAlpha(c) || IsDigit(c);
+}
+
+// MojomLexer tokenizes a mojom source file. It is NOT thread-safe.
+class MojomLexer {
+ public:
+  explicit MojomLexer(const std::string& source);
+  ~MojomLexer();
+
+  // Returns the list of tokens in the source file.
+  std::vector<Token> Tokenize();
+
+ private:
+  // The GetNextToken.* functions all return true if they could find a token
+  // (even an error token) and false otherwise.
+  bool GetNextToken(Token* result);
+  bool GetNextTokenSingleChar(Token* result);
+  bool GetNextTokenEqualsOrResponse(Token* result);
+  bool GetNextTokenIdentifier(Token* result);
+  bool GetNextTokenDecConst(Token* result);
+  bool GetNextTokenHexConst(Token* result);
+  bool GetNextTokenOrdinal(Token* result);
+  bool GetNextTokenStringLiteral(Token* result);
+
+  void ConsumeSkippable();
+  void ConsumeDigits();
+  void ConsumeEol();
+  void Consume(size_t num);
+
+  bool eos(size_t offset_plus) {
+    return offset_ + offset_plus >= source_.size();
+  }
+
+  const std::string source_;
+  size_t offset_;
+  size_t line_no_;
+  size_t offset_in_line_;
+
+  DISALLOW_COPY_AND_ASSIGN(MojomLexer);
+};
+
+std::vector<Token> MojomLexer::Tokenize() {
+  offset_ = 0;
+  line_no_ = 0;
+  offset_in_line_ = 0;
+
+  std::vector<Token> result;
+  Token cur;
+  while (GetNextToken(&cur)) {
+    result.push_back(cur);
+
+    // As soon as an error token is found, stop tokenizing.
+    if (cur.error()) {
+      break;
+    }
+  }
+
+  return result;
+}
+
+bool MojomLexer::GetNextToken(Token* result) {
+  // Skip all spaces which may be in front of the next token.
+  ConsumeSkippable();
+
+  // If we found the end of the source signal that is so.
+  if (eos(0))
+    return false;
+
+  // Save the current position in the source code.
+  result->char_pos = offset_;
+  result->line_no = line_no_;
+  result->line_pos = offset_in_line_;
+
+  if (GetNextTokenSingleChar(result) || GetNextTokenEqualsOrResponse(result) ||
+      GetNextTokenIdentifier(result) || GetNextTokenHexConst(result) ||
+      GetNextTokenDecConst(result) || GetNextTokenDecConst(result) ||
+      GetNextTokenOrdinal(result) || GetNextTokenStringLiteral(result))
+    return true;
+
+  result->token = source_.substr(offset_, 1);
+  result->token_type = TokenType::ERROR_ILLEGAL_CHAR;
+  return true;
+}
+
+void MojomLexer::ConsumeSkippable() {
+  if (eos(0))
+    return;
+
+  bool found_non_space = false;
+  while (!found_non_space && !eos(0)) {
+    switch (source_[offset_]) {
+      case ' ':
+      case '\t':
+      case '\r':
+        Consume(1);
+        break;
+      case '\n':
+        ConsumeEol();
+        break;
+      default:
+        found_non_space = true;
+        break;
+    }
+  }
+}
+
+// Finds all single-character tokens except for '='.
+bool MojomLexer::GetNextTokenSingleChar(Token* result) {
+  switch (source_[offset_]) {
+    case '(':
+      result->token_type = TokenType::LPAREN;
+      break;
+    case ')':
+      result->token_type = TokenType::RPAREN;
+      break;
+    case '[':
+      result->token_type = TokenType::LBRACKET;
+      break;
+    case ']':
+      result->token_type = TokenType::RBRACKET;
+      break;
+    case '{':
+      result->token_type = TokenType::LBRACE;
+      break;
+    case '}':
+      result->token_type = TokenType::RBRACE;
+      break;
+    case '<':
+      result->token_type = TokenType::LANGLE;
+      break;
+    case '>':
+      result->token_type = TokenType::RANGLE;
+      break;
+    case ';':
+      result->token_type = TokenType::SEMI;
+      break;
+    case ',':
+      result->token_type = TokenType::COMMA;
+      break;
+    case '.':
+      result->token_type = TokenType::DOT;
+      break;
+    case '-':
+      result->token_type = TokenType::MINUS;
+      break;
+    case '+':
+      result->token_type = TokenType::PLUS;
+      break;
+    case '&':
+      result->token_type = TokenType::AMP;
+      break;
+    case '?':
+      result->token_type = TokenType::QSTN;
+      break;
+    default:
+      return false;
+      break;
+  }
+
+  result->token = source_.substr(offset_, 1);
+  Consume(1);
+  return true;
+}
+
+// Finds '=' or '=>'.
+bool MojomLexer::GetNextTokenEqualsOrResponse(Token* result) {
+  if (source_[offset_] != '=')
+    return false;
+  Consume(1);
+
+  if (eos(0) || source_[offset_] != '>') {
+    result->token_type = TokenType::EQUALS;
+    result->token = "=";
+  } else {
+    result->token_type = TokenType::RESPONSE;
+    result->token = "=>";
+    Consume(1);
+  }
+  return true;
+}
+
+// valid C identifiers (K&R2: A.2.3)
+bool MojomLexer::GetNextTokenIdentifier(Token* result) {
+  char c = source_[offset_];
+
+  // Identifiers start with a letter or underscore.
+  if (!(IsAlpha(c) || c == '_'))
+    return false;
+  size_t start_offset = offset_;
+
+  // Identifiers contain letters numbers and underscores.
+  while (!eos(0) && (IsAlnum(source_[offset_]) || c == '_'))
+    Consume(1);
+
+  result->token = source_.substr(start_offset, offset_ - start_offset);
+  result->token_type = TokenType::IDENTIFIER;
+
+  if (Keywords().count(result->token))
+    result->token_type = Keywords()[result->token];
+
+  return true;
+}
+
+// integer constants (K&R2: A.2.5.1) dec
+// floating constants (K&R2: A.2.5.3)
+bool MojomLexer::GetNextTokenDecConst(Token* result) {
+  if (!IsDigit(source_[offset_]))
+    return false;
+
+  result->token_type = TokenType::INT_CONST_DEC;
+  // If the number starts with a zero and is not a floating point number.
+  if (source_[offset_] == '0' &&
+      (eos(1) || (source_[offset_] == 'e' && source_[offset_] == 'E' &&
+                  source_[offset_] == '.'))) {
+    // TODO(azani): Catch and error on octal.
+    result->token = "0";
+    Consume(1);
+    return true;
+  }
+
+  size_t start_offset = offset_;
+
+  // First, we consume all the digits.
+  ConsumeDigits();
+
+  // If there is a fractional part, we consume the . and the following digits.
+  if (!eos(0) && source_[offset_] == '.') {
+    result->token_type = TokenType::FLOAT_CONST;
+    Consume(1);
+    ConsumeDigits();
+  }
+
+  // If there is an exponential part, we consume the e and the following digits.
+  if (!eos(0) && (source_[offset_] == 'e' || source_[offset_] == 'E')) {
+    if (!eos(2) && (source_[offset_ + 1] == '-' || source_[offset_ + 1]) &&
+        IsDigit(source_[offset_ + 2])) {
+      result->token_type = TokenType::FLOAT_CONST;
+      Consume(2);  // Consume e/E and +/-
+      ConsumeDigits();
+    } else if (!eos(1) && IsDigit(source_[offset_ + 1])) {
+      result->token_type = TokenType::FLOAT_CONST;
+      Consume(1);  // Consume e/E
+      ConsumeDigits();
+    }
+  }
+
+  result->token = source_.substr(start_offset, offset_ - start_offset);
+  return true;
+}
+
+// integer constants (K&R2: A.2.5.1) hex
+bool MojomLexer::GetNextTokenHexConst(Token* result) {
+  // Hex numbers start with a 0, x and then some hex numeral.
+  if (eos(2) || source_[offset_] != '0' ||
+      (source_[offset_ + 1] != 'x' && source_[offset_ + 1] != 'X') ||
+      !IsHexDigit(source_[offset_ + 2]))
+    return false;
+
+  result->token_type = TokenType::INT_CONST_HEX;
+  size_t start_offset = offset_;
+  Consume(2);
+
+  while (IsHexDigit(source_[offset_]))
+    Consume(1);
+
+  result->token = source_.substr(start_offset, offset_ - start_offset);
+  return true;
+}
+
+bool MojomLexer::GetNextTokenOrdinal(Token* result) {
+  // Ordinals start with '@' and then some digit.
+  if (eos(1) || source_[offset_] != '@' || !IsDigit(source_[offset_ + 1]))
+    return false;
+  size_t start_offset = offset_;
+  // Consumes '@'.
+  Consume(1);
+
+  result->token_type = TokenType::ORDINAL;
+  ConsumeDigits();
+
+  result->token = source_.substr(start_offset, offset_ - start_offset);
+  return true;
+}
+
+bool MojomLexer::GetNextTokenStringLiteral(Token* result) {
+  // Ordinals start with '@' and then some digit.
+  if (source_[offset_] != '"')
+    return false;
+
+  size_t start_offset = offset_;
+  // Consumes '"'.
+  Consume(1);
+
+  while (source_[offset_] != '"') {
+    if (source_[offset_] == '\n' || eos(0)) {
+      result->token_type = TokenType::ERROR_UNTERMINATED_STRING_LITERAL;
+      result->token = source_.substr(start_offset, offset_ - start_offset);
+      return true;
+    }
+
+    // This block will be skipped if the backslash is at the end of the source.
+    if (source_[offset_] == '\\' && !eos(1)) {
+      // Consume the backslash. This will ensure \" is consumed.
+      Consume(1);
+    }
+    Consume(1);
+  }
+  // Consume the closing doublequotes.
+  Consume(1);
+
+  result->token_type = TokenType::STRING_LITERAL;
+
+  result->token = source_.substr(start_offset, offset_ - start_offset);
+  return true;
+}
+
+void MojomLexer::ConsumeDigits() {
+  while (!eos(0) && IsDigit(source_[offset_]))
+    Consume(1);
+}
+
+void MojomLexer::ConsumeEol() {
+  ++offset_;
+  ++line_no_;
+  offset_in_line_ = 0;
+}
+
+void MojomLexer::Consume(size_t num) {
+  offset_ += num;
+  offset_in_line_ += num;
+}
+
+MojomLexer::MojomLexer(const std::string& source)
+    : source_(source), offset_(0), line_no_(0), offset_in_line_(0) {
+}
+
+MojomLexer::~MojomLexer() {
+}
+
+}  // namespace
+
+Token::Token()
+    : token_type(TokenType::ERROR_UNKNOWN),
+      char_pos(0),
+      line_no(0),
+      line_pos(0) {
+}
+
+Token::~Token() {
+}
+
+// Accepts the text of a mojom file and returns the ordered list of tokens
+// found in the file.
+std::vector<Token> Tokenize(const std::string& source) {
+  return MojomLexer(source).Tokenize();
+}
+
+}  // namespace mojom
+}  // namespace mojo
diff --git a/mojom/lexer.h b/mojom/lexer.h
new file mode 100644
index 0000000..b477a37
--- /dev/null
+++ b/mojom/lexer.h
@@ -0,0 +1,92 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MOJO_PUBLIC_TOOLS_BINDINGS_MOJOM_CPP_LEXER_H_
+#define MOJO_PUBLIC_TOOLS_BINDINGS_MOJOM_CPP_LEXER_H_
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+
+namespace mojo {
+namespace mojom {
+
+enum class TokenType {
+  // Errors
+  ERROR_UNKNOWN,
+  ERROR_ILLEGAL_CHAR,
+  ERROR_UNTERMINATED_STRING_LITERAL,
+
+  // Punctuators and Separators
+  LPAREN,
+  RPAREN,
+  LBRACKET,
+  RBRACKET,
+  LBRACE,
+  RBRACE,
+  LANGLE,
+  RANGLE,
+  SEMI,
+  COMMA,
+  DOT,
+  MINUS,
+  PLUS,
+  AMP,
+  QSTN,
+  EQUALS,
+  RESPONSE,
+
+  // Identifiers
+  IDENTIFIER,
+
+  // Keywords
+  IMPORT,
+  MODULE,
+  STRUCT,
+  UNION,
+  INTERFACE,
+  ENUM,
+  CONST,
+  TRUE,
+  FALSE,
+  DEFAULT,
+
+  // Constants
+  INT_CONST_DEC,
+  INT_CONST_HEX,
+  FLOAT_CONST,
+  ORDINAL,
+  STRING_LITERAL,
+
+  // TODO(azani): Check that all tokens were implemented.
+  // TODO(azani): Error out on octal.
+};
+
+struct Token {
+  Token();
+  ~Token();
+
+  bool error() const {
+    return (token_type == TokenType::ERROR_ILLEGAL_CHAR ||
+            token_type == TokenType::ERROR_UNTERMINATED_STRING_LITERAL ||
+            token_type == TokenType::ERROR_UNKNOWN);
+  }
+
+  TokenType token_type;
+  std::string token;
+  size_t char_pos;
+  size_t line_no;
+  size_t line_pos;
+};
+
+// Accepts the text of a mojom file and returns the ordered list of tokens
+// found in the file.
+std::vector<Token> Tokenize(const std::string& source);
+
+}  // namespace mojom
+}  // namespace mojo
+
+#endif  // MOJO_PUBLIC_TOOLS_BINDINGS_MOJOM_CPP_LEXER_H_
diff --git a/mojom/lexer_unittest.cc b/mojom/lexer_unittest.cc
new file mode 100644
index 0000000..f4db79a
--- /dev/null
+++ b/mojom/lexer_unittest.cc
@@ -0,0 +1,162 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/logging.h"
+#include "mojom/lexer.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace mojo {
+namespace mojom {
+namespace {
+
+TEST(LexerTest, AllNonErrorTokens) {
+  const struct TestData {
+    const char* name;
+    const char* source;
+    mojom::TokenType expected_token;
+  } test_data[] = {
+      {"LPAREN", "(", mojom::TokenType::LPAREN},
+      {"RPAREN", ")", mojom::TokenType::RPAREN},
+      {"LBRACKET", "[", mojom::TokenType::LBRACKET},
+      {"RBRACKET", "]", mojom::TokenType::RBRACKET},
+      {"LBRACE", "{", mojom::TokenType::LBRACE},
+      {"RBRACE", "}", mojom::TokenType::RBRACE},
+      {"LANGLE", "<", mojom::TokenType::LANGLE},
+      {"RANGLE", ">", mojom::TokenType::RANGLE},
+      {"SEMI", ";", mojom::TokenType::SEMI},
+      {"COMMA", ",", mojom::TokenType::COMMA},
+      {"DOT", ".", mojom::TokenType::DOT},
+      {"MINUS", "-", mojom::TokenType::MINUS},
+      {"PLUS", "+", mojom::TokenType::PLUS},
+      {"AMP", "&", mojom::TokenType::AMP},
+      {"QSTN", "?", mojom::TokenType::QSTN},
+      {"EQUALS", "=", mojom::TokenType::EQUALS},
+      {"RESPONSE", "=>", mojom::TokenType::RESPONSE},
+      {"IDENTIFIER", "something", mojom::TokenType::IDENTIFIER},
+      {"IMPORT", "import", mojom::TokenType::IMPORT},
+      {"MODULE", "module", mojom::TokenType::MODULE},
+      {"STRUCT", "struct", mojom::TokenType::STRUCT},
+      {"UNION", "union", mojom::TokenType::UNION},
+      {"INTERFACE", "interface", mojom::TokenType::INTERFACE},
+      {"ENUM", "enum", mojom::TokenType::ENUM},
+      {"CONST", "const", mojom::TokenType::CONST},
+      {"TRUE", "true", mojom::TokenType::TRUE},
+      {"FALSE", "false", mojom::TokenType::FALSE},
+      {"DEFAULT", "default", mojom::TokenType::DEFAULT},
+      {"INT_CONST_DEC", "10", mojom::TokenType::INT_CONST_DEC},
+      {"INT_CONST_DEC_0", "0", mojom::TokenType::INT_CONST_DEC},
+      {"FLOAT_CONST", "10.5", mojom::TokenType::FLOAT_CONST},
+      {"FLOAT_CONST_E", "10e5", mojom::TokenType::FLOAT_CONST},
+      {"FLOAT_CONST_ZERO", "0.5", mojom::TokenType::FLOAT_CONST},
+      {"FLOAT_CONST_E_ZERO", "0e5", mojom::TokenType::FLOAT_CONST},
+      {"FLOAT_CONST_E_PLUS", "10e+5", mojom::TokenType::FLOAT_CONST},
+      {"FLOAT_CONST_E_MINUS", "10e-5", mojom::TokenType::FLOAT_CONST},
+      {"INT_CONST_HEX", "0x10A", mojom::TokenType::INT_CONST_HEX},
+      {"ORDINAL", "@10", mojom::TokenType::ORDINAL},
+      {"STRING_LITERAL", "\"hello world\"", mojom::TokenType::STRING_LITERAL},
+      {"STRING_LITERAL_ESCAPE",
+       "\"hello \\\"world\\\"\"",
+       mojom::TokenType::STRING_LITERAL},
+      {"STRING_LITERAL_HEX_ESCAPE",
+       "\"hello \\x23 world\"",
+       mojom::TokenType::STRING_LITERAL},
+  };
+  for (size_t i = 0; i < arraysize(test_data); i++) {
+    const char* test_name = test_data[i].name;
+    const char* source = test_data[i].source;
+    const mojom::TokenType expected_token = test_data[i].expected_token;
+    std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+    DCHECK(tokens.size() >= 1) << "Failure to tokenize at all: " << test_name;
+    const mojom::Token token = tokens[0];
+    EXPECT_EQ(expected_token, token.token_type)
+        << "Wrong token type: " << test_name;
+    EXPECT_EQ(source, token.token) << "Wrong token value: " << test_name;
+  }
+}
+
+TEST(LexerTest, TokenPosition) {
+  std::string source("  \n  .");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  const mojom::Token token = tokens[0];
+  EXPECT_EQ(mojom::TokenType::DOT, token.token_type);
+  EXPECT_EQ(".", token.token);
+  EXPECT_EQ(5U, token.char_pos);
+  EXPECT_EQ(1U, token.line_no);
+  EXPECT_EQ(2U, token.line_pos);
+}
+
+TEST(LexerTest, ExhaustedTokens) {
+  std::string source("");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  EXPECT_EQ(0U, tokens.size());
+}
+
+TEST(LexerTest, SkipSkippable) {
+  std::string source("  \t  \r \n .");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  const mojom::Token token = tokens[0];
+  EXPECT_EQ(mojom::TokenType::DOT, token.token_type);
+  EXPECT_EQ(".", token.token);
+}
+
+TEST(LexerTest, SkipToTheEnd) {
+  std::string source("  \t  \r \n ");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  EXPECT_EQ(0U, tokens.size());
+}
+
+TEST(LexerTest, TokenizeMoreThanOne) {
+  std::string source("()");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+
+  EXPECT_EQ(mojom::TokenType::LPAREN, tokens[0].token_type);
+  EXPECT_EQ(mojom::TokenType::RPAREN, tokens[1].token_type);
+  EXPECT_EQ(2U, tokens.size());
+}
+
+TEST(LexerTest, ERROR_ILLEGAL_CHAR) {
+  std::string source("#");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  const mojom::Token token = tokens[0];
+  EXPECT_EQ(mojom::TokenType::ERROR_ILLEGAL_CHAR, token.token_type);
+  EXPECT_EQ("#", token.token);
+  EXPECT_TRUE(token.error());
+}
+
+TEST(LexerTest, ERROR_UNTERMINATED_STRING_LITERAL_EOL) {
+  std::string source("\"Hello \n World\"");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  const mojom::Token token = tokens[0];
+  EXPECT_EQ(mojom::TokenType::ERROR_UNTERMINATED_STRING_LITERAL,
+            token.token_type);
+  EXPECT_EQ("\"Hello ", token.token);
+  EXPECT_EQ(0U, token.char_pos);
+  EXPECT_TRUE(token.error());
+}
+
+TEST(LexerTest, ERROR_UNTERMINATED_STRING_LITERAL_EOF) {
+  std::string source("\"Hello ");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  const mojom::Token token = tokens[0];
+  EXPECT_EQ(mojom::TokenType::ERROR_UNTERMINATED_STRING_LITERAL,
+            token.token_type);
+  EXPECT_EQ("\"Hello ", token.token);
+  EXPECT_EQ(0U, token.char_pos);
+  EXPECT_TRUE(token.error());
+}
+
+TEST(LexerTest, ERROR_UNTERMINATED_STRING_LITERAL_ESC_EOF) {
+  std::string source("\"Hello \\");
+  std::vector<mojom::Token> tokens = mojom::Tokenize(source);
+  const mojom::Token token = tokens[0];
+  EXPECT_EQ(mojom::TokenType::ERROR_UNTERMINATED_STRING_LITERAL,
+            token.token_type);
+  EXPECT_EQ("\"Hello \\", token.token);
+  EXPECT_EQ(0U, token.char_pos);
+  EXPECT_TRUE(token.error());
+}
+
+}  // namespace
+}  // namespace mojom
+}  // namespace mojo