blob: 422f4d8ddff1dd7a3e7c31dd2260d3e9dfe60376 [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package lexer
import (
"strings"
"testing"
)
func checkEq(t *testing.T, expected, actual interface{}) {
if expected != actual {
t.Fatalf("Failed check: Expected (%v), Actual (%v)", expected, actual)
}
}
// pumpTokens pumps all the tokens from a channel into a slice.
func pumpTokens(tokensChan chan Token) []Token {
tokens := []Token{}
for token := range tokensChan {
tokens = append(tokens, token)
}
return tokens
}
// TestAllSingleTokens tests for each token that a valid string is accepted as
// the correct token.
func TestAllSingleTokens(t *testing.T) {
testData := []struct {
source string
token TokenKind
}{
{"(", LParen},
{")", RParen},
{"[", LBracket},
{"]", RBracket},
{"{", LBrace},
{"}", RBrace},
{"<", LAngle},
{">", RAngle},
{";", Semi},
{",", Comma},
{".", Dot},
{"-", Minus},
{"+", Plus},
{"&", Amp},
{"?", Qstn},
{"=", Equals},
{"=>", Response},
{"somet_hi3ng", Name},
{"import", Import},
{"module", Module},
{"struct", Struct},
{"union", Union},
{"interface", Interface},
{"enum", Enum},
{"const", Const},
{"true", True},
{"false", False},
{"default", Default},
{"@10", Ordinal},
{"10", IntConstDec},
{"0", IntConstDec},
{"0xA10", IntConstHex},
{"0xa10", IntConstHex},
{"0XA10", IntConstHex},
{"0Xa10", IntConstHex},
{"10.5", FloatConst},
{"10e5", FloatConst},
{"0.5", FloatConst},
{"0e5", FloatConst},
{"10e+5", FloatConst},
{"10e-5", FloatConst},
{"3.14E5", FloatConst},
{"3.14e5", FloatConst},
{"3.14E+55", FloatConst},
{"3.14e+55", FloatConst},
{"3.14E-55", FloatConst},
{"3.14e-55", FloatConst},
{"\"hello world\"", StringLiteral},
{"\"hello \\\"real\\\" world\"", StringLiteral},
{"// hello comment world", SingleLineComment},
{"/* hello \n comment */", MultiLineComment},
}
for i := range testData {
l := lexer{source: testData[i].source, tokens: make(chan Token)}
go l.run()
tokens := pumpTokens(l.tokens)
if len(tokens) != 1 {
t.Fatalf("Source('%v'): Expected 1 token but got %v instead: %v",
testData[i].source, len(tokens), tokens)
}
checkEq(t, testData[i].source, tokens[0].Text)
checkEq(t, testData[i].token, tokens[0].Kind)
}
}
// TestTokenPosition tests that the position in the source string, the line
// number and the position in the line of the lexed token are correctly found.
func TestTokenPosition(t *testing.T) {
source := " \n ."
l := lexer{source: source, tokens: make(chan Token)}
go l.run()
tokens := pumpTokens(l.tokens)
token := tokens[0]
checkEq(t, 5, token.SourcePos)
checkEq(t, 1, token.LineNo)
checkEq(t, 2, token.LinePos)
}
// TestTokenPositionChineseString tests that SourcePos is expressed as a number
// of runes and that SourcePosBytes is expressed as a number of bytes.
func TestTokenPositionChineseString(t *testing.T) {
source := "\"您好\" is"
ts := Tokenize(source)
checkEq(t, StringLiteral, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, 5, ts.PeekNext().SourcePos)
checkEq(t, 9, ts.PeekNext().SourcePosBytes)
}
// TestSkipSkippable tests that all skippable characters are skipped.
func TestSkipSkippable(t *testing.T) {
source := " \t \r \n ."
l := lexer{source: source, tokens: make(chan Token)}
go l.run()
tokens := pumpTokens(l.tokens)
checkEq(t, Dot, tokens[0].Kind)
}
// TestTokenize tests that a single token embedded in a larger string is
// correctly lexed.
func TestTokenize(t *testing.T) {
ts := Tokenize(" \t . ")
token := ts.PeekNext()
checkEq(t, Dot, token.Kind)
ts.ConsumeNext()
token = ts.PeekNext()
checkEq(t, EOF, token.Kind)
}
// TestTokenizeBadUTF8String tests that an invalid UTF8 string is handled.
func TestTokenizeBadUTF8String(t *testing.T) {
ts := Tokenize("\xF0")
checkEq(t, ErrorIllegalChar, ts.PeekNext().Kind)
}
// TestTokenizeEmptyString tests that empty strings are handled correctly.
func TestTokenizeEmptyString(t *testing.T) {
ts := Tokenize("")
checkEq(t, EOF, ts.PeekNext().Kind)
}
// TestTokenizeMoreThanOne tests that more than one token is correctly lexed.
func TestTokenizeMoreThanOne(t *testing.T) {
ts := Tokenize("()")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, RParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, EOF, ts.PeekNext().Kind)
}
// TestIllegalChar tests that an illegal character is correctly spotted.
func TestIllegalChar(t *testing.T) {
ts := Tokenize(" \t $ ")
checkEq(t, ErrorIllegalChar, ts.PeekNext().Kind)
}
// TestUnterminatedStringLiteralEos tests that the correct error is emitted if
// a quoted string is never closed.
func TestUnterminatedStringLiteralEos(t *testing.T) {
ts := Tokenize("\"hello world")
checkEq(t, ErrorUnterminatedStringLiteral, ts.PeekNext().Kind)
}
// TestUnterminatedStringLiteralEol tests that the correct error is emitted if
// a quoted string is closed on a subsequent line.
func TestUnterminatedStringLiteralEol(t *testing.T) {
ts := Tokenize("\"hello\n world\"")
checkEq(t, ErrorUnterminatedStringLiteral, ts.PeekNext().Kind)
}
// TestSingleLineCommentNoSkip tests that single line comments are correctly lexed.
func TestSingleLineCommentNoSkip(t *testing.T) {
ts := tokenizeUnfiltered("( // some stuff\n)")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, SingleLineComment, ts.PeekNext().Kind)
checkEq(t, "// some stuff", ts.PeekNext().Text)
ts.ConsumeNext()
checkEq(t, RParen, ts.PeekNext().Kind)
}
// TestMultiLineCommentNoSkip tests that multi line comments are correctly lexed.
func TestMultiLineCommentNoSkip(t *testing.T) {
ts := tokenizeUnfiltered("( /* hello world/ * *\n */)")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, MultiLineComment, ts.PeekNext().Kind)
checkEq(t, "/* hello world/ * *\n */", ts.PeekNext().Text)
ts.ConsumeNext()
checkEq(t, RParen, ts.PeekNext().Kind)
}
// TestSingleLineComment tests that single line comments are correctly skipped.
func TestSingleLineComment(t *testing.T) {
ts := Tokenize("( // some stuff\n)")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, RParen, ts.PeekNext().Kind)
}
// TestMultiLineComment tests that multi line comments are correctly skipped.
func TestMultiLineComment(t *testing.T) {
ts := Tokenize("( /* hello world/ * *\n */)")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, RParen, ts.PeekNext().Kind)
}
// TestConsumeUnfilteredOnly tests that ConsumeNext consumes only unfiltered tokens.
func TestConsumeUnfilteredOnly(t *testing.T) {
ts := Tokenize(" /* hello world*/ ( )")
// ConsumeNext should consume the left parenthesis, not the filtered comment.
ts.ConsumeNext()
checkEq(t, RParen, ts.PeekNext().Kind)
}
// TestCommentsOnly tests that source made only of comments is correctly processed.
func TestCommentsOnly(t *testing.T) {
ts := Tokenize("/* hello world */\n // hello world")
checkEq(t, EOF, ts.PeekNext().Kind)
}
// TestUnterminatedMultiLineComment tests that unterminated multiline comments
// emit the correct error.
func TestUnterminatedMultiLineComment(t *testing.T) {
ts := Tokenize("( /* hello world/ * *\n )")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, ErrorUnterminatedComment, ts.PeekNext().Kind)
}
// TestUnterminatedMultiLineCommentAtStar tests that if the string ends at a *
// (which could be the beginning of the close of a multiline comment) the right
// error is emitted.
func TestUnterminatedMultiLineCommentAtStar(t *testing.T) {
ts := Tokenize("( /* hello world/ *")
checkEq(t, LParen, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, ErrorUnterminatedComment, ts.PeekNext().Kind)
}
// TestTokenSnippet tests snippet generation based on tokens.
func TestTokenSnippet(t *testing.T) {
source := "\n hello world \n"
ts := Tokenize(source)
expected := " hello world \n"
expected += " ^^^^^"
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
// TestTokenSnippetNoNewLines tests that the correct snippet is generated when
// the source does not have new lines.
func TestTokenSnippetNoNewLines(t *testing.T) {
source := " hello world "
ts := Tokenize(source)
expected := " hello world \n"
expected += " ^^^^^"
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
// TestTokenSnippetNotFirst tests snippet generation for a token that is not
// first on the line.
func TestTokenSnippetNotFirst(t *testing.T) {
source := "hello world"
ts := Tokenize(source)
expected := "hello world\n"
expected += " ^^^^^"
ts.ConsumeNext()
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
// TestTokenSnippetNotASCII tests snippet generation in the presence of
// non-ASCII characters.
func TestTokenSnippetNotASCII(t *testing.T) {
source := "\"你好\" \"世界\""
ts := Tokenize(source)
expected := "\"你好\" \"世界\"\n"
expected += " ^^^^"
ts.ConsumeNext()
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
// TestTokenSnippetLongPrelude tests snippet generation in the presence of a
// long line with many characters preceeding the token.
func TestTokenSnippetLongPrelude(t *testing.T) {
source := strings.Repeat(" ", 70) + "hello world"
ts := Tokenize(source)
expected := strings.Repeat(" ", 58) + "hello world\n"
expected += strings.Repeat(" ", 58) + "^^^^^"
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
// TestTokenSnippetLongPreludeWithNewLine tests snippet generation in the presence of a
// long line with many characters preceeding the token and then a newline after the token.
func TestTokenSnippetLongPreludeWithNewLine(t *testing.T) {
source := strings.Repeat(" ", 70) + "hello world\n"
ts := Tokenize(source)
expected := strings.Repeat(" ", 58) + "hello world\n"
expected += strings.Repeat(" ", 58) + "^^^^^"
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
// TestTokenSnippetLongPreludeWithSuffix tests snippet generation in the presence of a
// long line with many characters preceeding the token and then some characters following on the same line.
func TestTokenSnippetLongPreludeWithLongSuffix(t *testing.T) {
source := strings.Repeat(" ", 70) + "hello world " + strings.Repeat("x", 10)
ts := Tokenize(source)
expected := strings.Repeat(" ", 58) + "hello world xxxxxxxx\n"
expected += strings.Repeat(" ", 58) + "^^^^^"
checkEq(t, expected, ts.PeekNext().Snippet(source, false))
}
func TestFilteredTokens(t *testing.T) {
source := "/* filtered1 */ hello world // filtered2"
ts := Tokenize(source)
ts.ConsumeNext()
ts.ConsumeNext()
filtered := ts.(*FilteredTokenStream).FilteredTokens()
checkEq(t, MultiLineComment, filtered[0].Kind)
checkEq(t, SingleLineComment, filtered[1].Kind)
}
func TestEmptyLine(t *testing.T) {
ts := tokenizeUnfiltered("\n")
checkEq(t, EmptyLine, ts.PeekNext().Kind)
ts.ConsumeNext()
checkEq(t, EOF, ts.PeekNext().Kind)
}