mojom/mojom_tool/lexer/lexer.go - mojo-tools - Git at Google

 // Copyright 2015 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
 // To use the lexer, call Tokenize with the source string to obtain
 // a TokenStream. The lexer is run concurrently so you should be able to
 // use the TokenStream before the lexer is done with the source.
 //
 // The lexer is implemented as a state machine. The states are represented
 // by functions (the stateFn type) which accept a lexer and return the
 // new state.
 //
 // Most states also have an isFooStart function which helps determine if
 // a transition to Foo is appropriate. Those functions accept a single
 // rune as a parameter and return true if the state machine should
 // transition to state Foo. Some states do not have such functions on
 // account of the transition condition being trivial.
 //
 // The lexer implementation was inspired by
 // http://cuddle.googlecode.com/hg/talk/lex.html

 package lexer

 import (
 	"unicode/utf8"
 )

 // Tokenize accepts a source string and parses it into a stream of tokens which
 // can be read from the returned TokenStream. Comment tokens are ommitted from
 // the returned stream.
 func Tokenize(source string) TokenStream {
 	return NewFilteredTokenStream(
 		tokenizeUnfiltered(source),
 		[]TokenKind{SingleLineComment, MultiLineComment, EmptyLine})
 }

 // tokenizeUnfiltered returns a TokenStream which does not filter out any of the
 // tokens in the channel. It is used for testing and by Tokenize which adds a
 // filter on top of the stream returned by tokenizeUnfiltered.
 func tokenizeUnfiltered(source string) TokenStream {
 	tokens := make(chan Token)
 	l := lexer{source: source, tokens: tokens}
 	go l.run()
 	return &TokenChan{tokenChan: tokens}
 }

 type lexer struct {
 	// source is the source code to be lexed.
 	source string

 	// tokens is a channel to which the found tokens are emitted.
 	tokens chan Token

 	// sourcePosBytes is the number of bytes that have been consumed.
 	sourcePosBytes int

 	// sourcePos is the number of runes that have been consumed.
 	sourcePos int

 	// lineno is the current line number.
 	lineNo int

 	// linePos is how many runes have been consumed since the beginning of the
 	// line.
 	linePos int

 	// linePosBytes is how many bytes have been consumed since the beginning of
 	// the line.
 	linePosBytes int

 	// curTokenSourcePosBytes is the number of bytes consumed prior to the
 	// beginning of the current token.
 	curTokenSourcePosBytes int

 	// curTokenSourcePos is the number of runes consumed prior to the beginning of
 	// the current token.
 	curTokenSourcePos int

 	// curTokenLineNo is the line number on which the current token begins.
 	curTokenLineNo int

 	// curTokenLinePos is the number of runes since the beginning of the line
 	// where the current token begins.
 	curTokenLinePos int

 	// curTokenLinePosBytes is the number of bytes since the beginning of the line
 	// where the current token begins.
 	curTokenLinePosBytes int
 }

 // CurText returns the consumed part of the current token.
 func (l *lexer) CurText() string {
 	return l.source[l.curTokenSourcePosBytes:l.sourcePosBytes]
 }

 // emitToken emits the current token and begins a new token.
 func (l *lexer) emitToken(tokenType TokenKind) {
 	l.tokens <- Token{
 		Kind:           tokenType,
 		Text:           l.source[l.curTokenSourcePosBytes:l.sourcePosBytes],
 		SourcePos:      l.curTokenSourcePos,
 		LineNo:         l.curTokenLineNo,
 		LinePos:        l.curTokenLinePos,
 		LinePosBytes:   l.curTokenLinePosBytes,
 		SourcePosBytes: l.curTokenSourcePosBytes}
 	l.beginToken()
 }

 // beginToken begins the new token.
 func (l *lexer) beginToken() {
 	l.curTokenSourcePosBytes = l.sourcePosBytes
 	l.curTokenSourcePos = l.sourcePos
 	l.curTokenLineNo = l.lineNo
 	l.curTokenLinePos = l.linePos
 	l.curTokenLinePosBytes = l.linePosBytes
 }

 // Consume consumes the next rune in the source.
 func (l *lexer) Consume() {
 	if l.IsEos() {
 		return
 	}

 	c, width := utf8.DecodeRuneInString(l.source[l.sourcePosBytes:])

 	if c == '\n' {
 		l.lineNo += 1
 		l.linePos = 0
 		l.linePosBytes = 0
 	} else {
 		l.linePos += 1
 		l.linePosBytes += width
 	}
 	l.sourcePosBytes += width
 	l.sourcePos += 1
 }

 // Peek returns the next rune in the source.
 func (l *lexer) Peek() rune {
 	// At the end of the string, there is no sane answer to Peek.
 	if l.IsEos() {
 		return utf8.RuneError
 	}

 	// If RuneError is returned, it will be handled as any other rune, likely
 	// resulting in an ErrorIllegalChar token being emitted.
 	char, _ := utf8.DecodeRuneInString(l.source[l.sourcePosBytes:])
 	return char
 }

 // IsEos returns true if the whole source has been consumed false
 // otherwise.
 func (l *lexer) IsEos() bool {
 	return l.sourcePosBytes >= len(l.source)
 }

 // run is the lexer's main loop.
 func (l *lexer) run() {
 	// We are implementing a state machine.
 	// lexRoot is the beginning state.
 	// nil is the end state.
 	// States are functions which are called on the lexer. They return the
 	// next state.
 	for state := lexRoot; state != nil; {
 		state = state(l)
 	}
 	close(l.tokens)
 }

 // A stateFn represents a state in the lexer state machine.
 type stateFn func(*lexer) stateFn

 // This is the beginning state and also the state which is returned to after
 // most tokens are emitted.
 func lexRoot(l *lexer) stateFn {
 	if l.IsEos() {
 		return nil
 	}

 	switch c := l.Peek(); {
 	case isSingleCharTokens(c):
 		return lexSingleCharTokens
 	case isEqualsOrResponseStart(c):
 		return lexEqualsOrResponse
 	case isNameStart(c):
 		return lexName
 	case isOrdinalStart(c):
 		return lexOrdinal
 	case isNumberStart(c):
 		return lexNumber
 	case isStringStart(c):
 		return lexString
 	case isSkippable(c):
 		return lexSkip
 	case isMaybeComment(c):
 		return lexComment
 	}

 	l.Consume()
 	l.emitToken(ErrorIllegalChar)
 	return nil
 }

 // isSkippable determines if a rune is skippable.
 func isSkippable(c rune) bool {
 	return c == ' ' || c == '\t' || c == '\r' || c == '\n'
 }

 // lexSkip consumes skippable runes.
 func lexSkip(l *lexer) stateFn {
 	for isSkippable(l.Peek()) {
 		l.beginToken()
 		if l.Peek() == '\n' && l.linePos == 0 {
 			l.emitToken(EmptyLine)
 		}
 		l.Consume()
 	}
 	l.beginToken()
 	return lexRoot
 }

 // singleCharTokens is a map of single-rune tokens.
 var singleCharTokens = map[rune]TokenKind{
 	'(': LParen,
 	')': RParen,
 	'[': LBracket,
 	']': RBracket,
 	'{': LBrace,
 	'}': RBrace,
 	'<': LAngle,
 	'>': RAngle,
 	';': Semi,
 	',': Comma,
 	'.': Dot,
 	'-': Minus,
 	'+': Plus,
 	'&': Amp,
 	'?': Qstn,
 }

 // isSingleCharTokens returns true if the rune is a single character token.
 func isSingleCharTokens(c rune) bool {
 	_, ok := singleCharTokens[c]
 	return ok
 }

 // lexSingleCharTokens lexes single character tokens.
 func lexSingleCharTokens(l *lexer) stateFn {
 	c := l.Peek()
 	l.Consume()
 	t, _ := singleCharTokens[c]
 	l.emitToken(t)

 	return lexRoot
 }

 // isEqualsOrResponseStart returns true if the rune corresponds to be
 // beginning of either the '=' or '=>' tokens.
 func isEqualsOrResponseStart(c rune) bool {
 	return c == '='
 }

 // lexEqualsOrResponse lexes the '=' or the '=>' token.
 func lexEqualsOrResponse(l *lexer) stateFn {
 	l.Consume()

 	if l.Peek() == '>' {
 		l.Consume()
 		l.emitToken(Response)
 	} else {
 		l.emitToken(Equals)
 	}

 	return lexRoot
 }

 // isAlpha returns true if the rune is a letter of the alphabet.
 func isAlpha(c rune) bool {
 	return (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
 }

 // isDigit returns true if the rune is a digit.
 func isDigit(c rune) bool {
 	return ('0' <= c && c <= '9')
 }

 // isHexDigit returns true if the rune is a hexadecimal digit.
 func isHexDigit(c rune) bool {
 	return isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
 }

 // isNameStart returns true if the rune is the beginning of a Name token.
 func isNameStart(c rune) bool {
 	return isAlpha(c) || c == '_'
 }

 // keywordTokens maps keywords to their associate tokens.
 var keywordTokens = map[string]TokenKind{
 	"import":    Import,
 	"module":    Module,
 	"struct":    Struct,
 	"union":     Union,
 	"interface": Interface,
 	"enum":      Enum,
 	"const":     Const,
 	"true":      True,
 	"false":     False,
 	"default":   Default,
 }

 // lexName lexes valid C identifiers. (K&R2: A.2.3)
 func lexName(l *lexer) stateFn {
 	l.Consume()

 	// isNameRune returns true if the rune is valid in a Name token.
 	isNameRune := func(c rune) bool {
 		return isAlpha(c) || isDigit(c) || c == '_'
 	}

 	for isNameRune(l.Peek()) {
 		l.Consume()
 	}

 	// Emit the appropriate keyword token if the current name is a
 	// keyword or a Name token otherwise.
 	if token, found := keywordTokens[l.CurText()]; found {
 		l.emitToken(token)
 	} else {
 		l.emitToken(Name)
 	}

 	return lexRoot
 }

 // isOrdinalStart returns true if the rune is the beginning of an Ordinal
 // token.
 func isOrdinalStart(c rune) bool {
 	return '@' == c
 }

 // lexOrdinal lexes an Ordinal token. Ordinals are a '@' followed by one
 // or more digits.
 func lexOrdinal(l *lexer) stateFn {
 	// Consume the '@'.
 	l.Consume()

 	for isDigit(l.Peek()) {
 		l.Consume()
 	}

 	l.emitToken(Ordinal)

 	return lexRoot
 }

 // isNumberStart returns true if the rune is the beginning of a number.
 func isNumberStart(c rune) bool {
 	// Even hexadecimals must begin with a digit (namely 0).
 	return isDigit(c)
 }

 // lexNumber lexes a number token.
 func lexNumber(l *lexer) stateFn {
 	// If the number begins with 0 it cannot be a decimal integer.
 	if l.Peek() == '0' {
 		return lexNumberStartWithZero
 	}
 	return lexDec
 }

 // lexDec lexes a base-10 number.
 func lexDec(l *lexer) stateFn {
 	for isDigit(l.Peek()) {
 		l.Consume()
 	}

 	// Handle numbers with decimal points and numbers in scientific notation.
 	switch c := l.Peek(); {
 	case c == 'e' || c == 'E':
 		return lexExponentPart
 	case c == '.':
 		return lexFractionPart
 	}

 	l.emitToken(IntConstDec)

 	return lexRoot
 }

 // lexNumberStartWithZero lexes hexadecimals, some floats or 0.
 func lexNumberStartWithZero(l *lexer) stateFn {
 	// Consume the leading 0
 	l.Consume()

 	// Here we check to see whether we are in the hexadecimal or floating
 	// point case.
 	switch c := l.Peek(); {
 	case c == 'x' || c == 'X':
 		return lexHexNumber
 	case c == '.':
 		return lexFractionPart
 	case c == 'e' || c == 'E':
 		return lexExponentPart
 	}

 	// Found a naked 0.
 	l.emitToken(IntConstDec)

 	return lexRoot
 }

 // lexHexNumber lexes hexadecimal integers.
 func lexHexNumber(l *lexer) stateFn {
 	// Consume the x or X
 	l.Consume()

 	for isHexDigit(l.Peek()) {
 		l.Consume()
 	}

 	l.emitToken(IntConstHex)

 	return lexRoot
 }

 // lexFractionPart lexes the part of a floating point number that comes
 // after the decimal point.
 func lexFractionPart(l *lexer) stateFn {
 	// Consume '.'
 	l.Consume()

 	for isDigit(l.Peek()) {
 		l.Consume()
 	}

 	if c := l.Peek(); c == 'e' || c == 'E' {
 		return lexExponentPart
 	}

 	l.emitToken(FloatConst)

 	return lexRoot
 }

 // lexExponentPart lexes the part of a floating point number after E or e.
 func lexExponentPart(l *lexer) stateFn {
 	// Consume 'e' or 'E'
 	l.Consume()

 	if c := l.Peek(); c == '+' || c == '-' {
 		l.Consume()
 	}

 	for isDigit(l.Peek()) {
 		l.Consume()
 	}

 	l.emitToken(FloatConst)

 	return lexRoot
 }

 // isStringStart returns true if the rune represents the beginning of a string.
 func isStringStart(c rune) bool {
 	return '"' == c
 }

 // lexString lexes a quoted string.
 func lexString(l *lexer) stateFn {
 	// Consume opening quotes.
 	l.Consume()

 	for !l.IsEos() && l.Peek() != '"' && l.Peek() != '\n' {
 		if l.Peek() == '\\' {
 			// If we see an escape character consume whatever follows blindly.
 			// TODO(azani): Consider parsing escape sequences.
 			l.Consume()
 		}
 		l.Consume()
 	}

 	if l.IsEos() || l.Peek() == '\n' {
 		l.emitToken(ErrorUnterminatedStringLiteral)
 		return nil
 	}

 	// Consume the closing quotes
 	l.Consume()

 	l.emitToken(StringLiteral)

 	return lexRoot
 }

 // isMaybeComment returns true if the rune may be the beginning of a
 // comment.
 func isMaybeComment(c rune) bool {
 	return c == '/'
 }

 // lexComment consumes a single-line or multi-line comment.
 func lexComment(l *lexer) stateFn {
 	// Consume the '/'.
 	l.Consume()

 	switch l.Peek() {
 	case '/':
 		return lexSingleLineComment
 	case '*':
 		return lexMultiLineComment
 	}

 	l.emitToken(ErrorIllegalChar)
 	return nil
 }

 // lexSingleLineComment consumes a single line comment.
 func lexSingleLineComment(l *lexer) stateFn {
 	// Consume the '/'
 	l.Consume()

 	for !l.IsEos() && l.Peek() != '\n' {
 		l.Consume()
 	}

 	l.emitToken(SingleLineComment)
 	return lexRoot
 }

 // lexMultiLineComment consumes a multi-line comment.
 func lexMultiLineComment(l *lexer) stateFn {
 	// Consume the '*'.
 	l.Consume()

 	for !l.IsEos() {
 		if l.Peek() == '*' {
 			return lexPossibleEndOfComment
 		}
 		l.Consume()
 	}

 	l.emitToken(ErrorUnterminatedComment)
 	return nil
 }

 // lexPossibleEndOfComment consumes the possible end of a multiline
 // comment and determines whether the comment in fact ended or not.
 func lexPossibleEndOfComment(l *lexer) stateFn {
 	// Consume the '*'
 	l.Consume()

 	if l.IsEos() {
 		l.emitToken(ErrorUnterminatedComment)
 		return nil
 	}

 	if l.Peek() == '/' {
 		l.Consume()
 		l.emitToken(MultiLineComment)
 		return lexRoot
 	}

 	return lexMultiLineComment
 }
	// Copyright 2015 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.
	//
	// To use the lexer, call Tokenize with the source string to obtain
	// a TokenStream. The lexer is run concurrently so you should be able to
	// use the TokenStream before the lexer is done with the source.
	//
	// The lexer is implemented as a state machine. The states are represented
	// by functions (the stateFn type) which accept a lexer and return the
	// new state.
	//
	// Most states also have an isFooStart function which helps determine if
	// a transition to Foo is appropriate. Those functions accept a single
	// rune as a parameter and return true if the state machine should
	// transition to state Foo. Some states do not have such functions on
	// account of the transition condition being trivial.
	//
	// The lexer implementation was inspired by
	// http://cuddle.googlecode.com/hg/talk/lex.html

	package lexer

	import (
	"unicode/utf8"
	)

	// Tokenize accepts a source string and parses it into a stream of tokens which
	// can be read from the returned TokenStream. Comment tokens are ommitted from
	// the returned stream.
	func Tokenize(source string) TokenStream {
	return NewFilteredTokenStream(
	tokenizeUnfiltered(source),
	[]TokenKind{SingleLineComment, MultiLineComment, EmptyLine})
	}

	// tokenizeUnfiltered returns a TokenStream which does not filter out any of the
	// tokens in the channel. It is used for testing and by Tokenize which adds a
	// filter on top of the stream returned by tokenizeUnfiltered.
	func tokenizeUnfiltered(source string) TokenStream {
	tokens := make(chan Token)
	l := lexer{source: source, tokens: tokens}
	go l.run()
	return &TokenChan{tokenChan: tokens}
	}

	type lexer struct {
	// source is the source code to be lexed.
	source string

	// tokens is a channel to which the found tokens are emitted.
	tokens chan Token

	// sourcePosBytes is the number of bytes that have been consumed.
	sourcePosBytes int

	// sourcePos is the number of runes that have been consumed.
	sourcePos int

	// lineno is the current line number.
	lineNo int

	// linePos is how many runes have been consumed since the beginning of the
	// line.
	linePos int

	// linePosBytes is how many bytes have been consumed since the beginning of
	// the line.
	linePosBytes int

	// curTokenSourcePosBytes is the number of bytes consumed prior to the
	// beginning of the current token.
	curTokenSourcePosBytes int

	// curTokenSourcePos is the number of runes consumed prior to the beginning of
	// the current token.
	curTokenSourcePos int

	// curTokenLineNo is the line number on which the current token begins.
	curTokenLineNo int

	// curTokenLinePos is the number of runes since the beginning of the line
	// where the current token begins.
	curTokenLinePos int

	// curTokenLinePosBytes is the number of bytes since the beginning of the line
	// where the current token begins.
	curTokenLinePosBytes int
	}

	// CurText returns the consumed part of the current token.
	func (l *lexer) CurText() string {
	return l.source[l.curTokenSourcePosBytes:l.sourcePosBytes]
	}

	// emitToken emits the current token and begins a new token.
	func (l *lexer) emitToken(tokenType TokenKind) {
	l.tokens <- Token{
	Kind: tokenType,
	Text: l.source[l.curTokenSourcePosBytes:l.sourcePosBytes],
	SourcePos: l.curTokenSourcePos,
	LineNo: l.curTokenLineNo,
	LinePos: l.curTokenLinePos,
	LinePosBytes: l.curTokenLinePosBytes,
	SourcePosBytes: l.curTokenSourcePosBytes}
	l.beginToken()
	}

	// beginToken begins the new token.
	func (l *lexer) beginToken() {
	l.curTokenSourcePosBytes = l.sourcePosBytes
	l.curTokenSourcePos = l.sourcePos
	l.curTokenLineNo = l.lineNo
	l.curTokenLinePos = l.linePos
	l.curTokenLinePosBytes = l.linePosBytes
	}

	// Consume consumes the next rune in the source.
	func (l *lexer) Consume() {
	if l.IsEos() {
	return
	}

	c, width := utf8.DecodeRuneInString(l.source[l.sourcePosBytes:])

	if c == '\n' {
	l.lineNo += 1
	l.linePos = 0
	l.linePosBytes = 0
	} else {
	l.linePos += 1
	l.linePosBytes += width
	}
	l.sourcePosBytes += width
	l.sourcePos += 1
	}

	// Peek returns the next rune in the source.
	func (l *lexer) Peek() rune {
	// At the end of the string, there is no sane answer to Peek.
	if l.IsEos() {
	return utf8.RuneError
	}

	// If RuneError is returned, it will be handled as any other rune, likely
	// resulting in an ErrorIllegalChar token being emitted.
	char, _ := utf8.DecodeRuneInString(l.source[l.sourcePosBytes:])
	return char
	}

	// IsEos returns true if the whole source has been consumed false
	// otherwise.
	func (l *lexer) IsEos() bool {
	return l.sourcePosBytes >= len(l.source)
	}

	// run is the lexer's main loop.
	func (l *lexer) run() {
	// We are implementing a state machine.
	// lexRoot is the beginning state.
	// nil is the end state.
	// States are functions which are called on the lexer. They return the
	// next state.
	for state := lexRoot; state != nil; {
	state = state(l)
	}
	close(l.tokens)
	}

	// A stateFn represents a state in the lexer state machine.
	type stateFn func(*lexer) stateFn

	// This is the beginning state and also the state which is returned to after
	// most tokens are emitted.
	func lexRoot(l *lexer) stateFn {
	if l.IsEos() {
	return nil
	}

	switch c := l.Peek(); {
	case isSingleCharTokens(c):
	return lexSingleCharTokens
	case isEqualsOrResponseStart(c):
	return lexEqualsOrResponse
	case isNameStart(c):
	return lexName
	case isOrdinalStart(c):
	return lexOrdinal
	case isNumberStart(c):
	return lexNumber
	case isStringStart(c):
	return lexString
	case isSkippable(c):
	return lexSkip
	case isMaybeComment(c):
	return lexComment
	}

	l.Consume()
	l.emitToken(ErrorIllegalChar)
	return nil
	}

	// isSkippable determines if a rune is skippable.
	func isSkippable(c rune) bool {
	return c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n'
	}

	// lexSkip consumes skippable runes.
	func lexSkip(l *lexer) stateFn {
	for isSkippable(l.Peek()) {
	l.beginToken()
	if l.Peek() == '\n' && l.linePos == 0 {
	l.emitToken(EmptyLine)
	}
	l.Consume()
	}
	l.beginToken()
	return lexRoot
	}

	// singleCharTokens is a map of single-rune tokens.
	var singleCharTokens = map[rune]TokenKind{
	'(': LParen,
	')': RParen,
	'[': LBracket,
	']': RBracket,
	'{': LBrace,
	'}': RBrace,
	'<': LAngle,
	'>': RAngle,
	';': Semi,
	',': Comma,
	'.': Dot,
	'-': Minus,
	'+': Plus,
	'&': Amp,
	'?': Qstn,
	}

	// isSingleCharTokens returns true if the rune is a single character token.
	func isSingleCharTokens(c rune) bool {
	_, ok := singleCharTokens[c]
	return ok
	}

	// lexSingleCharTokens lexes single character tokens.
	func lexSingleCharTokens(l *lexer) stateFn {
	c := l.Peek()
	l.Consume()
	t, _ := singleCharTokens[c]
	l.emitToken(t)

	return lexRoot
	}

	// isEqualsOrResponseStart returns true if the rune corresponds to be
	// beginning of either the '=' or '=>' tokens.
	func isEqualsOrResponseStart(c rune) bool {
	return c == '='
	}

	// lexEqualsOrResponse lexes the '=' or the '=>' token.
	func lexEqualsOrResponse(l *lexer) stateFn {
	l.Consume()

	if l.Peek() == '>' {
	l.Consume()
	l.emitToken(Response)
	} else {
	l.emitToken(Equals)
	}

	return lexRoot
	}

	// isAlpha returns true if the rune is a letter of the alphabet.
	func isAlpha(c rune) bool {
	return (('a' <= c && c <= 'z') \|\| ('A' <= c && c <= 'Z'))
	}

	// isDigit returns true if the rune is a digit.
	func isDigit(c rune) bool {
	return ('0' <= c && c <= '9')
	}

	// isHexDigit returns true if the rune is a hexadecimal digit.
	func isHexDigit(c rune) bool {
	return isDigit(c) \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F')
	}

	// isNameStart returns true if the rune is the beginning of a Name token.
	func isNameStart(c rune) bool {
	return isAlpha(c) \|\| c == '_'
	}

	// keywordTokens maps keywords to their associate tokens.
	var keywordTokens = map[string]TokenKind{
	"import": Import,
	"module": Module,
	"struct": Struct,
	"union": Union,
	"interface": Interface,
	"enum": Enum,
	"const": Const,
	"true": True,
	"false": False,
	"default": Default,
	}

	// lexName lexes valid C identifiers. (K&R2: A.2.3)
	func lexName(l *lexer) stateFn {
	l.Consume()

	// isNameRune returns true if the rune is valid in a Name token.
	isNameRune := func(c rune) bool {
	return isAlpha(c) \|\| isDigit(c) \|\| c == '_'
	}

	for isNameRune(l.Peek()) {
	l.Consume()
	}

	// Emit the appropriate keyword token if the current name is a
	// keyword or a Name token otherwise.
	if token, found := keywordTokens[l.CurText()]; found {
	l.emitToken(token)
	} else {
	l.emitToken(Name)
	}

	return lexRoot
	}

	// isOrdinalStart returns true if the rune is the beginning of an Ordinal
	// token.
	func isOrdinalStart(c rune) bool {
	return '@' == c
	}

	// lexOrdinal lexes an Ordinal token. Ordinals are a '@' followed by one
	// or more digits.
	func lexOrdinal(l *lexer) stateFn {
	// Consume the '@'.
	l.Consume()

	for isDigit(l.Peek()) {
	l.Consume()
	}

	l.emitToken(Ordinal)

	return lexRoot
	}

	// isNumberStart returns true if the rune is the beginning of a number.
	func isNumberStart(c rune) bool {
	// Even hexadecimals must begin with a digit (namely 0).
	return isDigit(c)
	}

	// lexNumber lexes a number token.
	func lexNumber(l *lexer) stateFn {
	// If the number begins with 0 it cannot be a decimal integer.
	if l.Peek() == '0' {
	return lexNumberStartWithZero
	}
	return lexDec
	}

	// lexDec lexes a base-10 number.
	func lexDec(l *lexer) stateFn {
	for isDigit(l.Peek()) {
	l.Consume()
	}

	// Handle numbers with decimal points and numbers in scientific notation.
	switch c := l.Peek(); {
	case c == 'e' \|\| c == 'E':
	return lexExponentPart
	case c == '.':
	return lexFractionPart
	}

	l.emitToken(IntConstDec)

	return lexRoot
	}

	// lexNumberStartWithZero lexes hexadecimals, some floats or 0.
	func lexNumberStartWithZero(l *lexer) stateFn {
	// Consume the leading 0
	l.Consume()

	// Here we check to see whether we are in the hexadecimal or floating
	// point case.
	switch c := l.Peek(); {
	case c == 'x' \|\| c == 'X':
	return lexHexNumber
	case c == '.':
	return lexFractionPart
	case c == 'e' \|\| c == 'E':
	return lexExponentPart
	}

	// Found a naked 0.
	l.emitToken(IntConstDec)

	return lexRoot
	}

	// lexHexNumber lexes hexadecimal integers.
	func lexHexNumber(l *lexer) stateFn {
	// Consume the x or X
	l.Consume()

	for isHexDigit(l.Peek()) {
	l.Consume()
	}

	l.emitToken(IntConstHex)

	return lexRoot
	}

	// lexFractionPart lexes the part of a floating point number that comes
	// after the decimal point.
	func lexFractionPart(l *lexer) stateFn {
	// Consume '.'
	l.Consume()

	for isDigit(l.Peek()) {
	l.Consume()
	}

	if c := l.Peek(); c == 'e' \|\| c == 'E' {
	return lexExponentPart
	}

	l.emitToken(FloatConst)

	return lexRoot
	}

	// lexExponentPart lexes the part of a floating point number after E or e.
	func lexExponentPart(l *lexer) stateFn {
	// Consume 'e' or 'E'
	l.Consume()

	if c := l.Peek(); c == '+' \|\| c == '-' {
	l.Consume()
	}

	for isDigit(l.Peek()) {
	l.Consume()
	}

	l.emitToken(FloatConst)

	return lexRoot
	}

	// isStringStart returns true if the rune represents the beginning of a string.
	func isStringStart(c rune) bool {
	return '"' == c
	}

	// lexString lexes a quoted string.
	func lexString(l *lexer) stateFn {
	// Consume opening quotes.
	l.Consume()

	for !l.IsEos() && l.Peek() != '"' && l.Peek() != '\n' {
	if l.Peek() == '\\' {
	// If we see an escape character consume whatever follows blindly.
	// TODO(azani): Consider parsing escape sequences.
	l.Consume()
	}
	l.Consume()
	}

	if l.IsEos() \|\| l.Peek() == '\n' {
	l.emitToken(ErrorUnterminatedStringLiteral)
	return nil
	}

	// Consume the closing quotes
	l.Consume()

	l.emitToken(StringLiteral)

	return lexRoot
	}

	// isMaybeComment returns true if the rune may be the beginning of a
	// comment.
	func isMaybeComment(c rune) bool {
	return c == '/'
	}

	// lexComment consumes a single-line or multi-line comment.
	func lexComment(l *lexer) stateFn {
	// Consume the '/'.
	l.Consume()

	switch l.Peek() {
	case '/':
	return lexSingleLineComment
	case '*':
	return lexMultiLineComment
	}

	l.emitToken(ErrorIllegalChar)
	return nil
	}

	// lexSingleLineComment consumes a single line comment.
	func lexSingleLineComment(l *lexer) stateFn {
	// Consume the '/'
	l.Consume()

	for !l.IsEos() && l.Peek() != '\n' {
	l.Consume()
	}

	l.emitToken(SingleLineComment)
	return lexRoot
	}

	// lexMultiLineComment consumes a multi-line comment.
	func lexMultiLineComment(l *lexer) stateFn {
	// Consume the '*'.
	l.Consume()

	for !l.IsEos() {
	if l.Peek() == '*' {
	return lexPossibleEndOfComment
	}
	l.Consume()
	}

	l.emitToken(ErrorUnterminatedComment)
	return nil
	}

	// lexPossibleEndOfComment consumes the possible end of a multiline
	// comment and determines whether the comment in fact ended or not.
	func lexPossibleEndOfComment(l *lexer) stateFn {
	// Consume the '*'
	l.Consume()

	if l.IsEos() {
	l.emitToken(ErrorUnterminatedComment)
	return nil
	}

	if l.Peek() == '/' {
	l.Consume()
	l.emitToken(MultiLineComment)
	return lexRoot
	}

	return lexMultiLineComment
	}