| // Copyright 2015 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // TokenKinds is a type which describes the kinds of tokens which can be |
| // encountered in a mojom file. |
| |
| package lexer |
| |
| import ( |
| "bytes" |
| "fmt" |
| "strings" |
| "unicode/utf8" |
| ) |
| |
| type TokenKind int |
| |
| // TokenKinds |
| const ( |
| // A character was found which is not part of a valid token. |
| ErrorIllegalChar TokenKind = iota |
| // A quoted string was opened but not closed. |
| ErrorUnterminatedStringLiteral |
| // A multiline comment was opened but not closed. |
| ErrorUnterminatedComment |
| // Indicates the end of a stream of tokens. |
| EOF |
| |
| // Punctuators and Separators |
| LParen |
| RParen |
| LBracket |
| RBracket |
| LBrace |
| RBrace |
| LAngle |
| RAngle |
| Semi |
| Comma |
| Dot |
| Minus |
| Plus |
| Amp |
| Qstn |
| Equals |
| Response |
| |
| // Names |
| Name |
| |
| // Keywords |
| Import |
| Module |
| Struct |
| Union |
| Interface |
| Enum |
| Const |
| True |
| False |
| Default |
| |
| // Constants |
| IntConstDec |
| IntConstHex |
| FloatConst |
| Ordinal |
| StringLiteral |
| |
| // Comments |
| SingleLineComment |
| MultiLineComment |
| ) |
| |
| // This method is used to generate user-facing strings in compilation error |
| // messages. For example for LBrace we produce the string "'{'". Notice the |
| // single-quotes. This will be used for example in an error message that looks |
| // like the following: |
| // Unexpected token at line 5, column 6: '###'. Expecting '{'. |
| func (tokenKind TokenKind) String() string { |
| switch tokenKind { |
| // Errors |
| case ErrorIllegalChar: |
| return "illegal token" |
| case ErrorUnterminatedStringLiteral: |
| return "unterminated string literal" |
| case ErrorUnterminatedComment: |
| return "unterminated comment" |
| |
| // End of file |
| case EOF: |
| return "eof" |
| |
| // Punctuators and Separators |
| case LParen: |
| return "'('" |
| case RParen: |
| return "')'" |
| case LBracket: |
| return "'['" |
| case RBracket: |
| return "']'" |
| case LBrace: |
| return "'{'" |
| case RBrace: |
| return "'}'" |
| case LAngle: |
| return "'<'" |
| case RAngle: |
| return "'>'" |
| case Semi: |
| return "';'" |
| case Comma: |
| return "','" |
| case Dot: |
| return "'.'" |
| case Minus: |
| return "'-'" |
| case Plus: |
| return "'+'" |
| case Amp: |
| return "'&'" |
| case Qstn: |
| return "'?'" |
| case Equals: |
| return "'='" |
| case Response: |
| return "'=>'" |
| |
| // Names |
| case Name: |
| return "a name" |
| |
| // Keywords |
| case Import: |
| return "'import'" |
| case Module: |
| return "'module'" |
| case Struct: |
| return "'struct'" |
| case Union: |
| return "'union'" |
| case Interface: |
| return "'interface'" |
| case Enum: |
| return "'enum'" |
| case Const: |
| return "'const'" |
| case True: |
| return "'true'" |
| case False: |
| return "'false'" |
| case Default: |
| return "'default'" |
| |
| // Constants |
| case IntConstDec: |
| return "decimal integer literal" |
| case IntConstHex: |
| return "hex integer literal" |
| case FloatConst: |
| return "float literal" |
| case Ordinal: |
| return "an ordinal" |
| case StringLiteral: |
| return "a string literal" |
| |
| case SingleLineComment: |
| return "single line comment" |
| case MultiLineComment: |
| return "multi line comment" |
| |
| default: |
| // Note(rudominer) It is important to use %d below so as to avoid |
| // re-invoking this method and causing an infinite recursion. |
| return fmt.Sprintf("%d", tokenKind) |
| } |
| } |
| |
| type Token struct { |
| Kind TokenKind |
| Text string |
| // SourcePos is the number of runes preceeding the token. |
| SourcePos int |
| // LinePos is the number of runes preceeding the token on its line. |
| LinePos int |
| // SourcePosBytes is the number of bytes preceeding the token. |
| SourcePosBytes int |
| // LinePosBytes is the number of bytes preceeding the token on its line. |
| LinePosBytes int |
| // LineNo is the line on which the token is found. (First line is 0.) |
| LineNo int |
| } |
| |
| // ShortLocationString is used to generate user-facing strings in compilation |
| // error messages. This will be used for example in an error message that looks |
| // like the following: |
| // Unexpected token at line 5, column 6: '###'. Expecting '{'. |
| func (t Token) ShortLocationString() string { |
| return fmt.Sprintf("%d:%d", t.LineNo+1, t.LinePos+1) |
| } |
| |
| func (t Token) LongLocationString() string { |
| return fmt.Sprintf("line %d, column %d", t.LineNo+1, t.LinePos+1) |
| } |
| |
| // EOF returns true if the token on which it is called represents the end of the |
| // token string. |
| func (t Token) EOF() bool { |
| return t.Kind == EOF |
| } |
| |
| // String is used to generate user-facing strings in compilation error |
| // messages. For many token kinds the TokenKind.String() method will produce |
| // good results for representing the token. But for other TokenKinds we will |
| // want to include some information besides a representation of the kind. |
| // For example for an ErrorIllegalChar kind we wnat to show the text. |
| // This will be used for example in an error message that looks |
| // like the following: |
| // Unexpected token at line 5, column 6: '###'. Expecting '{'. |
| func (token Token) String() string { |
| switch token.Kind { |
| case StringLiteral: |
| return token.Text |
| case Name, IntConstDec, IntConstHex, FloatConst, Ordinal, ErrorIllegalChar: |
| return fmt.Sprintf("%q", token.Text) |
| |
| default: |
| return token.Kind.String() |
| } |
| } |
| |
| // Snippet is used to generate a user-facing string in compilation error |
| // messages. It displays the token's text as well as the surrounding line for |
| // context. It also includes a line with some carets to highlight the token. |
| // source is the source code where token was found. |
| // If color is true, the carets on the second line will be colored. |
| func (token Token) Snippet(source string, color bool) (snippet string) { |
| begin := token.SourcePosBytes - token.LinePosBytes |
| |
| // First, we make sure the prelude to the token is not too long. Since we |
| // limit the width of the snippet to 79 runes, we first make sure the prelude |
| // takes up no more than 58 of those runes. |
| runeCount := token.LinePos |
| if runeCount > 58 { |
| runeCount = 58 |
| } |
| skipRunes := token.LinePos - runeCount |
| for index, _ := range source[begin:] { |
| skipRunes-- |
| if skipRunes == 0 { |
| begin += (index + 1) |
| break |
| } |
| } |
| |
| // Now we calculate the end of the snippet line. Either the first new line |
| // rune or a total of no more than 79 characters. |
| end := len(source) |
| for index, rune := range source[token.SourcePosBytes:] { |
| if rune == '\n' || runeCount >= 78 { |
| end = index + begin + token.LinePosBytes |
| break |
| } |
| runeCount++ |
| } |
| |
| snippetBuffer := bytes.NewBufferString(source[begin:end]) |
| snippetBuffer.WriteRune('\n') |
| |
| // We calculate how much whitespace to add before the caret marker for the |
| // token. Please note that there is an assumption that all non-tab characters |
| // are of the same width. This is not correct, but close-enough most of the |
| // time. |
| for _, rune := range source[begin:token.SourcePosBytes] { |
| if rune == '\t' { |
| snippetBuffer.WriteRune('\t') |
| } else { |
| snippetBuffer.WriteRune(' ') |
| } |
| } |
| |
| // We don't want too big of a caret line as that may be distracting. So we |
| // limit it to 20 runes at most. |
| tokenSize := utf8.RuneCountInString(token.Text) |
| if token.Kind == ErrorUnterminatedComment { |
| // In the case of an unterminated comment, the token text will contain |
| // the entire rest of the source after the opening comment. We really |
| // only want to highlight the "/*" characters marking the beginning of |
| // the comment. |
| tokenSize = 2 |
| } else if tokenSize > 20 { |
| tokenSize = 20 |
| } |
| if color { |
| // Set the caret characters to green. |
| snippetBuffer.WriteString("\x1b[32;1m") |
| } |
| snippetBuffer.WriteString(strings.Repeat("^", tokenSize)) |
| if color { |
| // Reset all printing attributes. |
| snippetBuffer.WriteString("\x1b[0m") |
| } |
| snippet = snippetBuffer.String() |
| return |
| } |
| |
| func StringLiteralTokenToText(token Token) string { |
| if token.Kind != StringLiteral { |
| panic("stringLiteralTokenToText can only accept StringLiteral tokens.") |
| } |
| |
| text := token.Text |
| length := len(text) |
| if (length < 2) || (text[0] != '"') || (text[length-1] != '"') { |
| panic(fmt.Sprintf("Lexer returned a string literal token whose "+ |
| "text was not delimited by quotation marks: '%s'.", text)) |
| } |
| return text[1 : length-1] |
| } |