blob: 29bb0c4f88f23233004c6efbb05b73173eef6e68 [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package parser
import (
"fmt"
"mojom/mojom_tool/lexer"
"mojom/mojom_tool/mojom"
"strings"
)
///////////////////////////////////////////////////////////////////////
/// Type Parser
/// //////////////////////////////////////////////////////////////////
// This file contains the definition of the Parser type but it does not contain
// the actual parsing logic. That may be found in parsing.go
// A Parser is constructed and used to parse a single mojom file. The
// Parser is given a pointer to a MojomDescriptor that it will populate.
// The same MojomDescriptor may be given to successive runs of the Parser
// so that an entire graph of .mojom files may be parsed.
type Parser struct {
// The stream of input tokens
inputStream lexer.TokenStream
// The current error state. In the current generation of the Parser we
// only handle a single parse error before giving up. If an error
// has been encountered then |err| is not nil.
// TODO(rudominer) Enhancement: Parser should be able to keep going
// after some errors. Change this field to be a list of errors instead of
// a single error.
err error
// Last token seen, whether or not it was consumed.
lastSeen lexer.Token
// Last token consumed. May or may not be equal to lastSeen.
lastConsumed lexer.Token
// The root of the parse tree being constructed. This may be nil
// because the parse tree is only explicitly constructed in
// debug mode.
rootNode *ParseNode
// The current node of the parse tree in our recursive descent. This
// may be nil because the parse tree is only explicitly constructed fo
// in debug mode.
currentNode *ParseNode
// The MojomDescriptor being filled in by the Parser. This is passed
// in in the constructor.
mojomDescriptor *mojom.MojomDescriptor
// The Parser creates a new instance of MojomFile on each call to Parse(),
// which may only be called once per instance.
mojomFile *mojom.MojomFile
// The top of the Scope stack.
currentScope *mojom.Scope
debugMode bool
used bool
// In meta-data-only mode the parser will parse the module statement,
// the import statements and the file attributes and then return without
// parsing any of the mojom declarations. The result is as if the
// .mojom file did not have any declarations.
metaDataOnlyMode bool
// Set this variable to true to discard all remaining tokens in the
// |inputStream|.
discardRemaining bool
}
// Make a new Parser in preparation for calling Parse().
func MakeParser(canonicalFileName, specifiedName, fileContents string,
descriptorToPopulate *mojom.MojomDescriptor, importedFrom *mojom.MojomFile) Parser {
if descriptorToPopulate == nil {
panic("descriptorToPopulate must not be nil")
}
inputStream := lexer.Tokenize(fileContents)
parser := Parser{inputStream: inputStream,
mojomDescriptor: descriptorToPopulate}
parser.mojomDescriptor = descriptorToPopulate
parser.mojomFile = parser.mojomDescriptor.AddMojomFile(canonicalFileName, specifiedName,
importedFrom, fileContents)
return parser
}
func (p *Parser) SetDebugMode(debug bool) {
p.debugMode = debug
}
func (p *Parser) SetMetaDataOnlyMode(metaDataOnly bool) {
p.metaDataOnlyMode = metaDataOnly
}
// Perform the parsing on the |fileContents| passed to MakeParser().
// The |descriptorToPopulate| passed to MakeParser() will be populated.
// After Parse() is done call GetMojomFile() to get the resulting
// |MojomFile|. The |Imports| field of that |MojomFile| gives the
// files imported by the file that was just parsed. For each file |f|
// in |Imports|, call MojomDescriptor.ContainsFile(f) on
// |descriptorToPopulate| to determine whether or not |f| has already
// been parsed. If not then construct another Parser for |f| and its
// contents and call Parse() again.
func (p *Parser) Parse() {
if p.used {
panic("An instance of Parser may only be used once.")
}
p.used = true
// Perform the recursive descent.
p.parseMojomFile()
// Check if there are any extraneous tokens left in the stream.
if p.OK() && !p.checkEOF() {
token := p.peekNextToken("")
message := fmt.Sprintf("Extraneous token: %v.", token)
p.parseErrorT(ParserErrorCodeExtraneousToken, message, token)
}
}
// After Parse() is done call this method to obtain the resulting
// MojomFile.
func (p *Parser) GetMojomFile() *mojom.MojomFile {
return p.mojomFile
}
// After Parse() is done call this method to obtain the comment tokens that were
// filtered out.
func (p *Parser) GetComments() []lexer.Token {
return p.inputStream.(*lexer.FilteredTokenStream).FilteredTokens()
}
// Returns the root of the parse tree if this Parser is in debug mode.
// Otherwise returns nil.
func (p *Parser) GetParseTree() *ParseNode {
return p.rootNode
}
////////////////////////////////////////////////////////////////////////////
// Parse Error Handling
////////////////////////////////////////////////////////////////////////////
type ParseError struct {
code ParseErrorCode
file *mojom.MojomFile
token lexer.Token
message string
}
// Make ParseError implement the error interface.
func (e ParseError) Error() string {
return mojom.UserErrorMessage(e.file, e.token, e.message)
}
// parseError sets the parser's current error to a ParseError with the given data.
func (p *Parser) parseError(code ParseErrorCode, message string) {
p.parseErrorT(code, message, p.lastSeen)
}
// parseErrorT sets the parser's current error to a ParseError with the given data.
func (p *Parser) parseErrorT(code ParseErrorCode, message string, token lexer.Token) {
p.err = &ParseError{
code: code,
file: p.mojomFile,
token: token,
message: message,
}
}
// Returns whether or not the Parser is in a non-error state.
func (p *Parser) OK() bool {
return p.err == nil
}
// Returns the current ParseError or nil if OK() is true.
func (p *Parser) GetError() error {
return p.err
}
//////////// Error codes //////////
type ParseErrorCode int
const (
// An attributes section appeared in a location it is not allowed.
ParserErrorCodeBadAttributeLocation ParseErrorCode = iota
// Unexpected end-of-file
ParserErrorCodeEOF
// A simple name was expected but an identifier contained a dot.
ParserErrorCodeExpectedSimpleName
// After what appears to be a complete mojom file there were extra tokens.
ParserErrorCodeExtraneousToken
// An integer literal value was too large
ParserErrorCodeIntegerOutOfRange
// An integer literal value was ill-formed.
// TODO(azani) This is only necessary because the lexer allows some
// illegal tokens such as "0x"
ParserErrorCodeIntegerParseError
// A semicolon was missing.
// TODO(rudominer) Consider elimintating most semicolons from the language.
ParserErrorCodeMissingSemi
// The type of a value is not compatible with the type of the variable
// to which it is being assigned.
ParserErrorCodeNotAssignmentCompatible
// Either an explicitly specified ordinal is out of range, or else the
// combination of explicitly specified ordinals is inconsistent.
ParserErrorCodeBadOrdinal
// An unexpected token was encountered. This is the most common error.
ParserErrorCodeUnexpectedToken
)
////////////////////////////////////////////////////////////////////////////
// Methods for accessing the stream of tokens.
////////////////////////////////////////////////////////////////////////////
// Returns the next available token in the stream without advancing the
// stream cursor. In case the stream cursor is already past the end
// the returned Token will be the EOF token. In this case the global
// error state will be set to ParserErrorCodeEOF error code with the message
// "Unexpected end-of-file " concatenated with |eofMessage|. In case of
// any other type of error the returned token is unspecified and the
// global error state will be set with more details.
func (p *Parser) peekNextToken(eofMessage string) (nextToken lexer.Token) {
if p.discardRemaining {
nextToken = lexer.EofToken()
} else {
nextToken = p.inputStream.PeekNext()
}
if nextToken.EOF() {
errorMessage := "Unexpected end-of-file. " + eofMessage
p.parseError(ParserErrorCodeEOF, errorMessage)
}
p.lastSeen = nextToken
return
}
// This method is similar to peekNextToken except that in the case of EOF
// it does not set the global error state but rather returns |eof| = |true|.
// This method is useful when EOF is an allowed state and you want
// to know what the extraneous token is in case it is not EOF.
func (p *Parser) checkEOF() (eof bool) {
if p.discardRemaining {
p.lastSeen = lexer.EofToken()
} else {
p.lastSeen = p.inputStream.PeekNext()
}
eof = p.lastSeen.EOF()
return
}
// Sets p.lastConsumed to the value of the next available token in the
// stream and then advances the stream cursor. If the cursor is already
// past the end of the stream then it sets p.lastConsumed to the EOF
// token.
func (p *Parser) consumeNextToken() {
if p.discardRemaining {
p.lastSeen = lexer.EofToken()
p.lastConsumed = p.lastSeen
return
}
p.lastConsumed = p.inputStream.PeekNext()
p.inputStream.ConsumeNext()
}
////////////////////////////////////////////////////////////////////////////
// Parse Tree Support
////////////////////////////////////////////////////////////////////////////
// In normal operation we do not explicit construct a parse tree. This is
// only used in debug mode.
///// ParseNode type /////
type ParseNode struct {
name string
tokens []*lexer.Token
parent *ParseNode
children []*ParseNode
}
func (node *ParseNode) String() string {
return toString(node, 0)
}
// Recursively generates a string representing a tree of nodes
// where indentLevel indicates the level in the tree
func toString(node *ParseNode, indentLevel int) string {
prefix := "\n" + strings.Repeat(".", indentLevel) + "^"
firstTokens := ""
if node.tokens != nil {
firstTokens = fmt.Sprintf("%s", node.tokens)
}
s := fmt.Sprintf("%s%s%s", prefix, node.name, firstTokens)
if node.children != nil {
for _, child := range node.children {
s += toString(child, indentLevel+3)
}
}
return s
}
func newParseNode(name string) *ParseNode {
node := new(ParseNode)
node.name = name
return node
}
func (node *ParseNode) appendChild(name string, firstToken *lexer.Token) *ParseNode {
child := newParseNode(name)
child.tokens = append(child.tokens, firstToken)
child.parent = node
node.children = append(node.children, child)
return child
}
func (p *Parser) pushRootNode(name string) {
if !p.debugMode {
return
}
p.rootNode = newParseNode(name)
p.currentNode = p.rootNode
}
func (p *Parser) pushChildNode(name string) {
if !p.debugMode {
return
}
if p.currentNode == nil {
panic("pushRootNode() must be invoked first.")
}
tokenCopy := p.lastSeen
childNode := p.currentNode.appendChild(name, &(tokenCopy))
p.currentNode = childNode
}
func (p *Parser) attachToken() {
if !p.debugMode {
return
}
if p.currentNode == nil {
panic("Stack is empty.")
}
tokenCopy := p.lastSeen
p.currentNode.tokens = append(p.currentNode.tokens, &tokenCopy)
}
func (p *Parser) popNode() {
if !p.debugMode {
return
}
if p.currentNode == nil {
panic("stack is empty.")
}
p.currentNode = p.currentNode.parent
}