blob: 6603c0fa0d192aa9d055cb80a48ea0f44951bebb [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package utils
import (
"bytes"
"fmt"
)
// This file contains an implementation of an algorithm to check the well-foundedness
// of two-sorted directed graphs. See the paper
// "Well-Founded Two-Sorted Directed Graphs" (https://goo.gl/ipxFKu) for background
// and detailed definitions. Here we give only a high-level overview.
//
// A two-sorted graph is a directed graph that contains two sorts of nodes called circle nodes and
// square nodes. A node in a two-sorted graph is *well-founded* iff it satisfies the
// following recursive definition:
// (i) Leaf nodes are well-founded
// (ii) A circle node is well-founded iff all of its children are well-founded
// (iii) A non-leaf square node is well-founded iff at least one of its children is well-founded.
// (See the paper for a more logically correct definition.)
//
// See the comments on the main function |CheckWellFounded| below for a description
// of the algorithm.
///////////////////////////////////////////////////
// Node type
//
// A |Node| is a node in a directed graph.
///////////////////////////////////////////////////
type Node interface {
// Returns the list of children of this node.
OutEdges() []Node
// Returns whether or not this node is a square node.
IsSquare() bool
// SetKnownWellFounded is invoked by the algorithm in order to set the
// fact that the algorithm has determined that this node is well-founded.
// An implementation of |Node| should cache this--even between different
// invocations of the algorithm on different starting nodes--and return
// |true| from the method |KnownWellFounded| just in case this method has
// ever been invoked. In this way any nodes verified to be well-founded during
// one run of the algorithm do not need to be checked during a later run of the
// algorithm on a different starting node.
SetKnownWellFounded()
// Returns whether or not the function |SetKnownWellFounded| has ever
// been invoked on this node, during the lifetime of a program using
// this library.
KnownWellFounded() bool
// Returns the name of this node, appropriate for debugging.
DebugName() string
}
///////////////////////////////////////////////////
// CheckWellFounded function
//
// This is the main public function.
///////////////////////////////////////////////////
// CheckWellFounded checks whether the sub-graph rooted at |root| contains any ill-founded nodes.
// If any ill-founded nodes are detected then a non-nil |CycleDescription| is returned containing
// a cycle of ill-founded nodes. If every node is well-founded then nil is returned.
//
// If the graph contains only circle nodes then well-foundedness is equivalent to
// acyclicality and so the returned cycle is a proof that the nodes contained in it
// are ill-founded. But in general (if the graph contains square nodes) the returned
// |CycleDescription| is only meant to serve as an example of some of the
// ill-foundedness contained in the subgraph. The cycle is guaranteed to contain only
// ill-founded nodes and the cycle may be considered part of a proof that thes nodes
// are in fact ill-founded. But the cycle does not necessarily contain every
// ill-founded node and it does not
// necessarily constitute a complete proof of the ill-foundedness
// because a graph that contains square nodes is allowed to contain some cycles and still be
// well-founded. The intended application of the returned CycleDescription is to be used to
// describe to a user the location of the ill-foundedness in order to allow the user to modify
// the graph in order to make it well-founded.
//
// This function may be invoked multiple times on different starting nodes during the life
// of a program. If the function is invoked once on node |x| and no ill-foundedness is
// found and then the function is invoked later on node |y| and if node |z| is reachable
// from both |x| and |y| then node |z| will be marked |KnownWellFounded| during the
// first run of the function and so the graph below |z| will not have to be inspected
// during the second run of the function.
//
// Our algorithm proceeds in three phases:
// (1) Phase 1 consists of a depth-first traversal of the graph whose purpose is two-fold:
// (a) To prove directly that as many nodes as possible are well-founded and mark them
// so by invoking SetKnownWellFounded().
// (b) To prepare for phase 2 by constructing two sets of nodes called the |foundationSet| and
// the |pendingSet|
// See the comments at |checkWellFoundedPhase1| for more details.
//
// In many cases phase 1 will be able to prove that every node is well-founded and so the algorithm
// will terminate without entering phase 2. This is the case for example if the graph has no
// cycles at all.
//
// (2) The purpose of phase 2 is to propogate the |KnownWellFounded| property to the remaining well-founded
// nodes (the ones that could not be verified as well-founded during phase 1.) Phase 2 proceeds in
// multiple rounds. During each round the |KnownWellFounded| property is propogated from the
// |foundationSet| to the |pendingSet|. See the comments at |checkWellFoundedPhase2| for more details.
//
// If there are no ill-founded nodes then the algorithm terminates after phase 2.
//
// (3) Phase 3 of the algorithm consists of building a |CycleDescription| in the case that there are ill-founded
// nodes. See the method |findKnownIllFoundedCycle|.
func CheckWellFounded(root Node) *CycleDescription {
return checkWellFounded(root, nil)
}
// checkWellFounded is a package-private version of CheckWellFounded intendend to be invoked by tests.
// It offers a second parameter |debugDataRequest|. If this is non-nil then its fields will be filled
// in with debugging data about the output of phase 1.
func checkWellFounded(root Node, debugDataRequest *debugData) *CycleDescription {
if root == nil {
return nil
}
finder := makeCycleFinder()
finder.debugDataRequest = debugDataRequest
holder := finder.holderForNode(root)
return finder.checkWellFounded(holder)
}
///////////////////////////////////////////////////
/// cycleFinder type
///
/// A cycleFinder is an object that holds state during the execution of the algorithm.
///////////////////////////////////////////////////
type cycleFinder struct {
// Maps each node seen by the computation to its holder.
nodeToHolder map[Node]*nodeHolder
foundationSet, pendingSet NodeSet
visitationIndex int
currentPath nodeStack
debugDataRequest *debugData
}
type debugData struct {
initialPendingSet []Node
initialFoundationSet []Node
}
func makeCycleFinder() cycleFinder {
finder := cycleFinder{}
finder.nodeToHolder = make(map[Node]*nodeHolder)
finder.foundationSet = MakeNodeSet()
finder.pendingSet = MakeNodeSet()
finder.currentPath = nodeStack{}
return finder
}
// checkWellFounded contains the top-level implementation of the algorithm.
func (finder *cycleFinder) checkWellFounded(nodeHolder *nodeHolder) *CycleDescription {
if nodeHolder.node.KnownWellFounded() {
// This node is already known to be well-founded because of an earlier
// execution of the algorithm.
return nil
}
// Perform phase 1.
finder.checkWellFoundedPhase1(nodeHolder)
// In tests we pass back some debugging information here.
if finder.debugDataRequest != nil {
finder.debugDataRequest.initialPendingSet = finder.pendingSet.ToNodeSlice()
finder.debugDataRequest.initialFoundationSet = finder.foundationSet.ToNodeSlice()
}
// All nodes have been verified as well-founded.
if finder.pendingSet.Empty() {
return nil
}
// Perform phase 2.
finder.checkWellFoundedPhase2()
// All nodes have been verified as well-founded.
if finder.pendingSet.Empty() {
return nil
}
// If we are here then there is at least one ill-founded node.
// In order to build a canonical cycle description, find the illfounded
// node with the least visitation order.
var minVisitationOrder int
var minIllfoundedNode Node = nil
for n, _ := range finder.pendingSet.elements {
if minIllfoundedNode == nil || n.visitationOrder < minVisitationOrder {
minVisitationOrder = n.visitationOrder
minIllfoundedNode = n.node
}
}
// Starting from the ill-founded node with the least visitation order,
// build a canonical cycle.
freshCycleFinder := makeCycleFinder()
holder := freshCycleFinder.holderForNode(minIllfoundedNode)
return freshCycleFinder.findKnownIllFoundedCycle(holder)
}
// checkWellFoundedPhase1 is a recursive helper function that does a depth-first traversal
// of the graph rooted at |nodeHolder|. The goal of phase 1 is to mark as many
// of the nodes as possible as |KnownWellFounded| and to set up the |pendingSet|,
// the |foundationSet|, and the |parentsToBeNotified| sets so that the remaining
// well-founded nodes will be marked as |KnownWellFounded| in phase 2.
//
// In more detail the following steps are performed for each node x:
// (a1) If it can be verified during the traversal that x is well-founded then
// x will be marked as |KnownWellFounded|. This occurs if x is a leaf, or x
// is a circle and each child node of x is known well-founded before being
// visited as a child of x, or x is a square and at-least one child node of x
// is known well-founded before being visited as a child of x.
// (a2) Otherwise if it cannot be determined during traveral that x is well-founded then
// (i) x is added to the |pendingSet|.
// (ii) x is added to the |parentsToBeNotified| set of all of its children.
// (b) In step (a1) if at the time x is found to be well-founded x already has
// some parent node x' in its |parentsToBeNotified| set (meaning that step a2 occurred
// earlier for x' and so x' is in the |pendingSet|) then x is added to the |foundationSet|.
// In phase 2, the fact that x is in the foundation set and x' is in the pending set will be
// used to propogate known-wellfoundedness to x'.
func (finder *cycleFinder) checkWellFoundedPhase1(nodeHolder *nodeHolder) {
if nodeHolder.node.KnownWellFounded() {
// This node is known to be well-founded before being visited.
// This occurs when the node was marked |KnownWellFounded| during a
// previous run of the algorithm. It follows that all nodes reachable
// from this node have also been so marked. We therefore don't need
// to traverse the part of the graph below this node during this run
// of the algorithm and so we treat this node as a leaf node.
nodeHolder.state = vsVisitEnded
return
}
// Mark the visit as started.
nodeHolder.state = vsVisitStarted
// Next we examine each of the children and recurse into the unvisited ones.
sawUnverifiedChild := false
for _, child := range nodeHolder.node.OutEdges() {
childHolder := finder.holderForNode(child)
if childHolder.state == vsUnvisited {
// Recursively visit this child.
finder.checkWellFoundedPhase1(childHolder)
}
// After having visited a child we use the results to update the status of this node.
// We could express the logic here more concisely, but the logic is easier
// to understand if we treat circles and squares seperately,
if nodeHolder.node.IsSquare() {
if nodeHolder.node.KnownWellFounded() {
// This square node has already been marked |KnownWellFounded| becuase
// of an earlier iteration through this loop. There is nothing else to do.
continue
}
if childHolder.node.KnownWellFounded() {
// We mark a square node as |KnownWellFounded| as soon as we can so
// that if any of its descendants are also parents, the well-foundedness
// has a chance to propogate to the descendant in a recursive call.
nodeHolder.node.SetKnownWellFounded()
} else {
// This square node is not yet known to be well-founded and the child node
// is not yet known to be well-founded. Set up a back link from the child.
childHolder.parentsToBeNotified.Add(nodeHolder)
sawUnverifiedChild = true
}
continue // Done handling the square case.
}
// Else the node is a circle. If the child is not yet known to be well-founded
// set up a back link from the child to this node.
if !childHolder.node.KnownWellFounded() {
childHolder.parentsToBeNotified.Add(nodeHolder)
sawUnverifiedChild = true
}
}
// If a circle node has only well-founded children, or a square node has no children at all,
// then the node is well-founded.
if !sawUnverifiedChild && !nodeHolder.node.KnownWellFounded() {
nodeHolder.node.SetKnownWellFounded()
}
// Possibly add this node to the |foundationSet| or the |pendingSet|.
if nodeHolder.node.KnownWellFounded() {
if !nodeHolder.parentsToBeNotified.Empty() {
finder.foundationSet.Add(nodeHolder)
}
} else {
finder.pendingSet.Add(nodeHolder)
}
// Mark the visit as ended.
nodeHolder.state = vsVisitEnded
return
}
// checkWellFoundedPhase2 performs phase 2 of the algorithm. The goal is to
// propogate known well-foundedness along the back-links that were established
// during phase 1. We have two sets of nodes: the |foundationSet| and the
// |pendingSet|. The |pendingSet| consists of all nodes that are not currently
// known to be well-founded. If the |pendingSet| is not empty when this method
// returns, then the nodes in the |pendingSet| are ill-founded. The |foundationSet|
// consists of the current frontier of the propogation. That is, the |foundationSet|
// consists of the nodes discovered to be well-founded in recent iterations and not yet
// used to propogate well-foundedness. (The |foundationSet| starts with the nodes discovered
// to be well-founded during phase 1, pruned to nodes that have parents that
// are in the |pendingSet|.) We iteratively remove a node n from the foundation set and
// for each of its parents p for which p is in the pending set and for which we can now verify
// well-foundedness, we remove p from the pending set and add it to the foundation set.
func (finder *cycleFinder) checkWellFoundedPhase2() {
for n := finder.foundationSet.removeRandomElement(); n != nil; n = finder.foundationSet.removeRandomElement() {
for p, _ := range n.parentsToBeNotified.elements {
if finder.pendingSet.Contains(p) {
knownWellFounded := true
if !p.node.IsSquare() {
for _, child := range p.node.OutEdges() {
if child != p.node && !child.KnownWellFounded() {
knownWellFounded = false
break
}
}
}
if knownWellFounded {
p.node.SetKnownWellFounded()
finder.foundationSet.Add(p)
finder.pendingSet.Remove(p)
}
}
}
}
}
// findKnownIllFoundedCycle finds and returns a |CycleDescription| starting from a node that is known
// to be ill-founded. This proceeds by following edges from an ill-founded node to
// an ill-founded child node until a cycle is formed. We return a *canonical* cycle,
// meaning we start from the node with the least possible visit index and follow edges to
// the child node with the least possible visit index. This is done in order to make testing of the algorithm easier.
// We are not concerned with optimizing the performance of phase 3 because in the intended application
// phase 3 can occur at most once in the lifetime of a program: Once an ill-founded node is detected the
// program exits with a cycle description allowing the user to fix the ill-foundedness.
func (finder *cycleFinder) findKnownIllFoundedCycle(nodeHolder *nodeHolder) *CycleDescription {
// Mark the current node as started
nodeHolder.state = vsVisitStarted
finder.currentPath.Push(nodeHolder)
for _, child := range nodeHolder.node.OutEdges() {
childHolder := finder.holderForNode(child)
if childHolder.state == vsVisitStarted {
// If the child has been started but not finished then we have found a cycle
// from the child to the current node back to the child.
return newCycleDescription(finder.currentPath.elements, childHolder, nodeHolder)
} else if !childHolder.node.KnownWellFounded() {
return finder.findKnownIllFoundedCycle(childHolder)
}
}
panic("Program logic error: Could not find a known ill-founded cycle.")
}
// Returns the nodeHolder for the given node.
func (finder *cycleFinder) holderForNode(node Node) *nodeHolder {
if holder, found := finder.nodeToHolder[node]; found {
return holder
}
// This is the first time we have seen this node. Assign it a new
// visitor order.
holder := newNodeHolder(node, finder.visitationIndex)
finder.visitationIndex++
finder.nodeToHolder[node] = holder
return holder
}
////////////////////////////////////////////////////
// nodeHolder type
////////////////////////////////////////////////////
type visitationState int
const (
vsUnvisited visitationState = iota
vsVisitStarted
vsVisitEnded
)
// A nodeHolder is an internal data structure used by the algorithm.
// It holds one node plus data about that node used by the algorithm.
type nodeHolder struct {
// The node
node Node
parentsToBeNotified NodeSet
visitationOrder int
state visitationState
}
func newNodeHolder(node Node, visitationOrder int) *nodeHolder {
nodeHolder := new(nodeHolder)
nodeHolder.node = node
nodeHolder.parentsToBeNotified = MakeNodeSet()
nodeHolder.state = vsUnvisited
nodeHolder.visitationOrder = visitationOrder
return nodeHolder
}
//////////////////////////////////////////////
/// nodeStack type
//////////////////////////////////////////////
// A nodeStack is a stack of *nodeHolders
type nodeStack struct {
elements []*nodeHolder
}
func (stack *nodeStack) Push(n *nodeHolder) {
stack.elements = append(stack.elements, n)
}
func (stack *nodeStack) Size() int {
return len(stack.elements)
}
func (stack *nodeStack) Pop() (n *nodeHolder) {
lastIndex := stack.Size() - 1
n = stack.elements[lastIndex]
stack.elements = stack.elements[:lastIndex]
return
}
func (stack *nodeStack) Peek() (n *nodeHolder) {
return stack.elements[stack.Size()-1]
}
func (stack *nodeStack) String() string {
var buffer bytes.Buffer
fmt.Fprintf(&buffer, "[")
first := true
for _, e := range stack.elements {
if !first {
fmt.Fprintf(&buffer, ", ")
}
fmt.Fprintf(&buffer, "%s", e.node.DebugName())
first = false
}
fmt.Fprintln(&buffer, "]")
return buffer.String()
}
///////////////////////////////////////////////////
/// NodeSet type
///////////////////////////////////////////////////
// A NodeSet is a set of nodeHolders.
type NodeSet struct {
elements map[*nodeHolder]bool
}
// MakeNodeSet makes a new empty NodeSet.
func MakeNodeSet() NodeSet {
nodeSet := NodeSet{}
nodeSet.elements = make(map[*nodeHolder]bool)
return nodeSet
}
// Add adds a Node to a NodeSet.
func (set *NodeSet) Add(node *nodeHolder) {
set.elements[node] = true
}
// AddAll adds all the nodes from |otherSet| to |set|.
func (set *NodeSet) AddAll(otherSet NodeSet) {
for e, _ := range otherSet.elements {
set.elements[e] = true
}
}
// Contains returns whether or not |node| is an element of |set|.
func (set *NodeSet) Contains(node *nodeHolder) bool {
_, ok := set.elements[node]
return ok
}
func (set *NodeSet) Remove(node *nodeHolder) {
delete(set.elements, node)
}
func (set *NodeSet) Empty() bool {
return len(set.elements) == 0
}
// doUntilEmpty repeatedly iterates through the elements of |set| removing
// them and invoking |f|. More precisely, for each element x of |set|,
// x is removed from |set| and then f(x) is invoked.
//
// The function |f| is allowed to mutate |set|. If |f| adds new elements to
// |set| those will also eventually be removed and operated on. Whether or
// not this process converges to an empty set depends entirely on the behavior
// of |f| and it is the caller's responsibility to ensure that it does.
func (set *NodeSet) doUntilEmpty(f func(node *nodeHolder)) {
for !set.Empty() {
for n, _ := range set.elements {
set.Remove(n)
f(n)
}
}
}
// removeRandomElement removes and returns an arbitrary element of |set|
// or nil of |set| is empty.
func (set *NodeSet) removeRandomElement() *nodeHolder {
for n, _ := range set.elements {
delete(set.elements, n)
return n
}
return nil
}
func (set *NodeSet) ToNodeSlice() []Node {
slice := make([]Node, 0, len(set.elements))
for n, _ := range set.elements {
slice = append(slice, n.node)
}
return slice
}
func (set *NodeSet) Size() int {
return len(set.elements)
}
// compareNodeSets is a package-private method used in our tests. It returns
// a non-nil error in case expected is not equal to actual.
func compareNodeSets(expected, actual *NodeSet) error {
for n, _ := range expected.elements {
if !actual.Contains(n) {
return fmt.Errorf("%s is in expected but not actual", n.node.DebugName())
}
}
for n, _ := range actual.elements {
if !expected.Contains(n) {
return fmt.Errorf("%s is in actual but not expected", n.node.DebugName())
}
}
return nil
}
// String returns a human readable string representation of |set|.
func (set *NodeSet) String() string {
var buffer bytes.Buffer
fmt.Fprintf(&buffer, "{")
first := true
for e, _ := range set.elements {
if !first {
fmt.Fprintf(&buffer, ", ")
}
fmt.Fprintf(&buffer, "%s", e.node.DebugName())
first = false
}
fmt.Fprintln(&buffer, "}")
return buffer.String()
}
///////////////////////////////////////////////////
// CycleDescription type
//
// A |CycleDescription| describes a cycle in a directed graph.
///////////////////////////////////////////////////
type CycleDescription struct {
first, last Node
path []Node
}
func (c *CycleDescription) String() string {
var buffer bytes.Buffer
fmt.Fprintf(&buffer, "first:%s", c.first.DebugName())
fmt.Fprintf(&buffer, ", last:%s", c.last.DebugName())
fmt.Fprintf(&buffer, ", path:{")
first := true
for _, n := range c.path {
if !first {
fmt.Fprintf(&buffer, ", ")
}
fmt.Fprintf(&buffer, "%s", n.DebugName())
first = false
}
fmt.Fprintln(&buffer, "}")
return buffer.String()
}
func newCycleDescription(path []*nodeHolder, first, last *nodeHolder) *CycleDescription {
description := CycleDescription{}
description.first = first.node
description.last = last.node
description.path = make([]Node, 0, len(path))
for _, n := range path {
if len(description.path) > 0 || n.node == first.node {
description.path = append(description.path, n.node)
}
}
if description.path[len(description.path)-1] != last.node {
panic(fmt.Sprintf("%s != %s", description.path[len(description.path)-1], last.node))
}
return &description
}