DataFlowAnalysis.java
/*
* Copyright 2008 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.javascript.jscomp;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import com.google.common.collect.ImmutableList;
import com.google.javascript.jscomp.ControlFlowGraph.Branch;
import com.google.javascript.jscomp.NodeTraversal.AbstractPostOrderCallback;
import com.google.javascript.jscomp.graph.Annotation;
import com.google.javascript.jscomp.graph.DiGraph.DiGraphNode;
import com.google.javascript.jscomp.graph.LatticeElement;
import com.google.javascript.jscomp.parsing.parser.util.format.SimpleFormat;
import com.google.javascript.rhino.Node;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
/**
* A framework to help writing static program analysis. A subclass of
* this framework should specify how a single node changes the state
* of a program. This class finds a safe estimate (a fixed-point) for
* the whole program. The proven facts about the program will be
* annotated with
* {@link com.google.javascript.jscomp.graph.GraphNode#setAnnotation} to the
* given control flow graph's nodes in form of {@link LatticeElement}
* after calling {@link #analyze()}.
*
* <p>As a guideline, the following is a list of behaviors that any analysis
* can take:
* <ol>
* <li>Flow Direction: Is the analysis a forward or backward analysis?
* <li>Lattice Elements: How does the analysis represent the state of the
* program at any given point?
* <li>JOIN Operation: Given two incoming paths and a lattice state value, what
* can the compiler conclude at the join point?
* <li>Flow Equations: How does an instruction modify the state of program in
* terms of lattice values?
* <li>Initial Entry Value: What can the compiler assume at the beginning of the
* program?
* <li>Initial Estimate: What can the compiler assume at each point of the
* program? (What is the BOTTOM value of the lattice) By definition this lattice
* JOIN {@code x} for any {@code x} must also be {@code x}.
* </ol>
* To make these behaviors known to the framework, the following steps must be
* taken.
* <ol>
* <li>Flow Direction: Implement {@link #isForward()}.
* <li>Lattice Elements: Implement {@link LatticeElement}.
* <li>JOIN Operation: Implement
* {@link JoinOp#apply}.
* <li>Flow Equations: Implement
* {@link #flowThrough(Object, LatticeElement)}.
* <li>Initial Entry Value: Implement {@link #createEntryLattice()}.
* <li>Initial Estimate: Implement {@link #createInitialEstimateLattice()}.
* </ol>
*
* <p>Upon execution of the {@link #analyze()} method, nodes of the input
* control flow graph will be annotated with a {@link FlowState} object that
* represents maximum fixed point solution. Any previous annotations at the
* nodes of the control flow graph will be lost.
*
*
* @param <N> The control flow graph's node value type.
* @param <L> Lattice element type.
*/
abstract class DataFlowAnalysis<N, L extends LatticeElement> {
private final ControlFlowGraph<N> cfg;
final JoinOp<L> joinOp;
protected final Set<DiGraphNode<N, Branch>> orderedWorkSet;
/*
* Feel free to increase this to a reasonable number if you are finding that
* more and more passes need more steps before finding a fixed-point.
* If you just have a special case, consider calling
* {@link #analyze(int)} instead.
*/
public static final int MAX_STEPS = 800000;
/**
* Constructs a data flow analysis.
*
* <p>Typical usage
* <pre>
* DataFlowAnalysis dfa = ...
* dfa.analyze();
* </pre>
*
* {@link #analyze()} annotates the result to the control flow graph by
* means of {@link DiGraphNode#setAnnotation} without any
* modification of the graph itself. Additional calls to {@link #analyze()}
* recomputes the analysis which can be useful if the control flow graph
* has been modified.
*
* @param targetCfg The control flow graph object that this object performs
* on. Modification of the graph requires a separate call to
* {@link #analyze()}.
*
* @see #analyze()
*/
DataFlowAnalysis(ControlFlowGraph<N> targetCfg, JoinOp<L> joinOp) {
this.cfg = targetCfg;
this.joinOp = joinOp;
Comparator<DiGraphNode<N, Branch>> nodeComparator = cfg.getOptionalNodeComparator(isForward());
if (nodeComparator != null) {
this.orderedWorkSet = new TreeSet<>(nodeComparator);
} else {
this.orderedWorkSet = new LinkedHashSet<>();
}
}
/**
* Returns the control flow graph that this analysis was performed on.
* Modifications can be done on this graph, however, the only time that the
* annotations are correct is after {@link #analyze()} is called and before
* the graph has been modified.
*/
final ControlFlowGraph<N> getCfg() {
return cfg;
}
protected L join(L latticeA, L latticeB) {
return joinOp.apply(ImmutableList.of(latticeA, latticeB));
}
/**
* Checks whether the analysis is a forward flow analysis or backward flow
* analysis.
*
* @return {@code true} if it is a forward analysis.
*/
abstract boolean isForward();
/**
* Computes the output state for a given node and input state.
*
* @param node The node.
* @param input Input lattice that should be read-only.
* @return Output lattice.
*/
abstract L flowThrough(N node, L input);
/**
* Finds a fixed-point solution using at most {@link #MAX_STEPS}
* iterations.
*
* @see #analyze(int)
*/
final void analyze() {
analyze(MAX_STEPS);
}
/**
* Finds a fixed-point solution. The function has the side effect of replacing
* the existing node annotations with the computed solutions using {@link
* com.google.javascript.jscomp.graph.GraphNode#setAnnotation(Annotation)}.
*
* <p>Initially, each node's input and output flow state contains the value
* given by {@link #createInitialEstimateLattice()} (with the exception of the
* entry node of the graph which takes on the {@link #createEntryLattice()}
* value. Each node will use the output state of its predecessor and compute a
* output state according to the instruction. At that time, any nodes that
* depends on the node's newly modified output value will need to recompute
* their output state again. Each step will perform a computation at one node
* until no extra computation will modify any existing output state anymore.
*
* @param maxSteps Max number of iterations before the method stops and throw
* a {@link MaxIterationsExceededException}. This will prevent the
* analysis from going into a infinite loop.
*/
final void analyze(int maxSteps) {
initialize();
int step = 0;
while (!orderedWorkSet.isEmpty()) {
if (step > maxSteps) {
throw new MaxIterationsExceededException(
"Analysis did not terminate after " + maxSteps + " iterations");
}
DiGraphNode<N, Branch> curNode = orderedWorkSet.iterator().next();
orderedWorkSet.remove(curNode);
joinInputs(curNode);
if (flow(curNode)) {
// If there is a change in the current node, we want to grab the list
// of nodes that this node affects.
List<DiGraphNode<N, Branch>> nextNodes =
isForward() ? cfg.getDirectedSuccNodes(curNode) : cfg.getDirectedPredNodes(curNode);
for (DiGraphNode<N, Branch> nextNode : nextNodes) {
if (nextNode != cfg.getImplicitReturn()) {
orderedWorkSet.add(nextNode);
}
}
}
step++;
}
if (isForward()) {
joinInputs(getCfg().getImplicitReturn());
}
}
/**
* Gets the state of the initial estimation at each node.
*
* @return Initial state.
*/
abstract L createInitialEstimateLattice();
/**
* Gets the incoming state of the entry node.
*
* @return Entry state.
*/
abstract L createEntryLattice();
/**
* Initializes the work list and the control flow graph.
*/
protected void initialize() {
// TODO(user): Calling clear doesn't deallocate the memory in a
// LinkedHashSet. Consider creating a new work set if we plan to repeatedly
// call analyze.
orderedWorkSet.clear();
for (DiGraphNode<N, Branch> node : cfg.getDirectedGraphNodes()) {
node.setAnnotation(new FlowState<>(createInitialEstimateLattice(),
createInitialEstimateLattice()));
if (node != cfg.getImplicitReturn()) {
orderedWorkSet.add(node);
}
}
}
/**
* Performs a single flow through a node.
*
* @return {@code true} if the flow state differs from the previous state.
*/
protected boolean flow(DiGraphNode<N, Branch> node) {
FlowState<L> state = node.getAnnotation();
if (isForward()) {
L outBefore = state.out;
state.out = flowThrough(node.getValue(), state.in);
return !outBefore.equals(state.out);
} else {
L inBefore = state.in;
state.in = flowThrough(node.getValue(), state.out);
return !inBefore.equals(state.in);
}
}
/**
* Computes the new flow state at a given node's entry by merging the
* output (input) lattice of the node's predecessor (successor).
*
* @param node Node to compute new join.
*/
protected void joinInputs(DiGraphNode<N, Branch> node) {
FlowState<L> state = node.getAnnotation();
if (isForward()) {
if (cfg.getEntry() == node) {
state.setIn(createEntryLattice());
} else {
List<DiGraphNode<N, Branch>> inNodes = cfg.getDirectedPredNodes(node);
if (inNodes.size() == 1) {
FlowState<L> inNodeState = inNodes.get(0).getAnnotation();
state.setIn(inNodeState.getOut());
} else if (inNodes.size() > 1) {
List<L> values = new ArrayList<>(inNodes.size());
for (DiGraphNode<N, Branch> currentNode : inNodes) {
FlowState<L> currentNodeState = currentNode.getAnnotation();
values.add(currentNodeState.getOut());
}
state.setIn(joinOp.apply(values));
}
}
} else {
List<DiGraphNode<N, Branch>> inNodes = cfg.getDirectedSuccNodes(node);
if (inNodes.size() == 1) {
DiGraphNode<N, Branch> inNode = inNodes.get(0);
if (inNode == cfg.getImplicitReturn()) {
state.setOut(createEntryLattice());
} else {
FlowState<L> inNodeState = inNode.getAnnotation();
state.setOut(inNodeState.getIn());
}
} else if (inNodes.size() > 1) {
List<L> values = new ArrayList<>(inNodes.size());
for (DiGraphNode<N, Branch> currentNode : inNodes) {
FlowState<L> currentNodeState = currentNode.getAnnotation();
values.add(currentNodeState.getIn());
}
state.setOut(joinOp.apply(values));
}
}
}
/**
* The in and out states of a node.
*
* @param <L> Input and output lattice element type.
*/
static class FlowState<L extends LatticeElement> implements Annotation {
private L in;
private L out;
/**
* Private constructor. No other classes should create new states.
*
* @param inState Input.
* @param outState Output.
*/
private FlowState(L inState, L outState) {
checkNotNull(inState);
checkNotNull(outState);
this.in = inState;
this.out = outState;
}
L getIn() {
return in;
}
void setIn(L in) {
checkNotNull(in);
this.in = in;
}
L getOut() {
return out;
}
void setOut(L out) {
checkNotNull(out);
this.out = out;
}
@Override
public String toString() {
return SimpleFormat.format("IN: %s OUT: %s", in, out);
}
@Override
public boolean equals(Object o) {
if (o instanceof FlowState) {
FlowState<?> that = (FlowState<?>) o;
return that.in.equals(this.in)
&& that.out.equals(this.out);
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(in, out);
}
}
/**
* The exception to be thrown if the analysis has been running for a long
* number of iterations. Chances are the analysis is not monotonic, a
* fixed-point cannot be found and it is currently stuck in an infinite loop.
*/
static class MaxIterationsExceededException extends RuntimeException {
private static final long serialVersionUID = 1L;
MaxIterationsExceededException(String msg) {
super(msg);
}
}
abstract static class BranchedForwardDataFlowAnalysis
<N, L extends LatticeElement> extends DataFlowAnalysis<N, L> {
@Override
protected void initialize() {
orderedWorkSet.clear();
for (DiGraphNode<N, Branch> node : getCfg().getDirectedGraphNodes()) {
int outEdgeCount = getCfg().getOutEdges(node.getValue()).size();
List<L> outLattices = new ArrayList<>();
for (int i = 0; i < outEdgeCount; i++) {
outLattices.add(createInitialEstimateLattice());
}
node.setAnnotation(new BranchedFlowState<>(
createInitialEstimateLattice(), outLattices));
if (node != getCfg().getImplicitReturn()) {
orderedWorkSet.add(node);
}
}
}
BranchedForwardDataFlowAnalysis(ControlFlowGraph<N> targetCfg, JoinOp<L> joinOp) {
super(targetCfg, joinOp);
}
@Override
final boolean isForward() {
return true;
}
/**
* The branched flow function maps a single lattice to a list of output
* lattices.
*
* <p>Each outgoing edge of a node will have a corresponding output lattice
* in the ordered returned by
* {@link com.google.javascript.jscomp.graph.DiGraph#getOutEdges(Object)}
* in the returned list.
*
* @return A list of output values depending on the edge's branch type.
*/
abstract List<L> branchedFlowThrough(N node, L input);
@Override
protected final boolean flow(DiGraphNode<N, Branch> node) {
BranchedFlowState<L> state = node.getAnnotation();
List<L> outBefore = state.out;
state.out = branchedFlowThrough(node.getValue(), state.in);
checkState(outBefore.size() == state.out.size());
for (int i = 0; i < outBefore.size(); i++) {
if (!outBefore.get(i).equals(state.out.get(i))) {
return true;
}
}
return false;
}
@Override
protected void joinInputs(DiGraphNode<N, Branch> node) {
BranchedFlowState<L> state = node.getAnnotation();
List<DiGraphNode<N, Branch>> predNodes =
getCfg().getDirectedPredNodes(node);
List<L> values = new ArrayList<>(predNodes.size());
for (DiGraphNode<N, Branch> predNode : predNodes) {
BranchedFlowState<L> predNodeState = predNode.getAnnotation();
L in = predNodeState.out.get(
getCfg().getDirectedSuccNodes(predNode).indexOf(node));
values.add(in);
}
if (getCfg().getEntry() == node) {
state.setIn(createEntryLattice());
} else if (!values.isEmpty()) {
state.setIn(joinOp.apply(values));
}
}
}
/**
* The in and out states of a node.
*
* @param <L> Input and output lattice element type.
*/
static class BranchedFlowState<L extends LatticeElement>
implements Annotation {
private L in;
private List<L> out;
/**
* Private constructor. No other classes should create new states.
*
* @param inState Input.
* @param outState Output.
*/
private BranchedFlowState(L inState, List<L> outState) {
checkNotNull(inState);
checkNotNull(outState);
this.in = inState;
this.out = outState;
}
L getIn() {
return in;
}
void setIn(L in) {
checkNotNull(in);
this.in = in;
}
@Override
public String toString() {
return SimpleFormat.format("IN: %s OUT: %s", in, out);
}
@Override
public boolean equals(Object o) {
if (o instanceof BranchedFlowState) {
BranchedFlowState<?> that = (BranchedFlowState<?>) o;
return that.in.equals(this.in)
&& that.out.equals(this.out);
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(in, out);
}
}
/**
* Compute set of escaped variables. When a variable is escaped in a dataflow analysis, it can be
* referenced outside of the code that we are analyzing. A variable is escaped if any of the
* following is true:
*
* 1. Exported variables as they can be needed after the script terminates.
* 2. Names of named functions because in JavaScript, function foo(){} does not kill
* foo in the dataflow.
*
* @param jsScope Must be a function scope
*/
static void computeEscaped(
final Scope jsScope,
final Set<Var> escaped,
AbstractCompiler compiler,
Es6SyntacticScopeCreator scopeCreator) {
checkArgument(jsScope.isFunctionScope());
AbstractPostOrderCallback finder =
new AbstractPostOrderCallback() {
@Override
public void visit(NodeTraversal t, Node n, Node parent) {
Node enclosingBlock = NodeUtil.getEnclosingFunction(n);
if (jsScope.getRootNode() == enclosingBlock || !n.isName() || parent.isFunction()) {
return;
}
String name = n.getString();
Var var = t.getScope().getVar(name);
if (var != null) {
Node enclosingScopeNode = NodeUtil.getEnclosingFunction(var.getNode());
if (enclosingScopeNode == jsScope.getRootNode()) {
escaped.add(var);
}
}
}
};
Map<String, Var> allVarsInFn = new HashMap<>();
List<Var> orderedVars = new ArrayList<>();
NodeUtil.getAllVarsDeclaredInFunction(
allVarsInFn, orderedVars, compiler, scopeCreator, jsScope);
NodeTraversal t = new NodeTraversal(compiler, finder, scopeCreator);
t.traverseAtScope(jsScope);
// TODO (simranarora) catch variables should not be considered escaped in ES6. Getting rid of
// the catch check is causing breakages however
for (Var var : allVarsInFn.values()) {
if (var.getParentNode().isCatch()
|| compiler.getCodingConvention().isExported(var.getName())) {
escaped.add(var);
}
}
}
}