blob: c8d5a05525084cc29ef6771606e33a544a80ab3a [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.devtools.build.lib.graph;
16
laurentlb3d2a68c2017-06-30 00:32:04 +020017import static java.util.Comparator.comparing;
18import static java.util.Comparator.comparingLong;
19
dbabkin9f58c502018-04-10 07:23:11 -070020import com.google.common.base.Preconditions;
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +020021import com.google.common.collect.ImmutableList;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010022import com.google.common.collect.ImmutableSet;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010023import java.util.ArrayList;
24import java.util.Collection;
25import java.util.Collections;
26import java.util.Comparator;
27import java.util.HashMap;
28import java.util.HashSet;
29import java.util.LinkedList;
30import java.util.List;
31import java.util.Map;
32import java.util.PriorityQueue;
33import java.util.Set;
dbabkin9f58c502018-04-10 07:23:11 -070034import java.util.concurrent.ConcurrentHashMap;
Janak Ramakrishnan706bc722015-08-25 22:02:38 +000035import javax.annotation.Nullable;
36
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010037/**
dbabkin9f58c502018-04-10 07:23:11 -070038 * {@code Digraph} a generic directed graph or "digraph", suitable for modeling asymmetric binary
39 * relations.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010040 *
dbabkin9f58c502018-04-10 07:23:11 -070041 * <p>An instance <code>G = &lt;V,E&gt;</code> consists of a set of nodes or vertices <code>V</code>
42 * , and a set of directed edges <code>E</code>, which is a subset of <code>V &times; V</code>. This
43 * permits self-edges but does not represent multiple edges between the same pair of nodes.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010044 *
dbabkin9f58c502018-04-10 07:23:11 -070045 * <p>Nodes may be labeled with values of any type (type parameter T). All nodes within a graph have
46 * distinct labels. The null pointer is not a valid label.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010047 *
dbabkin9f58c502018-04-10 07:23:11 -070048 * <p>The package supports various operations for modeling partial order relations, and supports
49 * input/output in AT&amp;T's 'dot' format. See http://www.research.att.com/sw/tools/graphviz/.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010050 *
dbabkin9f58c502018-04-10 07:23:11 -070051 * <p>Some invariants:
52 *
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010053 * <ul>
dbabkin9f58c502018-04-10 07:23:11 -070054 * <li>Each graph instances "owns" the nodes is creates. The behaviour of operations on nodes a
55 * graph does not own is undefined.
56 * <li>{@code Digraph} assumes immutability of node labels, much like {@link HashMap} assumes it
57 * for keys.
58 * <li>Mutating the underlying graph invalidates any sets and iterators backed by it.
59 * <li>Nodes can be added and removed concurrently. Edges can be added and removed concurrently
60 * too. While it is thread safe to add or remove edge, these operations are not atomic. Graph
61 * can be observable in inconsistent state during this operations, for instance: edge linked
62 * to only one node.
63 * <li>
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010064 * </ul>
65 *
dbabkin9f58c502018-04-10 07:23:11 -070066 * <p>Each node stores successor and predecessor adjacency sets using a representation that
67 * dynamically changes with size: small sets are stored as arrays, large sets using hash tables.
68 * This representation provides significant space and time performance improvements upon two prior
69 * versions: the earliest used only HashSets; a later version used linked lists, as described in
70 * Cormen, Leiserson &amp; Rivest.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010071 */
72public final class Digraph<T> implements Cloneable {
73
dbabkin9f58c502018-04-10 07:23:11 -070074 /** Maps labels to nodes, which are in strict 1:1 correspondence. */
75 private final Map<T, Node<T>> nodes = new ConcurrentHashMap<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010076
77 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010078 * Construct an empty Digraph.
79 */
80 public Digraph() {}
81
82 /**
83 * Sanity-check: assert that a node is indeed a member of this graph and not
84 * another one. Perform this check whenever a function is supplied a node by
85 * the user.
86 */
87 private final void checkNode(Node<T> node) {
88 if (getNode(node.getLabel()) != node) {
89 throw new IllegalArgumentException("node " + node
90 + " is not a member of this graph");
91 }
92 }
93
94 /**
95 * Adds a directed edge between the nodes labelled 'from' and 'to', creating
96 * them if necessary.
97 *
98 * @return true iff the edge was not already present.
99 */
100 public boolean addEdge(T from, T to) {
101 Node<T> fromNode = createNode(from);
102 Node<T> toNode = createNode(to);
103 return addEdge(fromNode, toNode);
104 }
105
106 /**
107 * Adds a directed edge between the specified nodes, which must exist and
108 * belong to this graph.
109 *
110 * @return true iff the edge was not already present.
111 *
112 * Note: multi-edges are ignored. Self-edges are permitted.
113 */
114 public boolean addEdge(Node<T> fromNode, Node<T> toNode) {
115 checkNode(fromNode);
116 checkNode(toNode);
dbabkin9f58c502018-04-10 07:23:11 -0700117 return fromNode.addEdge(toNode);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100118 }
119
120 /**
121 * Returns true iff the graph contains an edge between the
122 * specified nodes, which must exist and belong to this graph.
123 */
124 public boolean containsEdge(Node<T> fromNode, Node<T> toNode) {
125 checkNode(fromNode);
126 checkNode(toNode);
127 // TODO(bazel-team): (2009) iterate only over the shorter of from.succs, to.preds.
128 return fromNode.getSuccessors().contains(toNode);
129 }
130
131 /**
132 * Removes the edge between the specified nodes. Idempotent: attempts to
133 * remove non-existent edges have no effect.
134 *
135 * @return true iff graph changed.
136 */
137 public boolean removeEdge(Node<T> fromNode, Node<T> toNode) {
138 checkNode(fromNode);
139 checkNode(toNode);
dbabkin9f58c502018-04-10 07:23:11 -0700140 return fromNode.removeEdge(toNode);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100141 }
142
143 /**
144 * Remove all nodes and edges.
145 */
146 public void clear() {
147 nodes.clear();
148 }
149
150 @Override
151 public String toString() {
152 return "Digraph[" + getNodeCount() + " nodes]";
153 }
154
155 @Override
156 public int hashCode() {
157 throw new UnsupportedOperationException(); // avoid nondeterminism
158 }
159
160 /**
161 * Returns true iff the two graphs are equivalent, i.e. have the same set
162 * of node labels, with the same connectivity relation.
163 *
164 * O(n^2) in the worst case, i.e. equivalence. The algorithm could be speed up by
165 * close to a factor 2 in the worst case by a more direct implementation instead
166 * of using isSubgraph twice.
167 */
168 @Override
169 public boolean equals(Object thatObject) {
170 /* If this graph is a subgraph of thatObject, then we know that thatObject is of
171 * type Digraph<?> and thatObject can be cast to this type.
172 */
173 return isSubgraph(thatObject) && ((Digraph<?>) thatObject).isSubgraph(this);
174 }
175
176 /**
177 * Returns true iff this graph is a subgraph of the argument. This means that this graph's nodes
178 * are a subset of those of the argument; moreover, for each node of this graph the set of
179 * successors is a subset of those of the corresponding node in the argument graph.
180 *
181 * This algorithm is O(n^2), but linear in the total sizes of the graphs.
182 */
183 public boolean isSubgraph(Object thatObject) {
184 if (this == thatObject) {
185 return true;
186 }
187 if (!(thatObject instanceof Digraph)) {
188 return false;
189 }
190
191 @SuppressWarnings("unchecked")
192 Digraph<T> that = (Digraph<T>) thatObject;
193 if (this.getNodeCount() > that.getNodeCount()) {
194 return false;
195 }
196 for (Node<T> n1: nodes.values()) {
197 Node<T> n2 = that.getNodeMaybe(n1.getLabel());
198 if (n2 == null) {
199 return false; // 'that' is missing a node
200 }
201
202 // Now compare the successor relations.
203 // Careful:
204 // - We can't do simple equality on the succs-sets because the
205 // nodes belong to two different graphs!
206 // - There's no need to check both predecessor and successor
207 // relations, either one is sufficient.
208 Collection<Node<T>> n1succs = n1.getSuccessors();
209 Collection<Node<T>> n2succs = n2.getSuccessors();
210 if (n1succs.size() > n2succs.size()) {
211 return false;
212 }
213 // foreach successor of n1, ensure n2 has a similarly-labeled succ.
214 for (Node<T> succ1: n1succs) {
215 Node<T> succ2 = that.getNodeMaybe(succ1.getLabel());
216 if (succ2 == null) {
217 return false;
218 }
219 if (!n2succs.contains(succ2)) {
220 return false;
221 }
222 }
223 }
224 return true;
225 }
226
227 /**
228 * Returns a duplicate graph with the same set of node labels and the same
229 * connectivity relation. The labels themselves are not cloned.
230 */
231 @Override
232 public Digraph<T> clone() {
233 final Digraph<T> that = new Digraph<T>();
Janak Ramakrishnan706bc722015-08-25 22:02:38 +0000234 visitNodesBeforeEdges(
235 new AbstractGraphVisitor<T>() {
236 @Override
237 public void visitEdge(Node<T> lhs, Node<T> rhs) {
238 that.addEdge(lhs.getLabel(), rhs.getLabel());
239 }
240
241 @Override
242 public void visitNode(Node<T> node) {
243 that.createNode(node.getLabel());
244 }
245 },
246 nodes.values(),
247 null);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100248 return that;
249 }
250
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200251 /** Returns a deterministic immutable copy of the nodes of this graph. */
Janak Ramakrishnan706bc722015-08-25 22:02:38 +0000252 public Collection<Node<T>> getNodes(final Comparator<? super T> comparator) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200253 return ImmutableList.sortedCopyOf(comparing(Node::getLabel, comparator), nodes.values());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100254 }
255
256 /**
257 * Returns an immutable view of the nodes of this graph.
258 *
259 * Note: we have to return Collection and not Set because values() returns
260 * one: the 'nodes' HashMap doesn't know that it is injective. :-(
261 */
262 public Collection<Node<T>> getNodes() {
263 return Collections.unmodifiableCollection(nodes.values());
264 }
265
266 /**
267 * @return the set of root nodes: those with no predecessors.
268 *
269 * NOTE: in a cyclic graph, there may be nodes that are not reachable from
270 * any "root".
271 */
272 public Set<Node<T>> getRoots() {
Ulf Adams07dba942015-03-05 14:47:37 +0000273 Set<Node<T>> roots = new HashSet<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100274 for (Node<T> node: nodes.values()) {
275 if (!node.hasPredecessors()) {
276 roots.add(node);
277 }
278 }
279 return roots;
280 }
281
282 /**
283 * @return the set of leaf nodes: those with no successors.
284 */
285 public Set<Node<T>> getLeaves() {
Ulf Adams07dba942015-03-05 14:47:37 +0000286 Set<Node<T>> leaves = new HashSet<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100287 for (Node<T> node: nodes.values()) {
288 if (!node.hasSuccessors()) {
289 leaves.add(node);
290 }
291 }
292 return leaves;
293 }
294
295 /**
296 * @return an immutable view of the set of labels of this graph's nodes.
297 */
298 public Set<T> getLabels() {
299 return Collections.unmodifiableSet(nodes.keySet());
300 }
301
302 /**
303 * Finds and returns the node with the specified label. If there is no such
304 * node, an exception is thrown. The null pointer is not a valid label.
305 *
306 * @return the node whose label is "label".
307 * @throws IllegalArgumentException if no node was found with the specified
308 * label.
309 */
310 public Node<T> getNode(T label) {
311 if (label == null) {
312 throw new NullPointerException();
313 }
314 Node<T> node = nodes.get(label);
315 if (node == null) {
316 throw new IllegalArgumentException("No such node label: " + label);
317 }
318 return node;
319 }
320
321 /**
322 * Find the node with the specified label. Returns null if it doesn't exist.
323 * The null pointer is not a valid label.
324 *
325 * @return the node whose label is "label", or null if it was not found.
326 */
327 public Node<T> getNodeMaybe(T label) {
328 if (label == null) {
329 throw new NullPointerException();
330 }
331 return nodes.get(label);
332 }
333
334 /**
335 * @return the number of nodes in the graph.
336 */
337 public int getNodeCount() {
338 return nodes.size();
339 }
340
341 /**
342 * @return the number of edges in the graph.
343 *
344 * Note: expensive! Useful when asserting against mutations though.
345 */
346 public int getEdgeCount() {
347 int edges = 0;
348 for (Node<T> node: nodes.values()) {
349 edges += node.getSuccessors().size();
350 }
351 return edges;
352 }
353
354 /**
dbabkin9f58c502018-04-10 07:23:11 -0700355 * Find or create a node with the specified label. This is the <i>only</i> factory of Nodes. The
356 * null pointer is not a valid label.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100357 */
358 public Node<T> createNode(T label) {
dbabkinc90d4b52018-04-20 00:14:15 -0700359 return nodes.computeIfAbsent(label, Digraph::createNodeNative);
dbabkin9f58c502018-04-10 07:23:11 -0700360 }
361
dbabkinc90d4b52018-04-20 00:14:15 -0700362 private static <T> Node<T> createNodeNative(T label) {
dbabkin9f58c502018-04-10 07:23:11 -0700363 Preconditions.checkNotNull(label);
364 return new Node<>(label);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100365 }
366
367 /******************************************************************
368 * *
369 * Graph Algorithms *
370 * *
371 ******************************************************************/
372
373 /**
374 * These only manipulate the graph through methods defined above.
375 */
376
377 /**
378 * Returns true iff the graph is cyclic. Time: O(n).
379 */
380 public boolean isCyclic() {
381
382 // To detect cycles, we use a colored depth-first search. All nodes are
383 // initially marked white. When a node is encountered, it is marked grey,
384 // and when its descendants are completely visited, it is marked black.
385 // If a grey node is ever encountered, then there is a cycle.
386 final Object WHITE = null; // i.e. not present in nodeToColor, the default.
387 final Object GREY = new Object();
388 final Object BLACK = new Object();
Ulf Adams07dba942015-03-05 14:47:37 +0000389 final Map<Node<T>, Object> nodeToColor = new HashMap<>(); // empty => all white
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100390
391 class CycleDetector { /* defining a class gives us lexical scope */
392 boolean visit(Node<T> node) {
393 nodeToColor.put(node, GREY);
394 for (Node<T> succ: node.getSuccessors()) {
395 if (nodeToColor.get(succ) == GREY) {
396 return true;
397 } else if (nodeToColor.get(succ) == WHITE) {
398 if (visit(succ)) {
399 return true;
400 }
401 }
402 }
403 nodeToColor.put(node, BLACK);
404 return false;
405 }
406 }
407
408 CycleDetector detector = new CycleDetector();
409 for (Node<T> node: nodes.values()) {
410 if (nodeToColor.get(node) == WHITE) {
411 if (detector.visit(node)) {
412 return true;
413 }
414 }
415 }
416 return false;
417 }
418
419 /**
420 * Returns the strong component graph of "this". That is, returns a new
421 * acyclic graph in which all strongly-connected components in the original
422 * graph have been "fused" into a single node.
423 *
424 * @return a new graph, whose node labels are sets of nodes of the
425 * original graph. (Do not get confused as to which graph each
426 * set of Nodes belongs!)
427 */
428 public Digraph<Set<Node<T>>> getStrongComponentGraph() {
429 Collection<Set<Node<T>>> sccs = getStronglyConnectedComponents();
430 Digraph<Set<Node<T>>> scGraph = createImageUnderPartition(sccs);
431 scGraph.removeSelfEdges(); // scGraph should be acyclic: no self-edges
432 return scGraph;
433 }
434
435 /**
436 * Returns a partition of the nodes of this graph into sets, each set being
437 * one strongly-connected component of the graph.
438 */
439 public Collection<Set<Node<T>>> getStronglyConnectedComponents() {
Ulf Adams07dba942015-03-05 14:47:37 +0000440 final List<Set<Node<T>>> sccs = new ArrayList<>();
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200441 NodeSetReceiver<T> r = sccs::add;
442 SccVisitor<T> v = new SccVisitor<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100443 for (Node<T> node : nodes.values()) {
444 v.visit(r, node);
445 }
446 return sccs;
447 }
448
449 /**
450 * <p> Given a partition of the graph into sets of nodes, returns the image
451 * of this graph under the function which maps each node to the
452 * partition-set in which it appears. The labels of the new graph are the
453 * (immutable) sets of the partition, and the edges of the new graph are the
454 * edges of the original graph, mapped via the same function. </p>
455 *
456 * <p> Note: the resulting graph may contain self-edges. If these are not
457 * wanted, call <code>removeSelfEdges()</code>> on the result. </p>
458 *
459 * <p> Interesting special case: if the partition is the set of
460 * strongly-connected components, the result of this function is the
461 * strong-component graph. </p>
462 */
463 public Digraph<Set<Node<T>>>
464 createImageUnderPartition(Collection<Set<Node<T>>> partition) {
465
466 // Build mapping function: each node label is mapped to its equiv class:
Ulf Adams07dba942015-03-05 14:47:37 +0000467 Map<T, Set<Node<T>>> labelToImage = new HashMap<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100468 for (Set<Node<T>> set: partition) {
469 // It's important to use immutable sets of node labels when sets are keys
470 // in a map; see ImmutableSet class for explanation.
471 Set<Node<T>> imageSet = ImmutableSet.copyOf(set);
472 for (Node<T> node: imageSet) {
473 labelToImage.put(node.getLabel(), imageSet);
474 }
475 }
476
477 if (labelToImage.size() != getNodeCount()) {
478 throw new IllegalArgumentException(
479 "createImageUnderPartition(): argument is not a partition");
480 }
481
482 return createImageUnderMapping(labelToImage);
483 }
484
485 /**
486 * Returns the image of this graph in a given function, expressed as a
487 * mapping from labels to some other domain.
488 */
489 public <IMAGE> Digraph<IMAGE>
490 createImageUnderMapping(Map<T, IMAGE> map) {
Ulf Adams07dba942015-03-05 14:47:37 +0000491 Digraph<IMAGE> imageGraph = new Digraph<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100492
493 for (Node<T> fromNode: nodes.values()) {
494 T fromLabel = fromNode.getLabel();
495
496 IMAGE fromImage = map.get(fromLabel);
497 if (fromImage == null) {
498 throw new IllegalArgumentException(
499 "Incomplete function: undefined for " + fromLabel);
500 }
501 imageGraph.createNode(fromImage);
502
503 for (Node<T> toNode: fromNode.getSuccessors()) {
504 T toLabel = toNode.getLabel();
505
506 IMAGE toImage = map.get(toLabel);
507 if (toImage == null) {
508 throw new IllegalArgumentException(
509 "Incomplete function: undefined for " + toLabel);
510 }
511 imageGraph.addEdge(fromImage, toImage);
512 }
513 }
514
515 return imageGraph;
516 }
517
518 /**
519 * Removes any self-edges (x,x) in this graph.
520 */
521 public void removeSelfEdges() {
522 for (Node<T> node: nodes.values()) {
523 removeEdge(node, node);
524 }
525 }
526
527 /**
528 * Finds the shortest directed path from "fromNode" to "toNode". The path is
529 * returned as an ordered list of nodes, including both endpoints. Returns
530 * null if there is no path. Uses breadth-first search. Running time is
531 * O(n).
532 */
533 public List<Node<T>> getShortestPath(Node<T> fromNode,
534 Node<T> toNode) {
535 checkNode(fromNode);
536 checkNode(toNode);
537
538 if (fromNode == toNode) {
539 return Collections.singletonList(fromNode);
540 }
541
Ulf Adams07dba942015-03-05 14:47:37 +0000542 Map<Node<T>, Node<T>> pathPredecessor = new HashMap<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100543
Ulf Adams07dba942015-03-05 14:47:37 +0000544 Set<Node<T>> marked = new HashSet<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100545
Ulf Adams07dba942015-03-05 14:47:37 +0000546 LinkedList<Node<T>> queue = new LinkedList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100547 queue.addLast(fromNode);
548 marked.add(fromNode);
549
Ulf Adams07dba942015-03-05 14:47:37 +0000550 while (!queue.isEmpty()) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100551 Node<T> u = queue.removeFirst();
552 for (Node<T> v: u.getSuccessors()) {
553 if (marked.add(v)) {
554 pathPredecessor.put(v, u);
555 if (v == toNode) {
556 return getPathToTreeNode(pathPredecessor, v); // found a path
557 }
558 queue.addLast(v);
559 }
560 }
561 }
562 return null; // no path
563 }
564
565 /**
566 * Given a tree (expressed as a map from each node to its parent), and a
567 * starting node, returns the path from the root of the tree to 'node' as a
568 * list.
569 */
Janak Ramakrishnane72d5222015-02-26 17:09:18 +0000570 public static <X> List<X> getPathToTreeNode(Map<X, X> tree, X node) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200571 List<X> path = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100572 while (node != null) {
573 path.add(node);
574 node = tree.get(node); // get parent
575 }
576 Collections.reverse(path);
577 return path;
578 }
579
580 /**
581 * Returns the nodes of an acyclic graph in topological order
582 * [a.k.a "reverse post-order" of depth-first search.]
583 *
584 * A topological order is one such that, if (u, v) is a path in
585 * acyclic graph G, then u is before v in the topological order.
586 * In other words "tails before heads" or "roots before leaves".
587 *
588 * @return The nodes of the graph, in a topological order
589 */
590 public List<Node<T>> getTopologicalOrder() {
591 List<Node<T>> order = getPostorder();
592 Collections.reverse(order);
593 return order;
594 }
595
596 /**
597 * Returns the nodes of an acyclic graph in topological order
598 * [a.k.a "reverse post-order" of depth-first search.]
599 *
600 * A topological order is one such that, if (u, v) is a path in
601 * acyclic graph G, then u is before v in the topological order.
602 * In other words "tails before heads" or "roots before leaves".
603 *
604 * If an ordering is given, returns a specific topological order from the set
605 * of all topological orders; if no ordering given, returns an arbitrary
606 * (nondeterministic) one, but is a bit faster because no sorting needs to be
607 * done for each node.
608 *
609 * @param edgeOrder the ordering in which edges originating from the same node
610 * are visited.
611 * @return The nodes of the graph, in a topological order
612 */
Janak Ramakrishnan706bc722015-08-25 22:02:38 +0000613 public List<Node<T>> getTopologicalOrder(Comparator<? super T> edgeOrder) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200614 CollectingVisitor<T> visitor = new CollectingVisitor<>();
615 DFS<T> visitation = new DFS<>(DFS.Order.POSTORDER, edgeOrder, false);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100616 visitor.beginVisit();
617 for (Node<T> node : getNodes(edgeOrder)) {
618 visitation.visit(node, visitor);
619 }
620 visitor.endVisit();
621
622 List<Node<T>> order = visitor.getVisitedNodes();
623 Collections.reverse(order);
624 return order;
625 }
626
627 /**
628 * Returns the nodes of an acyclic graph in post-order.
629 */
630 public List<Node<T>> getPostorder() {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200631 CollectingVisitor<T> collectingVisitor = new CollectingVisitor<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100632 visitPostorder(collectingVisitor);
633 return collectingVisitor.getVisitedNodes();
634 }
635
636 /**
637 * Returns the (immutable) set of nodes reachable from node 'n' (reflexive
638 * transitive closure).
639 */
640 public Set<Node<T>> getFwdReachable(Node<T> n) {
641 return getFwdReachable(Collections.singleton(n));
642 }
643
644 /**
645 * Returns the (immutable) set of nodes reachable from any node in {@code
646 * startNodes} (reflexive transitive closure).
647 */
648 public Set<Node<T>> getFwdReachable(Collection<Node<T>> startNodes) {
649 // This method is intentionally not static, to permit future expansion.
650 DFS<T> dfs = new DFS<T>(DFS.Order.PREORDER, false);
651 for (Node<T> n : startNodes) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200652 dfs.visit(n, new AbstractGraphVisitor<>());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100653 }
654 return dfs.getMarked();
655 }
656
657 /**
658 * Returns the (immutable) set of nodes that reach node 'n' (reflexive
659 * transitive closure).
660 */
661 public Set<Node<T>> getBackReachable(Node<T> n) {
662 return getBackReachable(Collections.singleton(n));
663 }
664
665 /**
666 * Returns the (immutable) set of nodes that reach some node in {@code
667 * startNodes} (reflexive transitive closure).
668 */
669 public Set<Node<T>> getBackReachable(Collection<Node<T>> startNodes) {
670 // This method is intentionally not static, to permit future expansion.
671 DFS<T> dfs = new DFS<T>(DFS.Order.PREORDER, true);
672 for (Node<T> n : startNodes) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200673 dfs.visit(n, new AbstractGraphVisitor<>());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100674 }
675 return dfs.getMarked();
676 }
677
678 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100679 * Removes the specified node in the graph.
680 *
dbabkin9f58c502018-04-10 07:23:11 -0700681 * <p>If preserveOrder flag is set than after removing node this method connects all predecessors
682 * and successors.
683 *
684 * <p>Let's consider graph
685 *
686 * <pre>
687 * a -> n -> c
688 * b -> n -> d
689 * </pre>
690 *
691 * After n removed the following edges will be added
692 *
693 * <pre>
694 * a -> c
695 * a -> d
696 * b -> c
697 * b -> d
698 * </pre>
699 *
700 * @param node the node to remove (must be in the graph).
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100701 * @param preserveOrder see removeNode(T, boolean).
702 */
dbabkin9f58c502018-04-10 07:23:11 -0700703 public Collection<Node<T>> removeNode(Node<T> node, boolean preserveOrder) {
704 checkNode(node);
705
706 Collection<Node<T>> predecessors = node.removeAllPredecessors();
707 Collection<Node<T>> successors = node.removeAllSuccessors();
708
709 List<Node<T>> neighbours = Collections.emptyList();
710
711 if (preserveOrder) {
712 neighbours = new ArrayList<>(successors.size() + predecessors.size());
713 neighbours.addAll(successors);
714 neighbours.addAll(predecessors);
715
716 for (Node<T> p : predecessors) {
717 for (Node<T> s : successors) {
718 p.addEdge(s);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100719 }
720 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100721 }
722
dbabkin9f58c502018-04-10 07:23:11 -0700723 Object del = nodes.remove(node.getLabel());
724 if (del != node) {
725 throw new IllegalStateException(del + " " + node);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100726 }
dbabkin9f58c502018-04-10 07:23:11 -0700727
728 return neighbours;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100729 }
730
731 /**
732 * Extracts the subgraph G' of this graph G, containing exactly the nodes
733 * specified by the labels in V', and preserving the original
734 * <i>transitive</i> graph relation among those nodes. </p>
735 *
736 * @param subset a subset of the labels of this graph; the resulting graph
737 * will have only the nodes with these labels.
738 */
739 public Digraph<T> extractSubgraph(final Set<T> subset) {
740 Digraph<T> subgraph = this.clone();
741 subgraph.subgraph(subset);
742 return subgraph;
743 }
744
745 /**
746 * Removes all nodes from this graph except those whose label is an element of {@code keepLabels}.
747 * Edges are added so as to preserve the <i>transitive</i> closure relation.
748 *
dbabkin9f58c502018-04-10 07:23:11 -0700749 * @param keepLabels a subset of the labels of this graph; the resulting graph will have only the
750 * nodes with these labels.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100751 */
dbabkin9f58c502018-04-10 07:23:11 -0700752 private void subgraph(final Set<T> keepLabels) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100753 // This algorithm does the following:
754 // Let keep = nodes that have labels in keepLabels.
755 // Let toRemove = nodes \ keep. reachables = successors and predecessors of keep in nodes.
756 // reachables is the subset of nodes of remove that are an immediate neighbor of some node in
757 // keep.
758 //
759 // Removes all nodes of reachables from keepLabels.
760 // Until reachables is empty:
761 // Takes n from reachables
762 // for all s in succ(n)
763 // for all p in pred(n)
764 // add the edge (p, s)
765 // add s to reachables
766 // for all p in pred(n)
767 // add p to reachables
768 // Remove n and its edges
769 //
770 // A few adjustments are needed to do the whole computation.
771
772 final Set<Node<T>> toRemove = new HashSet<>();
773 final Set<Node<T>> keepNeighbors = new HashSet<>();
774
775 // Look for all nodes if they are to be kept or removed
776 for (Node<T> node : nodes.values()) {
777 if (keepLabels.contains(node.getLabel())) {
778 // Node is to be kept
779 keepNeighbors.addAll(node.getPredecessors());
780 keepNeighbors.addAll(node.getSuccessors());
781 } else {
782 // node is to be removed.
783 toRemove.add(node);
784 }
785 }
786
787 if (toRemove.isEmpty()) {
788 // This premature return is needed to avoid 0-size priority queue creation.
789 return;
790 }
791
792 // We use a priority queue to look for low-order nodes first so we don't propagate the high
793 // number of paths of high-order nodes making the time consumption explode.
794 // For perfect results we should reorder the set each time we add a new edge but this would
795 // be too expensive, so this is a good enough approximation.
laurentlb3d2a68c2017-06-30 00:32:04 +0200796 final PriorityQueue<Node<T>> reachables =
797 new PriorityQueue<>(
798 toRemove.size(),
799 comparingLong(arg -> (long) arg.numPredecessors() * (long) arg.numSuccessors()));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100800
801 // Construct the reachables queue with the list of successors and predecessors of keep in
802 // toRemove.
803 keepNeighbors.retainAll(toRemove);
804 reachables.addAll(keepNeighbors);
805 toRemove.removeAll(reachables);
806
807 // Remove nodes, least connected first, preserving reachability.
808 while (!reachables.isEmpty()) {
dbabkin9f58c502018-04-10 07:23:11 -0700809
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100810 Node<T> node = reachables.poll();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100811
dbabkin9f58c502018-04-10 07:23:11 -0700812 Collection<Node<T>> neighbours = removeNode(node, /*preserveOrder*/ true);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100813
dbabkin9f58c502018-04-10 07:23:11 -0700814 for (Node<T> neighbour : neighbours) {
815 if (toRemove.remove(neighbour)) {
816 reachables.add(neighbour);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100817 }
818 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100819 }
820
821 // Final cleanup for non-reachable nodes.
822 for (Node<T> node : toRemove) {
823 removeNode(node, false);
824 }
825 }
826
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200827 @FunctionalInterface
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100828 private interface NodeSetReceiver<T> {
829 void accept(Set<Node<T>> nodes);
830 }
831
832 /**
833 * Find strongly connected components using path-based strong component
834 * algorithm. This has the advantage over the default method of returning
835 * the components in postorder.
836 *
837 * We visit nodes depth-first, keeping track of the order that
838 * we visit them in (preorder). Our goal is to find the smallest node (in
839 * this preorder of visitation) reachable from a given node. We keep track of the
840 * smallest node pointed to so far at the top of a stack. If we ever find an
841 * already-visited node, then if it is not already part of a component, we
842 * pop nodes from that stack until we reach this already-visited node's number
843 * or an even smaller one.
844 *
845 * Once the depth-first visitation of a node is complete, if this node's
846 * number is at the top of the stack, then it is the "first" element visited
847 * in its strongly connected component. Hence we pop all elements that were
848 * pushed onto the visitation stack and put them in a strongly connected
849 * component with this one, then send a passed-in {@link Digraph.NodeSetReceiver} this component.
850 */
851 private class SccVisitor<T> {
852 // Nodes already assigned to a strongly connected component.
Ulf Adams07dba942015-03-05 14:47:37 +0000853 private final Set<Node<T>> assigned = new HashSet<>();
854
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100855 // The order each node was visited in.
Ulf Adams07dba942015-03-05 14:47:37 +0000856 private final Map<Node<T>, Integer> preorder = new HashMap<>();
857
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100858 // Stack of all nodes visited whose SCC has not yet been determined. When an SCC is found,
859 // that SCC is an initial segment of this stack, and is popped off. Every time a new node is
860 // visited, it is put on this stack.
Ulf Adams07dba942015-03-05 14:47:37 +0000861 private final List<Node<T>> stack = new ArrayList<>();
862
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100863 // Stack of visited indices for the first-visited nodes in each of their known-so-far
864 // strongly connected components. A node pushes its index on when it is visited. If any of
865 // its successors have already been visited and are not in an already-found strongly connected
866 // component, then, since the successor was already visited, it and this node must be part of a
867 // cycle. So every node visited since the successor is actually in the same strongly connected
868 // component. In this case, preorderStack is popped until the top is at most the successor's
869 // index.
870 //
871 // After all descendants of a node have been visited, if the top element of preorderStack is
872 // still the current node's index, then it was the first element visited of the current strongly
873 // connected component. So all nodes on {@code stack} down to the current node are in its
874 // strongly connected component. And the node's index is popped from preorderStack.
Ulf Adams07dba942015-03-05 14:47:37 +0000875 private final List<Integer> preorderStack = new ArrayList<>();
876
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100877 // Index of node being visited.
878 private int counter = 0;
879
880 private void visit(NodeSetReceiver<T> visitor, Node<T> node) {
881 if (preorder.containsKey(node)) {
882 // This can only happen if this was a non-recursive call, and a previous
883 // visit call had already visited node.
884 return;
885 }
886 preorder.put(node, counter);
887 stack.add(node);
888 preorderStack.add(counter++);
889 int preorderLength = preorderStack.size();
890 for (Node<T> succ : node.getSuccessors()) {
891 Integer succPreorder = preorder.get(succ);
892 if (succPreorder == null) {
893 visit(visitor, succ);
894 } else {
895 // Does succ not already belong to an SCC? If it doesn't, then it
896 // must be in the same SCC as node. The "starting node" of this SCC
897 // must have been visited before succ (or is succ itself).
898 if (!assigned.contains(succ)) {
899 while (preorderStack.get(preorderStack.size() - 1) > succPreorder) {
900 preorderStack.remove(preorderStack.size() - 1);
901 }
902 }
903 }
904 }
905 if (preorderLength == preorderStack.size()) {
906 // If the length of the preorderStack is unchanged, we did not find any earlier-visited
907 // nodes that were part of a cycle with this node. So this node is the first-visited
908 // element in its strongly connected component, and we collect the component.
909 preorderStack.remove(preorderStack.size() - 1);
Ulf Adams07dba942015-03-05 14:47:37 +0000910 Set<Node<T>> scc = new HashSet<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100911 Node<T> compNode;
912 do {
913 compNode = stack.remove(stack.size() - 1);
914 assigned.add(compNode);
915 scc.add(compNode);
916 } while (!node.equals(compNode));
917 visitor.accept(scc);
918 }
919 }
920 }
921
922 /********************************************************************
923 * *
924 * Orders, traversals and visitors *
925 * *
926 ********************************************************************/
927
928 /**
929 * A visitation over all the nodes in the graph that invokes
930 * <code>visitor.visitNode()</code> for each node in a depth-first
931 * post-order: each node is visited <i>after</i> each of its successors; the
932 * order in which edges are traversed is the order in which they were added
933 * to the graph. <code>visitor.visitEdge()</code> is not called.
934 *
935 * @param startNodes the set of nodes from which to begin the visitation.
936 */
937 public void visitPostorder(GraphVisitor<T> visitor,
938 Iterable<Node<T>> startNodes) {
939 visitDepthFirst(visitor, DFS.Order.POSTORDER, false, startNodes);
940 }
941
942 /**
943 * Equivalent to {@code visitPostorder(visitor, getNodes())}.
944 */
945 public void visitPostorder(GraphVisitor<T> visitor) {
946 visitPostorder(visitor, nodes.values());
947 }
948
949 /**
950 * A visitation over all the nodes in the graph that invokes
951 * <code>visitor.visitNode()</code> for each node in a depth-first
952 * pre-order: each node is visited <i>before</i> each of its successors; the
953 * order in which edges are traversed is the order in which they were added
954 * to the graph. <code>visitor.visitEdge()</code> is not called.
955 *
956 * @param startNodes the set of nodes from which to begin the visitation.
957 */
958 public void visitPreorder(GraphVisitor<T> visitor,
959 Iterable<Node<T>> startNodes) {
960 visitDepthFirst(visitor, DFS.Order.PREORDER, false, startNodes);
961 }
962
963 /**
964 * Equivalent to {@code visitPreorder(visitor, getNodes())}.
965 */
966 public void visitPreorder(GraphVisitor<T> visitor) {
967 visitPreorder(visitor, nodes.values());
968 }
969
970 /**
971 * A visitation over all the nodes in the graph in depth-first order. See
972 * DFS constructor for meaning of 'order' and 'transpose' parameters.
973 *
974 * @param startNodes the set of nodes from which to begin the visitation.
975 */
976 public void visitDepthFirst(GraphVisitor<T> visitor,
977 DFS.Order order,
978 boolean transpose,
979 Iterable<Node<T>> startNodes) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +0200980 DFS<T> visitation = new DFS<>(order, transpose);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100981 visitor.beginVisit();
982 for (Node<T> node: startNodes) {
983 visitation.visit(node, visitor);
984 }
985 visitor.endVisit();
986 }
987
Janak Ramakrishnan706bc722015-08-25 22:02:38 +0000988 private static <T> Comparator<Node<T>> makeNodeComparator(
989 final Comparator<? super T> comparator) {
laurentlb3d2a68c2017-06-30 00:32:04 +0200990 return comparing(Node::getLabel, comparator::compare);
Janak Ramakrishnan706bc722015-08-25 22:02:38 +0000991 }
992
993 /**
Ulf Adams3ab82f72015-09-04 12:10:53 +0000994 * Given {@code unordered}, a collection of nodes and a (possibly null) {@code comparator} for
995 * their labels, returns a sorted collection if {@code comparator} is non-null, otherwise returns
996 * {@code unordered}.
Janak Ramakrishnan706bc722015-08-25 22:02:38 +0000997 */
998 private static <T> Collection<Node<T>> maybeOrderCollection(
999 Collection<Node<T>> unordered, @Nullable final Comparator<? super T> comparator) {
Jonathan Bluett-Duncan0df3ddbd2017-08-09 11:13:54 +02001000 return comparator == null
1001 ? unordered
1002 : ImmutableList.sortedCopyOf(makeNodeComparator(comparator), unordered);
Janak Ramakrishnan706bc722015-08-25 22:02:38 +00001003 }
1004
1005 private void visitNodesBeforeEdges(
1006 GraphVisitor<T> visitor,
1007 Iterable<Node<T>> startNodes,
1008 @Nullable Comparator<? super T> comparator) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001009 visitor.beginVisit();
1010 for (Node<T> fromNode: startNodes) {
1011 visitor.visitNode(fromNode);
Janak Ramakrishnan706bc722015-08-25 22:02:38 +00001012 for (Node<T> toNode : maybeOrderCollection(fromNode.getSuccessors(), comparator)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001013 visitor.visitEdge(fromNode, toNode);
1014 }
1015 }
1016 visitor.endVisit();
1017 }
1018
1019 /**
Janak Ramakrishnan706bc722015-08-25 22:02:38 +00001020 * A visitation over the graph that visits all nodes and edges in topological order
1021 * such that each node is visited before any edge coming out of that node; ties among nodes are
Ulf Adams3ab82f72015-09-04 12:10:53 +00001022 * broken using the provided {@code comparator} if not null; edges are visited in order specified
Janak Ramakrishnan706bc722015-08-25 22:02:38 +00001023 * by the comparator, <b>not</b> topological order of the target nodes.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001024 */
Janak Ramakrishnan706bc722015-08-25 22:02:38 +00001025 public void visitNodesBeforeEdges(
1026 GraphVisitor<T> visitor, @Nullable Comparator<? super T> comparator) {
1027 visitNodesBeforeEdges(
1028 visitor,
1029 comparator == null ? getTopologicalOrder() : getTopologicalOrder(comparator),
1030 comparator);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001031 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001032}