Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
graph.py
Go to the documentation of this file.
1 from collections import defaultdict, namedtuple
2 
3 class DiGraph(object):
4  """Implementation of directed graph"""
5 
6  def __init__(self):
7  self._nodes = set()
8  self._edges = []
9  # N -> Nodes N2 with a edge (N -> N2)
10  self._nodes_succ = {}
11  # N -> Nodes N2 with a edge (N2 -> N)
12  self._nodes_pred = {}
13 
14  def __repr__(self):
15  out = []
16  for node in self._nodes:
17  out.append(str(node))
18  for src, dst in self._edges:
19  out.append("%s -> %s" % (src, dst))
20  return '\n'.join(out)
21 
22  def nodes(self):
23  return self._nodes
24 
25  def edges(self):
26  return self._edges
27 
28  def add_node(self, node):
29  if node in self._nodes:
30  return
31  self._nodes.add(node)
32  self._nodes_succ[node] = []
33  self._nodes_pred[node] = []
34 
35  def del_node(self, node):
36  """Delete the @node of the graph; Also delete every edge to/from this
37  @node"""
38 
39  if node in self._nodes:
40  self._nodes.remove(node)
41  for pred in self.predecessors(node):
42  self.del_edge(pred, node)
43  for succ in self.successors(node):
44  self.del_edge(node, succ)
45 
46  def add_edge(self, src, dst):
47  if not src in self._nodes:
48  self.add_node(src)
49  if not dst in self._nodes:
50  self.add_node(dst)
51  self._edges.append((src, dst))
52  self._nodes_succ[src].append(dst)
53  self._nodes_pred[dst].append(src)
54 
55  def add_uniq_edge(self, src, dst):
56  """Add an edge from @src to @dst if it doesn't already exist"""
57  if (src not in self._nodes_succ or
58  dst not in self._nodes_succ[src]):
59  self.add_edge(src, dst)
60 
61  def del_edge(self, src, dst):
62  self._edges.remove((src, dst))
63  self._nodes_succ[src].remove(dst)
64  self._nodes_pred[dst].remove(src)
65 
66  def predecessors_iter(self, node):
67  if not node in self._nodes_pred:
68  raise StopIteration
69  for n_pred in self._nodes_pred[node]:
70  yield n_pred
71 
72  def predecessors(self, node):
73  return [x for x in self.predecessors_iter(node)]
74 
75  def successors_iter(self, node):
76  if not node in self._nodes_succ:
77  raise StopIteration
78  for n_suc in self._nodes_succ[node]:
79  yield n_suc
80 
81  def successors(self, node):
82  return [x for x in self.successors_iter(node)]
83 
84  def leaves_iter(self):
85  for node in self._nodes:
86  if not self._nodes_succ[node]:
87  yield node
88 
89  def leaves(self):
90  return [x for x in self.leaves_iter()]
91 
92  def heads_iter(self):
93  for node in self._nodes:
94  if not self._nodes_pred[node]:
95  yield node
96 
97  def heads(self):
98  return [x for x in self.heads_iter()]
99 
100  def find_path(self, src, dst, cycles_count=0, done=None):
101  if done is None:
102  done = {}
103  if dst in done and done[dst] > cycles_count:
104  return [[]]
105  if src == dst:
106  return [[src]]
107  out = []
108  for node in self.predecessors(dst):
109  done_n = dict(done)
110  done_n[dst] = done_n.get(dst, 0) + 1
111  for path in self.find_path(src, node, cycles_count, done_n):
112  if path and path[0] == src:
113  out.append(path + [dst])
114  return out
115 
116  @staticmethod
117  def node2str(node):
118  return str(node)
119 
120  @staticmethod
121  def edge2str(src, dst):
122  return ""
123 
124  def dot(self):
125  out = """
126 digraph asm_graph {
127 graph [
128 splines=polyline,
129 ];
130 node [
131 fontsize = "16",
132 shape = "box"
133 ];
134 """
135  for node in self.nodes():
136  out += '%s [label="%s"];\n' % (
137  hash(node) & 0xFFFFFFFFFFFFFFFF, self.node2str(node))
138 
139  for src, dst in self.edges():
140  out += '%s -> %s [label="%s"]\n' % (hash(src) & 0xFFFFFFFFFFFFFFFF,
141  hash(dst) & 0xFFFFFFFFFFFFFFFF,
142  self.edge2str(src, dst))
143  out += "}"
144  return out
145 
146  @staticmethod
147  def _reachable_nodes(head, next_cb):
148  """Generic algorithm to compute all nodes reachable from/to node
149  @head"""
150 
151  todo = set([head])
152  reachable = set()
153  while todo:
154  node = todo.pop()
155  if node in reachable:
156  continue
157  reachable.add(node)
158  yield node
159  for next_node in next_cb(node):
160  todo.add(next_node)
161 
162  def reachable_sons(self, head):
163  """Compute all nodes reachable from node @head. Each son is an
164  immediate successor of an arbitrary, already yielded son of @head"""
165  return self._reachable_nodes(head, self.successors_iter)
166 
167  def reachable_parents(self, leaf):
168  """Compute all parents of node @leaf. Each parent is an immediate
169  predecessor of an arbitrary, already yielded parent of @leaf"""
170  return self._reachable_nodes(leaf, self.predecessors_iter)
171 
172  @staticmethod
173  def _compute_generic_dominators(head, reachable_cb, prev_cb, next_cb):
174  """Generic algorithm to compute either the dominators or postdominators
175  of the graph.
176  @head: the head/leaf of the graph
177  @reachable_cb: sons/parents of the head/leaf
178  @prev_cb: return predecessors/succesors of a node
179  @next_cb: return succesors/predecessors of a node
180  """
181 
182  nodes = set(reachable_cb(head))
183  dominators = {}
184  for node in nodes:
185  dominators[node] = set(nodes)
186 
187  dominators[head] = set([head])
188  modified = True
189  todo = set(nodes)
190 
191  while todo:
192  node = todo.pop()
193 
194  # Heads state must not be changed
195  if node == head:
196  continue
197 
198  # Compute intersection of all predecessors'dominators
199  new_dom = None
200  for pred in prev_cb(node):
201  if not pred in nodes:
202  continue
203  if new_dom is None:
204  new_dom = set(dominators[pred])
205  new_dom.intersection_update(dominators[pred])
206 
207  # We are not a head to we have at least one dominator
208  assert(new_dom is not None)
209 
210  new_dom.update(set([node]))
211 
212  # If intersection has changed, add sons to the todo list
213  if new_dom == dominators[node]:
214  continue
215 
216  dominators[node] = new_dom
217  for succ in next_cb(node):
218  todo.add(succ)
219  return dominators
220 
221  def compute_dominators(self, head):
222  """Compute the dominators of the graph"""
223  return self._compute_generic_dominators(head,
224  self.reachable_sons,
225  self.predecessors_iter,
226  self.successors_iter)
227 
228  def compute_postdominators(self, leaf):
229  """Compute the postdominators of the graph"""
230  return self._compute_generic_dominators(leaf,
231  self.reachable_parents,
232  self.successors_iter,
233  self.predecessors_iter)
234 
235  @staticmethod
236  def _walk_generic_dominator(node, gen_dominators, succ_cb):
237  """Generic algorithm to return an iterator of the ordered list of
238  @node's dominators/post_dominator.
239 
240  The function doesn't return the self reference in dominators.
241  @node: The start node
242  @gen_dominators: The dictionnary containing at least node's
243  dominators/post_dominators
244  @succ_cb: return predecessors/succesors of a node
245 
246  """
247  # Init
248  done = set()
249  if node not in gen_dominators:
250  # We are in a branch which doesn't reach head
251  return
252  node_gen_dominators = set(gen_dominators[node])
253  todo = set([node])
254 
255  # Avoid working on itself
256  node_gen_dominators.remove(node)
257 
258  # For each level
259  while node_gen_dominators:
260  new_node = None
261 
262  # Worklist pattern
263  while todo:
264  node = todo.pop()
265  if node in done:
266  continue
267  if node in node_gen_dominators:
268  new_node = node
269  break
270 
271  # Avoid loops
272  done.add(node)
273 
274  # Look for the next level
275  for pred in succ_cb(node):
276  todo.add(pred)
277 
278  # Return the node; it's the next starting point
279  assert(new_node is not None)
280  yield new_node
281  node_gen_dominators.remove(new_node)
282  todo = set([new_node])
283 
284  def walk_dominators(self, node, dominators):
285  """Return an iterator of the ordered list of @node's dominators
286  The function doesn't return the self reference in dominators.
287  @node: The start node
288  @dominators: The dictionnary containing at least node's dominators
289  """
290  return self._walk_generic_dominator(node,
291  dominators,
292  self.predecessors_iter)
293 
294  def walk_postdominators(self, node, postdominators):
295  """Return an iterator of the ordered list of @node's postdominators
296  The function doesn't return the self reference in postdominators.
297  @node: The start node
298  @postdominators: The dictionnary containing at least node's
299  postdominators
300 
301  """
302  return self._walk_generic_dominator(node,
303  postdominators,
304  self.successors_iter)
305 
307  """Compute the immediate dominators of the graph"""
308  dominators = self.compute_dominators(head)
309  idoms = {}
310 
311  for node in dominators:
312  for predecessor in self.walk_dominators(node, dominators):
313  if predecessor in dominators[node] and node != predecessor:
314  idoms[node] = predecessor
315  break
316  return idoms
317 
318  def compute_dominance_frontier(self, head):
319  """
320  Compute the dominance frontier of the graph
321 
322  Source: Cooper, Keith D., Timothy J. Harvey, and Ken Kennedy.
323  "A simple, fast dominance algorithm."
324  Software Practice & Experience 4 (2001), p. 9
325  """
326  idoms = self.compute_immediate_dominators(head)
327  frontier = {}
328 
329  for node in idoms:
330  if self._nodes_pred[node] >= 2:
331  for predecessor in self.predecessors_iter(node):
332  runner = predecessor
333  if runner not in idoms:
334  continue
335  while runner != idoms[node]:
336  if runner not in frontier:
337  frontier[runner] = set()
338 
339  frontier[runner].add(node)
340  runner = idoms[runner]
341  return frontier
342 
343  def _walk_generic_first(self, head, flag, succ_cb):
344  """
345  Generic algorithm to compute breadth or depth first search
346  for a node.
347  @head: the head of the graph
348  @flag: denotes if @todo is used as queue or stack
349  @succ_cb: returns a node's predecessors/successors
350  :return: next node
351  """
352  todo = [head]
353  done = set()
354 
355  while todo:
356  node = todo.pop(flag)
357  if node in done:
358  continue
359  done.add(node)
360 
361  for succ in succ_cb(node):
362  todo.append(succ)
363 
364  yield node
365 
366  def walk_breadth_first_forward(self, head):
367  """Performs a breadth first search on the graph from @head"""
368  return self._walk_generic_first(head, 0, self.successors_iter)
369 
370  def walk_depth_first_forward(self, head):
371  """Performs a depth first search on the graph from @head"""
372  return self._walk_generic_first(head, -1, self.successors_iter)
373 
375  """Performs a breadth first search on the reversed graph from @head"""
376  return self._walk_generic_first(head, 0, self.predecessors_iter)
377 
378  def walk_depth_first_backward(self, head):
379  """Performs a depth first search on the reversed graph from @head"""
380  return self._walk_generic_first(head, -1, self.predecessors_iter)
381 
382  def compute_natural_loops(self, head):
383  """
384  Computes all natural loops in the graph.
385 
386  Source: Aho, Alfred V., Lam, Monica S., Sethi, R. and Jeffrey Ullman.
387  "Compilers: Principles, Techniques, & Tools, Second Edition"
388  Pearson/Addison Wesley (2007), Chapter 9.6.6
389  :param head: head of the graph
390  :return: yield a tuple of the form (back edge, loop body)
391  """
392  for a, b in self.compute_back_edges(head):
393  body = self._compute_natural_loop_body(b, a)
394  yield ((b, a), body)
395 
396  def compute_back_edges(self, head):
397  """
398  Computes all back edges from a node to a
399  dominator in the graph.
400  :param head: head of graph
401  :return: yield a back edge
402  """
403  dominators = self.compute_dominators(head)
404 
405  # traverse graph
406  for node in self.walk_depth_first_forward(head):
407  for successor in self.successors_iter(node):
408  # check for a back edge to a dominator
409  if successor in dominators[node]:
410  edge = (node, successor)
411  yield edge
412 
413  def _compute_natural_loop_body(self, head, leaf):
414  """
415  Computes the body of a natural loop by a depth-first
416  search on the reversed control flow graph.
417  :param head: leaf of the loop
418  :param leaf: header of the loop
419  :return: set containing loop body
420  """
421  todo = [leaf]
422  done = {head}
423 
424  while todo:
425  node = todo.pop()
426  if node in done:
427  continue
428  done.add(node)
429 
430  for predecessor in self.predecessors_iter(node):
431  todo.append(predecessor)
432  return done
433 
435  """
436  Partitions the graph into strongly connected components.
437 
438  Iterative implementation of Gabow's path-based SCC algorithm.
439  Source: Gabow, Harold N.
440  "Path-based depth-first search for strong and biconnected components."
441  Information Processing Letters 74.3 (2000), pp. 109--110
442 
443  The iterative implementation is inspired by Mark Dickinson's
444  code:
445  http://code.activestate.com/recipes/
446  578507-strongly-connected-components-of-a-directed-graph/
447  :return: yield a strongly connected component
448  """
449  stack = []
450  boundaries = []
451  counter = len(self.nodes())
452 
453  # init index with 0
454  index = {v: 0 for v in self.nodes()}
455 
456  # state machine for worklist algorithm
457  VISIT, HANDLE_RECURSION, MERGE = 0, 1, 2
458  NodeState = namedtuple('NodeState', ['state', 'node'])
459 
460  for node in self.nodes():
461  # next node if node was already visited
462  if index[node]:
463  continue
464 
465  todo = [NodeState(VISIT, node)]
466  done = set()
467 
468  while todo:
469  current = todo.pop()
470 
471  if current.node in done:
472  continue
473 
474  # node is unvisited
475  if current.state == VISIT:
476  stack.append(current.node)
477  index[current.node] = len(stack)
478  boundaries.append(index[current.node])
479 
480  todo.append(NodeState(MERGE, current.node))
481  # follow successors
482  for successor in self.successors_iter(current.node):
483  todo.append(NodeState(HANDLE_RECURSION, successor))
484 
485  # iterative handling of recursion algorithm
486  elif current.state == HANDLE_RECURSION:
487  # visit unvisited successor
488  if index[current.node] == 0:
489  todo.append(NodeState(VISIT, current.node))
490  else:
491  # contract cycle if necessary
492  while index[current.node] < boundaries[-1]:
493  boundaries.pop()
494 
495  # merge strongly connected component
496  else:
497  if index[current.node] == boundaries[-1]:
498  boundaries.pop()
499  counter += 1
500  scc = set()
501 
502  while index[current.node] <= len(stack):
503  popped = stack.pop()
504  index[popped] = counter
505  scc.add(popped)
506 
507  done.add(current.node)
508 
509  yield scc
def compute_immediate_dominators
Definition: graph.py:306
def compute_strongly_connected_components
Definition: graph.py:434