Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
data_analysis.py
Go to the documentation of this file.
2  import get_expr_mem, get_list_rw, ExprId, ExprInt
3 from miasm2.ir.symbexec import symbexec
4 
5 
6 def get_node_name(label, i, n):
7  # n_name = "%s_%d_%s"%(label.name, i, n)
8  n_name = (label, i, n)
9  return n_name
10 
11 
12 def intra_bloc_flow_raw(ir_arch, flow_graph, irb):
13  """
14  Create data flow for an irbloc using raw IR expressions
15  """
16  in_nodes = {}
17  out_nodes = {}
18  current_nodes = {}
19  for i, exprs in enumerate(irb.irs):
20  list_rw = get_list_rw(exprs)
21  current_nodes.update(out_nodes)
22 
23  # gen mem arg to mem node links
24  all_mems = set()
25  for nodes_r, nodes_w in list_rw:
26  for n in nodes_r.union(nodes_w):
27  all_mems.update(get_expr_mem(n))
28  if not all_mems:
29  continue
30 
31  # print [str(x) for x in all_mems]
32  for n in all_mems:
33  node_n_w = get_node_name(irb.label, i, n)
34  if not n in nodes_r:
35  continue
36  o_r = n.arg.get_r(mem_read=False, cst_read=True)
37  for n_r in o_r:
38  if n_r in current_nodes:
39  node_n_r = current_nodes[n_r]
40  else:
41  node_n_r = get_node_name(irb.label, i, n_r)
42  current_nodes[n_r] = node_n_r
43  in_nodes[n_r] = node_n_r
44  flow_graph.add_uniq_edge(node_n_r, node_n_w)
45 
46  # gen data flow links
47  for nodes_r, nodes_w in list_rw:
48  for n_r in nodes_r:
49  if n_r in current_nodes:
50  node_n_r = current_nodes[n_r]
51  else:
52  node_n_r = get_node_name(irb.label, i, n_r)
53  current_nodes[n_r] = node_n_r
54  in_nodes[n_r] = node_n_r
55 
56  flow_graph.add_node(node_n_r)
57  for n_w in nodes_w:
58  node_n_w = get_node_name(irb.label, i + 1, n_w)
59  out_nodes[n_w] = node_n_w
60  # current_nodes[n_w] = node_n_w
61 
62  flow_graph.add_node(node_n_w)
63  flow_graph.add_uniq_edge(node_n_r, node_n_w)
64  irb.in_nodes = in_nodes
65  irb.out_nodes = out_nodes
66 
67 
68 def intra_bloc_flow_symbexec(ir_arch, flow_graph, irb):
69  """
70  Create data flow for an irbloc using symbolic execution
71  """
72  in_nodes = {}
73  out_nodes = {}
74  current_nodes = {}
75 
76  symbols_init = {}
77  for r in ir_arch.arch.regs.all_regs_ids:
78  # symbols_init[r] = ir_arch.arch.regs.all_regs_ids_init[i]
79  x = ExprId(r.name, r.size)
80  x.is_term = True
81  symbols_init[r] = x
82 
83  sb = symbexec(ir_arch, dict(symbols_init))
84  sb.emulbloc(irb)
85  # print "*"*40
86  # print irb
87  # print sb.dump_id()
88  # print sb.dump_mem()
89 
90  for n_w in sb.symbols:
91  # print n_w
92  v = sb.symbols[n_w]
93  if n_w in symbols_init and symbols_init[n_w] == v:
94  continue
95  read_values = v.get_r(cst_read=True)
96  # print n_w, v, [str(x) for x in read_values]
97  node_n_w = get_node_name(irb.label, len(irb.lines), n_w)
98 
99  for n_r in read_values:
100  if n_r in current_nodes:
101  node_n_r = current_nodes[n_r]
102  else:
103  node_n_r = get_node_name(irb.label, 0, n_r)
104  current_nodes[n_r] = node_n_r
105  in_nodes[n_r] = node_n_r
106 
107  out_nodes[n_w] = node_n_w
108  flow_graph.add_uniq_edge(node_n_r, node_n_w)
109 
110  irb.in_nodes = in_nodes
111  irb.out_nodes = out_nodes
112 
113 
114 def inter_bloc_flow_link(ir_arch, flow_graph, todo, link_exec_to_data):
115  lbl, current_nodes, exec_nodes = todo
116  # print 'TODO'
117  # print lbl
118  # print [(str(x[0]), str(x[1])) for x in current_nodes]
119  current_nodes = dict(current_nodes)
120 
121  # link current nodes to bloc in_nodes
122  if not lbl in ir_arch.blocs:
123  print "cannot find bloc!!", lbl
124  return set()
125  irb = ir_arch.blocs[lbl]
126  # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()]))
127  to_del = set()
128  for n_r, node_n_r in irb.in_nodes.items():
129  if not n_r in current_nodes:
130  continue
131  # print 'add link', current_nodes[n_r], node_n_r
132  flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r)
133  to_del.add(n_r)
134 
135  # if link exec to data, all nodes depends on exec nodes
136  if link_exec_to_data:
137  for n_x_r in exec_nodes:
138  for n_r, node_n_r in irb.in_nodes.items():
139  if not n_x_r in current_nodes:
140  continue
141  if isinstance(n_r, ExprInt):
142  continue
143  flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r)
144 
145  # update current nodes using bloc out_nodes
146  for n_w, node_n_w in irb.out_nodes.items():
147  current_nodes[n_w] = node_n_w
148 
149  # get nodes involved in exec flow
150  x_nodes = tuple(sorted(list(irb.dst.get_r())))
151 
152  todo = set()
153  for lbl_dst in ir_arch.g.successors(irb.label):
154  todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes))
155 
156  # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()]))
157 
158  return todo
159 
160 
161 def create_implicit_flow(ir_arch, flow_graph):
162 
163  # first fix IN/OUT
164  # If a son read a node which in not in OUT, add it
165  todo = set(ir_arch.blocs.keys())
166  while todo:
167  lbl = todo.pop()
168  irb = ir_arch.blocs[lbl]
169  for lbl_son in ir_arch.g.successors(irb.label):
170  if not lbl_son in ir_arch.blocs:
171  print "cannot find bloc!!", lbl
172  continue
173  irb_son = ir_arch.blocs[lbl_son]
174  for n_r in irb_son.in_nodes:
175  if n_r in irb.out_nodes:
176  continue
177  if not isinstance(n_r, ExprId):
178  continue
179 
180  # print "###", n_r
181  # print "###", irb
182  # print "###", 'OUT', [str(x) for x in irb.out_nodes]
183  # print "###", irb_son
184  # print "###", 'IN', [str(x) for x in irb_son.in_nodes]
185 
186  node_n_w = irb.label, len(irb.lines), n_r
187  irb.out_nodes[n_r] = node_n_w
188  if not n_r in irb.in_nodes:
189  irb.in_nodes[n_r] = irb.label, 0, n_r
190  node_n_r = irb.in_nodes[n_r]
191  # print "###", node_n_r
192  for lbl_p in ir_arch.g.predecessors(irb.label):
193  todo.add(lbl_p)
194 
195  flow_graph.add_uniq_edge(node_n_r, node_n_w)
196 
197 
198 def inter_bloc_flow(ir_arch, flow_graph, irb_0, link_exec_to_data=True):
199 
200  todo = set()
201  done = set()
202  todo.add((irb_0, (), ()))
203 
204  while todo:
205  state = todo.pop()
206  if state in done:
207  continue
208  done.add(state)
209  out = inter_bloc_flow_link(ir_arch, flow_graph, state, link_exec_to_data)
210  todo.update(out)
211 
212 
214 
215  """
216  This algorithm will do symbolic execution on a function, trying to propagate
217  states between basic blocs in order to extract inter-blocs dataflow. The
218  algorithm tries to merge states from blocs with multiple parents.
219 
220  There is no real magic here, loops and complex merging will certainly fail.
221  """
222 
223  def __init__(self, ir_arch):
224  self.todo = set()
225  self.stateby_ad = {}
226  self.cpt = {}
227  self.states_var_done = set()
228  self.states_done = set()
229  self.total_done = 0
230  self.ir_arch = ir_arch
231 
232  def add_state(self, parent, ad, state):
233  variables = dict(state.symbols.items())
234 
235  # get bloc dead, and remove from state
236  b = self.ir_arch.get_bloc(ad)
237  if b is None:
238  raise ValueError("unknown bloc! %s" % ad)
239  """
240  dead = b.dead[0]
241  for d in dead:
242  if d in variables:
243  del(variables[d])
244  """
245  variables = variables.items()
246 
247  s = parent, ad, tuple(sorted(variables))
248  """
249  state_var = s[1]
250  if s in self.states_var_done:
251  print 'skip state'
252  return
253  if not ad in self.stateby_ad:
254  self.stateby_ad[ad] = set()
255  self.stateby_ad[ad].add(state_var)
256 
257  """
258  self.todo.add(s)
259 
260  """
261  if not ad in self.cpt:
262  self.cpt[ad] = 0
263  """
264  """
265  def get_next_min(self):
266  state_by_ad = {}
267  for state in self.todo:
268  ad = state[1]
269  if not ad in state_by_ad:
270  state_by_ad[ad] = []
271  state_by_ad[ad].append(state)
272  print "XX", [len(x) for x in state_by_ad.values()]
273  state_by_ad = state_by_ad.items()
274  state_by_ad.sort(key=lambda x:len(x[1]))
275  state_by_ad.reverse()
276  return state_by_ad.pop()[1][0]
277  """
278 
279  def get_next_state(self):
280  state = self.todo.pop()
281  return state
282 
283  def do_step(self):
284  if len(self.todo) == 0:
285  return None
286  if self.total_done > 600:
287  print "symbexec watchdog!"
288  return None
289  self.total_done += 1
290  print 'CPT', self.total_done
291  while self.todo:
292  # if self.total_done>20:
293  # self.get_next_min()
294  # state = self.todo.pop()
295  state = self.get_next_state()
296  parent, ad, s = state
297  self.states_done.add(state)
298  self.states_var_done.add(state)
299  # if s in self.states_var_done:
300  # print "state done"
301  # continue
302 
303  sb = symbexec(self.ir_arch, dict(s))
304 
305  return parent, ad, sb
306  return None