Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
sembuilder.py
Go to the documentation of this file.
1 "Helper to quickly build instruction's semantic side effects"
2 
3 import inspect
4 import ast
5 import re
6 
7 import miasm2.expression.expression as m2_expr
8 from miasm2.ir.ir import irbloc
9 
10 
11 class MiasmTransformer(ast.NodeTransformer):
12  """AST visitor translating DSL to Miasm expression
13 
14  memX[Y] -> ExprMem(Y, X)
15  iX(Y) -> ExprIntX(Y)
16  X if Y else Z -> ExprCond(Y, X, Z)
17  'X'(Y) -> ExprOp('X', Y)
18  ('X' % Y)(Z) -> ExprOp('X' % Y, Z)
19  """
20 
21  # Parsers
22  parse_integer = re.compile("^i([0-9]+)$")
23  parse_mem = re.compile("^mem([0-9]+)$")
24 
25  # Visitors
26  def visit_Call(self, node):
27  """iX(Y) -> ExprIntX(Y),
28  'X'(Y) -> ExprOp('X', Y), ('X' % Y)(Z) -> ExprOp('X' % Y, Z)"""
29 
30  # Recursive visit
31  node = self.generic_visit(node)
32 
33  if isinstance(node.func, ast.Name):
34  # iX(Y) -> ExprIntX(Y)
35  fc_name = node.func.id
36 
37  # Match the function name
38  new_name = fc_name
39  integer = self.parse_integer.search(fc_name)
40 
41  # Do replacement
42  if integer is not None:
43  new_name = "ExprInt%s" % integer.groups()[0]
44 
45  # Replace in the node
46  node.func.id = new_name
47 
48  elif (isinstance(node.func, ast.Str) or
49  (isinstance(node.func, ast.BinOp) and
50  isinstance(node.func.op, ast.Mod) and
51  isinstance(node.func.left, ast.Str))):
52  # 'op'(args...) -> ExprOp('op', args...)
53  # ('op' % (fmt))(args...) -> ExprOp('op' % (fmt), args...)
54  op_name = node.func
55 
56  # Do replacement
57  node.func = ast.Name(id="ExprOp", ctx=ast.Load())
58  node.args[0:0] = [op_name]
59 
60  return node
61 
62  def visit_Subscript(self, node):
63  """memX[Y] -> ExprMem(Y, X)"""
64 
65  # Recursive visit
66  node = self.generic_visit(node)
67 
68  # Detect the syntax
69  if not isinstance(node.value, ast.Name):
70  return node
71  name = node.value.id
72  mem = self.parse_mem.search(name)
73  if mem is None:
74  return node
75 
76  # Do replacement
77  addr = self.visit(node.slice.value)
78  call = ast.Call(func=ast.Name(id='ExprMem', ctx=ast.Load()),
79  args=[addr, ast.Num(n=int(mem.groups()[0]))],
80  keywords=[], starargs=None, kwargs=None)
81  return call
82 
83  def visit_IfExp(self, node):
84  """X if Y else Z -> ExprCond(Y, X, Z)"""
85  # Recursive visit
86  node = self.generic_visit(node)
87 
88  # Build the new ExprCond
89  call = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()),
90  args=[self.visit(node.test),
91  self.visit(node.body),
92  self.visit(node.orelse)],
93  keywords=[], starargs=None, kwargs=None)
94  return call
95 
96 
98  """Helper for building instruction's semantic side effects method
99 
100  This class provides a decorator @parse to use on them.
101  The context in which the function will be parsed must be supplied on
102  instanciation
103  """
104 
105  def __init__(self, ctx):
106  """Create a SemBuilder
107  @ctx: context dictionnary used during parsing
108  """
109  # Init
111  self._ctx = dict(m2_expr.__dict__)
112  self._ctx["irbloc"] = irbloc
113  self._functions = {}
114 
115  # Update context
116  self._ctx.update(ctx)
117 
118  @property
119  def functions(self):
120  """Return a dictionnary name -> func of parsed functions"""
121  return self._functions.copy()
122 
123  @staticmethod
125  """Return the AST standing for label creations"""
126  out = ast.parse("lbl_end = ExprId(ir.get_next_instr(instr))").body
127  out += ast.parse("lbl_if = ExprId(ir.gen_label())").body
128  return out
129 
130  def _parse_body(self, body, argument_names):
131  """Recursive function transforming a @body to a block expression
132  Return:
133  - AST to append to body (real python statements)
134  - a list of blocks, ie list of affblock, ie list of ExprAff (AST)"""
135 
136  # Init
137  ## Real instructions
138  real_body = []
139  ## Final blocks
140  blocks = [[[]]]
141 
142  for statement in body:
143 
144  if isinstance(statement, ast.Assign):
145  src = self.transformer.visit(statement.value)
146  dst = self.transformer.visit(statement.targets[0])
147 
148  if (isinstance(dst, ast.Name) and
149  dst.id not in argument_names and
150  dst.id not in self._ctx):
151 
152  # Real variable declaration
153  statement.value = src
154  real_body.append(statement)
155  continue
156 
157  dst.ctx = ast.Load()
158 
159  res = ast.Call(func=ast.Name(id='ExprAff',
160  ctx=ast.Load()),
161  args=[dst, src],
162  keywords=[],
163  starargs=None,
164  kwargs=None)
165 
166  blocks[-1][-1].append(res)
167 
168  elif (isinstance(statement, ast.Expr) and
169  isinstance(statement.value, ast.Str)):
170  # String (docstring, comment, ...) -> keep it
171  real_body.append(statement)
172 
173  elif (isinstance(statement, ast.If) and
174  not statement.orelse):
175  # Create jumps : ir.IRDst = lbl_if if cond else lbl_end
176  cond = statement.test
177  real_body += self._create_labels()
178 
179  lbl_end = ast.Name(id='lbl_end', ctx=ast.Load())
180  lbl_if = ast.Name(id='lbl_if', ctx=ast.Load())
181  dst = ast.Call(func=ast.Name(id='ExprCond',
182  ctx=ast.Load()),
183  args=[cond,
184  lbl_if,
185  lbl_end],
186  keywords=[],
187  starargs=None,
188  kwargs=None)
189 
190  if (isinstance(cond, ast.UnaryOp) and
191  isinstance(cond.op, ast.Not)):
192  ## if not cond -> switch exprCond
193  dst.args[1:] = dst.args[1:][::-1]
194  dst.args[0] = cond.operand
195 
196  IRDst = ast.Attribute(value=ast.Name(id='ir',
197  ctx=ast.Load()),
198  attr='IRDst', ctx=ast.Load())
199  blocks[-1][-1].append(ast.Call(func=ast.Name(id='ExprAff',
200  ctx=ast.Load()),
201  args=[IRDst, dst],
202  keywords=[],
203  starargs=None,
204  kwargs=None))
205 
206  # Create the new blocks
207  sub_blocks, sub_body = self._parse_body(statement.body,
208  argument_names)
209  if len(sub_blocks) > 1:
210  raise RuntimeError("Imbricated if unimplemented")
211 
212  ## Close the last block
213  jmp_end = ast.Call(func=ast.Name(id='ExprAff',
214  ctx=ast.Load()),
215  args=[IRDst, lbl_end],
216  keywords=[],
217  starargs=None,
218  kwargs=None)
219  sub_blocks[-1][-1].append(jmp_end)
220  sub_blocks[-1][-1] = ast.List(elts=sub_blocks[-1][-1],
221  ctx=ast.Load())
222  sub_blocks[-1] = ast.List(elts=sub_blocks[-1],
223  ctx=ast.Load())
224 
225  ## Replace the block with a call to 'irbloc'
226  lbl_if_name = ast.Attribute(value=ast.Name(id='lbl_if',
227  ctx=ast.Load()),
228  attr='name', ctx=ast.Load())
229 
230  sub_blocks[-1] = ast.Call(func=ast.Name(id='irbloc',
231  ctx=ast.Load()),
232  args=[lbl_if_name,
233  sub_blocks[-1]],
234  keywords=[],
235  starargs=None,
236  kwargs=None)
237  blocks += sub_blocks
238  real_body += sub_body
239 
240  # Prepare a new block for following statement
241  blocks.append([[]])
242 
243  else:
244  # TODO: real var, +=, /=, -=, <<=, >>=, if/else, ...
245  raise RuntimeError("Unimplemented %s" % statement)
246 
247  return blocks, real_body
248 
249  def parse(self, func):
250  """Function decorator, returning a correct method from a pseudo-Python
251  one"""
252 
253  # Get the function AST
254  parsed = ast.parse(inspect.getsource(func))
255  fc_ast = parsed.body[0]
256  argument_names = [name.id for name in fc_ast.args.args]
257 
258  # Translate (blocks[0][0] is the current instr)
259  blocks, body = self._parse_body(fc_ast.body, argument_names)
260 
261  # Build the new function
262  fc_ast.args.args[0:0] = [ast.Name(id='ir', ctx=ast.Param()),
263  ast.Name(id='instr', ctx=ast.Param())]
264  cur_instr = blocks[0][0]
265  if len(blocks[-1][0]) == 0:
266  ## Last block can be empty
267  blocks.pop()
268  other_blocks = blocks[1:]
269  body.append(ast.Return(value=ast.Tuple(elts=[ast.List(elts=cur_instr,
270  ctx=ast.Load()),
271  ast.List(elts=other_blocks,
272  ctx=ast.Load())],
273  ctx=ast.Load())))
274 
275  ret = ast.Module([ast.FunctionDef(name=fc_ast.name,
276  args=fc_ast.args,
277  body=body,
278  decorator_list=[])])
279 
280  # To display the generated function, use codegen.to_source
281  # codegen: https://github.com/andreif/codegen
282 
283  # Compile according to the context
284  fixed = ast.fix_missing_locations(ret)
285  codeobj = compile(fixed, '<string>', 'exec')
286  ctx = self._ctx.copy()
287  eval(codeobj, ctx)
288 
289  # Get the function back
290  self._functions[fc_ast.name] = ctx[fc_ast.name]
291  return ctx[fc_ast.name]