Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
parse_asm.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #-*- coding:utf-8 -*-
3 import re
4 
5 import miasm2.expression.expression as m2_expr
6 import miasm2.core.asmbloc as asmbloc
7 from miasm2.core.cpu import gen_base_expr, parse_ast
8 from miasm2.core.cpu import instruction
9 
10 declarator = {'byte': 8,
11  'word': 16,
12  'dword': 32,
13  'qword': 64,
14  'long': 32,
15  }
16 
17 size2pck = {8: 'B',
18  16: 'H',
19  32: 'I',
20  64: 'Q',
21  }
22 
23 EMPTY_RE = re.compile(r'\s*$')
24 COMMENT_RE = re.compile(r'\s*;\S*')
25 LOCAL_LABEL_RE = re.compile(r'\s*(\.L\S+)\s*:')
26 DIRECTIVE_START_RE = re.compile(r'\s*\.')
27 DIRECTIVE_RE = re.compile(r'\s*\.(\S+)')
28 LABEL_RE = re.compile(r'\s*(\S+)\s*:')
29 FORGET_LABEL_RE = re.compile(r'\s*\.LF[BE]\d\s*:')
30 
31 
33 
34  """Stand for Directive"""
35 
36  pass
37 
39 
40  """Stand for alignment representation"""
41 
42  def __init__(self, alignment=1):
43  self.alignment = alignment
44 
45  def __str__(self):
46  return "Alignment %s" % self.alignment
47 
48 
50 
51  """Stand for alignment representation"""
52 
53  pass
54 
55 
56 class DirectiveDontSplit(Directive):
57 
58  """Stand for alignment representation"""
59 
60  pass
61 
62 
63 def guess_next_new_label(symbol_pool):
64  """Generate a new label
65  @symbol_pool: the asm_symbol_pool instance"""
66  i = 0
67  gen_name = "loc_%.8X"
68  while True:
69  name = gen_name % i
70  label = symbol_pool.getby_name(name)
71  if label is None:
72  return symbol_pool.add_label(name)
73  i += 1
74 
75 
76 def replace_expr_labels(expr, symbol_pool, replace_id):
77  """Create asm_label of the expression @expr in the @symbol_pool
78  Update @replace_id"""
79 
80  if not (isinstance(expr, m2_expr.ExprId) and
81  isinstance(expr.name, asmbloc.asm_label)):
82  return expr
83 
84  old_lbl = expr.name
85  new_lbl = symbol_pool.getby_name_create(old_lbl.name)
86  replace_id[expr] = m2_expr.ExprId(new_lbl, expr.size)
87  return replace_id[expr]
88 
89 
90 def replace_orphan_labels(instr, symbol_pool):
91  """Link orphan labels used by @instr to the @symbol_pool"""
92 
93  for i, arg in enumerate(instr.args):
94  replace_id = {}
95  arg.visit(lambda e: replace_expr_labels(e,
96  symbol_pool,
97  replace_id))
98  instr.args[i] = instr.args[i].replace_expr(replace_id)
99 
100 
101 STATE_NO_BLOC = 0
102 STATE_IN_BLOC = 1
103 
104 
105 def parse_txt(mnemo, attrib, txt, symbol_pool=None):
106  """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where
107  blocks is a list of asm_bloc and symbol_pool the associated asm_symbol_pool
108 
109  @mnemo: architecture used
110  @attrib: architecture attribute
111  @txt: assembly listing
112  @symbol_pool: (optional) the asm_symbol_pool instance used to handle labels
113  of the listing
114 
115  """
116 
117  if symbol_pool is None:
118  symbol_pool = asmbloc.asm_symbol_pool()
119 
120  C_NEXT = asmbloc.asm_constraint.c_next
121  C_TO = asmbloc.asm_constraint.c_to
122 
123  lines = []
124  # parse each line
125  for line in txt.split('\n'):
126  # empty
127  if EMPTY_RE.match(line):
128  continue
129  # comment
130  if COMMENT_RE.match(line):
131  continue
132  # labels to forget
133  if FORGET_LABEL_RE.match(line):
134  continue
135  # label beginning with .L
136  match_re = LABEL_RE.match(line)
137  if match_re:
138  label_name = match_re.group(1)
139  label = symbol_pool.getby_name_create(label_name)
140  lines.append(label)
141  continue
142  # directive
143  if DIRECTIVE_START_RE.match(line):
144  match_re = DIRECTIVE_RE.match(line)
145  directive = match_re.group(1)
146  if directive in ['text', 'data', 'bss']:
147  continue
148  if directive in ['string', 'ascii']:
149  # XXX HACK
150  line = line.replace(r'\n', '\n').replace(r'\r', '\r')
151  raw = line[line.find(r'"') + 1:line.rfind(r'"')]
152  raw = raw.decode('string_escape')
153  if directive == 'string':
154  raw += "\x00"
155  lines.append(asmbloc.asm_raw(raw))
156  continue
157  if directive == 'ustring':
158  # XXX HACK
159  line = line.replace(r'\n', '\n').replace(r'\r', '\r')
160  raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00"
161  raw = raw.decode('string_escape')
162  raw = "".join([string + '\x00' for string in raw])
163  lines.append(asmbloc.asm_raw(raw))
164  continue
165  if directive in declarator:
166  data_raw = line[match_re.end():].split(' ', 1)[1]
167  data_raw = data_raw.split(',')
168  size = declarator[directive]
169  expr_list = []
170 
171  # parser
172  base_expr = gen_base_expr()[2]
173  my_var_parser = parse_ast(lambda x: m2_expr.ExprId(x, size),
174  lambda x:
175  m2_expr.ExprInt(x, size))
176  base_expr.setParseAction(my_var_parser)
177 
178  for element in data_raw:
179  element = element.strip()
180  element_expr = base_expr.parseString(element)[0]
181  expr_list.append(element_expr.canonize())
182 
183  raw_data = asmbloc.asm_raw(expr_list)
184  raw_data.element_size = size
185  lines.append(raw_data)
186  continue
187  if directive == 'comm':
188  # TODO
189  continue
190  if directive == 'split': # custom command
191  lines.append(DirectiveSplit())
192  continue
193  if directive == 'dontsplit': # custom command
194  lines.append(DirectiveDontSplit())
195  continue
196  if directive == "align":
197  align_value = int(line[match_re.end():], 0)
198  lines.append(DirectiveAlign(align_value))
199  continue
200  if directive in ['file', 'intel_syntax', 'globl', 'local',
201  'type', 'size', 'align', 'ident', 'section']:
202  continue
203  if directive[0:4] == 'cfi_':
204  continue
205 
206  raise ValueError("unknown directive %s" % str(directive))
207 
208  # label
209  match_re = LABEL_RE.match(line)
210  if match_re:
211  label_name = match_re.group(1)
212  label = symbol_pool.getby_name_create(label_name)
213  lines.append(label)
214  continue
215 
216  # code
217  if ';' in line:
218  line = line[:line.find(';')]
219  line = line.strip(' ').strip('\t')
220  instr = mnemo.fromstring(line, attrib)
221 
222  # replace orphan asm_label with labels from symbol_pool
223  replace_orphan_labels(instr, symbol_pool)
224 
225  if instr.dstflow():
226  instr.dstflow2label(symbol_pool)
227  lines.append(instr)
228 
229  asmbloc.log_asmbloc.info("___pre asm oki___")
230  # make blocks
231 
232  cur_block = None
233  state = STATE_NO_BLOC
234  i = 0
235  blocks = []
236  block_to_nlink = None
237  block_may_link = False
238  delayslot = 0
239  while i < len(lines):
240  if delayslot:
241  if delayslot == 0:
242  state = STATE_NO_BLOC
243  else:
244  delayslot -= 1
245  line = lines[i]
246  # no current block
247  if state == STATE_NO_BLOC:
248  if isinstance(line, DirectiveDontSplit):
249  block_to_nlink = cur_block
250  i += 1
251  continue
252  elif isinstance(line, DirectiveSplit):
253  block_to_nlink = None
254  i += 1
255  continue
256  elif not isinstance(line, asmbloc.asm_label):
257  # First line must be a label. If it's not the case, generate
258  # it.
259  label = guess_next_new_label(symbol_pool)
260  cur_block = asmbloc.asm_bloc(label, alignment=mnemo.alignment)
261  else:
262  cur_block = asmbloc.asm_bloc(line, alignment=mnemo.alignment)
263  i += 1
264  # Generate the current bloc
265  blocks.append(cur_block)
266  state = STATE_IN_BLOC
267  if block_to_nlink:
268  block_to_nlink.addto(
269  asmbloc.asm_constraint(cur_block.label,
270  C_NEXT))
271  block_to_nlink = None
272  continue
273 
274  # in block
275  elif state == STATE_IN_BLOC:
276  if isinstance(line, DirectiveSplit):
277  state = STATE_NO_BLOC
278  block_to_nlink = None
279  elif isinstance(line, DirectiveDontSplit):
280  state = STATE_NO_BLOC
281  block_to_nlink = cur_block
282  elif isinstance(line, DirectiveAlign):
283  cur_block.alignment = line.alignment
284  elif isinstance(line, asmbloc.asm_raw):
285  cur_block.addline(line)
286  block_to_nlink = cur_block
287  elif isinstance(line, asmbloc.asm_label):
288  if block_to_nlink:
289  cur_block.addto(
290  asmbloc.asm_constraint(line, C_NEXT))
291  block_to_nlink = None
292  state = STATE_NO_BLOC
293  continue
294  # instruction
295  elif isinstance(line, instruction):
296  cur_block.addline(line)
297  block_to_nlink = cur_block
298  if not line.breakflow():
299  i += 1
300  continue
301  if delayslot:
302  raise RuntimeError("Cannot have breakflow in delayslot")
303  if line.dstflow():
304  for dst in line.getdstflow(symbol_pool):
305  if not isinstance(dst, m2_expr.ExprId):
306  continue
307  if dst in mnemo.regs.all_regs_ids:
308  continue
309  cur_block.addto(asmbloc.asm_constraint(dst, C_TO))
310 
311  if not line.splitflow():
312  block_to_nlink = None
313 
314  delayslot = line.delayslot
315  if delayslot == 0:
316  state = STATE_NO_BLOC
317  else:
318  raise RuntimeError("unknown class %s" % line.__class__)
319  i += 1
320 
321  for block in blocks:
322  asmbloc.log_asmbloc.info(block)
323  return blocks, symbol_pool
def gen_base_expr
Definition: cpu.py:309