metasm/cpu/ia32/decompile.rb in metasm-1.0.3 vs metasm/cpu/ia32/decompile.rb in metasm-1.0.4
- old
+ new
@@ -57,11 +57,11 @@
dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd
end
# add di-specific registry written/accessed
def decompile_func_finddeps_di(dcmp, func, di, a, w)
- a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
+ a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI
end
# list variable dependency for each block, remove useless writes
# returns { blockaddr => [list of vars that are needed by a following block] }
def decompile_func_finddeps(dcmp, blocks, func)
@@ -109,11 +109,11 @@
end
}
}
if stackoff # last block instr == subfunction call
deps_r[b] |= deps_subfunc[b] - deps_w[b]
- deps_w[b] |= [:eax, :ecx, :edx] # standard ABI
+ deps_w[b] |= register_symbols[0, 3] # standard ABI
end
}
bt = blocks.transpose
@@ -138,11 +138,11 @@
next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r
bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown]
false
}
- if r == :eax and (rdi || blk.list.last).opcode.name == 'ret'
+ if r == register_symbols[0] and (rdi || blk.list.last).opcode.name == 'ret'
func.type.type = C::BaseType.new(:void)
false
elsif rdi and rdi.backtrace_binding[r]
false # mov al, 42 ; ret -> don't regarg eax
else
@@ -192,27 +192,34 @@
dw
end
def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil)
+ eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols
+ ebx, esp, ebp = ebx, esp, ebp # fix ruby unused var warning
scope = func.initializer
func.type.args.each { |a| scope.symbol[a.name] = a }
stmts = scope.statements
blocks_toclean = myblocks.dup
func_entry = myblocks.first[0]
+ di_addr = nil
until myblocks.empty?
b, to = myblocks.shift
if l = dcmp.dasm.get_label_at(b)
- stmts << C::Label.new(l)
+ stmts << C::Label.new(l).with_misc(:di_addr => b)
end
# list of assignments [[dest reg, expr assigned]]
ops = []
# reg binding (reg => value, values.externals = regs at block start)
binding = {}
# Expr => CExpr
- ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) }
+ ce = lambda { |*e|
+ ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope)
+ dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) } if di_addr
+ ret
+ }
# Expr => Expr.bind(binding) => CExpr
ceb = lambda { |*e| ce[Expression[*e].bind(binding)] }
# dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil])
commit = lambda {
@@ -233,10 +240,11 @@
}
# returns an array to use as funcall arguments
get_func_args = lambda { |di, f|
# XXX see remarks in #finddeps
+ # TODO x64
bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true)
stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil
args_todo = f.type.args.to_a.dup
args = []
if f.has_attribute('fastcall') # XXX DRY
@@ -281,23 +289,24 @@
args.map { |e| ceb[e] }
}
# go !
dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx|
+ di_addr = di.address
a = di.instruction.args
if di.opcode.props[:setip] and not di.opcode.props[:stopexec]
# conditional jump
commit[]
n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
if di.opcode.name =~ /^loop(.+)?/
- cx = C::CExpression[:'--', ceb[:ecx]]
+ cx = C::CExpression[:'--', ceb[ecx]]
cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx
else
cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])]
end
# XXX switch/indirect/multiple jmp
- stmts << C::If.new(C::CExpression[cc], C::Goto.new(n))
+ stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
to.delete dcmp.dasm.normalize(n)
next
end
if di.opcode.name == 'mov'
@@ -310,11 +319,11 @@
dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});")
end
f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"]
a2 = a2.symbolic(di)
a2 = [a2, :&, 0xffff] if sz == 16
- stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type)
+ stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type).with_misc(:di_addr => di_addr)
next
end
case a2
when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg
if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
@@ -322,21 +331,21 @@
dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);")
end
f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"]
t = f.type.type
binding.delete a1.symbolic(di)
- stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t)
+ stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t).with_misc(:di_addr => di_addr), t).with_misc(:di_addr => di_addr)
next
end
end
case di.opcode.name
when 'ret'
commit[]
ret = nil
- ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
- stmts << C::Return.new(ret)
+ ret = C::CExpression[ceb[eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void
+ stmts << C::Return.new(ret).with_misc(:di_addr => di_addr)
when 'call' # :saveip
n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address)
args = []
if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args
args = get_func_args[di, f]
@@ -365,13 +374,13 @@
dcmp.c_parser.toplevel.symbol[n] = f
dcmp.c_parser.toplevel.statements << C::Declaration.new(f)
end
end
commit[]
- binding.delete :eax
- e = C::CExpression[f, :funcall, args]
- e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void)
+ binding.delete eax
+ e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di_addr)
+ e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if deps[b].include? eax and f.type.type != C::BaseType.new(:void)
stmts << e
when 'jmp'
#if di.comment.to_a.include? 'switch'
# n = di.instruction.args.first.symbolic(di)
# fptr = ceb[n]
@@ -386,27 +395,25 @@
# }
# stmts << sw
a = di.instruction.args.first
if a.kind_of? Expression
elsif not a.respond_to? :symbolic
- stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
+ stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil).with_misc(:di_addr => di_addr)
else
n = di.instruction.args.first.symbolic(di)
fptr = ceb[n]
binding.delete n
commit[]
if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function
proto = fptr.type.untypedef.type
args = get_func_args[di, fptr.type]
else
proto = C::Function.new(C::BaseType.new(:void))
- fptr = C::CExpression[[fptr], C::Pointer.new(proto)]
+ fptr = C::CExpression[[fptr], C::Pointer.new(proto)].with_misc(:di_addr => di_addr)
args = []
end
- ret = C::Return.new(C::CExpression[fptr, :funcall, args])
- class << ret ; attr_accessor :from_instr end
- ret.from_instr = di
+ ret = C::Return.new(C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr)
stmts << ret
to = []
end
when 'lgdt'
if not dcmp.c_parser.toplevel.struct['segment_descriptor']
@@ -416,43 +423,43 @@
if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt']
dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);')
end
# need a way to transform arg => :frameptr+12
arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
when 'lidt'
if not dcmp.c_parser.toplevel.struct['interrupt_descriptor']
dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };')
dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));')
end
if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt']
dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);')
end
arg = di.backtrace_binding.keys.grep(Indirection).first.pointer
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void))
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
when 'ltr', 'lldt'
if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"]
dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);")
end
arg = di.backtrace_binding.keys.first
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void))
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
when 'out'
sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"]
dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);")
end
- port = di.instruction.args.grep(Expression).first || :edx
- stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void))
+ port = di.instruction.args.grep(Expression).first || edx
+ stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr)
when 'in'
sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz
if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);")
end
- port = di.instruction.args.grep(Expression).first || :edx
+ port = di.instruction.args.grep(Expression).first || edx
f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"]
- binding.delete :eax
- stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type)
+ binding.delete eax
+ stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr)
when 'sti', 'cli'
stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil)
when /^(mov|sto|lod)s([bwdq])/
op, sz = $1, $2
commit[]
@@ -460,38 +467,38 @@
pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym))
blk = C::Block.new(scope)
case op
when 'mov'
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]]
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
when 'sto'
- blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]]
- blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]]
+ blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', ceb[eax]].with_misc(:di_addr => di_addr)
+ blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr)
when 'lod'
- blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]]
- blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]]
+ blk.statements << C::CExpression[ceb[eax], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr)
+ blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr)
#when 'sca'
#when 'cmp'
end
case (di.instruction.prefix || {})[:rep]
when nil
stmts.concat blk.statements
when 'rep'
- blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]]
- stmts << C::While.new(C::CExpression[ceb[:ecx]], blk)
+ blk.statements << C::CExpression[ceb[ecx], :'=', [ceb[ecx], :-, [1]]].with_misc(:di_addr => di_addr)
+ stmts << C::While.new(C::CExpression[ceb[ecx]], blk).with_misc(:di_addr => di_addr)
#when 'repz' # sca/cmp only
#when 'repnz'
end
next
else
bd = get_fwdemu_binding(di)
if di.backtrace_binding[:incomplete_binding]
commit[]
- stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil)
+ stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di_addr)
else
update = {}
bd.each { |k, v|
if k.kind_of? ::Symbol and not deps[b].include? k
ops << [k, v]
@@ -502,9 +509,10 @@
end
}
binding.update update
end
end
+ di_addr = nil
}
commit[]
case to.length
when 0