metasm/cpu/ia32/decompile.rb in metasm-1.0.3 vs metasm/cpu/ia32/decompile.rb in metasm-1.0.4

- old
+ new

@@ -57,11 +57,11 @@ dasm.address_binding[funcstart] = oldfuncbd if oldfuncbd end # add di-specific registry written/accessed def decompile_func_finddeps_di(dcmp, func, di, a, w) - a << :eax if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI + a << register_symbols[0] if di.opcode.name == 'ret' and (not func.type.kind_of? C::BaseType or func.type.type.name != :void) # standard ABI end # list variable dependency for each block, remove useless writes # returns { blockaddr => [list of vars that are needed by a following block] } def decompile_func_finddeps(dcmp, blocks, func) @@ -109,11 +109,11 @@ end } } if stackoff # last block instr == subfunction call deps_r[b] |= deps_subfunc[b] - deps_w[b] - deps_w[b] |= [:eax, :ecx, :edx] # standard ABI + deps_w[b] |= register_symbols[0, 3] # standard ABI end } bt = blocks.transpose @@ -138,11 +138,11 @@ next true if (a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - bw).include? r bw |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] false } - if r == :eax and (rdi || blk.list.last).opcode.name == 'ret' + if r == register_symbols[0] and (rdi || blk.list.last).opcode.name == 'ret' func.type.type = C::BaseType.new(:void) false elsif rdi and rdi.backtrace_binding[r] false # mov al, 42 ; ret -> don't regarg eax else @@ -192,27 +192,34 @@ dw end def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil) + eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols + ebx, esp, ebp = ebx, esp, ebp # fix ruby unused var warning scope = func.initializer func.type.args.each { |a| scope.symbol[a.name] = a } stmts = scope.statements blocks_toclean = myblocks.dup func_entry = myblocks.first[0] + di_addr = nil until myblocks.empty? b, to = myblocks.shift if l = dcmp.dasm.get_label_at(b) - stmts << C::Label.new(l) + stmts << C::Label.new(l).with_misc(:di_addr => b) end # list of assignments [[dest reg, expr assigned]] ops = [] # reg binding (reg => value, values.externals = regs at block start) binding = {} # Expr => CExpr - ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) } + ce = lambda { |*e| + ret = dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) + dcmp.walk_ce(ret) { |ee| ee.with_misc(:di_addr => di_addr) } if di_addr + ret + } # Expr => Expr.bind(binding) => CExpr ceb = lambda { |*e| ce[Expression[*e].bind(binding)] } # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil]) commit = lambda { @@ -233,10 +240,11 @@ } # returns an array to use as funcall arguments get_func_args = lambda { |di, f| # XXX see remarks in #finddeps + # TODO x64 bt = dcmp.dasm.backtrace(:esp, di.address, :snapshot_addr => func_entry, :include_start => true) stackoff = Expression[[bt, :+, @size/8], :-, :esp].bind(:esp => :frameptr).reduce rescue nil args_todo = f.type.args.to_a.dup args = [] if f.has_attribute('fastcall') # XXX DRY @@ -281,23 +289,24 @@ args.map { |e| ceb[e] } } # go ! dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx| + di_addr = di.address a = di.instruction.args if di.opcode.props[:setip] and not di.opcode.props[:stopexec] # conditional jump commit[] n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) if di.opcode.name =~ /^loop(.+)?/ - cx = C::CExpression[:'--', ceb[:ecx]] + cx = C::CExpression[:'--', ceb[ecx]] cc = $1 ? C::CExpression[cx, :'&&', ceb[decode_cc_to_expr($1)]] : cx else cc = ceb[decode_cc_to_expr(di.opcode.name[1..-1])] end # XXX switch/indirect/multiple jmp - stmts << C::If.new(C::CExpression[cc], C::Goto.new(n)) + stmts << C::If.new(C::CExpression[cc], C::Goto.new(n).with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr) to.delete dcmp.dasm.normalize(n) next end if di.opcode.name == 'mov' @@ -310,11 +319,11 @@ dcmp.c_parser.parse("void intrinsic_set_#{a1}(__int#{sz});") end f = dcmp.c_parser.toplevel.symbol["intrinsic_set_#{a1}"] a2 = a2.symbolic(di) a2 = [a2, :&, 0xffff] if sz == 16 - stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type) + stmts << C::CExpression.new(f, :funcall, [ceb[a2]], f.type.type).with_misc(:di_addr => di_addr) next end case a2 when Ia32::CtrlReg, Ia32::DbgReg, Ia32::TstReg, Ia32::SegReg if not dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"] @@ -322,21 +331,21 @@ dcmp.c_parser.parse("__int#{sz} intrinsic_get_#{a2}(void);") end f = dcmp.c_parser.toplevel.symbol["intrinsic_get_#{a2}"] t = f.type.type binding.delete a1.symbolic(di) - stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t), t) + stmts << C::CExpression.new(ce[a1.symbolic(di)], :'=', C::CExpression.new(f, :funcall, [], t).with_misc(:di_addr => di_addr), t).with_misc(:di_addr => di_addr) next end end case di.opcode.name when 'ret' commit[] ret = nil - ret = C::CExpression[ceb[:eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void - stmts << C::Return.new(ret) + ret = C::CExpression[ceb[eax]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void + stmts << C::Return.new(ret).with_misc(:di_addr => di_addr) when 'call' # :saveip n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) args = [] if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args args = get_func_args[di, f] @@ -365,13 +374,13 @@ dcmp.c_parser.toplevel.symbol[n] = f dcmp.c_parser.toplevel.statements << C::Declaration.new(f) end end commit[] - binding.delete :eax - e = C::CExpression[f, :funcall, args] - e = C::CExpression[ce[:eax], :'=', e, f.type.type] if deps[b].include? :eax and f.type.type != C::BaseType.new(:void) + binding.delete eax + e = C::CExpression[f, :funcall, args].with_misc(:di_addr => di_addr) + e = C::CExpression[ce[eax], :'=', e, f.type.type].with_misc(:di_addr => di_addr) if deps[b].include? eax and f.type.type != C::BaseType.new(:void) stmts << e when 'jmp' #if di.comment.to_a.include? 'switch' # n = di.instruction.args.first.symbolic(di) # fptr = ceb[n] @@ -386,27 +395,25 @@ # } # stmts << sw a = di.instruction.args.first if a.kind_of? Expression elsif not a.respond_to? :symbolic - stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil) + stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil).with_misc(:di_addr => di_addr) else n = di.instruction.args.first.symbolic(di) fptr = ceb[n] binding.delete n commit[] if fptr.kind_of? C::CExpression and fptr.type.pointer? and fptr.type.untypedef.type.kind_of? C::Function proto = fptr.type.untypedef.type args = get_func_args[di, fptr.type] else proto = C::Function.new(C::BaseType.new(:void)) - fptr = C::CExpression[[fptr], C::Pointer.new(proto)] + fptr = C::CExpression[[fptr], C::Pointer.new(proto)].with_misc(:di_addr => di_addr) args = [] end - ret = C::Return.new(C::CExpression[fptr, :funcall, args]) - class << ret ; attr_accessor :from_instr end - ret.from_instr = di + ret = C::Return.new(C::CExpression[fptr, :funcall, args].with_misc(:di_addr => di_addr)).with_misc(:di_addr => di_addr) stmts << ret to = [] end when 'lgdt' if not dcmp.c_parser.toplevel.struct['segment_descriptor'] @@ -416,43 +423,43 @@ if not dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'] dcmp.c_parser.parse('void intrinsic_lgdt(struct segment_table *);') end # need a way to transform arg => :frameptr+12 arg = di.backtrace_binding.keys.grep(Indirection).first.pointer - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)) + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lgdt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr) when 'lidt' if not dcmp.c_parser.toplevel.struct['interrupt_descriptor'] dcmp.c_parser.parse('struct interrupt_descriptor { __int16 offset0_16; __int16 segment; __int16 flags; __int16 offset16_32; };') dcmp.c_parser.parse('struct interrupt_table { __int16 size; struct interrupt_descriptor *table; } __attribute__((pack(2)));') end if not dcmp.c_parser.toplevel.symbol['intrinsic_lidt'] dcmp.c_parser.parse('void intrinsic_lidt(struct interrupt_table *);') end arg = di.backtrace_binding.keys.grep(Indirection).first.pointer - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)) + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol['intrinsic_lidt'], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr) when 'ltr', 'lldt' if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"] dcmp.c_parser.parse("void intrinsic_#{di.opcode.name}(int);") end arg = di.backtrace_binding.keys.first - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)) + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_#{di.opcode.name}"], :funcall, [ceb[arg]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr) when 'out' sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz if not dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"] dcmp.c_parser.parse("void intrinsic_out#{sz}(unsigned short port, __int#{sz} value);") end - port = di.instruction.args.grep(Expression).first || :edx - stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[:eax]], C::BaseType.new(:void)) + port = di.instruction.args.grep(Expression).first || edx + stmts << C::CExpression.new(dcmp.c_parser.toplevel.symbol["intrinsic_out#{sz}"], :funcall, [ceb[port], ceb[eax]], C::BaseType.new(:void)).with_misc(:di_addr => di_addr) when 'in' sz = di.instruction.args.find { |a_| a_.kind_of? Ia32::Reg and a_.val == 0 }.sz if not dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"] dcmp.c_parser.parse("__int#{sz} intrinsic_in#{sz}(unsigned short port);") end - port = di.instruction.args.grep(Expression).first || :edx + port = di.instruction.args.grep(Expression).first || edx f = dcmp.c_parser.toplevel.symbol["intrinsic_in#{sz}"] - binding.delete :eax - stmts << C::CExpression.new(ce[:eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type) + binding.delete eax + stmts << C::CExpression.new(ce[eax], :'=', C::CExpression.new(f, :funcall, [ceb[port]], f.type.type), f.type.type).with_misc(:di_addr => di_addr) when 'sti', 'cli' stmts << C::Asm.new(di.instruction.to_s, nil, [], [], nil, nil) when /^(mov|sto|lod)s([bwdq])/ op, sz = $1, $2 commit[] @@ -460,38 +467,38 @@ pt = C::Pointer.new(C::BaseType.new("__int#{sz*8}".to_sym)) blk = C::Block.new(scope) case op when 'mov' - blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', [:*, [[ceb[:esi]], pt]]] - blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]] - blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]] + blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr) + blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr) + blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr) when 'sto' - blk.statements << C::CExpression[[:*, [[ceb[:edi]], pt]], :'=', ceb[:eax]] - blk.statements << C::CExpression[ceb[:edi], :'=', [ceb[:edi], :+, [sz]]] + blk.statements << C::CExpression[[:*, [[ceb[edi]], pt]], :'=', ceb[eax]].with_misc(:di_addr => di_addr) + blk.statements << C::CExpression[ceb[edi], :'=', [ceb[edi], :+, [sz]]].with_misc(:di_addr => di_addr) when 'lod' - blk.statements << C::CExpression[ceb[:eax], :'=', [:*, [[ceb[:esi]], pt]]] - blk.statements << C::CExpression[ceb[:esi], :'=', [ceb[:esi], :+, [sz]]] + blk.statements << C::CExpression[ceb[eax], :'=', [:*, [[ceb[esi]], pt]]].with_misc(:di_addr => di_addr) + blk.statements << C::CExpression[ceb[esi], :'=', [ceb[esi], :+, [sz]]].with_misc(:di_addr => di_addr) #when 'sca' #when 'cmp' end case (di.instruction.prefix || {})[:rep] when nil stmts.concat blk.statements when 'rep' - blk.statements << C::CExpression[ceb[:ecx], :'=', [ceb[:ecx], :-, [1]]] - stmts << C::While.new(C::CExpression[ceb[:ecx]], blk) + blk.statements << C::CExpression[ceb[ecx], :'=', [ceb[ecx], :-, [1]]].with_misc(:di_addr => di_addr) + stmts << C::While.new(C::CExpression[ceb[ecx]], blk).with_misc(:di_addr => di_addr) #when 'repz' # sca/cmp only #when 'repnz' end next else bd = get_fwdemu_binding(di) if di.backtrace_binding[:incomplete_binding] commit[] - stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil) + stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil).with_misc(:di_addr => di_addr) else update = {} bd.each { |k, v| if k.kind_of? ::Symbol and not deps[b].include? k ops << [k, v] @@ -502,9 +509,10 @@ end } binding.update update end end + di_addr = nil } commit[] case to.length when 0