metasm/disassemble.rb in metasm-1.0.3 vs metasm/disassemble.rb in metasm-1.0.4

- old
+ new

@@ -62,11 +62,17 @@ include Renderable def render ret = [] ret << Expression[address] << ' ' if address ret << @instruction - ret << ' ; ' << @comment if comment + if comment + ret << ' ; ' + @comment.each { |c| + ret << c << ' ' + } + ret.pop + end ret end def add_comment(c) @comment ||= [] @@ -100,15 +106,15 @@ attr_accessor :type # bool: true if this maps to a :x that should not have a from when resolved attr_accessor :detached # maxdepth at the point of the object creation attr_accessor :maxdepth + # disassembler cpu_context + attr_accessor :cpu_context - def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil) - @expr, @origin, @orig_expr, @type = expr, origin, orig_expr, type - @len = len if len - @maxdepth = maxdepth if maxdepth + def initialize(expr, origin, orig_expr, type, len=nil, maxdepth=nil, cpu_context=nil) + @expr, @origin, @orig_expr, @type, @len, @maxdepth, @cpu_context = expr, origin, orig_expr, type, len, maxdepth, cpu_context end def hash ; [origin, expr].hash ; end def eql?(o) o.class == self.class and @@ -244,28 +250,28 @@ attr_accessor :localvars_xrefs # if btbind_callback is defined, calls it with args [dasm, binding, funcaddr, calladdr, expr, origin, maxdepth] # else update lazily the binding from expr.externals, and return backtrace_binding def get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) - if btbind_callback - @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth] - elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] + if backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] target.get_backtrace_binding(dasm, funcaddr, calladdr, expr, origin, maxdepth) + elsif btbind_callback + @btbind_callback[dasm, @backtrace_binding, funcaddr, calladdr, expr, origin, maxdepth] else unk_regs = expr.externals.grep(Symbol).uniq - @backtrace_binding.keys - [:unknown] dasm.cpu.backtrace_update_function_binding(dasm, funcaddr, self, return_address, *unk_regs) if not unk_regs.empty? @backtrace_binding end end # if btfor_callback is defined, calls it with args [dasm, bt_for, funcaddr, calladdr] # else return backtracked_for def get_backtracked_for(dasm, funcaddr, calladdr) - if btfor_callback - @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr] - elsif backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] + if backtrace_binding and dest = @backtrace_binding[:thunk] and target = dasm.function[dest] target.get_backtracked_for(dasm, funcaddr, calladdr) + elsif btfor_callback + @btfor_callback[dasm, @backtracked_for, funcaddr, calladdr] else @backtracked_for end end @@ -284,20 +290,33 @@ @localvars[off] ||= (str || (off > 0 ? 'arg_%X' % off : 'var_%X' % -off)) end end class CPU + # decode an instruction with a dasm context + # context is a hash, should be modified inplace by the CPU + # will be passed to the next instruction(s) in the code flow + def decode_instruction_context(dasm, edata, di_addr, context) + decode_instruction(edata, di_addr) + end + + # return the initial context for the disassembler, starts disassembling from addr + def disassemble_init_context(dasm, addr) + end + # return the thing to backtrace to find +value+ before the execution of this instruction # eg backtrace_emu('inc eax', Expression[:eax]) => Expression[:eax + 1] # (the value of :eax after 'inc eax' is the value of :eax before plus 1) # may return Expression::Unknown def backtrace_emu(di, value) Expression[Expression[value].bind(di.backtrace_binding ||= get_backtrace_binding(di)).reduce] end - # returns a list of Expressions/Integer to backtrace to find an execution target + # return the list of jump targets for insturctions modifying the control flow def get_xrefs_x(dasm, di) + return [] if not di.opcode.props[:setip] + [symbolic(di.instruction.args.last, di)] end # returns a list of [type, address, len] def get_xrefs_rw(dasm, di) get_xrefs_r(dasm, di).map { |addr, len| [:r, addr, len] } + get_xrefs_w(dasm, di).map { |addr, len| [:w, addr, len] } @@ -336,11 +355,11 @@ # updates the instruction arguments: replace an expression with another (eg when a label is renamed) def replace_instr_arg_immediate(i, old, new) i.args.map! { |a| case a - when Expression; Expression[a.bind(old => new).reduce] + when Expression; a == old ? new : Expression[a.bind(old => new).reduce] else a end } end @@ -394,10 +413,12 @@ attr_accessor :backtrace_maxcomplexity, :backtrace_maxcomplexity_data # maximum number of instructions inside a basic block, split past this limit attr_accessor :disassemble_maxblocklength # a cparser that parsed some C header files, prototypes are converted to DecodedFunction when jumped to attr_accessor :c_parser + # if false, disassembler skips internal functions with a prototype defined in a C header (eg static libraries) + attr_accessor :disassemble_known_functions # hash address => array of strings # default dasm dump will only show comments at beginning of code blocks attr_accessor :comment # bool, set to true (default) if functions with undetermined binding should be assumed to return with ABI-conforming binding (conserve frame ptr) attr_accessor :funcs_stdabi @@ -416,10 +437,12 @@ attr_accessor :callback_prebacktrace # callback called once all addresses have been disassembled attr_accessor :callback_finished # pointer to the gui widget we're displayed in attr_accessor :gui + # arbitrary data stored by other objects + attr_accessor :misc @@backtrace_maxblocks = 50 # creates a new disassembler def initialize(program, cpu=program.cpu) @@ -450,11 +473,11 @@ end # adds a section, updates prog_binding # base addr is an Integer or a String (label name for offset 0) def add_section(encoded, base) - encoded, base = base, encoded if base.kind_of? EncodedData + encoded, base = base, encoded if base.kind_of?(EncodedData) case base when ::Integer when ::String raise "invalid section base #{base.inspect} - not at section start" if encoded.export[base] and encoded.export[base] != 0 if ed = get_edata_at(base) @@ -489,11 +512,11 @@ when ::Array; @xrefs[addr] |= [x] else @xrefs[addr] = [@xrefs[addr], x] end end - # yields each xref to a given address, optionnaly restricted to a type + # yields each xref to a given address, optionaly restricted to a type def each_xref(addr, type=nil) addr = normalize addr x = @xrefs[addr] x = case x @@ -543,31 +566,31 @@ end # returns the canonical form of addr (absolute address integer or label of start of section + section offset) def normalize(addr) return addr if not addr or addr == :default - addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of? Integer + addr = Expression[addr].bind(@old_prog_binding).reduce if not addr.kind_of?(Integer) addr end # returns [edata, edata_base] or nil # edata.ptr points to addr def get_section_at(addr, memcheck=true) case addr = normalize(addr) when ::Integer - if s = @sections.find { |b, e| b.kind_of? ::Integer and addr >= b and addr < b + e.length } || - @sections.find { |b, e| b.kind_of? ::Integer and addr == b + e.length } # end label + if s = @sections.find { |b, e| b.kind_of?(::Integer) and addr >= b and addr < b + e.length } || + @sections.find { |b, e| b.kind_of?(::Integer) and addr == b + e.length } # end label s[1].ptr = addr - s[0] return if memcheck and s[1].data.respond_to?(:page_invalid?) and s[1].data.page_invalid?(s[1].ptr) [s[1], s[0]] end when Expression - if addr.op == :+ and addr.rexpr.kind_of? ::Integer and addr.rexpr >= 0 and addr.lexpr.kind_of? ::String and e = @sections[addr.lexpr] + if addr.op == :+ and addr.rexpr.kind_of?(::Integer) and addr.rexpr >= 0 and addr.lexpr.kind_of?(::String) and e = @sections[addr.lexpr] e.ptr = addr.rexpr return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) [e, Expression[addr.lexpr]] - elsif addr.op == :+ and addr.rexpr.kind_of? ::String and not addr.lexpr and e = @sections[addr.rexpr] + elsif addr.op == :+ and addr.rexpr.kind_of?(::String) and not addr.lexpr and e = @sections[addr.rexpr] e.ptr = 0 return if memcheck and e.data.respond_to?(:page_invalid?) and e.data.page_invalid?(e.ptr) [e, addr.rexpr] end end @@ -580,16 +603,18 @@ addr = Expression[addr].reduce addrstr = "#{base}_#{Expression[addr]}" return if addrstr !~ /^\w+$/ e, b = get_section_at(addr) if not e - l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of? ::String - l ||= addrstr if addr.kind_of? Expression and addr.externals.grep(::Symbol).empty? + l = Expression[addr].reduce_rec if Expression[addr].reduce_rec.kind_of?(::String) + l ||= addrstr if addr.kind_of?(Expression) and addr.externals.grep(::Symbol).empty? elsif not l = e.inv_export[e.ptr] l = @program.new_label(addrstr) e.add_export l, e.ptr - @label_alias_cache = nil + if @label_alias_cache ||= nil + (@label_alias_cache[b + e.ptr] ||= []) << l + end @old_prog_binding[l] = @prog_binding[l] = b + e.ptr elsif rewritepfx.find { |p| base != p and addrstr.sub(base, p) == l } newl = addrstr newl = @program.new_label(newl) unless @old_prog_binding[newl] and @old_prog_binding[newl] == @prog_binding[l] # avoid _uuid when a -> b -> a rename_label l, newl @@ -627,23 +652,28 @@ puts 'disassembly finished' if $VERBOSE @callback_finished[] if callback_finished return false elsif @addrs_todo.empty? ep = entrypoints.shift - l = auto_label_at(normalize(ep), 'entrypoint') + cpu_context = get_initial_cpu_context(ep) + l = auto_label_at(normalize(ep), 'entrypoint') || normalize(ep) puts "start disassemble from #{l} (#{entrypoints.length})" if $VERBOSE and not entrypoints.empty? @entrypoints << l - @addrs_todo << [ep] + @addrs_todo << { :addr => ep, :cpu_context => cpu_context } else disassemble_step end true end + def get_initial_cpu_context(addr) + @cpu.disassemble_init_context(self, addr) + end + def post_disassemble @decoded.each_value { |di| - next if not di.kind_of? DecodedInstruction + next if not di.kind_of?(DecodedInstruction) next if not di.opcode or not di.opcode.props[:saveip] if not di.block.to_subfuncret di.add_comment 'noreturn' # there is no need to re-loop on all :saveip as check_noret is transitive di.block.each_to_normal { |fa| check_noreturn_function(fa) } @@ -669,23 +699,24 @@ # disassembles one block from addrs_todo # adds next addresses to handle to addrs_todo # if @function[:default] exists, jumps to unknows locations are interpreted as to @function[:default] def disassemble_step - return if not todo = @addrs_todo.pop or @addrs_done.include? todo - @addrs_done << todo if todo[1] + return if not x = @addrs_todo.pop or @addrs_done.include?(x) + @addrs_done << x if x[:from] - # from_sfret is true if from is the address of a function call that returns to addr - addr, from, from_subfuncret = todo + addr = x[:addr] + from = x[:from] + # from_subfuncret is true if from is the address of a function call that returns to addr return if from == Expression::Unknown - puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{from_subfuncret} (/#{@addrs_todo.length})" if $DEBUG + puts "disassemble_step #{Expression[addr]} #{Expression[from] if from} #{x[:from_subfuncret]} (/#{@addrs_todo.length})" if $DEBUG addr = normalize(addr) - if from and from_subfuncret and di_at(from) + if from and x[:from_subfuncret] and di_at(from) @decoded[from].block.each_to_normal { |subfunc| subfunc = normalize(subfunc) next if not f = @function[subfunc] or f.finalized f.finalized = true puts " finalize subfunc #{Expression[subfunc]}" if debug_backtrace @@ -695,31 +726,40 @@ end } end if di = @decoded[addr] - if di.kind_of? DecodedInstruction + if di.kind_of?(DecodedInstruction) split_block(di.block, di.address, true) if not di.block_head? # this updates di.block - di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + di.block.add_from(from, x[:from_subfuncret] ? :subfuncret : :normal) if from and from != :default bf = di.block elsif di == true bf = @function[addr] end - elsif bf = @function[addr] + elsif from and bf = @function[addr] detect_function_thunk_noreturn(from) if bf.noreturn elsif s = get_section_at(addr) - block = InstructionBlock.new(normalize(addr), s[0]) - block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - disassemble_block(block) - elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of? ::String and - s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function - bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) + if from and c_parser and not disassemble_known_functions and name = get_all_labels_at(addr).find { |n| + cs = c_parser.toplevel.symbol[n] and cs.type.untypedef.kind_of?(C::Function) } + # do not disassemble internal function for which we have a prototype (eg static library) + puts "found known function #{name} at #{Expression[addr]}" if $VERBOSE + bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, c_parser.toplevel.symbol[name]) + detect_function_thunk_noreturn(from) if bf.noreturn + else + block = InstructionBlock.new(normalize(addr), s[0]) + block.add_from(from, x[:from_subfuncret] ? :subfuncret : :normal) if from and from != :default + disassemble_block(block, x[:cpu_context]) + end + elsif from and c_parser and name = Expression[addr].reduce_rec and name.kind_of?(::String) and + cs = c_parser.toplevel.symbol[name] and cs.type.untypedef.kind_of?(C::Function) + # use C header prototype for external functions if available + bf = @function[addr] = @cpu.decode_c_function_prototype(@c_parser, cs) detect_function_thunk_noreturn(from) if bf.noreturn - elsif from + elsif from and not @function[addr] if bf = @function[:default] puts "using default function for #{Expression[addr]} from #{Expression[from]}" if $DEBUG - if name = Expression[addr].reduce_rec and name.kind_of? ::String + if name = Expression[addr].reduce_rec and name.kind_of?(::String) @function[addr] = @function[:default].dup else addr = :default end if @decoded[from] @@ -735,51 +775,51 @@ else puts "not disassembling unknown address #{Expression[addr]}" if $VERBOSE end if bf and from and from != :default - if bf.kind_of? DecodedFunction + if bf.kind_of?(DecodedFunction) bff = bf.get_backtracked_for(self, addr, from) else bff = bf.backtracked_for end end bff.each { |btt| next if btt.address - if @decoded[from].kind_of? DecodedInstruction and @decoded[from].opcode.props[:saveip] and not from_subfuncret and not @function[addr] - backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached) + if @decoded[from].kind_of?(DecodedInstruction) and @decoded[from].opcode.props[:saveip] and not x[:from_subfuncret] and not @function[addr] + backtrace_check_found(btt.expr, @decoded[addr], btt.origin, btt.type, btt.len, btt.maxdepth, btt.detached, btt.cpu_context) end next if backtrace_check_funcret(btt, addr, from) backtrace(btt.expr, from, - :include_start => true, :from_subfuncret => from_subfuncret, + :include_start => true, :from_subfuncret => x[:from_subfuncret], :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, - :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth) + :len => btt.len, :detached => btt.detached, :maxdepth => btt.maxdepth, :cpu_context => btt.cpu_context) } if bff end # splits an InstructionBlock, updates the blocks backtracked_for def split_block(block, address=nil, rebacktrace=false) if not address # invoked as split_block(0x401012) - return if not @decoded[block].kind_of? DecodedInstruction + return if not @decoded[block].kind_of?(DecodedInstruction) block, address = @decoded[block].block, block end return block if address == block.address new_b = block.split address if rebacktrace new_b.backtracked_for.dup.each { |btt| backtrace(btt.expr, btt.address, :only_upto => block.list.last.address, :include_start => !btt.exclude_instr, :from_subfuncret => btt.from_subfuncret, :origin => btt.origin, :orig_expr => btt.orig_expr, :type => btt.type, :len => btt.len, - :detached => btt.detached, :maxdepth => btt.maxdepth) + :detached => btt.detached, :maxdepth => btt.maxdepth, :cpu_context => btt.cpu_context) } end new_b end # disassembles a new instruction block at block.address (must be normalized) - def disassemble_block(block) + def disassemble_block(block, cpu_context) raise if not block.list.empty? di_addr = block.address delay_slot = nil di = nil @@ -803,11 +843,12 @@ #} end # decode instruction block.edata.ptr = di_addr - block.address + block.edata_ptr - if not di = @cpu.decode_instruction(block.edata, di_addr) + cpu_context = cpu_context.dup if cpu_context + if not di = @cpu.decode_instruction_context(self, block.edata, di_addr, cpu_context) ed = block.edata break if ed.ptr >= ed.length and get_section_at(di_addr) and di = block.list.last puts "#{ed.ptr >= ed.length ? "end of section reached" : "unknown instruction #{ed.data[di_addr-block.address+block.edata_ptr, 4].to_s.unpack('H*').first}"} at #{Expression[di_addr]}" if $VERBOSE return end @@ -843,22 +884,27 @@ end if delay_slot di, delay = delay_slot if delay == 0 or not di_addr - backtrace_xrefs_di_x(di) + backtrace_xrefs_di_x(di, cpu_context) if di.opcode.props[:stopexec] or not di_addr; return else break end end delay_slot[1] = delay - 1 end + + if block.edata.inv_export[di_addr - block.address + block.edata_ptr] + # ensure there is a block split if we have a label defined + break + end } ar = [di_addr] ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr - ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x) } + ar.each { |di_addr_| backtrace(di_addr_, di.address, :origin => di.address, :type => :x, :cpu_context => cpu_context) } block end # retrieve the list of execution crossrefs due to the decodedinstruction @@ -876,68 +922,75 @@ # disassembles_fast from a list of entrypoints, also dasm subfunctions def disassemble_fast_deep(*entrypoints) @entrypoints ||= [] @entrypoints |= entrypoints - entrypoints.each { |ep| do_disassemble_fast_deep(normalize(ep)) } + entrypoints.each { |ep| do_disassemble_fast_deep(:addr => normalize(ep)) } @callback_finished[] if callback_finished end def do_disassemble_fast_deep(ep) disassemble_fast(ep) { |fa, di| - fa = normalize(fa) - do_disassemble_fast_deep(fa) - if di and ndi = di_at(fa) - ndi.block.add_from_normal(di.address) - end + do_disassemble_fast_deep(:addr => normalize(fa), :from => di.address) } end # disassembles fast from a list of entrypoints # see disassemble_fast_step def disassemble_fast(entrypoint, maxdepth=-1, &b) - ep = [entrypoint] - until ep.empty? - disassemble_fast_step(ep, &b) + td = entrypoint + td = { :addr => entrypoint } unless td.kind_of?(::Hash) + td[:cpu_context] ||= get_initial_cpu_context(td[:addr]) + todo = [td] + until todo.empty? + disassemble_fast_step(todo, &b) maxdepth -= 1 - ep.delete_if { |a| not @decoded[normalize(a[0])] } if maxdepth == 0 + todo.delete_if { |a| not @decoded[normalize(a[:addr])] } if maxdepth == 0 end - check_noreturn_function(entrypoint) + check_noreturn_function(td[:addr]) end # disassembles one block from the ary, see disassemble_fast_block def disassemble_fast_step(todo, &b) return if not x = todo.pop - addr, from, from_subfuncret = x - addr = normalize(addr) + addr = normalize(x[:addr]) if di = @decoded[addr] - if di.kind_of? DecodedInstruction + if di.kind_of?(DecodedInstruction) split_block(di.block, di.address) if not di.block_head? - di.block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default + di.block.add_from(x[:from], x[:from_subfuncret] ? :subfuncret : :normal) if x[:from] and x[:from] != :default end + elsif @function[addr] and x[:from] elsif s = get_section_at(addr) - block = InstructionBlock.new(normalize(addr), s[0]) - block.add_from(from, from_subfuncret ? :subfuncret : :normal) if from and from != :default - todo.concat disassemble_fast_block(block, &b) - elsif name = Expression[addr].reduce_rec and name.kind_of? ::String and not @function[addr] - if c_parser and s = c_parser.toplevel.symbol[name] and s.type.untypedef.kind_of? C::Function - @function[addr] = @cpu.decode_c_function_prototype(@c_parser, s) - detect_function_thunk_noreturn(from) if @function[addr].noreturn + if x[:from] and c_parser and not disassemble_known_functions and name = get_all_labels_at(addr).find { |n| + cs = c_parser.toplevel.symbol[n] and cs.type.untypedef.kind_of?(C::Function) } + # do not disassemble internal function for which we have a prototype (eg static library) + puts "found known function #{name} at #{Expression[addr]}" if $VERBOSE + @function[addr] = @cpu.decode_c_function_prototype(@c_parser, c_parser.toplevel.symbol[name]) + detect_function_thunk_noreturn(x[:from]) if @function[addr].noreturn + else + block = InstructionBlock.new(addr, s[0]) + block.add_from(x[:from], x[:from_subfuncret] ? :subfuncret : :normal) if x[:from] and x[:from] != :default + todo.concat disassemble_fast_block(block, x[:cpu_context], &b) + end + elsif name = Expression[addr].reduce_rec and name.kind_of?(::String) and not @function[addr] + if c_parser and cs = c_parser.toplevel.symbol[name] and cs.type.untypedef.kind_of?(C::Function) + @function[addr] = @cpu.decode_c_function_prototype(@c_parser, cs) + detect_function_thunk_noreturn(x[:from]) if @function[addr].noreturn elsif @function[:default] @function[addr] = @function[:default].dup end end disassemble_fast_checkfunc(addr) end # check if an addr has an xref :x from a :saveip, if so mark as Function def disassemble_fast_checkfunc(addr) - if @decoded[addr].kind_of? DecodedInstruction and not @function[addr] + if @decoded[addr].kind_of?(DecodedInstruction) and not @function[addr] func = false each_xref(addr, :x) { |x_| func = true if odi = di_at(x_.origin) and odi.opcode.props[:saveip] } if func @@ -955,12 +1008,12 @@ # assumes all :saveip returns, except those pointing to a subfunc with noreturn # yields subfunction addresses (targets of :saveip) # no backtrace for :x (change with backtrace_maxblocks_fast) # returns a todo-style ary # assumes @addrs_todo is empty - def disassemble_fast_block(block, &b) - block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of? InstructionBlock + def disassemble_fast_block(block, cpu_context, &b) + block = InstructionBlock.new(normalize(block), get_section_at(block)[0]) if not block.kind_of?(InstructionBlock) di_addr = block.address delay_slot = nil di = nil ret = [] @@ -969,11 +1022,12 @@ @disassemble_maxblocklength.times { break if @decoded[di_addr] # decode instruction block.edata.ptr = di_addr - block.address + block.edata_ptr - if not di = @cpu.decode_instruction(block.edata, di_addr) + cpu_context = cpu_context.dup if cpu_context + if not di = @cpu.decode_instruction_context(self, block.edata, di_addr, cpu_context) break if block.edata.ptr >= block.edata.length and get_section_at(di_addr) and di = block.list.last return ret end @decoded[di_addr] = di @@ -1000,16 +1054,16 @@ if di.opcode.props[:setip] @addrs_todo = [] ar = @program.get_xrefs_x(self, di) ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr ar.each { |expr| - backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast) + backtrace(expr, di.address, :origin => di.address, :type => :x, :maxdepth => @backtrace_maxblocks_fast, :cpu_context => cpu_context) } end if di.opcode.props[:saveip] @addrs_todo = [] - ret.concat disassemble_fast_block_subfunc(di, &b) + ret.concat disassemble_fast_block_subfunc(di, cpu_context, &b) else ret.concat @addrs_todo @addrs_todo = [] end delay_slot ||= [di, @cpu.delay_slot(di)] @@ -1026,17 +1080,17 @@ ar = [di_addr] ar = @callback_newaddr[block.list.last.address, ar] || ar if callback_newaddr ar.each { |a| di.block.add_to_normal(a) - ret << [a, di.address] + ret << { :addr => a, :from => di.address, :cpu_context => cpu_context } } ret end # handles when disassemble_fast encounters a call to a subfunction - def disassemble_fast_block_subfunc(di) + def disassemble_fast_block_subfunc(di, cpu_context) funcs = di.block.to_normal.to_a do_ret = funcs.empty? ret = [] na = di.next_addr + di.bin_length * @cpu.delay_slot(di) funcs.each { |fa| @@ -1045,26 +1099,27 @@ yield fa, di if block_given? if f = @function[fa] and bf = f.get_backtracked_for(self, fa, di.address) and not bf.empty? # this includes retaddr unless f is noreturn bf.each { |btt| next if btt.type != :x - bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max) + bt = backtrace(btt.expr, di.address, :include_start => true, :origin => btt.origin, :maxdepth => [@backtrace_maxblocks_fast, 1].max, :cpu_context => cpu_context) if btt.detached - ret.concat bt # callback argument - elsif bt.find { |a| normalize(a) == na } + ret.concat bt.map { |a| { :addr => a } } # callback argument + elsif not f.noreturn and bt.find { |a| normalize(a) == na } do_ret = true end } elsif not f or not f.noreturn do_ret = true end } if do_ret di.block.add_to_subfuncret(na) - ret << [na, di.address, true] + ret << { :addr => na, :from => di.address, :from_subfuncret => true, :cpu_context => cpu_context } di.block.add_to_normal :default if not di.block.to_normal and @function[:default] end + di.add_comment 'noreturn' if ret.empty? ret end # trace whose xrefs this di is responsible of def backtrace_xrefs_di_rw(di) @@ -1083,22 +1138,22 @@ } } end # trace xrefs for execution - def backtrace_xrefs_di_x(di) + def backtrace_xrefs_di_x(di, cpu_context) ar = @program.get_xrefs_x(self, di) ar = @callback_newaddr[di.address, ar] || ar if callback_newaddr - ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x) } + ar.each { |expr| backtrace(expr, di.address, :origin => di.address, :type => :x, :cpu_context => cpu_context) } end # checks if the function starting at funcaddr is an external function thunk (eg jmp [SomeExtFunc]) # the argument must be the address of a decodedinstruction that is the first of a function, # which must not have return_addresses # returns the new thunk name if it was changed def detect_function_thunk(funcaddr) - # check thunk linearity (no conditionnal branch etc) + # check thunk linearity (no conditional branch etc) addr = funcaddr count = 0 while b = block_at(addr) count += 1 return if count > 5 or b.list.length > 5 @@ -1127,11 +1182,11 @@ if funcaddr != addr and f = @function[funcaddr] # forward get_backtrace_binding to target f.backtrace_binding = { :thunk => addr } f.noreturn = true if @function[addr] and @function[addr].noreturn end - return if not fname.kind_of? ::String + return if not fname.kind_of?(::String) l = auto_label_at(funcaddr, 'sub', 'loc') return if l[0, 4] != 'sub_' puts "found thunk for #{fname} at #{Expression[funcaddr]}" if $DEBUG rename_label(l, @program.new_label("thunk_#{fname}")) end @@ -1165,18 +1220,18 @@ # it is if all its end blocks are calls to noreturn functions # if it is, create a @function[fa] with noreturn = true # should only be called with fa = target of a call def check_noreturn_function(fa) fb = function_blocks(fa, false, false) + return if fb.empty? lasts = fb.keys.find_all { |k| fb[k] == [] } - return if lasts.empty? if lasts.all? { |la| b = block_at(la) next if not di = b.list.last (di.opcode.props[:saveip] and b.to_normal.to_a.all? { |tfa| tf = function_at(tfa) and tf.noreturn - }) or (di.opcode.props[:stopexec] and not di.opcode.props[:setip]) + }) or (di.opcode.props[:stopexec] and not (di.opcode.props[:setip] or not get_xrefs_x(di).empty?)) } # yay @function[fa] ||= DecodedFunction.new @function[fa].noreturn = true end @@ -1227,11 +1282,11 @@ # stopaddr is an [array of] address of instruction, the backtrace will stop just after executing it # maxdepth is the maximum depth (in blocks) for each backtrace branch. # (defaults to dasm.backtrace_maxblocks, which defaults do Dasm.backtrace_maxblocks) def backtrace_walk(obj, addr, include_start, from_subfuncret, stopaddr, maxdepth) start_addr = normalize(addr) - stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of? ::Array + stopaddr = [stopaddr] if stopaddr and not stopaddr.kind_of?(::Array) # array of [obj, addr, from_subfuncret, loopdetect] # loopdetect is an array of [obj, addr, from_type] of each end of block encountered todo = [] @@ -1254,11 +1309,11 @@ hadsomething = false w_di.block.each_from { |f_addr, f_type| next if f_type == :indirect hadsomething = true o_f_addr = f_addr - f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot + f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of?(DecodedInstruction) # delay slot if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == f_addr and l_type == f_type } f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) if f_obj and f_obj != w_obj # should avoid infinite loops f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] end @@ -1268,11 +1323,11 @@ next if f_obj == false f_obj ||= w_obj f_loopdetect ||= w_loopdetect # only count non-trivial paths in loopdetect (ignore linear links) add_detect = [[f_obj, f_addr, f_type]] - add_detect = [] if @decoded[f_addr].kind_of? DecodedInstruction and tmp = @decoded[f_addr].block and + add_detect = [] if @decoded[f_addr].kind_of?(DecodedInstruction) and tmp = @decoded[f_addr].block and ((w_di.block.from_subfuncret.to_a == [] and w_di.block.from_normal == [f_addr] and tmp.to_normal == [w_di.address] and tmp.to_subfuncret.to_a == []) or (w_di.block.from_subfuncret == [f_addr] and tmp.to_subfuncret == [w_di.address])) todo << [f_obj, f_addr, f_type, f_loopdetect + add_detect ] } @@ -1281,11 +1336,11 @@ next if done.include? [w_obj, w_addr] oldlen = todo.length each_xref(w_addr, :x) { |x| f_addr = x.origin o_f_addr = f_addr - f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of? DecodedInstruction # delay slot + f_addr = @decoded[f_addr].block.list.last.address if @decoded[f_addr].kind_of?(DecodedInstruction) # delay slot if l = w_loopdetect.find { |l_obj, l_addr, l_type| l_addr == w_addr } f_obj = yield(:loop, w_obj, :looptrace => w_loopdetect[w_loopdetect.index(l)..-1], :loopdetect => w_loopdetect) if f_obj and f_obj != w_obj f_loopdetect = w_loopdetect[0...w_loopdetect.index(l)] end @@ -1464,10 +1519,11 @@ # :max_complexity{_data} => maximum complexity of the expression before aborting its backtrace # :log => Array, will be updated with the backtrace evolution # :only_upto => backtrace only to update bt_for for current block & previous ending at only_upto # :no_check => don't use backtrace_check_found (will not backtrace indirection static values) # :terminals => array of symbols with constant value (stop backtracking if all symbols in the expr are terminals) (only supported with no_check) + # :cpu_context => disassembler cpu_context def backtrace(expr, start_addr, nargs={}) include_start = nargs.delete :include_start from_subfuncret = nargs.delete :from_subfuncret origin = nargs.delete :origin origexpr = nargs.delete :orig_expr @@ -1480,10 +1536,11 @@ max_complexity_data = nargs.delete(:max_complexity) || @backtrace_maxcomplexity_data bt_log = nargs.delete :log # array to receive the ongoing backtrace info only_upto = nargs.delete :only_upto no_check = nargs.delete :no_check terminals = nargs.delete(:terminals) || [] + cpu_context = nargs.delete :cpu_context raise ArgumentError, "invalid argument to backtrace #{nargs.keys.inspect}" if not nargs.empty? expr = Expression[expr] origexpr = expr if origin == start_addr @@ -1500,20 +1557,20 @@ max_complexity = max_complexity_data maxdepth = @backtrace_maxblocks_data if backtrace_maxblocks_data and maxdepth > @backtrace_maxblocks_data end if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, - di, origin, type, len, maxdepth, detached, snapshot_addr)) + di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr)) # no need to update backtracked_for return vals elsif maxdepth <= 0 return [Expression::Unknown] end # create initial backtracked_for if type and origin == start_addr and di - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1) + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-1, cpu_context) btt.address = di.address btt.exclude_instr = true if not include_start btt.from_subfuncret = true if from_subfuncret and include_start btt.detached = true if detached di.block.backtracked_for |= [btt] @@ -1530,63 +1587,63 @@ expr = expr_ case ev when :unknown_addr, :maxdepth puts " backtrace end #{ev} #{expr}" if debug_backtrace result |= [expr] if not snapshot_addr - @addrs_todo << [expr, (detached ? nil : origin)] if not snapshot_addr and type == :x and origin + @addrs_todo << { :addr => expr, :from => (detached ? nil : origin), :cpu_context => cpu_context } if not snapshot_addr and type == :x and origin when :end - if not expr.kind_of? StoppedExpr + if not expr.kind_of?(StoppedExpr) oldexpr = expr expr = backtrace_emu_blockup(h[:addr], expr) puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr if expr != oldexpr and not snapshot_addr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, nil, origin, type, len, - maxdepth-h[:loopdetect].length, detached, snapshot_addr)) + maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr)) result |= vals next end end puts " backtrace end #{ev} #{expr}" if debug_backtrace if not snapshot_addr result |= [expr] - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1, cpu_context) btt.detached = true if detached @decoded[h[:addr]].block.backtracked_for |= [btt] if @decoded[h[:addr]] @function[h[:addr]].backtracked_for |= [btt] if @function[h[:addr]] and h[:addr] != :default - @addrs_todo << [expr, (detached ? nil : origin)] if type == :x and origin + @addrs_todo << { :addr => expr, :from => (detached ? nil : origin), :cpu_context => cpu_context } if type == :x and origin end when :stopaddr - if not expr.kind_of? StoppedExpr + if not expr.kind_of?(StoppedExpr) oldexpr = expr expr = backtrace_emu_blockup(h[:addr], expr) puts " backtrace up #{Expression[h[:addr]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace bt_log << [:up, expr, oldexpr, h[:addr], :end] if bt_log and expr != oldexpr end puts " backtrace end #{ev} #{expr}" if debug_backtrace result |= ((expr.kind_of?(StoppedExpr)) ? expr.exprs : [expr]) when :loop - next false if expr.kind_of? StoppedExpr + next false if expr.kind_of?(StoppedExpr) t = h[:looptrace] oldexpr = t[0][0] next false if expr == oldexpr # unmodifying loop puts " bt loop at #{Expression[t[0][1]]}: #{oldexpr} => #{expr} (#{t.map { |z| Expression[z[1]] }.join(' <- ')})" if debug_backtrace bt_log << [:loop, expr, oldexpr, t.map { |z| z[1] }] if bt_log false when :up next false if only_upto and h[:to] != only_upto - next expr if expr.kind_of? StoppedExpr + next expr if expr.kind_of?(StoppedExpr) oldexpr = expr expr = backtrace_emu_blockup(h[:from], expr) puts " backtrace up #{Expression[h[:from]]}->#{Expression[h[:to]]} #{oldexpr}#{" => #{expr}" if expr != oldexpr}" if debug_backtrace bt_log << [:up, expr, oldexpr, h[:from], h[:to]] if bt_log if expr != oldexpr and vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, @decoded[h[:from]], origin, type, len, - maxdepth-h[:loopdetect].length, detached, snapshot_addr)) + maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr)) if snapshot_addr expr = StoppedExpr.new vals next expr else result |= vals @@ -1604,11 +1661,11 @@ else btf << new_btt end } - btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1) + btt = BacktraceTrace.new(expr, origin, origexpr, type, len, maxdepth-h[:loopdetect].length-1, cpu_context) btt.detached = true if detached if x = di_at(h[:from]) update_btf[x.block.backtracked_for, btt] end if x = @function[h[:from]] and h[:from] != :default @@ -1628,11 +1685,11 @@ end end end expr when :di, :func - next if expr.kind_of? StoppedExpr + next if expr.kind_of?(StoppedExpr) if not snapshot_addr and @cpu.backtrace_is_stack_address(expr) puts " not backtracking stack address #{expr}" if debug_backtrace next false end @@ -1651,11 +1708,11 @@ end bt_log << [ev, expr, oldexpr, h[:funcaddr], h[:addr]] if bt_log and expr != oldexpr end puts " backtrace #{h[:di] || Expression[h[:funcaddr]]} #{oldexpr} => #{expr}" if debug_backtrace and expr != oldexpr if vals = (no_check ? (!need_backtrace(expr, terminals) and [expr]) : backtrace_check_found(expr, - h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, snapshot_addr)) + h[:di], origin, type, len, maxdepth-h[:loopdetect].length, detached, cpu_context, snapshot_addr)) if snapshot_addr expr = StoppedExpr.new vals else result |= vals bt_log << [:found, vals, h[:addr]] if bt_log @@ -1683,11 +1740,11 @@ @cpu.backtrace_is_function_return(btt.expr, @decoded[btt.origin]) and retaddr = backtrace_emu_instr(di, btt.expr) and not need_backtrace(retaddr) puts " backtrace addrs_todo << #{Expression[retaddr]} from #{di} (funcret)" if debug_backtrace di.block.add_to_subfuncret normalize(retaddr) - if @decoded[funcaddr].kind_of? DecodedInstruction + if @decoded[funcaddr].kind_of?(DecodedInstruction) # check that all callers :saveip returns (eg recursive call that was resolved # before we found funcaddr was a function) @decoded[funcaddr].block.each_from_normal { |fm| if fdi = di_at(fm) and fdi.opcode.props[:saveip] and not fdi.block.to_subfuncret backtrace_check_funcret(btt, funcaddr, fm) @@ -1701,21 +1758,21 @@ # just before the 1st function block address in @addrs_todo (which is pop()ed by dasm_step) faddrlist = [] todo = [] di.block.each_to_normal { |t| todo << normalize(t) } while a = todo.pop - next if faddrlist.include? a or not get_section_at(a) + next if faddrlist.include?(a) or not get_section_at(a) faddrlist << a - if @decoded[a].kind_of? DecodedInstruction + if @decoded[a].kind_of?(DecodedInstruction) @decoded[a].block.each_to_samefunc(self) { |t| todo << normalize(t) } end end - idx = @addrs_todo.index(@addrs_todo.find { |r, i, sfr| faddrlist.include? normalize(r) }) || -1 - @addrs_todo.insert(idx, [retaddr, instraddr, true]) + idx = @addrs_todo.index(@addrs_todo.find { |aa| faddrlist.include? normalize(aa[:addr]) }) || -1 + @addrs_todo.insert(idx, { :addr => retaddr, :from => instraddr, :from_subfuncret => true, :cpu_context => btt.cpu_context }) else - @addrs_todo << [retaddr, instraddr, true] + @addrs_todo << { :addr => retaddr, :from => instraddr, :from_subfuncret => true, :cpu_context => btt.cpu_context } end true end end @@ -1750,11 +1807,11 @@ end # returns true if the expression needs more backtrace # it checks for the presence of a symbol (not :unknown), which means it depends on some register value def need_backtrace(expr, terminals=[]) - return if expr.kind_of? ::Integer + return if expr.kind_of?(::Integer) !(expr.externals.grep(::Symbol) - [:unknown] - terminals).empty? end # returns an array of expressions, or nil if expr needs more backtrace # it needs more backtrace if expr.externals include a Symbol != :unknown (symbol == register value) @@ -1768,11 +1825,11 @@ # XXX global variable (modified by another function), exported data, multithreaded app.. # TODO handle memory aliasing (mov ebx, eax ; write [ebx] ; read [eax]) # TODO trace expr evolution through backtrace, to modify immediates to an expr involving label names # TODO mov [ptr], imm ; <...> ; jmp [ptr] => rename imm as loc_XX # eg. mov eax, 42 ; add eax, 4 ; jmp eax => mov eax, some_label-4 - def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, snapshot_addr=nil) + def backtrace_check_found(expr, di, origin, type, len, maxdepth, detached, cpu_context, snapshot_addr=nil) # only entrypoints or block starts called by a :saveip are checked for being a function # want to execute [esp] from a block start if type == :x and di and di == di.block.list.first and @cpu.backtrace_is_function_return(expr, @decoded[origin]) and ( # which is an entrypoint.. (not di.block.from_normal and not di.block.from_subfuncret) or @@ -1810,11 +1867,11 @@ #result << expr if not type # XXX returning multiple values for nothing is too confusing, TODO fix decompiler result.uniq! # create xrefs/labels result.each { |e| - backtrace_found_result(e, di, type, origin, len, detached) + backtrace_found_result(e, di, type, origin, len, detached, cpu_context) } if type and origin result end @@ -1939,11 +1996,11 @@ ret end # creates xrefs, updates addrs_todo, updates instr args - def backtrace_found_result(expr, di, type, origin, len, detached) + def backtrace_found_result(expr, di, type, origin, len, detached, cpu_context) n = normalize(expr) fallthrough = true if type == :x and o = di_at(origin) and not o.opcode.props[:stopexec] and n == o.block.list.last.next_addr # delay_slot add_xref(n, Xref.new(type, origin, len)) if origin != :default and origin != Expression::Unknown and not fallthrough unk = true if n == Expression::Unknown @@ -1998,11 +2055,11 @@ origin = nil @decoded[o].block.add_to_indirect(normalize(n)) if @decoded[o] and not unk else @decoded[origin].block.add_to_normal(normalize(n)) if @decoded[origin] and not unk end - @addrs_todo << [n, origin] + @addrs_todo << { :addr => n, :from => origin, :cpu_context => cpu_context } end end def inspect "<Metasm::Disassembler @%x>" % object_id @@ -2012,16 +2069,16 @@ a = '' dump { |l| a << l << "\n" } a end - # dumps the source, optionnally including data + # dumps the source, optionally including data # yields (defaults puts) each line def dump(dump_data=true, &b) b ||= lambda { |l| puts l } @sections.sort_by { |addr, edata| addr.kind_of?(::Integer) ? addr : 0 }.each { |addr, edata| - addr = Expression[addr] if addr.kind_of? ::String + addr = Expression[addr] if addr.kind_of?(::String) blockoffs = @decoded.values.grep(DecodedInstruction).map { |di| Expression[di.block.address, :-, addr].reduce if di.block_head? }.grep(::Integer).sort.reject { |o| o < 0 or o >= edata.length } b[@program.dump_section_header(addr, edata)] if not dump_data and edata.length > 16*1024 and blockoffs.empty? b["// [#{edata.length} data bytes]"] next @@ -2032,11 +2089,11 @@ if unk_off == blockoffs.first blockoffs.shift di = @decoded[addr+unk_off] if unk_off != di.block.edata_ptr b["\n// ------ overlap (#{unk_off-di.block.edata_ptr}) ------"] - elsif di.block.from_normal.kind_of? ::Array + elsif di.block.from_normal.kind_of?(::Array) b["\n"] end dump_block(di.block, &b) unk_off += [di.block.bin_length, 1].max unk_off = blockoffs.first if blockoffs.first and unk_off > blockoffs.first @@ -2077,11 +2134,11 @@ if block.edata.inv_export[block.edata_ptr] and label_alias[block.address] b["\n"] if xr.empty? label_alias[block.address].each { |name| b["#{name}:"] } end if c = @comment[block.address] - c = c.join("\n") if c.kind_of? ::Array + c = c.join("\n") if c.kind_of?(::Array) c.each_line { |l| b["// #{l}"] } end end # dumps data/labels, honours @xrefs.len if exists @@ -2122,15 +2179,15 @@ # dup(?) if off >= edata.data.length dups = edata.virtsize - off @prog_binding.each_value { |a| tmp = Expression[a, :-, addr].reduce - dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups + dups = tmp if tmp.kind_of?(::Integer) and tmp > 0 and tmp < dups } @xrefs.each_key { |a| tmp = Expression[a, :-, addr].reduce - dups = tmp if tmp.kind_of? ::Integer and tmp > 0 and tmp < dups + dups = tmp if tmp.kind_of?(::Integer) and tmp > 0 and tmp < dups } dups /= elemlen dups = 1 if dups < 1 b[(l + "#{dups} dup(?)").ljust(48) << cmt] return off + dups*elemlen @@ -2172,20 +2229,20 @@ # recognize strings vals = vals.inject([]) { |vals_, value| if (elemlen == 1 or elemlen == 2) case value when 0x20..0x7e, 0x0a, 0x0d - if vals_.last.kind_of? ::String; vals_.last << value ; vals_ + if vals_.last.kind_of?(::String); vals_.last << value ; vals_ else vals_ << value.chr end else vals_ << value end else vals_ << value end } vals.map! { |value| - if value.kind_of? ::String + if value.kind_of?(::String) if value.length > 2 # or value == vals.first or value == vals.last # if there is no xref, don't care value.inspect else value.unpack('C*').map { |c| Expression[c] } end