# This file is part of Metasm, the Ruby assembly manipulation suite # Copyright (C) 2006-2009 Yoann GUILLOT # # Licence is LGPL, see LICENCE in the top-level directory # this file compliments disassemble.rb, adding misc user-friendly methods module Metasm class InstructionBlock # adds an address to the from_normal/from_subfuncret list def add_from(addr, type=:normal) send "add_from_#{type}", addr end def add_from_normal(addr) @from_normal ||= [] @from_normal |= [addr] end def add_from_subfuncret(addr) @from_subfuncret ||= [] @from_subfuncret |= [addr] end def add_from_indirect(addr) @from_indirect ||= [] @from_indirect |= [addr] end # iterates over every from address, yields [address, type in [:normal, :subfuncret, :indirect]] def each_from each_from_normal { |a| yield a, :normal } each_from_subfuncret { |a| yield a, :subfuncret } each_from_indirect { |a| yield a, :indirect } end def each_from_normal(&b) @from_normal.each(&b) if from_normal end def each_from_subfuncret(&b) @from_subfuncret.each(&b) if from_subfuncret end def each_from_indirect(&b) @from_indirect.each(&b) if from_indirect end def add_to(addr, type=:normal) send "add_to_#{type}", addr end def add_to_normal(addr) @to_normal ||= [] @to_normal |= [addr] end def add_to_subfuncret(addr) @to_subfuncret ||= [] @to_subfuncret |= [addr] end def add_to_indirect(addr) @to_indirect ||= [] @to_indirect |= [addr] end def each_to each_to_normal { |a| yield a, :normal } each_to_subfuncret { |a| yield a, :subfuncret } each_to_indirect { |a| yield a, :indirect } end def each_to_normal(&b) @to_normal.each(&b) if to_normal end def each_to_subfuncret(&b) @to_subfuncret.each(&b) if to_subfuncret end def each_to_indirect(&b) @to_indirect.each(&b) if to_indirect end # yields all from that are from the same function def each_from_samefunc(dasm, &b) return if dasm.function[address] @from_subfuncret.each(&b) if from_subfuncret @from_normal.each(&b) if from_normal end # yields all from that are not in the same subfunction as this block def each_from_otherfunc(dasm, &b) @from_normal.each(&b) if from_normal and dasm.function[address] @from_subfuncret.each(&b) if from_subfuncret and dasm.function[address] @from_indirect.each(&b) if from_indirect end # yields all to that are in the same subfunction as this block def each_to_samefunc(dasm) each_to { |to, type| next if type != :normal and type != :subfuncret to = dasm.normalize(to) yield to if not dasm.function[to] } end # yields all to that are not in the same subfunction as this block def each_to_otherfunc(dasm) each_to { |to, type| to = dasm.normalize(to) yield to if type == :indirect or dasm.function[to] or not dasm.decoded[to] } end # returns the array used in each_from_samefunc def from_samefunc(dasm) ary = [] each_from_samefunc(dasm) { |a| ary << a } ary end def from_otherfunc(dasm) ary = [] each_from_otherfunc(dasm) { |a| ary << a } ary end def to_samefunc(dasm) ary = [] each_to_samefunc(dasm) { |a| ary << a } ary end def to_otherfunc(dasm) ary = [] each_to_otherfunc(dasm) { |a| ary << a } ary end end class DecodedInstruction # checks if this instruction is the first of its IBlock def block_head? self == @block.list.first end end class CPU # compat alias, for scripts using older version of metasm def get_backtrace_binding(di) backtrace_binding(di) end end class Disassembler # access the default value for @@backtrace_maxblocks for newly created Disassemblers def self.backtrace_maxblocks ; @@backtrace_maxblocks ; end def self.backtrace_maxblocks=(b) ; @@backtrace_maxblocks = b ; end # adds a commentary at the given address # comments are found in the array @comment: {addr => [list of strings]} def add_comment(addr, cmt) @comment[addr] ||= [] @comment[addr] |= [cmt] end # returns the 1st element of #get_section_at (ie the edata at a given address) or nil def get_edata_at(*a) if s = get_section_at(*a) s[0] end end # returns the DecodedInstruction at addr if it exists def di_at(addr) di = @decoded[addr] || @decoded[normalize(addr)] if addr di if di.kind_of? DecodedInstruction end # returns the InstructionBlock containing the address at addr def block_at(addr) di = di_at(addr) di.block if di end # returns the DecodedFunction at addr if it exists def function_at(addr) f = @function[addr] || @function[normalize(addr)] if addr f if f.kind_of? DecodedFunction end # returns the DecodedInstruction covering addr # returns one at starting nearest addr if multiple are available (overlapping instrs) def di_including(addr) return if not addr addr = normalize(addr) if off = (0...16).find { |o| @decoded[addr-o].kind_of? DecodedInstruction and @decoded[addr-o].bin_length > o } @decoded[addr-off] end end # returns the InstructionBlock containing the byte at addr # returns the one of di_including() on multiple matches (overlapping instrs) def block_including(addr) di = di_including(addr) di.block if di end # returns the DecodedFunction including this byte # return the one of find_function_start() if multiple are possible (block shared by multiple funcs) def function_including(addr) return if not di = di_including(addr) function_at(find_function_start(di.address)) end # yields every InstructionBlock # returns the list of IBlocks def each_instructionblock(&b) ret = [] @decoded.each { |addr, di| next if not di.kind_of? DecodedInstruction or not di.block_head? ret << di.block b.call(di.block) if b } ret end alias instructionblocks each_instructionblock # return a backtrace_binding reversed (akin to code emulation) (but not really) def get_fwdemu_binding(di, pc=nil) @cpu.get_fwdemu_binding(di, pc) end # reads len raw bytes from the mmaped address space def read_raw_data(addr, len) if e = get_section_at(addr) e[0].read(len) end end # read an int of arbitrary type (:u8, :i32, ...) def decode_int(addr, type) type = "u#{type*8}".to_sym if type.kind_of? Integer if e = get_section_at(addr) e[0].decode_imm(type, @cpu.endianness) end end # read a byte at address addr def decode_byte(addr) decode_int(addr, :u8) end # read a dword at address addr # the dword is cpu-sized (eg 32 or 64bits) def decode_dword(addr) decode_int(addr, @cpu.size/8) end # read a zero-terminated string from addr # if no terminal 0 is found, return nil def decode_strz(addr, maxsz=4096) if e = get_section_at(addr) str = e[0].read(maxsz).to_s return if not len = str.index(?\0) str[0, len] end end # read a zero-terminated wide string from addr # return nil if no terminal found def decode_wstrz(addr, maxsz=4096) if e = get_section_at(addr) str = e[0].read(maxsz).to_s return if not len = str.unpack('v*').index(0) str[0, 2*len] end end # disassembles one instruction at address # returns nil if no instruction can be decoded there # does not update any internal state of the disassembler, nor reuse the @decoded cache def disassemble_instruction(addr) if e = get_section_at(addr) @cpu.decode_instruction(e[0], normalize(addr)) end end # disassemble addr as if the code flow came from from_addr def disassemble_from(addr, from_addr) from_addr = from_addr.address if from_addr.kind_of? DecodedInstruction from_addr = normalize(from_addr) if b = block_at(from_addr) b.add_to_normal(addr) end @addrs_todo << [addr, from_addr] disassemble end # returns the label associated to an addr, or nil if none exist def get_label_at(addr) e = get_edata_at(addr, false) e.inv_export[e.ptr] if e end # sets the label for the specified address # returns nil if the address is not mapped # memcheck is passed to get_section_at to validate that the address is mapped # keep existing label if 'overwrite' is false def set_label_at(addr, name, memcheck=true, overwrite=true) addr = Expression[addr].reduce e, b = get_section_at(addr, memcheck) if not e elsif not l = e.inv_export[e.ptr] or (!overwrite and l != name) l = @program.new_label(name) e.add_export l, e.ptr @label_alias_cache = nil @old_prog_binding[l] = @prog_binding[l] = b + e.ptr elsif l != name l = rename_label l, @program.new_label(name) end l end # remove a label at address addr def del_label_at(addr, name=get_label_at(addr)) ed = get_edata_at(addr) if ed and ed.inv_export[ed.ptr] ed.del_export name, ed.ptr @label_alias_cache = nil end each_xref(addr) { |xr| next if not xr.origin or not o = @decoded[xr.origin] or not o.kind_of? Renderable o.each_expr { |e| next unless e.kind_of?(Expression) e.lexpr = addr if e.lexpr == name e.rexpr = addr if e.rexpr == name } } @old_prog_binding.delete name @prog_binding.delete name end # changes a label to another, updates referring instructions etc # returns the new label # the new label must be program-uniq (see @program.new_label) def rename_label(old, new) return new if old == new raise "label #{new.inspect} exists" if @prog_binding[new] each_xref(normalize(old)) { |x| next if not di = @decoded[x.origin] @cpu.replace_instr_arg_immediate(di.instruction, old, new) di.comment.to_a.each { |c| c.gsub!(old, new) } } e = get_edata_at(old, false) if e e.add_export new, e.export.delete(old), true end raise "cant rename nonexisting label #{old}" if not @prog_binding[old] @label_alias_cache = nil @old_prog_binding[new] = @prog_binding[new] = @prog_binding.delete(old) @addrs_todo.each { |at| case at[0] when old; at[0] = new when Expression; at[0] = at[0].bind(old => new) end } if @inv_section_reloc[old] @inv_section_reloc[old].each { |b, e_, o, r| (0..16).each { |off| if di = @decoded[Expression[b]+o-off] and di.bin_length > off @cpu.replace_instr_arg_immediate(di.instruction, old, new) end } r.target = r.target.bind(old => new) } @inv_section_reloc[new] = @inv_section_reloc.delete(old) end if c_parser and @c_parser.toplevel.symbol[old] @c_parser.toplevel.symbol[new] = @c_parser.toplevel.symbol.delete(old) @c_parser.toplevel.symbol[new].name = new end new end # finds the start of a function from the address of an instruction def find_function_start(addr) addr = addr.address if addr.kind_of? DecodedInstruction todo = [addr] done = [] while a = todo.pop a = normalize(a) di = @decoded[a] next if done.include? a or not di.kind_of? DecodedInstruction done << a a = di.block.address break a if @function[a] l = [] di.block.each_from_samefunc(self) { |f| l << f } break a if l.empty? todo.concat l end end # iterates over the blocks of a function, yields each func block address # returns the graph of blocks (block address => [list of samefunc blocks]) def each_function_block(addr, incl_subfuncs = false, find_func_start = true) addr = @function.index(addr) if addr.kind_of? DecodedFunction addr = addr.address if addr.kind_of? DecodedInstruction addr = find_function_start(addr) if not @function[addr] and find_func_start todo = [addr] ret = {} while a = todo.pop next if not di = di_at(a) a = di.block.address next if ret[a] ret[a] = [] yield a if block_given? di.block.each_to_samefunc(self) { |f| ret[a] << f ; todo << f } di.block.each_to_otherfunc(self) { |f| ret[a] << f ; todo << f } if incl_subfuncs end ret end alias function_blocks each_function_block # returns a graph of function calls # for each func passed as arg (default: all), update the 'ret' hash # associating func => [list of direct subfuncs called] def function_graph(funcs = @function.keys + @entrypoints.to_a, ret={}) funcs = funcs.map { |f| normalize(f) }.uniq.find_all { |f| @decoded[f] } funcs.each { |f| next if ret[f] ret[f] = [] each_function_block(f) { |b| @decoded[b].block.each_to_otherfunc(self) { |sf| ret[f] |= [sf] } } } ret end # return the graph of function => subfunction list # recurses from an entrypoint def function_graph_from(addr) addr = normalize(addr) addr = find_function_start(addr) || addr ret = {} osz = ret.length-1 while ret.length != osz osz = ret.length function_graph(ret.values.flatten + [addr], ret) end ret end # return the graph of function => subfunction list # for which a (sub-sub)function includes addr def function_graph_to(addr) addr = normalize(addr) addr = find_function_start(addr) || addr full = function_graph ret = {} todo = [addr] done = [] while a = todo.pop next if done.include? a done << a full.each { |f, sf| next if not sf.include? a ret[f] ||= [] ret[f] |= [a] todo << f } end ret end # returns info on sections, from @program if supported # returns an array of [name, addr, length, info] def section_info if @program.respond_to? :section_info @program.section_info else list = [] @sections.each { |k, v| list << [get_label_at(k), normalize(k), v.length, nil] } list end end # transform an address into a file offset def addr_to_fileoff(addr) addr = normalize(addr) @program.addr_to_fileoff(addr) end # transform a file offset into an address def fileoff_to_addr(foff) @program.fileoff_to_addr(foff) end # remove the decodedinstruction from..to, replace them by the new Instructions in 'by' # this updates the block list structure, old di will still be visible in @decoded, except from original block (those are deleted) # if from..to spans multiple blocks # to.block is splitted after to # all path from from are replaced by a single link to after 'to', be careful ! # (eg a->b->... & a->c ; from in a, to in c => a->b is lost) # all instructions are stuffed in the first block # paths are only walked using from/to_normal # 'by' may be empty # returns the block containing the new instrs (nil if empty) def replace_instrs(from, to, by, patch_by=false) raise 'bad from' if not fdi = di_at(from) or not fdi.block.list.index(fdi) raise 'bad to' if not tdi = di_at(to) or not tdi.block.list.index(tdi) # create DecodedInstruction from Instructions in 'by' if needed split_block(fdi.block, fdi.address) split_block(tdi.block, tdi.block.list[tdi.block.list.index(tdi)+1].address) if tdi != tdi.block.list.last fb = fdi.block tb = tdi.block # generate DecodedInstr from Instrs # try to keep the bin_length of original block wantlen = tdi.address + tdi.bin_length - fb.address wantlen -= by.grep(DecodedInstruction).inject(0) { |len, di| len + di.bin_length } ldi = by.last ldi = DecodedInstruction.new(ldi) if ldi.kind_of? Instruction nb_i = by.grep(Instruction).length wantlen = nb_i if wantlen < 0 or (ldi and ldi.opcode.props[:setip]) if patch_by by.map! { |di| if di.kind_of? Instruction di = DecodedInstruction.new(di) wantlen -= di.bin_length = wantlen / by.grep(Instruction).length nb_i -= 1 end di } else by = by.map { |di| if di.kind_of? Instruction di = DecodedInstruction.new(di) wantlen -= (di.bin_length = wantlen / nb_i) nb_i -= 1 end di } end #puts " ** patch next_addr to #{Expression[tb.list.last.next_addr]}" if not by.empty? and by.last.opcode.props[:saveip] by.last.next_addr = tb.list.last.next_addr if not by.empty? and by.last.opcode.props[:saveip] fb.list.each { |di| @decoded.delete di.address } fb.list.clear tb.list.each { |di| @decoded.delete di.address } tb.list.clear by.each { |di| fb.add_di di } by.each_with_index { |di, i| if odi = di_at(di.address) # collision, hopefully with another deobfuscation run ? if by[i..-1].all? { |mydi| mydi.to_s == @decoded[mydi.address].to_s } puts "replace_instrs: merge at #{di}" if $DEBUG by[i..-1] = by[i..-1].map { |xdi| @decoded[xdi.address] } by[i..-1].each { fb.list.pop } split_block(odi.block, odi.address) tb.to_normal = [di.address] (odi.block.from_normal ||= []) << to odi.block.from_normal.uniq! break else #raise "replace_instrs: collision #{di} vs #{odi}" puts "replace_instrs: collision #{di} vs #{odi}" if $VERBOSE while @decoded[di.address].kind_of? DecodedInstruction # find free space.. raise ? di.address += 1 # XXX use floats ? di.bin_length -= 1 end end end @decoded[di.address] = di } @addrs_done.delete_if { |ad| normalize(ad[0]) == tb.address or ad[1] == tb.address } @addrs_done.delete_if { |ad| normalize(ad[0]) == fb.address or ad[1] == fb.address } if by.empty? and tb.address != fb.address # update to_normal/from_normal fb.to_normal = tb.to_normal fb.to_normal.to_a.each { |newto| # other paths may already point to newto, we must only update the relevant entry if ndi = di_at(newto) and idx = ndi.block.from_normal.to_a.index(to) if by.empty? ndi.block.from_normal[idx,1] = fb.from_normal.to_a else ndi.block.from_normal[idx] = fb.list.last.address end end } fb.to_subfuncret = tb.to_subfuncret fb.to_subfuncret.to_a.each { |newto| if ndi = di_at(newto) and idx = ndi.block.from_subfuncret.to_a.index(to) if by.empty? ndi.block.from_subfuncret[idx,1] = fb.from_subfuncret.to_a else ndi.block.from_subfuncret[idx] = fb.list.last.address end end } if by.empty? tb.to_subfuncret = nil if tb.to_subfuncret == [] tolist = tb.to_subfuncret || tb.to_normal.to_a if lfrom = get_label_at(fb.address) and tolist.length == 1 lto = auto_label_at(tolist.first) each_xref(fb.address, :x) { |x| next if not di = @decoded[x.origin] @cpu.replace_instr_arg_immediate(di.instruction, lfrom, lto) di.comment.to_a.each { |c| c.gsub!(lfrom, lto) } } end fb.from_normal.to_a.each { |newfrom| if ndi = di_at(newfrom) and idx = ndi.block.to_normal.to_a.index(from) ndi.block.to_normal[idx..idx] = tolist end } fb.from_subfuncret.to_a.each { |newfrom| if ndi = di_at(newfrom) and idx = ndi.block.to_subfuncret.to_a.index(from) ndi.block.to_subfuncret[idx..idx] = tolist end } else # merge with adjacent blocks merge_blocks(fb, fb.to_normal.first) if fb.to_normal.to_a.length == 1 and di_at(fb.to_normal.first) merge_blocks(fb.from_normal.first, fb) if fb.from_normal.to_a.length == 1 and di_at(fb.from_normal.first) end fb if not by.empty? end # undefine a sequence of decodedinstructions from an address # stops at first non-linear branch # removes @decoded, @comments, @xrefs, @addrs_done # does not update @prog_binding (does not undefine labels) def undefine_from(addr) return if not di_at(addr) @comment.delete addr if @function.delete addr split_block(addr) addrs = [] while di = di_at(addr) di.block.list.each { |ddi| addrs << ddi.address } break if di.block.to_subfuncret.to_a != [] or di.block.to_normal.to_a.length != 1 addr = di.block.to_normal.first break if ndi = di_at(addr) and ndi.block.from_normal.to_a.length != 1 end addrs.each { |a| @decoded.delete a } @xrefs.delete_if { |a, x| if not x.kind_of? Array true if x and addrs.include? x.origin else x.delete_if { |xx| addrs.include? xx.origin } true if x.empty? end } @addrs_done.delete_if { |ad| !(addrs & [normalize(ad[0]), normalize(ad[1])]).empty? } end # merge two instruction blocks if they form a simple chain and are adjacent # returns true if merged def merge_blocks(b1, b2, allow_nonadjacent = false) if b1 and not b1.kind_of? InstructionBlock return if not b1 = block_at(b1) end if b2 and not b2.kind_of? InstructionBlock return if not b2 = block_at(b2) end if b1 and b2 and (allow_nonadjacent or b1.list.last.next_addr == b2.address) and b1.to_normal.to_a == [b2.address] and b2.from_normal.to_a.length == 1 and # that handles delay_slot b1.to_subfuncret.to_a == [] and b2.from_subfuncret.to_a == [] and b1.to_indirect.to_a == [] and b2.from_indirect.to_a == [] b2.list.each { |di| b1.add_di di } b1.to_normal = b2.to_normal b2.list.clear @addrs_done.delete_if { |ad| normalize(ad[0]) == b2.address } true end end # computes the binding of a code sequence # just a forwarder to CPU#code_binding def code_binding(*a) @cpu.code_binding(self, *a) end # returns an array of instructions/label that, once parsed and assembled, should # give something equivalent to the code accessible from the (list of) entrypoints given # from the @decoded dasm graph # assume all jump targets have a matching label in @prog_binding # may add inconditionnal jumps in the listing to preserve the code flow def flatten_graph(entry, include_subfunc=true) ret = [] entry = [entry] if not entry.kind_of? Array todo = entry.map { |a| normalize(a) } done = [] inv_binding = @prog_binding.invert while addr = todo.pop next if done.include? addr or not di_at(addr) done << addr b = @decoded[addr].block ret << Label.new(inv_binding[addr]) if inv_binding[addr] ret.concat b.list.map { |di| di.instruction } b.each_to_otherfunc(self) { |to| to = normalize to todo.unshift to if include_subfunc } b.each_to_samefunc(self) { |to| to = normalize to todo << to } if not di = b.list[-1-@cpu.delay_slot] or not di.opcode.props[:stopexec] or di.opcode.props[:saveip] to = b.list.last.next_addr if todo.include? to if done.include? to or not di_at(to) if not to_l = inv_binding[to] to_l = auto_label_at(to, 'loc') if done.include? to and idx = ret.index(@decoded[to].block.list.first.instruction) ret.insert(idx, Label.new(to_l)) end end ret << @cpu.instr_uncond_jump_to(to_l) else todo << to # ensure it's next in the listing end end end end ret end # returns a demangled C++ name def demangle_cppname(name) case name[0] when ?? # MSVC name = name[1..-1] demangle_msvc(name[1..-1]) if name[0] == ?? when ?_ name = name.sub(/_GLOBAL__[ID]_/, '') demangle_gcc(name[2..-1][/\S*/]) if name[0, 2] == '_Z' end end # from wgcc-2.2.2/undecorate.cpp # TODO def demangle_msvc(name) op = name[0, 1] op = name[0, 2] if op == '_' if op = { '2' => "new", '3' => "delete", '4' => "=", '5' => ">>", '6' => "<<", '7' => "!", '8' => "==", '9' => "!=", 'A' => "[]", 'C' => "->", 'D' => "*", 'E' => "++", 'F' => "--", 'G' => "-", 'H' => "+", 'I' => "&", 'J' => "->*", 'K' => "/", 'L' => "%", 'M' => "<", 'N' => "<=", 'O' => ">", 'P' => ">=", 'Q' => ",", 'R' => "()", 'S' => "~", 'T' => "^", 'U' => "|", 'V' => "&&", 'W' => "||", 'X' => "*=", 'Y' => "+=", 'Z' => "-=", '_0' => "/=", '_1' => "%=", '_2' => ">>=", '_3' => "<<=", '_4' => "&=", '_5' => "|=", '_6' => "^=", '_7' => "`vftable'", '_8' => "`vbtable'", '_9' => "`vcall'", '_A' => "`typeof'", '_B' => "`local static guard'", '_C' => "`string'", '_D' => "`vbase destructor'", '_E' => "`vector deleting destructor'", '_F' => "`default constructor closure'", '_G' => "`scalar deleting destructor'", '_H' => "`vector constructor iterator'", '_I' => "`vector destructor iterator'", '_J' => "`vector vbase constructor iterator'", '_K' => "`virtual displacement map'", '_L' => "`eh vector constructor iterator'", '_M' => "`eh vector destructor iterator'", '_N' => "`eh vector vbase constructor iterator'", '_O' => "`copy constructor closure'", '_S' => "`local vftable'", '_T' => "`local vftable constructor closure'", '_U' => "new[]", '_V' => "delete[]", '_X' => "`placement delete closure'", '_Y' => "`placement delete[] closure'"}[op] op[0] == ?` ? op[1..-2] : "op_#{op}" end end # from http://www.codesourcery.com/public/cxx-abi/abi.html def demangle_gcc(name) subs = [] ret = '' decode_tok = lambda { name ||= '' case name[0] when nil ret = nil when ?N name = name[1..-1] decode_tok[] until name[0] == ?E break if not ret ret << '::' decode_tok[] end name = name[1..-1] when ?I name = name[1..-1] ret = ret[0..-3] if ret[-2, 2] == '::' ret << '<' decode_tok[] until name[0] == ?E break if not ret ret << ', ' decode_tok[] end ret << ' ' if ret and ret[-1] == ?> ret << '>' if ret name = name[1..-1] when ?T case name[1] when ?T; ret << 'vtti(' when ?V; ret << 'vtable(' when ?I; ret << 'typeinfo(' when ?S; ret << 'typename(' else ret = nil end name = name[2..-1].to_s decode_tok[] if ret ret << ')' if ret name = name[1..-1] if name[0] == ?E when ?C name = name[2..-1] base = ret[/([^:]*)(<.*|::)?$/, 1] ret << base when ?D name = name[2..-1] base = ret[/([^:]*)(<.*|::)?$/, 1] ret << '~' << base when ?0..?9 nr = name[/^[0-9]+/] name = name[nr.length..-1].to_s ret << name[0, nr.to_i] name = name[nr.to_i..-1] subs << ret[/[\w:]*$/] when ?S name = name[1..-1] case name[0] when ?_, ?0..?9, ?A..?Z case name[0] when ?_; idx = 0 ; name = name[1..-1] when ?0..?9; idx = name[0, 1].unpack('C')[0] - 0x30 + 1 ; name = name[2..-1] when ?A..?Z; idx = name[0, 1].unpack('C')[0] - 0x41 + 11 ; name = name[2..-1] end if not subs[idx] ret = nil else ret << subs[idx] end when ?t ret << 'std::' name = name[1..-1] decode_tok[] else std = { ?a => 'std::allocator', ?b => 'std::basic_string', ?s => 'std::string', # 'std::basic_string < char, std::char_traits, std::allocator >', ?i => 'std::istream', # 'std::basic_istream >', ?o => 'std::ostream', # 'std::basic_ostream >', ?d => 'std::iostream', # 'std::basic_iostream >' }[name[0]] if not std ret = nil else ret << std end name = name[1..-1] end when ?P, ?R, ?r, ?V, ?K attr = { ?P => '*', ?R => '&', ?r => ' restrict', ?V => ' volatile', ?K => ' const' }[name[0]] name = name[1..-1] rl = ret.length decode_tok[] if ret ret << attr subs << ret[rl..-1] end else if ret =~ /[(<]/ and ty = { ?v => 'void', ?w => 'wchar_t', ?b => 'bool', ?c => 'char', ?a => 'signed char', ?h => 'unsigned char', ?s => 'short', ?t => 'unsigned short', ?i => 'int', ?j => 'unsigned int', ?l => 'long', ?m => 'unsigned long', ?x => '__int64', ?y => 'unsigned __int64', ?n => '__int128', ?o => 'unsigned __int128', ?f => 'float', ?d => 'double', ?e => 'long double', ?g => '__float128', ?z => '...' }[name[0]] name = name[1..-1] ret << ty else fu = name[0, 2] name = name[2..-1] if op = { 'nw' => ' new', 'na' => ' new[]', 'dl' => ' delete', 'da' => ' delete[]', 'ps' => '+', 'ng' => '-', 'ad' => '&', 'de' => '*', 'co' => '~', 'pl' => '+', 'mi' => '-', 'ml' => '*', 'dv' => '/', 'rm' => '%', 'an' => '&', 'or' => '|', 'eo' => '^', 'aS' => '=', 'pL' => '+=', 'mI' => '-=', 'mL' => '*=', 'dV' => '/=', 'rM' => '%=', 'aN' => '&=', 'oR' => '|=', 'eO' => '^=', 'ls' => '<<', 'rs' => '>>', 'lS' => '<<=', 'rS' => '>>=', 'eq' => '==', 'ne' => '!=', 'lt' => '<', 'gt' => '>', 'le' => '<=', 'ge' => '>=', 'nt' => '!', 'aa' => '&&', 'oo' => '||', 'pp' => '++', 'mm' => '--', 'cm' => ',', 'pm' => '->*', 'pt' => '->', 'cl' => '()', 'ix' => '[]', 'qu' => '?', 'st' => ' sizeof', 'sz' => ' sizeof', 'at' => ' alignof', 'az' => ' alignof' }[fu] ret << "operator#{op}" elsif fu == 'cv' ret << "cast<" decode_tok[] ret << ">" if ret else ret = nil end end end name ||= '' } decode_tok[] subs.pop if ret and name != '' ret << '(' decode_tok[] while ret and name != '' ret << ', ' decode_tok[] end ret << ')' if ret end ret end # scans all the sections raw for a given regexp # return/yields all the addresses matching # if yield returns nil/false, do not include the addr in the final result # sections are scanned MB by MB, so this should work (slowly) on 4GB sections (eg debugger VM) # with addr_start/length, symbol-based section are skipped def pattern_scan(pat, addr_start=nil, length=nil, chunksz=nil, margin=nil, &b) chunksz ||= 4*1024*1024 # scan 4MB at a time margin ||= 65536 # add this much bytes at each chunk to find /pat/ over chunk boundaries pat = Regexp.new(Regexp.escape(pat)) if pat.kind_of? ::String found = [] @sections.each { |sec_addr, e| if addr_start length ||= 0x1000_0000 begin if sec_addr < addr_start next if sec_addr+e.length <= addr_start e = e[addr_start-sec_addr, e.length] sec_addr = addr_start end if sec_addr+e.length > addr_start+length next if sec_addr > addr_start+length e = e[0, sec_addr+e.length-(addr_start+length)] end rescue puts $!, $!.message, $!.backtrace if $DEBUG # catch arithmetic error with symbol-based section next end end e.pattern_scan(pat, chunksz, margin) { |eo| match_addr = sec_addr + eo found << match_addr if not b or b.call(match_addr) false } } found end # returns/yields [addr, string] found using pattern_scan /[\x20-\x7e]/ def strings_scan(minlen=6, &b) ret = [] nexto = 0 pattern_scan(/[\x20-\x7e]{#{minlen},}/m, nil, 1024) { |o| if o - nexto > 0 next unless e = get_edata_at(o) str = e.data[e.ptr, 1024][/[\x20-\x7e]{#{minlen},}/m] ret << [o, str] if not b or b.call(o, str) nexto = o + str.length end } ret end # exports the addr => symbol map (see load_map) def save_map @prog_binding.map { |l, o| type = di_at(o) ? 'c' : 'd' # XXX o = o.to_s(16).rjust(8, '0') if o.kind_of? ::Integer "#{o} #{type} #{l}" } end # loads a map file (addr => symbol) # off is an optionnal offset to add to every address found (for eg rebased binaries) # understands: # standard map files (eg linux-kernel.map: , e.g. 'c01001ba t setup_idt') # ida map files (: ) # arg is either the map itself or the filename of the map (if it contains no newline) def load_map(str, off=0) str = File.read(str) rescue nil if not str.index("\n") sks = @sections.keys.sort seen = {} str.each_line { |l| case l.strip when /^([0-9A-F]+)\s+(\w+)\s+(\w+)/i # kernel.map style addr = $1.to_i(16)+off set_label_at(addr, $3, false, !seen[addr]) seen[addr] = true when /^([0-9A-F]+):([0-9A-F]+)\s+([a-z_]\w+)/i # IDA style # we do not have section load order, let's just hope that the addresses are sorted (and sortable..) # could check the 1st part of the file, with section sizes, but it is not very convenient # the regexp is so that we skip the 1st part with section descriptions # in the file, section 1 is the 1st section ; we have an additionnal section (exe header) which fixes the 0-index # XXX this is PE-specific, TODO fix it for ELF (ida references sections, we reference segments...) addr = sks[$1.to_i(16)] + $2.to_i(16) + off set_label_at(addr, $3, false, !seen[addr]) seen[addr] = true end } end # saves the dasm state in a file def save_file(file) tmpfile = file + '.tmp' File.open(tmpfile, 'wb') { |fd| save_io(fd) } File.rename tmpfile, file end # saves the dasm state to an IO def save_io(fd) fd.puts 'Metasm.dasm' if @program.filename and not @program.kind_of?(Shellcode) t = @program.filename.to_s fd.puts "binarypath #{t.length}", t else t = "#{@cpu.class.name.sub(/.*::/, '')} #{@cpu.size} #{@cpu.endianness}" fd.puts "cpu #{t.length}", t # XXX will be reloaded as a Shellcode with this CPU, but it may be a custom EXE # do not output binarypath, we'll be loaded as a Shellcode, 'section' will suffice end @sections.each { |a, e| # forget edata exports/relocs # dump at most 16Mo per section t = "#{Expression[a]} #{e.length}\n" + [e.data[0, 2**24].to_str].pack('m*') fd.puts "section #{t.length}", t } t = save_map.join("\n") fd.puts "map #{t.length}", t t = @decoded.map { |a, d| next if not d.kind_of? DecodedInstruction "#{Expression[a]},#{d.bin_length} #{d.instruction}#{" ; #{d.comment.join(' ')}" if d.comment}" }.compact.sort.join("\n") fd.puts "decoded #{t.length}", t t = @comment.map { |a, c| c.map { |l| l.chomp }.join("\n").split("\n").map { |lc| "#{Expression[a]} #{lc.chomp}" } }.join("\n") fd.puts "comment #{t.length}", t bl = @decoded.values.map { |d| d.block if d.kind_of? DecodedInstruction and d.block_head? }.compact t = bl.map { |b| [Expression[b.address], b.list.map { |d| Expression[d.address] }.join(','), b.to_normal.to_a.map { |t_| Expression[t_] }.join(','), b.to_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), b.to_indirect.to_a.map { |t_| Expression[t_] }.join(','), b.from_normal.to_a.map { |t_| Expression[t_] }.join(','), b.from_subfuncret.to_a.map { |t_| Expression[t_] }.join(','), b.from_indirect.to_a.map { |t_| Expression[t_] }.join(','), ].join(';') }.sort.join("\n") fd.puts "blocks #{t.length}", t t = @function.map { |a, f| next if not @decoded[a] [a, *f.return_address.to_a].map { |e| Expression[e] }.join(',') }.compact.sort.join("\n") # TODO binding ? fd.puts "funcs #{t.length}", t t = @xrefs.map { |a, x| a = ':default' if a == :default a = ':unknown' if a == Expression::Unknown # XXX origin case x when nil when Xref [Expression[a], x.type, x.len, (Expression[x.origin] if x.origin)].join(',') when Array x.map { |x_| [Expression[a], x_.type, x_.len, (Expression[x_.origin] if x_.origin)].join(',') } end }.compact.join("\n") fd.puts "xrefs #{t.length}", t t = @c_parser.to_s fd.puts "c #{t.length}", t #t = bl.map { |b| b.backtracked_for } #fd.puts "trace #{t.length}" , t end # loads a disassembler from a saved file def self.load(str, &b) d = new(nil, nil) d.load(str, &b) d end # loads the dasm state from a savefile content # will yield unknown segments / binarypath notfound def load(str) raise 'Not a metasm save file' if str[0, 12].chomp != 'Metasm.dasm' off = 12 pp = Preprocessor.new app = AsmPreprocessor.new while off < str.length i = str.index("\n", off) || str.length type, len = str[off..i].chomp.split off = i+1 data = str[off, len.to_i] off += len.to_i case type when nil, '' when 'binarypath' data = yield(type, data) if not File.exist? data and block_given? reinitialize AutoExe.decode_file(data) @program.disassembler = self @program.init_disassembler when 'cpu' cpuname, size, endianness = data.split cpu = Metasm.const_get(cpuname) raise 'invalid cpu' if not cpu < CPU cpu = cpu.new cpu.size = size.to_i cpu.endianness = endianness.to_sym reinitialize Shellcode.new(cpu) @program.disassembler = self @program.init_disassembler @sections.delete(0) # rm empty section at 0, other real 'section' follow when 'section' info = data[0, data.index("\n") || data.length] data = data[info.length, data.length] pp.feed!(info) addr = Expression.parse(pp).reduce len = Expression.parse(pp).reduce edata = EncodedData.new(data.unpack('m*').first, :virtsize => len) add_section(addr, edata) when 'map' load_map data when 'decoded' data.each_line { |l| begin next if l !~ /^([^,]*),(\d*) ([^;]*)(?:; (.*))?/ a, len, instr, cmt = $1, $2, $3, $4 a = Expression.parse(pp.feed!(a)).reduce instr = @cpu.parse_instruction(app.feed!(instr)) di = DecodedInstruction.new(instr, a) di.bin_length = len.to_i di.add_comment cmt if cmt @decoded[a] = di rescue puts "load: bad di #{l.inspect}" if $VERBOSE end } when 'blocks' data.each_line { |l| bla = l.chomp.split(';').map { |sl| sl.split(',') } begin a = Expression.parse(pp.feed!(bla.shift[0])).reduce b = InstructionBlock.new(a, get_section_at(a).to_a[0]) bla.shift.each { |e| a = Expression.parse(pp.feed!(e)).reduce b.add_di(@decoded[a]) } bla.zip([:to_normal, :to_subfuncret, :to_indirect, :from_normal, :from_subfuncret, :from_indirect]).each { |l_, s| b.send("#{s}=", l_.map { |e| Expression.parse(pp.feed!(e)).reduce }) if not l_.empty? } rescue puts "load: bad block #{l.inspect}" if $VERBOSE end } when 'funcs' data.each_line { |l| begin a, *r = l.split(',').map { |e| Expression.parse(pp.feed!(e)).reduce } @function[a] = DecodedFunction.new @function[a].return_address = r if not r.empty? @function[a].finalized = true # TODO rescue puts "load: bad function #{l.inspect} #$!" if $VERBOSE end } when 'comment' data.each_line { |l| begin a, c = l.split(' ', 2) a = Expression.parse(pp.feed!(a)).reduce @comment[a] ||= [] @comment[a] |= [c] rescue puts "load: bad comment #{l.inspect} #$!" if $VERBOSE end } when 'c' begin # TODO parse_invalid_c, split per function, whatever parse_c('') @c_parser.allow_bad_c = true parse_c(data, 'savefile#c') rescue puts "load: bad C: #$!", $!.backtrace if $VERBOSE end @c_parser.readtok until @c_parser.eos? if @c_parser when 'xrefs' data.each_line { |l| begin a, t, len, o = l.chomp.split(',') case a when ':default'; a = :default when ':unknown'; a = Expression::Unknown else a = Expression.parse(pp.feed!(a)).reduce end t = (t.empty? ? nil : t.to_sym) len = (len != '' ? len.to_i : nil) o = (o.to_s != '' ? Expression.parse(pp.feed!(o)).reduce : nil) # :default/:unknown ? add_xref(a, Xref.new(t, o, len)) rescue puts "load: bad xref #{l.inspect} #$!" if $VERBOSE end } #when 'trace' else if block_given? yield(type, data) else puts "load: unsupported section #{type.inspect}" if $VERBOSE end end end end # change the base address of the loaded binary # better done early (before disassembling anything) # returns the delta def rebase(newaddr) rebase_delta(newaddr - @sections.keys.min) end def rebase_delta(delta) fix = lambda { |a| case a when Array a.map! { |e| fix[e] } when Hash tmp = {} a.each { |k, v| tmp[fix[k]] = v } a.replace tmp when Integer a += delta when BacktraceTrace a.origin = fix[a.origin] a.address = fix[a.address] end a } fix[@sections] fix[@decoded] fix[@xrefs] fix[@function] fix[@addrs_todo] fix[@addrs_done] fix[@comment] @prog_binding.each_key { |k| @prog_binding[k] = fix[@prog_binding[k]] } @old_prog_binding.each_key { |k| @old_prog_binding[k] = fix[@old_prog_binding[k]] } @label_alias_cache = nil @decoded.values.grep(DecodedInstruction).each { |di| if di.block_head? b = di.block b.address += delta fix[b.to_normal] fix[b.to_subfuncret] fix[b.to_indirect] fix[b.from_normal] fix[b.from_subfuncret] fix[b.from_indirect] fix[b.backtracked_for] end di.address = fix[di.address] di.next_addr = fix[di.next_addr] } @function.each_value { |f| f.return_address = fix[f.return_address] fix[f.backtracked_for] } @xrefs.values.flatten.compact.each { |x| x.origin = fix[x.origin] } delta end # dataflow method # walks a function, starting at addr # follows the usage of registers, computing the evolution from the value they had at start_addr # whenever an instruction references the register (or anything derived from it), # yield [di, used_register, reg_value, trace_state] where reg_value is the Expression holding the value of # the register wrt the initial value at start_addr, and trace_state the value of all registers (reg_value # not yet applied) # reg_value may be nil if used_register is not modified by the function (eg call [eax]) # the yield return value is propagated, unless it is nil/false # init_state is a hash { :reg => initial value } def trace_function_register(start_addr, init_state) function_walk(start_addr, init_state) { |args| trace_state = args.last case args.first when :di di = args[2] update = {} get_fwdemu_binding(di).each { |r, v| if v.kind_of?(Expression) and v.externals.find { |e| trace_state[e] } # XXX may mix old (from trace) and current (from v) registers newv = v.bind(trace_state) update[r] = yield(di, r, newv, trace_state) elsif r.kind_of?(ExpressionType) and rr = r.externals.find { |e| trace_state[e] } # reg dereferenced in a write (eg mov [esp], 42) next if update.has_key?(rr) # already yielded if yield(di, rr, trace_state[rr], trace_state) == false update[rr] = false end elsif trace_state[r] # started on mov reg, foo next if di.address == start_addr update[r] = false end } # directly walk the instruction argument list for registers not appearing in the binding @cpu.instr_args_memoryptr(di).each { |ind| b = @cpu.instr_args_memoryptr_getbase(ind) if b and b = b.symbolic and not update.has_key?(b) yield(di, b, nil, trace_state) end } @cpu.instr_args_regs(di).each { |r| r = r.symbolic if not update.has_key?(r) yield(di, r, nil, trace_state) end } update.each { |r, v| trace_state = trace_state.dup if v # cannot follow non-registers, or we would have to emulate every single # instruction (try following [esp+4] across a __stdcall..) trace_state[r] = v if r.kind_of?(::Symbol) else trace_state.delete r end } when :subfunc faddr = args[1] f = @function[faddr] f = @function[f.backtrace_binding[:thunk]] if f and f.backtrace_binding[:thunk] if f binding = f.backtrace_binding if binding.empty? backtrace_update_function_binding(faddr) binding = f.backtrace_binding end # XXX fwdemu_binding ? binding.each { |r, v| if v.externals.find { |e| trace_state[e] } if r.kind_of?(::Symbol) trace_state = trace_state.dup trace_state[r] = Expression[v.bind(trace_state)].reduce end elsif trace_state[r] trace_state = trace_state.dup trace_state.delete r end } end when :merge # when merging paths, keep the smallest common state subset # XXX may have unexplored froms conflicts = args[2] trace_state = trace_state.dup conflicts.each { |addr, st| trace_state.delete_if { |k, v| st[k] != v } } end trace_state = false if trace_state.empty? trace_state } end # define a register as a pointer to a structure # rename all [reg+off] as [reg+struct.member] in current function # also trace assignments of pointer members def trace_update_reg_structptr(addr, reg, structname, structoff=0) sname = soff = ctx = nil expr_to_sname = lambda { |expr| if not expr.kind_of?(Expression) or expr.op != :+ sname = nil next end sname = expr.lexpr || expr.rexpr soff = (expr.lexpr ? expr.rexpr : 0) if soff.kind_of?(Expression) # ignore index in ptr array if soff.op == :* and soff.lexpr == @cpu.size/8 soff = 0 elsif soff.rexpr.kind_of?(Expression) and soff.rexpr.op == :* and soff.rexpr.lexpr == @cpu.size/8 soff = soff.lexpr elsif soff.lexpr.kind_of?(Expression) and soff.lexpr.op == :* and soff.lexpr.lexpr == @cpu.size/8 soff = soff.rexpr end elsif soff.kind_of?(::Symbol) # array with 1 byte elements / pre-scaled idx? if not ctx[soff] soff = 0 end end } lastdi = nil trace_function_register(addr, reg => Expression[structname, :+, structoff]) { |di, r, val, trace| next if r.to_s =~ /flag/ # XXX maybe too ia32-specific? ctx = trace @cpu.instr_args_memoryptr(di).each { |ind| # find the structure dereference in di b = @cpu.instr_args_memoryptr_getbase(ind) b = b.symbolic if b next unless trace[b] imm = @cpu.instr_args_memoryptr_getoffset(ind) || 0 # check expr has the form 'traced_struct_reg + off' expr_to_sname[trace[b] + imm] # Expr#+ calls Expr#reduce next unless sname.kind_of?(::String) and soff.kind_of?(::Integer) next if not st = c_parser.toplevel.struct[sname] or not st.kind_of?(C::Union) # ignore lea esi, [esi+0] next if soff == 0 and not di.backtrace_binding.find { |k, v| v-k != 0 } # TODO if trace[b] offset != 0, we had a lea reg, [struct+substruct_off], tweak str accordingly # resolve struct + off into struct.membername str = st.name.dup mb = st.expand_member_offset(c_parser, soff, str) # patch di imm = imm.rexpr if imm.kind_of?(Expression) and not imm.lexpr and imm.rexpr.kind_of?(ExpressionString) imm = imm.expr if imm.kind_of?(ExpressionString) @cpu.instr_args_memoryptr_setoffset(ind, ExpressionString.new(imm, str, :structoff)) # check if the type is an enum/bitfield, patch instruction immediates trace_update_reg_structptr_arg_enum(di, ind, mb, str) if mb } if lastdi != di.address lastdi = di.address next Expression[structname, :+, structoff] if di.address == addr and r == reg # check if we need to trace 'r' further val = val.reduce_rec if val.kind_of?(Expression) val = Expression[val] if val.kind_of?(::String) case val when Expression # only trace trivial structptr+off expressions expr_to_sname[val] if sname.kind_of?(::String) and soff.kind_of?(::Integer) Expression[sname, :+, soff] end when Indirection # di is mov reg, [ptr+struct.offset] # check if the target member is a pointer to a struct, if so, trace it expr_to_sname[val.pointer.reduce] next unless sname.kind_of?(::String) and soff.kind_of?(::Integer) if st = c_parser.toplevel.struct[sname] and st.kind_of?(C::Union) pt = st.expand_member_offset(c_parser, soff, '') pt = pt.untypedef if pt if pt.kind_of?(C::Pointer) tt = pt.type.untypedef stars = '' while tt.kind_of?(C::Pointer) stars << '*' tt = tt.type.untypedef end if tt.kind_of?(C::Union) and tt.name Expression[tt.name + stars] end end elsif soff == 0 and sname[-1] == ?* # XXX pointer to pointer to struct # full C type support would be better, but harder to fit in an Expr Expression[sname[0...-1]] end # in other cases, stop trace end } end # found a special member of a struct, check if we can apply # bitfield/enum name to other constants in the di def trace_update_reg_structptr_arg_enum(di, ind, mb, str) if ename = mb.has_attribute_var('enum') and enum = c_parser.toplevel.struct[ename] and enum.kind_of?(C::Enum) # handle enums: struct moo { int __attribute__((enum(bla))) fld; }; doit = lambda { |_di| if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer) # handle enum values on tagged structs if enum.members and name = enum.members.index(num_i) num.lexpr = nil num.op = :+ num.rexpr = ExpressionString.new(Expression[num_i], name, :enum) _di.add_comment "enum::#{ename}" if _di.address != di.address end end } doit[di] # mov eax, [ptr+struct.enumfield] => trace eax if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 } reg = reg.symbolic trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace| next if r != reg and val != Expression[reg] doit[_di] val } end elsif mb.untypedef.kind_of?(C::Struct) # handle bitfields byte_off = 0 if str =~ /\+(\d+)$/ # test byte [bitfield+1], 0x1 => test dword [bitfield], 0x100 # XXX little-endian only byte_off = $1.to_i str[/\+\d+$/] = '' end cmt = str.split('.')[-2, 2].join('.') if str.count('.') > 1 doit = lambda { |_di, add| if num = _di.instruction.args.grep(Expression).first and num_i = num.reduce and num_i.kind_of?(::Integer) # TODO handle ~num_i num_left = num_i << add s_or = [] mb.untypedef.members.each { |mm| if bo = mb.bitoffsetof(c_parser, mm) boff, blen = bo if mm.name && blen == 1 && ((num_left >> boff) & 1) > 0 s_or << mm.name num_left &= ~(1 << boff) end end } if s_or.first if num_left != 0 s_or << ('0x%X' % num_left) end s = s_or.join('|') num.lexpr = nil num.op = :+ num.rexpr = ExpressionString.new(Expression[num_i], s, :bitfield) _di.add_comment cmt if _di.address != di.address end end } doit[di, byte_off*8] if reg = @cpu.instr_args_regs(di).find { |r| v = di.backtrace_binding[r.symbolic] and (v - ind.symbolic) == 0 } reg = reg.symbolic trace_function_register(di.address, reg => Expression[0]) { |_di, r, val, trace| if r.kind_of?(Expression) and r.op == :& if r.lexpr == reg # test al, 42 doit[_di, byte_off*8] elsif r.lexpr.kind_of?(Expression) and r.lexpr.op == :>> and r.lexpr.lexpr == reg # test ah, 42 doit[_di, byte_off*8+r.lexpr.rexpr] end end next if r != reg and val != Expression[reg] doit[_di, byte_off*8] _di.address == di.address && r == reg ? Expression[0] : val } end end end # change Expression display mode for current object o to display integers as char constants def toggle_expr_char(o) return if not o.kind_of?(Renderable) tochars = lambda { |v| if v.kind_of?(::Integer) a = [] vv = v.abs a << (vv & 0xff) vv >>= 8 while vv > 0 a << (vv & 0xff) vv >>= 8 end if a.all? { |b| b < 0x7f } s = a.pack('C*').inspect.gsub("'") { '\\\'' }[1...-1] ExpressionString.new(v, (v > 0 ? "'#{s}'" : "-'#{s}'"), :char) end end } o.each_expr { |e| if e.kind_of?(Expression) if nr = tochars[e.rexpr] e.rexpr = nr elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :char e.rexpr = e.rexpr.expr end if nl = tochars[e.lexpr] e.lexpr = nl elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :char e.lexpr = e.lexpr.expr end end } end def toggle_expr_dec(o) return if not o.kind_of?(Renderable) o.each_expr { |e| if e.kind_of?(Expression) if e.rexpr.kind_of?(::Integer) e.rexpr = ExpressionString.new(Expression[e.rexpr], e.rexpr.to_s, :decimal) elsif e.rexpr.kind_of?(ExpressionString) and e.rexpr.type == :decimal e.rexpr = e.rexpr.reduce end if e.lexpr.kind_of?(::Integer) e.lexpr = ExpressionString.new(Expression[e.lexpr], e.lexpr.to_s, :decimal) elsif e.lexpr.kind_of?(ExpressionString) and e.lexpr.type == :decimal e.lexpr = e.lexpr.reduce end end } end # patch Expressions in current object to include label names when available # XXX should we also create labels ? def toggle_expr_offset(o) return if not o.kind_of? Renderable o.each_expr { |e| next unless e.kind_of?(Expression) if n = @prog_binding[e.lexpr] e.lexpr = n elsif e.lexpr.kind_of? ::Integer and n = get_label_at(e.lexpr) add_xref(normalize(e.lexpr), Xref.new(:addr, o.address)) if o.respond_to? :address e.lexpr = n end if n = @prog_binding[e.rexpr] e.rexpr = n elsif e.rexpr.kind_of? ::Integer and n = get_label_at(e.rexpr) add_xref(normalize(e.rexpr), Xref.new(:addr, o.address)) if o.respond_to? :address e.rexpr = n end } end # toggle all ExpressionStrings def toggle_expr_str(o) return if not o.kind_of?(Renderable) o.each_expr { |e| next unless e.kind_of?(ExpressionString) e.hide_str = !e.hide_str } end # call this function on a function entrypoint if the function is in fact a __noreturn # will cut the to_subfuncret of callers def fix_noreturn(o) each_xref(o, :x) { |a| a = normalize(a.origin) next if not di = di_at(a) or not di.opcode.props[:saveip] # XXX should check if caller also becomes __noreturn di.block.each_to_subfuncret { |to| next if not tdi = di_at(to) or not tdi.block.from_subfuncret tdi.block.from_subfuncret.delete_if { |aa| normalize(aa) == di.address } tdi.block.from_subfuncret = nil if tdi.block.from_subfuncret.empty? } di.block.to_subfuncret = nil } end # find the addresses of calls calling the address, handles thunks def call_sites(funcaddr) find_call_site = proc { |a| until not di = di_at(a) if di.opcode.props[:saveip] cs = di.address break end if di.block.from_subfuncret.to_a.first while di.block.from_subfuncret.to_a.length == 1 a = di.block.from_subfuncret[0] break if not di_at(a) a = @decoded[a].block.list.first.address di = @decoded[a] end end break if di.block.from_subfuncret.to_a.first break if di.block.from_normal.to_a.length != 1 a = di.block.from_normal.first end cs } ret = [] each_xref(normalize(funcaddr), :x) { |a| ret << find_call_site[a.origin] } ret.compact.uniq end # loads a disassembler plugin script # this is simply a ruby script instance_eval() in the disassembler # the filename argument is autocompleted with '.rb' suffix, and also # searched for in the Metasmdir/samples/dasm-plugins subdirectory if not found in cwd def load_plugin(plugin_filename) if not File.exist?(plugin_filename) if File.exist?(plugin_filename+'.rb') plugin_filename += '.rb' elsif defined? Metasmdir # try autocomplete pf = File.join(Metasmdir, 'samples', 'dasm-plugins', plugin_filename) if File.exist? pf plugin_filename = pf elsif File.exist? pf + '.rb' plugin_filename = pf + '.rb' end end end instance_eval File.read(plugin_filename) end # same as load_plugin, but hides the @gui attribute while loading, preventing the plugin do popup stuff # this is useful when you want to load a plugin from another plugin to enhance the plugin's functionnality # XXX this also prevents setting up kbd_callbacks etc.. def load_plugin_nogui(plugin_filename) oldgui = gui @gui = nil load_plugin(plugin_filename) ensure @gui = oldgui end # compose two code/instruction's backtrace_binding # assumes bd1 is followed by bd2 in the code flow # eg inc edi + push edi => # { Ind[:esp, 4] => Expr[:edi + 1], :esp => Expr[:esp - 4], :edi => Expr[:edi + 1] } # XXX if bd1 writes to memory with a pointer that is reused in bd2, this function has to # revert the change made by bd2, which only works with simple ptr addition now # XXX unhandled situations may be resolved using :unknown, or by returning incorrect values def compose_bt_binding(bd1, bd2) if bd1.kind_of? DecodedInstruction bd1 = bd1.backtrace_binding ||= cpu.get_backtrace_binding(bd1) end if bd2.kind_of? DecodedInstruction bd2 = bd2.backtrace_binding ||= cpu.get_backtrace_binding(bd2) end reduce = lambda { |e| Expression[Expression[e].reduce] } bd = {} bd2.each { |k, v| bd[k] = reduce[v.bind(bd1)] } # for each pointer appearing in keys of bd1, we must infer from bd2 what final # pointers should appear in bd # eg 'mov [eax], 0 mov ebx, eax' => { [eax] <- 0, [ebx] <- 0, ebx <- eax } bd1.each { |k, v| if k.kind_of? Indirection done = false k.pointer.externals.each { |e| # XXX this will break on nontrivial pointers or bd2 bd2.each { |k2, v2| # we dont want to invert computation of flag_zero/carry etc (booh) next if k2.to_s =~ /flag/ # discard indirection etc, result would be too complex / not useful next if not Expression[v2].expr_externals.include? e done = true # try to reverse the computation made upon 'e' # only simple addition handled here ptr = reduce[k.pointer.bind(e => Expression[[k2, :-, v2], :+, e])] # if bd2 does not rewrite e, duplicate the original pointer if not bd2[e] bd[k] ||= reduce[v] # here we should not see 'e' in ptr anymore ptr = Expression::Unknown if ptr.externals.include? e else # cant check if add reversion was successful.. end bd[Indirection[reduce[ptr], k.len]] ||= reduce[v] } } bd[k] ||= reduce[v] if not done else bd[k] ||= reduce[v] end } bd end def gui_hilight_word_regexp(word) @cpu.gui_hilight_word_regexp(word) end # return a C::AllocCStruct from c_parser # TODO handle program.class::Header.to_c_struct def decode_c_struct(structname, addr) if c_parser and edata = get_edata_at(addr) c_parser.decode_c_struct(structname, edata.data, edata.ptr) end end def decode_c_ary(structname, addr, len) if c_parser and edata = get_edata_at(addr) c_parser.decode_c_ary(structname, len, edata.data, edata.ptr) end end # find the function containing addr, and find & rename stack vars in it def name_local_vars(addr) if @cpu.respond_to?(:name_local_vars) and faddr = find_function_start(addr) @function[faddr] ||= DecodedFunction.new # XXX @cpu.name_local_vars(self, faddr) end end end end