metasm/decompile.rb in metasm-1.0.3 vs metasm/decompile.rb in metasm-1.0.4

- old
+ new

@@ -11,28 +11,23 @@ module Metasm class C::Variable; attr_accessor :stackoff; end class C::Block; attr_accessor :decompdata; end class DecodedFunction; attr_accessor :decompdata; end -class CPU - def decompile_check_abi(dcmp, entry, func) - end -end - class Decompiler - # TODO add methods to C::CExpr - AssignOp = [:'=', :'+=', :'-=', :'*=', :'/=', :'%=', :'^=', :'&=', :'|=', :'>>=', :'<<=', :'++', :'--'] - attr_accessor :dasm, :c_parser - attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels + attr_accessor :forbid_optimize_dataflow, :forbid_optimize_code, :forbid_decompile_ifwhile, :forbid_decompile_types, :forbid_optimize_labels, :forbid_all_optimizations # recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done attr_accessor :recurse + def disassembler ; dasm ; end + def initialize(dasm, cp = dasm.c_parser) @dasm = dasm @recurse = 1/0.0 # Infinity @c_parser = cp || @dasm.cpu.new_cparser + @dasm.cpu.decompile_init(self) if @dasm.cpu.respond_to?(:decompile_init) end # decompile recursively function from an entrypoint, then perform global optimisation (static vars, ...) # should be called once after everything is decompiled (global optimizations may bring bad results otherwise) # use decompile_func for incremental decompilation @@ -101,35 +96,43 @@ def do_decompile_func(entry, func) # find decodedinstruction graph of the function, decompile subfuncs myblocks = listblocks_func(entry) # [esp+8] => [:frameptr-12] + # TODO slow makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block } # find registry dependencies between blocks deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func) scope = func.initializer = C::Block.new(@c_parser.toplevel) if df = @dasm.function[entry] - scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} + scope.decompdata = df.decompdata ||= {:unalias_type => {}, :unalias_name => {}} else - scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}} + scope.decompdata ||= {:unalias_type => {}, :unalias_name => {}} end # di blocks => raw c statements, declare variables @dasm.cpu.decompile_blocks(self, myblocks, deps, func) + puts "dcmp debug #{func.name} {", scope, '}' if $DEBUG + return if forbid_all_optimizations + simplify_goto(scope) namestackvars(scope) unalias_vars(scope, func) decompile_c_types(scope) - optimize(scope) + optimize_code(scope) + optimize_vars(scope) + optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i) remove_unreferenced_vars(scope) + decompile_c_types_again(scope) cleanup_var_decl(scope, func) if @recurse > 0 decompile_controlseq(scope) optimize_vars(scope) + optimize_code(scope) optimize_ctrl(scope) optimize_vars(scope) remove_unreferenced_vars(scope) simplify_varname_noalias(scope) rename_variables(scope) @@ -141,11 +144,11 @@ when C::Return if not ret.value scope.statements.pop else v = ret.value - v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed + v = v.rexpr if v.kind_of?(C::CExpression) and not v.op and v.rexpr.kind_of?(C::Typed) func.type.type = v.type end end if @recurse == 0 @@ -154,67 +157,68 @@ end end # redecompile a function, redecompiles functions calling it if its prototype changed def redecompile(name) - @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name } + @c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == name } oldvar = @c_parser.toplevel.symbol.delete name decompile_func(name) - if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function + if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of?(C::Function) and newvar.type.kind_of?(C::Function) o, n = oldvar.type, newvar.type if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type } # XXX a may depend on b and c, and b may depend on c -> redecompile c twice # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs.. @c_parser.toplevel.statements.dup.each { |st| - next if not st.kind_of? C::Declaration + next if not st.kind_of?(C::Declaration) next if not st.var.initializer next if st.var.name == name - next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name } + next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == name } redecompile(st.var.name) } end end end def new_global_var(addr, type, scope=nil) addr = @dasm.normalize(addr) # (almost) NULL ptr - return if addr.kind_of? Integer and addr >= 0 and addr < 32 + return if addr.kind_of?(Integer) and addr >= 0 and addr < 32 # check preceding structure we're hitting # TODO check what we step over when defining a new static struct 0x100.times { |i_| next if not n = @dasm.get_label_at(addr-i_) next if not v = @c_parser.toplevel.symbol[n] - next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union + next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of?(C::Union) break if i_ == 0 # XXX it crashes later if we dont break here next if sizeof(v.type.pointed) <= i_ return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil) } ptype = type.pointed.untypedef if type.pointer? - if ptype.kind_of? C::Function + if ptype.kind_of?(C::Function) name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk') if @dasm.get_section_at(addr) and @recurse > 0 puts "found function pointer to #{name}" if $VERBOSE @dasm.disassemble(addr) if not @dasm.decoded[addr] # TODO disassemble_fast ? f = @dasm.function[addr] ||= DecodedFunction.new # TODO detect thunks (__noreturn) - f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} } - if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function + f.decompdata ||= { :unalias_type => {}, :unalias_name => {} } + if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of?(C::Function) os = @c_parser.toplevel.symbol.delete name - @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name } - aoff = 1 - ptype.args.to_a.each { |a| - aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr] - f.decompdata[:stackoff_type][aoff] ||= a.type - f.decompdata[:stackoff_name][aoff] ||= a.name if a.name - aoff += sizeof(a) # ary ? - } + @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of?(C::Declaration) and ts.var.name == name } + #aoff = 1 + #ptype.args.to_a.each { |a| + # TODO + #aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr] + #f.decompdata[:unalias_type][aoff] ||= a.type + #f.decompdata[:unalias_name][aoff] ||= a.name if a.name + #aoff += sizeof(a) # ary ? + #} decompile_func_rec(addr) s = @c_parser.toplevel.symbol[name] walk_ce([@c_parser.toplevel, scope]) { |ce| ce.lexpr = s if ce.lexpr == os ce.rexpr = s if ce.rexpr == os @@ -228,42 +232,42 @@ when 1; 'byte' when 2; 'word' when 4; 'dword' else 'unk' end - name = 'stru' if ptype.kind_of? C::Union + name = 'stru' if ptype.kind_of?(C::Union) name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru') if not var = @c_parser.toplevel.symbol[name] var = C::Variable.new var.name = name var.type = type.pointer? ? C::Array.new(ptype) : type @c_parser.toplevel.symbol[var.name] = var @c_parser.toplevel.statements << C::Declaration.new(var) end - if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length + if ptype.kind_of?(C::Union) and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length # TODO struct init, array, fptrs.. - elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and - [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer) + elsif type.pointer? and not type.pointed.untypedef.kind_of?(C::Function) and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and + [1, 2, 4].include?(tsz) and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer) # TODO do not overlap other statics (but labels may refer to elements of the array...) data = (0..256).map { v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness) - v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation + v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of?(Expression) # relocation v } var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty? - if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str + if (tsz == 1 or tsz == 2) and eos = data.index(0) and ((0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } or ptype.to_s == '(char)') # printable str # XXX 0x80 with ruby1.9... var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil end - if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and - i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc + if var.initializer.kind_of?(::Array) and i = var.initializer.first and i.kind_of?(C::CExpression) and not i.op and i.rexpr.kind_of?(C::Variable) and + i.rexpr.type.kind_of?(C::Function) and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name)) # iat_ExternalFunc i.type = i.rexpr.type type = var.type = C::Array.new(C::Pointer.new(i.type)) var.initializer = [i] end - var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array + var.initializer = nil if var.initializer.kind_of?(::Array) and not type.untypedef.kind_of?(C::Array) end # TODO patch existing references to addr ? (or would they have already triggered new_global_var?) # return the object to use to replace the raw addr @@ -283,11 +287,11 @@ blocks << [a, []] di.block.each_to { |ta, type| next if type == :indirect ta = dasm.normalize ta if type != :subfuncret and not @dasm.function[ta] and - (not @dasm.function[entry] or @autofuncs.include? entry) and + (not @dasm.function[entry] or @autofuncs.include?(entry)) and di.block.list.last.opcode.props[:saveip] # possible noreturn function # XXX call $+5; pop eax @autofuncs << ta @dasm.function[ta] = DecodedFunction.new @@ -315,11 +319,11 @@ def backtrace_target(expr, addr) if n = @dasm.backtrace(expr, addr).first return expr if n == Expression::Unknown n = Expression[n].reduce_rec n = @dasm.get_label_at(n) || n - n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/ + n = $1 if n.kind_of?(::String) and n =~ /^thunk_(.*)/ n else expr end end @@ -327,31 +331,33 @@ # patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off def makestackvars(funcstart, blocks) blockstart = nil cache_di = nil cache = {} # [i_s, e, type] => backtrace - tovar = lambda { |di, e, i_s| + @decomp_mkstackvars_terminals ||= [:frameptr] + tovar = lambda { |di, e, i_s_c| + i_s = (i_s_c > 0) case e - when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce - when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin] + when Expression; Expression[tovar[di, e.lexpr, i_s_c], e.op, tovar[di, e.rexpr, i_s_c]].reduce + when Indirection; Indirection[tovar[di, e.target, i_s_c-1], e.len, e.origin] when :frameptr; e when ::Symbol cache.clear if cache_di != di ; cache_di = di vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart, - :include_start => i_s, :no_check => true, :terminals => [:frameptr]) + :include_start => i_s, :no_check => true, :terminals => @decomp_mkstackvars_terminals) # backtrace only to blockstart first - if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or - (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and - (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer)))) + if vals.length == 1 and ee = vals.first and ee.kind_of?(Expression) and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer)) or + (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of?(Indirection) and eep = ee.rexpr.pointer and + (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of?(::Integer))))) ee else # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding.. vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart, - :include_start => i_s, :no_check => true, :terminals => [:frameptr]) - if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or - (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer))) + :include_start => i_s, :no_check => true, :terminals => @decomp_mkstackvars_terminals) + if vals.length == 1 and ee = vals.first and (ee.kind_of?(Expression) and (ee == Expression[:frameptr] or + (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer)))) ee else e end end else e @@ -364,13 +370,13 @@ @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block| block.list.each { |di| bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di) newbd = repl_bind[di] = {} bd.each { |k, v| - k = tovar[di, k, true] if k.kind_of? Indirection - next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer) - newbd[k] = tovar[di, v, false] + k = tovar[di, k, 2] if k.kind_of?(Indirection) + next if k == Expression[:frameptr] or (k.kind_of?(Expression) and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of?( ::Integer)) + newbd[k] = tovar[di, v, 0] } } } repl_bind.each { |di, bd| di.backtrace_binding = bd } @@ -389,13 +395,13 @@ # turns an Expression to a CExpression, create+declares needed variables in scope def decompile_cexpr(e, scope, itype=nil) case e when Expression - if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/ + if e.op == :'=' and e.lexpr.kind_of?(::String) and e.lexpr =~ /^dummy_metasm_/ decompile_cexpr(e.rexpr, scope, itype) - elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0 + elsif e.op == :+ and e.rexpr.kind_of?(::Integer) and e.rexpr < 0 decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype) elsif e.lexpr a = decompile_cexpr(e.lexpr, scope, itype) C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)] elsif e.op == :+ @@ -412,11 +418,11 @@ bt = C::Struct.new bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))] end itype = C::Pointer.new(bt) p = decompile_cexpr(e.target, scope, itype) - p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer + p = C::CExpression[[p], itype] C::CExpression[:*, p] when ::Integer C::CExpression[e] when C::CExpression e @@ -440,21 +446,21 @@ end end # simplify goto -> goto / goto -> return def simplify_goto(scope, keepret = false) - if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label + if not keepret and scope.statements[-1].kind_of?(C::Return) and not scope.statements[-2].kind_of?(C::Label) scope.statements.insert(-2, C::Label.new("ret_label")) end jumpto = {} walk(scope) { |s| - next if not s.kind_of? C::Block + next if not s.kind_of?(C::Block) s.statements.each_with_index { |ss, i| case ss when C::Goto, C::Return - while l = s.statements[i -= 1] and l.kind_of? C::Label + while l = s.statements[i -= 1] and l.kind_of?(C::Label) jumpto[l.name] = ss end end } } @@ -462,15 +468,15 @@ simpler = lambda { |s| case s when C::Goto if jumpto[s.target] r = jumpto[s.target].dup - r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression + r.value = r.value.deep_dup if r.kind_of?(C::Return) and r.value.kind_of?(C::CExpression) r end when C::Return - if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1] + if not keepret and scope.statements[-1].kind_of?(C::Return) and s.value == scope.statements[-1].value and s != scope.statements[-1] C::Goto.new(scope.statements[-2].name) end end } @@ -491,11 +497,11 @@ # remove unreferenced labels remove_labels(scope) walk(scope) { |s| - next if not s.kind_of? C::Block + next if not s.kind_of?(C::Block) del = false # remove dead code goto a; goto b; if (0) { z: bla; } => rm goto b s.statements.delete_if { |st| case st when C::Goto, C::Return @@ -506,14 +512,14 @@ del = false end } # if () { goto x; } x: s.statements.each_with_index { |ss, i| - if ss.kind_of? C::If + if ss.kind_of?(C::If) t = ss.bthen - t = t.statements.first if t.kind_of? C::Block - if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target + t = t.statements.first if t.kind_of?(C::Block) + if t.kind_of?(C::Goto) and s.statements[i+1].kind_of?(C::Label) and s.statements[i+1].name == t.target ss.bthen = C::Block.new(scope) end end } } @@ -546,12 +552,12 @@ e when ::Array e.each_with_index { |st, i| case st when C::While, C::DoWhile - l1 = (e[i+1].name if e[i+1].kind_of? C::Label) - l2 = (e[i-1].name if e[i-1].kind_of? C::Label) + l1 = (e[i+1].name if e[i+1].kind_of?(C::Label)) + l2 = (e[i-1].name if e[i-1].kind_of?(C::Label)) e[i].body = walk[st.body, l1, l2] else e[i] = walk[st, brk, cnt] end } @@ -576,72 +582,91 @@ walk[scope, nil, nil] remove_labels(scope) # while (1) { a; if(b) { c; return; }; d; } => while (1) { a; if (b) break; d; } c; - while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and - not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block + while st = scope.statements.last and st.kind_of?(C::While) and st.test.kind_of?(C::CExpression) and + not st.test.op and st.test.rexpr == 1 and st.body.kind_of?(C::Block) break if not i = st.body.statements.find { |ist| - ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return + ist.kind_of?(C::If) and not ist.belse and ist.bthen.kind_of?(C::Block) and ist.bthen.statements.last.kind_of?(C::Return) } - walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen } + walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of?(C::Block) and sst.outer == i.bthen } scope.statements.concat i.bthen.statements i.bthen = C::Break.new end patch_test = lambda { |ce| - ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!' + ce = ce.rexpr if ce.kind_of?(C::CExpression) and ce.op == :'!' # if (a+1) => if (a != -1) - if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr + if ce.kind_of?(C::CExpression) and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and ce.lexpr ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+ ce.op = :'!=' end } walk(scope) { |ce| case ce when C::If patch_test[ce.test] - if ce.bthen.kind_of? C::Block + if ce.bthen.kind_of?(C::Block) case ce.bthen.statements.length when 1 - walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen } + walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of?(C::Block) and sst.outer == ce.bthen } ce.bthen = ce.bthen.statements.first when 0 if not ce.belse and i = ce.bthen.outer.statements.index(ce) ce.bthen.outer.statements[i] = ce.test # TODO remove sideeffectless parts end end end - if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1 - walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse } + if ce.belse.kind_of?(C::Block) and ce.belse.statements.length == 1 + walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of?(C::Block) and sst.outer == ce.belse } ce.belse = ce.belse.statements.first end when C::While, C::DoWhile patch_test[ce.test] - if ce.body.kind_of? C::Block + if ce.body.kind_of?(C::Block) case ce.body.statements.length when 1 - walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body } + walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of?(C::Block) and sst.outer == ce.body } ce.body = ce.body.statements.first when 0 - if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce) + if ce.kind_of?(C::DoWhile) and i = ce.body.outer.statements.index(ce) ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body) end ce.body = nil end end end } + + walk(scope, false, true) { |ce| + # while (1) { a; if (b) break; } => do { a } while (!b); + if ce.kind_of?(C::While) and ce.test.kind_of?(C::CExpression) and not ce.test.op and ce.test.rexpr == 1 and ce.body.kind_of?(C::Block) + i = ce.body.statements.last + if i.kind_of?(C::If) and not i.belse and i.bthen.kind_of?(C::Break) + ce.body.statements.pop + next C::DoWhile.new(i.test.negate, ce.body) + end + end + + # if (a) b = 1; else b = 2; => b = a ? 1 : 2 + if ce.kind_of?(C::If) and ce.belse.kind_of?(C::CExpression) and ce.belse.op == :'=' and ce.belse.lexpr.kind_of?(C::Variable) and ce.bthen.kind_of?(C::CExpression) and ce.bthen.op == :'=' and ce.bthen.lexpr == ce.belse.lexpr + next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]] + end + } + + # TODO for (;;) {} + walk(scope) { |ce| - next if not ce.kind_of? C::Block + next if not ce.kind_of?(C::Block) st = ce.statements st.length.times { |n| - while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and ( - (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or - (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or - (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue)) + while st[n].kind_of?(C::If) and st[n+1].kind_of?(C::If) and not st[n].belse and not st[n+1].belse and ( + (st[n].bthen.kind_of?(C::Return) and st[n+1].bthen.kind_of?(C::Return) and st[n].bthen.value == st[n+1].bthen.value) or + (st[n].bthen.kind_of?(C::Break) and st[n+1].bthen.kind_of?(C::Break)) or + (st[n].bthen.kind_of?(C::Continue) and st[n+1].bthen.kind_of?(C::Continue))) # if (a) return x; if (b) return x; => if (a || b) return x; st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test] st.delete_at(n+1) end } @@ -657,83 +682,83 @@ ret = [] # list of labels appearing in ary inner_labels = ary.grep(C::Label).map { |l| l.name } while s = ary.shift # recurse if it's not the first run - if s.kind_of? C::If - s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block - s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block + if s.kind_of?(C::If) + s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of?(C::Block) + s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of?(C::Block) end # if (a) goto x; if (b) goto x; => if (a || b) goto x; - while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and + while s.kind_of?(C::If) and s.bthen.kind_of?(C::Goto) and not s.belse and ary.first.kind_of?(C::If) and ary.first.bthen.kind_of?(C::Goto) and not ary.first.belse and s.bthen.target == ary.first.bthen.target s.test = C::CExpression[s.test, :'||', ary.shift.test] end # if (a) goto x; b; x: => if (!a) { b; } - if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target } + if s.kind_of?(C::If) and s.bthen.kind_of?(C::Goto) and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target } # if {goto l;} a; l: => if (!) {a;} s.test = C::CExpression.negate s.test s.bthen = C::Block.new(scope) s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen) s.bthen.statements.pop # remove l: from bthen, it is in ary (was needed in bthen for inner ifs) ary[0...ary.index(l)] = [] end - if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto) - s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto + if s.kind_of?(C::If) and (s.bthen.kind_of?(C::Block) or s.bthen.kind_of?(C::Goto)) + s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of?(C::Goto) bts = s.bthen.statements # if (a) if (b) { c; } => if (a && b) { c; } - if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse + if bts.length == 1 and bts.first.kind_of?(C::If) and not bts.first.belse s.test = C::CExpression[s.test, :'&&', bts.first.test] bts = bts.first.bthen bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts] end # if (a) { if (b) goto c; d; } c: => if (a && !b) { d; } - if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name + if bts.first.kind_of?(C::If) and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of?(C::Goto) and ary[0].kind_of?(C::Label) and l.target == ary[0].name s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)] if e = bts.shift.belse bts.unshift e end end # if () { goto a; } a: - if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name + if bts.last.kind_of?(C::Goto) and ary[0].kind_of?(C::Label) and bts.last.target == ary[0].name bts.pop end # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer; - if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return + if bts.last.kind_of?(C::Goto) and not inner_labels.include?(bts.last.target) and g = ary.find { |ss| ss.kind_of?(C::Goto) or ss.kind_of?(C::Return) } and g.kind_of?(C::Return) s.test = C::CExpression.negate s.test ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)] end # if { a; goto l; } b; l: => if {a;} else {b;} - if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target } + if bts.last.kind_of?(C::Goto) and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target } s.belse = C::Block.new(scope) s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse) ary[0...ary.index(l)] = [] bts.pop end # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any; - if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target } + if not s.belse and (bts.last.kind_of?(C::Goto) or bts.last.kind_of?(C::Return)) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target } s.belse = C::Block.new(scope) s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse) ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], [] end # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c; if s.belse bes = s.belse.statements while not bts.empty? - if bts.last.kind_of? C::Label; ary.unshift bts.pop - elsif bes.last.kind_of? C::Label; ary.unshift bes.pop + if bts.last.kind_of?(C::Label); ary.unshift bts.pop + elsif bes.last.kind_of?(C::Label); ary.unshift bes.pop elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop else break end end @@ -752,28 +777,28 @@ #when 1; s.bthen = bts.first # later (allows simpler handling in _while) end end # l1: l2: if () goto l1; goto l2; => if(!) goto l2; goto l1; - if s.kind_of? C::If + if s.kind_of?(C::If) ls = s.bthen - ls = ls.statements.last if ls.kind_of? C::Block - if ls.kind_of? C::Goto + ls = ls.statements.last if ls.kind_of?(C::Block) + if ls.kind_of?(C::Goto) if li = inner_labels.index(ls.target) table = inner_labels else - table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse + table = ary.map { |st| st.name if st.kind_of?(C::Label) }.compact.reverse li = table.index(ls.target) || table.length end g = ary.find { |ss| - break if ss.kind_of? C::Return - next if not ss.kind_of? C::Goto + break if ss.kind_of?(C::Return) + next if not ss.kind_of?(C::Goto) table.index(ss.target).to_i > li } if g s.test = C::CExpression.negate s.test - if not s.bthen.kind_of? C::Block + if not s.bthen.kind_of?(C::Block) ls = C::Block.new(scope) ls.statements << s.bthen s.bthen = ls end ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope) @@ -788,85 +813,100 @@ def decompile_cseq_while(ary, scope) return if forbid_decompile_ifwhile # find the next instruction that is not a label - ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } } + ni = lambda { |li| (li..ary.length).find { |ni_| not ary[ni_].kind_of?(C::Label) } } - # TODO XXX get rid of #index - finished = false ; while not finished ; finished = true # 1.9 does not support 'retry' - ary.each { |s| + finished = false; while not finished; finished = true # ruby1.9 does not support 'retry' + i = 0 + while i < ary.length + si = i + s = ary[si] + i += 1 case s when C::Label - if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block - if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name + if ssi = ni[si] and ss = ary[ssi] and ss.kind_of?(C::If) and not ss.belse and ss.bthen.kind_of?(C::Block) + if ss.bthen.statements.last.kind_of?(C::Goto) and ss.bthen.statements.last.target == s.name + # l: if (a) { b; goto l; } => while(a) { b; } ss.bthen.statements.pop - if l = ary[ary.index(ss)+1] and l.kind_of? C::Label - ss.bthen.statements.grep(C::If).each { |i| - i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name + if l = ary[ssi+1] and l.kind_of?(C::Label) + ss.bthen.statements.grep(C::If).each { |it| + it.bthen = C::Break.new if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name } end - ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen) - elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } + ary[ssi] = C::While.new(ss.test, ss.bthen) + elsif ss.bthen.statements.last.kind_of?(C::Return) and gi = ((si+1)..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name } + # l: if (a) { b; return; } c; goto l; => while (!a) { c; } b; return; wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb) + wb.statements = decompile_cseq_while(ary[ssi+1...gi], wb) w = C::While.new(C::CExpression.negate(ss.test), wb) - ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements] + ary[ssi..gi] = [w, *ss.bthen.statements] finished = false ; break #retry end end - if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name } + if gi = (si..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name } + # l: a; goto l; => while(1) { a; } wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) + wb.statements = decompile_cseq_while(ary[si...gi], wb) w = C::While.new(C::CExpression[1], wb) - ary[ary.index(s)..ary.index(g)] = [w] + ary[si..gi] = [w] finished = false ; break #retry end - if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and - (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name } + if gi = (si..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::If) and not ary[_si].belse and gt = ary[_si].bthen and + (gt = gt.kind_of?(C::Block) ? gt.statements.last : gt) and gt.kind_of?(C::Goto) and gt.target == s.name } + # l: a; if (b) goto l; => do { a; } while (b); + # l: a; if (b) { c; goto l; } => do { a; if (!b) break; c; } while(1); wb = C::Block.new(scope) - wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb) - w = C::DoWhile.new(g.test, wb) - ary[ary.index(s)..ary.index(g)] = [w] + g = ary[gi] + if g.bthen.kind_of?(C::Block) and g.bthen.statements.length > 1 + nary = ary[si...gi] + [C::If.new(C::CExpression.negate(g.test), C::Break.new)] + g.bthen.statements[0...-1] + wb.statements = decompile_cseq_while(nary, wb) + w = C::DoWhile.new(C::CExpression[1], wb) + else + wb.statements = decompile_cseq_while(ary[si...gi], wb) + w = C::DoWhile.new(g.test, wb) + end + ary[si..gi] = [w] finished = false ; break #retry end when C::If - decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block - decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block + decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of?(C::Block) + decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of?(C::Block) when C::While, C::DoWhile - decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block + decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of?(C::Block) end - } end + end # while finished ary end # TODO def decompile_cseq_switch(scope) - uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e } + uncast = lambda { |e| e = e.rexpr while e.kind_of?(C::CExpression) and not e.op ; e } walk(scope) { |s| # XXX pfff... - next if not s.kind_of? C::If + next if not s.kind_of?(C::If) # if (v < 12) return ((void(*)())(tableaddr+4*v))(); t = s.bthen - t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1 - next if not t.kind_of? C::Return or not t.respond_to? :from_instr - next if t.from_instr.comment.to_a.include? 'switch' - next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op + t = t.statements.first if t.kind_of?(C::Block) and t.statements.length == 1 + next if not t.kind_of?(C::Return) or not t.respond_to?(:from_instr) + next if t.from_instr.comment.to_a.include?('switch') + next if not t.value.kind_of?(C::CExpression) or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of?(C::CExpression) or t.value.lexpr.op p = uncast[t.value.lexpr.rexpr] - next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr + next if not p.kind_of?(C::CExpression) or p.op != :* or p.lexpr p = uncast[p.rexpr] - next if not p.kind_of? C::CExpression or p.op != :+ + next if not p.kind_of?(C::CExpression) or p.op != :+ r, l = uncast[p.rexpr], uncast[p.lexpr] - r, l = l, r if r.kind_of? C::CExpression - next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr + r, l = l, r if r.kind_of?(C::CExpression) + next if not r.kind_of?(::Integer) or not l.kind_of?(C::CExpression) or l.op != :* or not l.lexpr lr, ll = uncast[l.rexpr], uncast[l.lexpr] - lr, ll = ll, lr if not ll.kind_of? ::Integer + lr, ll = ll, lr if not ll.kind_of?(::Integer) next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void))) base, index = r, lr - if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and - s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer + if s.test.kind_of?(C::CExpression) and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and + s.test.rexpr.kind_of?(C::CExpression) and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of?(::Integer) t.from_instr.add_comment 'switch' sup = s.test.rexpr.rexpr rng = ((s.test.op == :<) ? (0...sup) : (0..sup)) from = t.from_instr.address rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) } @@ -881,35 +921,35 @@ def remove_labels(scope) return if forbid_optimize_labels used = [] walk(scope) { |ss| - used |= [ss.target] if ss.kind_of? C::Goto + used |= [ss.target] if ss.kind_of?(C::Goto) } walk(scope) { |s| - next if not s.kind_of? C::Block + next if not s.kind_of?(C::Block) s.statements.delete_if { |l| - l.kind_of? C::Label and not used.include? l.name + l.kind_of?(C::Label) and not used.include?(l.name) } } # remove implicit continue; at end of loop walk(scope) { |s| - next if not s.kind_of? C::While - if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue + next if not s.kind_of?(C::While) + if s.body.kind_of?(C::Block) and s.body.statements.last.kind_of?(C::Continue) s.body.statements.pop end } end # checks if expr is a var (var or *&var) def isvar(ce, var) - if var.stackoff and ce.kind_of? C::CExpression + if var.stackoff and ce.kind_of?(C::CExpression) return unless ce.op == :* and not ce.lexpr ce = ce.rexpr - ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op - return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr + ce = ce.rexpr while ce.kind_of?(C::CExpression) and not ce.op + return unless ce.kind_of?(C::CExpression) and ce.op == :& and not ce.lexpr ce = ce.rexpr end ce == var end @@ -927,11 +967,11 @@ end # checks if expr writes var def ce_write(ce_, var) walk_ce(ce_) { |ce| - break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or + break true if C::CExpression::AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var)))) } end # patches a set of exprs, replacing oldce by newce @@ -969,18 +1009,20 @@ # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1)) read = {} write = {} ro = {} wo = {} + g_exprs = {} + g.exprs_var[var.name].to_h.each { |k, v| g_exprs[k] = v.map { |i| g.exprs[k][i] } } # list of [l, i] for which domain is not known unchecked = [] # mark all exprs of the graph # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write r = var.has_attribute_var('register') - g.exprs.each { |label, exprs| + g_exprs.each { |label, exprs| exprs.each_with_index { |ce, i| if ce_read(ce, var) if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym)) (ro[label] ||= []) << i @@ -1004,61 +1046,62 @@ # marks stuff do walk down walk_up = lambda { |l, i| todo_w = [[l, i-1]] done_w = [] while o = todo_w.pop - next if done_w.include? o + next if done_w.include?(o) done_w << o l, i = o loop do - if read[l].to_a.include? i + if read[l].to_a.include?(i) # XXX not optimal (should mark only the uppest read) - todo_down |= [[l, i]] if not dom.include? [l, i] + todo_down |= [[l, i]] if not dom.include?([l, i]) dom |= [[l, i]] - elsif write[l].to_a.include? i - todo_down |= [[l, i]] if not dom.include? [l, i] + elsif write[l].to_a.include?(i) + todo_down |= [[l, i]] if not dom.include?([l, i]) dom |= [[l, i]] break - elsif wo[l].to_a.include? i - todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down] + elsif wo[l].to_a.include?(i) + todo_down |= [[l, i]] if not dom_wo.include?([l, i, :down]) dom_wo |= [[l, i, :down]] break end i -= 1 if i < 0 g.from_optim[l].to_a.each { |ll| - todo_w << [ll, g.exprs[ll].to_a.length-1] + todo_w << [ll, g_exprs[ll].to_a.length-1] } + # read unitialized func_top = true if g.from_optim[l].to_a.empty? break end end end } # flood by walking the graph down from [l, i] (excluded) - # malks stuff to walk up + # marks stuff to walk up walk_down = lambda { |l, i| todo_w = [[l, i+1]] done_w = [] while o = todo_w.pop - next if done_w.include? o + next if done_w.include?(o) done_w << o l, i = o loop do - if read[l].to_a.include? i - todo_up |= [[l, i]] if not dom.include? [l, i] + if read[l].to_a.include?(i) + todo_up |= [[l, i]] if not dom.include?([l, i]) dom |= [[l, i]] - elsif write[l].to_a.include? i + elsif write[l].to_a.include?(i) break - elsif ro[l].to_a.include? i - todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up] + elsif ro[l].to_a.include?(i) + todo_up |= [[l, i]] if not dom_ro.include?([l, i, :up]) dom_ro |= [[l, i, :up]] break end i += 1 - if i >= g.exprs[l].to_a.length + if i >= g_exprs[l].to_a.length g.to_optim[l].to_a.each { |ll| todo_w << [ll, 0] } break end @@ -1075,15 +1118,15 @@ todo_up = [] todo_down = [] # init - if read[o[0]].to_a.include? o[1] + if read[o[0]].to_a.include?(o[1]) todo_up << o todo_down << o dom << o - elsif write[o[0]].to_a.include? o[1] + elsif write[o[0]].to_a.include?(o[1]) todo_down << o dom << o elsif o[2] == :up todo_up << o dom_ro << o @@ -1109,29 +1152,33 @@ # patch n_i = 0 n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"] nv = var.dup + nv.misc = var.misc ? var.misc.dup : {} nv.storage = :register if nv.has_attribute_var('register') nv.attributes = nv.attributes.dup if nv.attributes nv.name = newvarname + nv.misc[:unalias_name] = newvarname scope.statements << C::Declaration.new(nv) scope.symbol[nv.name] = nv - dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) } + dom.each { |oo| ce_patch(g_exprs[oo[0]][oo[1]], var, nv) } dom_ro.each { |oo| - ce = g.exprs[oo[0]][oo[1]] - if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression + ce = g_exprs[oo[0]][oo[1]] + if ce.op == :funcall + ce_patch(ce, var, nv) + elsif ce.rexpr.kind_of?(C::CExpression) ce_patch(ce.rexpr, var, nv) else ce.rexpr = nv end } dom_wo.each { |oo| - ce = g.exprs[oo[0]][oo[1]] + ce = g_exprs[oo[0]][oo[1]] if ce.op == :funcall - elsif ce.lexpr.kind_of? C::CExpression + elsif ce.lexpr.kind_of?(C::CExpression) ce_patch(ce.lexpr, var, nv) else ce.lexpr = nv end } @@ -1172,17 +1219,14 @@ scope.statements << C::Declaration.new(v) end v } - scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] } - scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] } - walk_ce(scope) { |e| next if e.op != :+ and e.op != :- - next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr' - next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer + next if not e.lexpr.kind_of?(C::Variable) or e.lexpr.name != 'frameptr' + next if not e.rexpr.kind_of?(C::CExpression) or e.rexpr.op or not e.rexpr.rexpr.kind_of?(::Integer) off = e.rexpr.rexpr off = -off if e.op == :- v = newvar[off, stackoff_to_varname(off)] e.replace C::CExpression[:&, v] } @@ -1198,49 +1242,49 @@ # name => type types = {} pscopevar = lambda { |e| - e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression - if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable + e = e.rexpr while e.kind_of?(C::CExpression) and not e.op and e.rexpr.kind_of?(C::CExpression) + if e.kind_of?(C::CExpression) and e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable) e.rexpr.name if scope.symbol[e.rexpr.name] end } scopevar = lambda { |e| - e = e.rexpr if e.kind_of? C::CExpression and not e.op - if e.kind_of? C::Variable and scope.symbol[e.name] + e = e.rexpr if e.kind_of?(C::CExpression) and not e.op + if e.kind_of?(C::Variable) and scope.symbol[e.name] e.name - elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr + elsif e.kind_of?(C::CExpression) and e.op == :* and not e.lexpr pscopevar[e.rexpr] end } globalvar = lambda { |e| - e = e.rexpr if e.kind_of? C::CExpression and not e.op - if e.kind_of? ::Integer and @dasm.get_section_at(e) + e = e.rexpr if e.kind_of?(C::CExpression) and not e.op + if e.kind_of?(::Integer) and e > 0x10000 and @dasm.get_section_at(e) e - elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name) + elsif e.kind_of?(C::Variable) and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name) e.name end } # check if a newly found type for o is better than current type # order: foo* > void* > foo better_type = lambda { |t0, t1| - t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or - (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or + t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of?(C::BaseType)) or t0.untypedef.kind_of?(C::Union) or + (t0.kind_of?(C::BaseType) and t1.kind_of?(C::BaseType) and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed]) } update_global_type = lambda { |e, t| if ne = new_global_var(e, t, scope) ne.type = t if better_type[t, ne.type] # TODO patch existing scopes using ne # TODO rename (dword_xx -> byte_xx etc) - e = scope.symbol_ancestors[e] || e if e.kind_of? String # exe reloc + e = scope.symbol_ancestors[e] || e if e.kind_of?(String) # exe reloc walk_ce(scope) { |ce| ce.lexpr = ne if ce.lexpr == e ce.rexpr = ne if ce.rexpr == e - if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union + if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of?(C::Union) # *struct -> struct->bla ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type)) elsif ce.lexpr == ne or ce.rexpr == ne # set ce type according to l/r # TODO set ce.parent type etc @@ -1253,24 +1297,24 @@ propagate_type = nil # fwd declaration propagating = [] # recursion guard (x = &x) # check if need to change the type of a var # propagate_type if type is updated update_type = lambda { |n, t| - next if propagating.include? n + next if propagating.include?(n) o = scope.symbol[n].stackoff - next if not o and t.untypedef.kind_of? C::Union - next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o] + next if not o and t.untypedef.kind_of?(C::Union) + next if scope.decompdata[:unalias_type][n] and t != scope.decompdata[:unalias_type][n] next if t0 = types[n] and not better_type[t, t0] next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned types[n] = t next if t == t0 propagating << n propagate_type[n, t] propagating.delete n next if not o t = t.untypedef - if t.kind_of? C::Struct + if t.kind_of?(C::Struct) t.members.to_a.each { |m| mo = t.offsetof(@c_parser, m.name) next if mo == 0 scope.symbol.each { |vn, vv| update_type[vn, m.type] if vv.stackoff == o+mo @@ -1280,42 +1324,42 @@ } # try to update the type of a var from knowing the type of an expr (through dereferences etc) known_type = lambda { |e, t| loop do - e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t + e = e.rexpr while e.kind_of?(C::CExpression) and not e.op and e.type == t if o = scopevar[e] update_type[o, t] elsif o = globalvar[e] update_global_type[o, t] - elsif not e.kind_of? C::CExpression + elsif not e.kind_of?(C::CExpression) elsif o = pscopevar[e] and t.pointer? update_type[o, t.pointed] elsif e.op == :* and not e.lexpr e = e.rexpr t = C::Pointer.new(t) next - elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable + elsif t.pointer? and e.op == :+ and e.lexpr.kind_of?(C::CExpression) and e.lexpr.type.integral? and e.rexpr.kind_of?(C::Variable) e.lexpr, e.rexpr = e.rexpr, e.lexpr next - elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression - if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer + elsif e.op == :+ and e.lexpr and e.rexpr.kind_of?(C::CExpression) + if not e.rexpr.op and e.rexpr.rexpr.kind_of?(::Integer) if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0 # XXX relocatable + base=0.. e = e.lexpr # (int)*(x+2) === (int) *x next elsif globalvar[e.rexpr.rexpr] known_type[e.lexpr, C::BaseType.new(:int)] e = e.rexpr next end - elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or + elsif t.pointer? and (e.lexpr.kind_of?(C::CExpression) and e.lexpr.lexpr and [:<<, :>>, :*, :&].include?(e.lexpr.op)) or (o = scopevar[e.lexpr] and types[o] and types[o].integral? and !(o = scopevar[e.rexpr] and types[o] and types[o].integral?)) e.lexpr, e.rexpr = e.rexpr, e.lexpr # swap e = e.lexpr next - elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or + elsif t.pointer? and ((e.rexpr.kind_of?(C::CExpression) and e.rexpr.lexpr and [:<<, :>>, :*, :&].include?(e.rexpr.op)) or (o = scopevar[e.rexpr] and types[o] and types[o].integral? and !(o = scopevar[e.lexpr] and types[o] and types[o].integral?))) e = e.lexpr next end @@ -1327,23 +1371,23 @@ # we found a type for a var, propagate it through affectations propagate_type = lambda { |var, type| walk_ce(scope) { |ce| next if ce.op != :'=' - if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var + if ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == var known_type[ce.rexpr, type] next end - if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var + if ce.rexpr.kind_of?(C::Variable) and ce.rexpr.name == var known_type[ce.lexpr, type] next end # int **x; y = **x => int y t = type l = ce.lexpr - while l.kind_of? C::CExpression and l.op == :* and not l.lexpr + while l.kind_of?(C::CExpression) and l.op == :* and not l.lexpr if var == pscopevar[l.rexpr] known_type[ce.rexpr, t] break elsif t.pointer? l = l.rexpr @@ -1353,11 +1397,11 @@ end # int **x; **x = y => int y t = type r = ce.rexpr - while r.kind_of? C::CExpression and r.op == :* and not r.lexpr + while r.kind_of?(C::CExpression) and r.op == :* and not r.lexpr if var == pscopevar[r.rexpr] known_type[ce.lexpr, t] break elsif t.pointer? r = r.rexpr @@ -1371,36 +1415,36 @@ } # put all those macros in use # use user-defined types first scope.symbol.each_value { |v| - next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff] + next if not v.kind_of?(C::Variable) or not t = scope.decompdata[:unalias_type][v.name] known_type[v, t] } # try to infer types from C semantics later = [] walk_ce(scope) { |ce| - if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and - ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr))) + if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of?(::Integer) and + ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of?(C::CExpression) or ce.lexpr.op != :'*' or ce.lexpr.lexpr))) # var = int known_type[ce.lexpr, ce.rexpr.type] elsif ce.op == :funcall f = ce.lexpr.type f = f.pointed if f.pointer? - next if not f.kind_of? C::Function + next if not f.kind_of?(C::Function) # cast func args to arg prototypes f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] } elsif ce.op == :* and not ce.lexpr - if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and - e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff + if e = ce.rexpr and e.kind_of?(C::CExpression) and not e.op and e = e.rexpr and e.kind_of?(C::CExpression) and + e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable) and e.rexpr.stackoff # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything later << [ce.rexpr, C::Pointer.new(ce.type)] next end known_type[ce.rexpr, C::Pointer.new(ce.type)] - elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function + elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of?(C::Function) # cast to fptr: must be a fptr known_type[ce.rexpr, ce.type] end } @@ -1421,11 +1465,11 @@ memb = {} types.dup.each { |n, t| v = scope.symbol[n] next if not o = v.stackoff t = t.untypedef - if t.kind_of? C::Struct + if t.kind_of?(C::Struct) t.members.to_a.each { |tm| moff = t.offsetof(@c_parser, tm.name) next if moff == 0 types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff } memb[o+moff] = [v, tm.name, tm.type] @@ -1467,11 +1511,11 @@ else re end } when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr] when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr] - ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable + ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of?(C::Variable) when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr] when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr] when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]] when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]] end @@ -1483,18 +1527,18 @@ # if int32 var_4 is always var_4 & 255, change type to int8 varuse = Hash.new(0) varandff = Hash.new(0) varandffff = Hash.new(0) walk_ce(scope) { |ce| - if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + if ce.op == :& and ce.lexpr.kind_of?(C::Variable) and ce.lexpr.type.integral? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) case ce.rexpr.rexpr when 0xff; varandff[ce.lexpr.name] += 1 when 0xffff; varandffff[ce.lexpr.name] += 1 end end - varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable - varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable + varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable) + varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable) } varandff.each { |k, v| scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v } varandffff.each { |k, v| @@ -1503,32 +1547,100 @@ # propagate types to cexprs walk_ce(scope, true) { |ce| if ce.op ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next - if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?) - known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function # localvar = &struct with fptr + if ce.op == :'=' and ce.rexpr.kind_of?(C::Typed) and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?) + known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of?(C::Function) # localvar = &struct with fptr ce.rexpr = C::CExpression[[ce.rexpr], ce.type] end - elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed) + elsif ce.type.pointer? and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed) ce.type = ce.rexpr.type end } end + # use casts to determine variable types after code optimization + # if all uses of var_42 are through *(int*)(&var_42), set type to int + def decompile_c_types_again(scope) + return if forbid_decompile_types + + update_type = lambda { |n, t| + o = scope.symbol[n].stackoff + next if not o and t.untypedef.kind_of?(C::Union) + next if scope.decompdata[:unalias_type][n] and t != scope.decompdata[:unalias_type][n] + scope.symbol[n].type = t + } + + # true if e is a cast of a var address + is_cast = lambda { |ce| + true if not ce.op and not ce.lexpr and ce.rexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr + } + + # scan code for casts + uses = {} + count_refs = Hash.new(0) + walk_ce(scope) { |ce| + count_refs[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable) + count_refs[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable) + if is_cast[ce] and ce.rexpr.rexpr.kind_of?(C::Variable) + (uses[ce.rexpr.rexpr.name] ||= []) << ce.type.pointed + end + } + + # given a list of types, return a type compatible with all + summary_type = lambda { |type_list| + t = type_list.first + type_list.each { |tt| + if sizeof(t) != sizeof(tt) or t.integral? != tt.integral? or t.pointer? != tt.pointer? + t = nil + break + elsif t != tt + t = tt if t.to_s =~ /__/ + end + } + t + } + + updated = {} + uses.each { |n, tl| + if tl.length == count_refs[n] and t = summary_type[tl] and update_type[n, t] + updated[n] = true + end + } + return if updated.empty? + + walk_ce(scope, true) { |ce| + if is_cast[ce] and updated[ce.rexpr.rexpr.name] + ce.op = :& + ce.rexpr = ce.rexpr.rexpr + elsif ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::Variable) and updated[ce.rexpr.rexpr.name] + ce.op = nil + ce.rexpr = ce.rexpr.rexpr + elsif ce.op + if ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::Variable) and updated[ce.rexpr.rexpr.name] + ce.rexpr = ce.rexpr.rexpr + end + if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(C::Variable) and updated[ce.lexpr.rexpr.name] + ce.lexpr = ce.lexpr.rexpr + end + end + } + end + # struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l # st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored) def structoffset(st, ptr, off, msz) tabidx = off / sizeof(st) off -= tabidx * sizeof(st) - ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array + ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of?(C::Array) return ptr if off == 0 and (not msz or # avoid infinite recursion with eg chained list - (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and - not s.type.untypedef.kind_of? C::Union)) + (ptr.kind_of?(C::CExpression) and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and + not s.type.untypedef.kind_of?(C::Union))) m_ptr = lambda { |m| - if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr + if ptr.kind_of?(C::CExpression) and ptr.op == :& and not ptr.lexpr C::CExpression[ptr.rexpr, :'.', m.name] else C::CExpression[ptr, :'->', m.name] end } @@ -1542,11 +1654,11 @@ mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off } off -= mo[sm] sst = sm.type.untypedef #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st # TODO fix infinite recursion on mutually recursive ptrs ptr = C::CExpression[:&, m_ptr[sm]] - if sst.kind_of? C::Union + if sst.kind_of?(C::Union) return structoffset(sst, ptr, off, msz) end end if off != 0 @@ -1559,19 +1671,19 @@ # fix pointer arithmetic (eg int foo += 4 => int* foo += 1) # use struct member access (eg *(structptr+8) => structptr->bla) # must be run only once, right after type setting def fix_pointer_arithmetic(scope) walk_ce(scope, true) { |ce| - if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op + if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include?(ce.op) ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)] end - if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union)) + if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?(C::Union))) ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr end - if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct + if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?(C::Struct) s = ce.rexpr.type.pointed.untypedef m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } if sizeof(m) != sizeof(ce) ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)] next @@ -1580,11 +1692,11 @@ ce.lexpr = ce.rexpr ce.op = :'->' ce.rexpr = m.name ce.type = m.type next - elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct + elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of?(C::Struct) s = ce.lexpr.type.untypedef m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 } ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type) ce.type = m.type next @@ -1592,22 +1704,22 @@ if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer? ce.type = ce.lexpr.type end - if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr + if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :* and not ce.rexpr.lexpr ce.replace C::CExpression[ce.rexpr.rexpr] end next if not ce.lexpr or not ce.lexpr.type.pointer? - if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and - ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr + if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of?(C::Union) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and + ce.rexpr.rexpr.kind_of?(::Integer) and o = ce.rexpr.rexpr # structptr + 4 => &structptr->member ce.replace structoffset(s, ce.lexpr, o, nil) - elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or - (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and - i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0 + elsif [:+, :-, :'+=', :'-='].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or + (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of?(C::CExpression) and not i.op and i = i.rexpr) or true))) and + i.kind_of?(::Integer) and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0 # ptr += 4 => ptr += 1 if not ce.rexpr.op ce.rexpr.rexpr /= psz else ce.rexpr.lexpr.rexpr /= psz @@ -1644,134 +1756,161 @@ next if not v1.type.integral? varinfo.each { |v2, (o2, l2)| # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;) # TODO should check stuff with aliasing domains next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1) + next if o1 == o2 and l1 != l2 # v1 => *(&v2+delta) p = C::CExpression[:&, v2] - p = C::CExpression[p, :+, [o1-o2]] + p = C::CExpression[p, :+, [o1-o2]] if o1 != o2 p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type p = C::CExpression[:*, p] walk_ce(scope) { |ce| ce.lexpr = p if ce.lexpr == v1 ce.rexpr = p if ce.rexpr == v1 } } } end - # to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types - # will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..') - # remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;') - # remove useless casts ('(int)i' with 'int i;' => 'i') - def optimize(scope) - optimize_code(scope) - optimize_vars(scope) - optimize_vars(scope) # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i) - end - # simplify cexpressions (char & 255, redundant casts, etc) def optimize_code(scope) return if forbid_optimize_code sametype = lambda { |t1, t2| t1 = t1.untypedef t2 = t2.untypedef - t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function - t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function + t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of?(C::Function) + t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of?(C::Function) t1 == t2 or - (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and + (t1.kind_of?(C::Function) and t2.kind_of?(C::Function) and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or - (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or + (t1.kind_of?(C::BaseType) and t1.integral? and t2.kind_of?(C::BaseType) and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type]) } # most of this is a CExpr#reduce future_array = [] walk_ce(scope, true) { |ce| # (whatever)0 => 0 - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 ce.replace ce.rexpr end # *&bla => bla if types ok - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type] + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type] ce.replace C::CExpression[ce.rexpr.rexpr] end - # int x + 0xffffffff -> x-1 - if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and + # int x + 0xffffffff => x-1 + if ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include?(ce.op) and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1 ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op] ce.rexpr.rexpr = 1 end + # i + ptr => ptr + i + if ce.op == :+ and ce.lexpr and ce.rexpr.type.pointer? and ce.lexpr.type.integral? + ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr + end + + # i + v => v + i + if ce.op == :+ and ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) + # avoid infinite swapping + if not ce.rexpr.kind_of?(C::CExpression) or ce.rexpr.op or not ce.rexpr.rexpr.kind_of?(::Integer) + ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr + end + end + + # (a + b) + c => a + (b + c) + if ce.op == :+ and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :+ and ce.lexpr.lexpr + ce.lexpr, ce.rexpr = ce.lexpr.lexpr, C::CExpression[ce.lexpr.rexpr, :+, ce.rexpr] + optimize_code(ce) + end + + # 1 + 2 => 3 + if (ce.op == :+ or ce.op == :- or ce.op == :*) and ce.lexpr.kind_of?(C::CExpression) and ce.type.integral? and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) + ce.lexpr, ce.op, ce.rexpr = nil, nil, ce.lexpr.rexpr.send(ce.op, ce.rexpr.rexpr) + end + + # 4 * (a + 1) => 4*a + 4 + if ce.op == :* and ce.lexpr.kind_of?(C::CExpression) and ce.type.integral? and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and ce.rexpr.kind_of?(C::CExpression) and (ce.rexpr.op == :+ or ce.rexpr.op == :-) and ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and ce.rexpr.rexpr.rexpr.kind_of?(::Integer) + ce.replace C::CExpression[[ce.lexpr, ce.op, ce.rexpr.lexpr], ce.rexpr.op, [ce.lexpr.rexpr * ce.rexpr.rexpr.rexpr]] + optimize_code(ce) + end + # int *ptr; *(ptr + 4) => ptr[4] - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of?(C::Variable) and var.type.pointer? ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr future_array << var.name end + # ptr + (i << 3) => ptr + 8*i + if (ce.op == :+ or ce.op == :[]) and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :<< and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and ce.rexpr.rexpr.rexpr.kind_of?(::Integer) + ce.rexpr.rexpr.rexpr = 1 << ce.rexpr.rexpr.rexpr + ce.rexpr.lexpr, ce.rexpr.op, ce.rexpr.rexpr = ce.rexpr.rexpr, :*, ce.rexpr.lexpr + end + # char x; x & 255 => x - if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and - not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and + if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of?(C::CExpression) and + not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and m = (1 << (8*sizeof(ce.lexpr))) - 1 and ce.rexpr.rexpr & m == m ce.replace C::CExpression[ce.lexpr] end # a + -b => a - b - if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr + if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :- and not ce.rexpr.lexpr ce.op, ce.rexpr = :-, ce.rexpr.rexpr end # (((int) i >> 31) & 1) => i < 0 - if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and + if ce.op == :& and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 1 and + ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of?(C::CExpression) and not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1 ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]] end # a-b == 0 => a == b - if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr + if ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include?(ce.op) and + ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :- and ce.lexpr.lexpr ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr end # (a > 0) != 0 - if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and - [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op + if ce.op == :'!=' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of?(C::CExpression) and + [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?(ce.lexpr.op) ce.replace ce.lexpr end # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] ) => jl # a<b => true if !r => a<0 == b<0 or a>=0 => a>=0 or b>=0 # a>=b => true if r => a<0 == b>=0 and a<0 => a<0 and b>=0 # x != (a && (b != x)) => [x && (!a || b)] || [!x && !(!a || b)] - if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and - ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and + if ce.op == :'!=' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :< and ce.rexpr.kind_of?(C::CExpression) and + ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.op == :'!=' and ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||', [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ] optimize_code(ce) end # (a != b) || a => a || b - if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } + if ce.op == :'||' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall } ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr optimize_code(ce) end # (a<b) && !(a>=0 && b<0) || (a>=b) && (a>=0 && b<0) => (signed)a < (signed)b - if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and - ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :< + if ce.op == :'||' and ce.lexpr.kind_of?(C::CExpression) and ce.rexpr.kind_of?(C::CExpression) and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and + ce.lexpr.lexpr.kind_of?(C::CExpression) and ce.lexpr.lexpr.op == :< a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate ce.replace C::CExpression[a, :'<', b] end end # a && 1 - if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0)) ce.replace C::CExpression[ce.lexpr] elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall } # cannot wipe if sideeffect ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]] end @@ -1780,197 +1919,247 @@ if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr] ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]] end # (a < b) | (a == b) => a <= b - if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and + if ce.op == :| and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :== and ce.lexpr.kind_of?(C::CExpression) and (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op] ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr end # a == 0 => !a - if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + if ce.op == :== and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr end - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer + if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]] end # !(bool) => bool - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op + if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include?(ce.rexpr.op) ce.replace ce.rexpr.negate end # (foo)(bar)x => (foo)x - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression + if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::CExpression) ce.rexpr = ce.rexpr.rexpr end # &struct.1stmember => &struct - if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and - s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0 + if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and + s.kind_of?(C::Union) and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0 ce.rexpr = ce.rexpr.lexpr ce.type = C::Pointer.new(ce.rexpr.type) end # (1stmember*)structptr => &structptr->1stmember - if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and - s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s + if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of?(C::Typed) and ce.rexpr.type.pointer? and + s = ce.rexpr.type.pointed.untypedef and s.kind_of?(C::Union) and ce.type.pointed.untypedef != s ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))] - #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and - #ce.rexpr.type.pointed.untypedef.kind_of? C::Function) # XXX ugly + #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of?(C::Function) or (ce.rexpr.type.pointer? and + #ce.rexpr.type.pointed.untypedef.kind_of?(C::Function)) # XXX ugly # int32* v1 = (int32*)pstruct; # z = v1+4 if v1 is not cast, the + is invalid (sizeof pointed changes) # TODO when finding type of pstruct, set type of v1 accordingly end # (&foo)->bar => foo.bar - if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr + if ce.op == :'->' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :& and not ce.lexpr.lexpr ce.lexpr = ce.lexpr.rexpr ce.op = :'.' end # (foo)bla => bla if bla of type foo - if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type] + if not ce.op and ce.rexpr.kind_of?(C::Typed) and sametype[ce.type, ce.rexpr.type] ce.replace C::CExpression[ce.rexpr] end - if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type + if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(C::Variable) and ce.lexpr.type == ce.lexpr.rexpr.type ce.lexpr = ce.lexpr.rexpr end - - if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and - not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type - ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type) - optimize_code(ce.lexpr) - end } # if there is a ptr[4], change all *ptr to ptr[0] for consistency # do this after the first pass, which may change &*ptr to ptr walk_ce(scope) { |ce| - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::Variable) and future_array.include?(ce.rexpr.name) ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0] end } if not future_array.empty? # if (x != 0) => if (x) walk(scope) { |st| - if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and - st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0 + if st.kind_of?(C::If) and st.test.kind_of?(C::CExpression) and st.test.op == :'!=' and + st.test.rexpr.kind_of?(C::CExpression) and not st.test.rexpr.op and st.test.rexpr.rexpr == 0 st.test = C::CExpression[st.test.lexpr] end } end # checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified) def sideeffect(exp, scope=nil) case exp when nil, ::Numeric, ::String; false when ::Array; exp.any? { |_e| sideeffect _e, scope } - when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile - when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or + when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include?(:volatile) + when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or C::CExpression::AssignOp.include?(exp.op) or sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope) else true # failsafe end end # converts C code to a graph of cexprs (nodes = cexprs, edges = codepaths) # returns a CGraph class CGraph - # exprs: label => [exprs], to: label => [labels], block: label => are exprs standalone (vs If#test), start: 1st label + # exprs: label => [exprs], to: label => [labels], block: label => are exprs in a block (vs If#test), start: 1st label attr_accessor :exprs, :to, :block, :start, :to_optim, :from_optim - end - def c_to_graph(st) - g = CGraph.new - g.exprs = {} # label => [exprs] - g.to = {} # label => [labels] - g.block = {} # label => is label in a block? (vs If#test) - anon_label = 0 # when no label is there, use anon_label++ + + def initialize + @exprs = {} # label => [exprs] + @to = {} # label => [labels] + @block = {} # label => is label in a block? (vs If#test) + @anon_label = 0 # when no label is there, use anon_label++ + @exprs_var = nil # similar to @exprs, indexed by var name, lazy initialization + end + + def build(stmt) + @start = @anon_label + to_graph(stmt, @start, nil, nil, nil) + optimize + self + end + # converts C code to a graph of codepath of cexprs - to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break| + def to_graph(stmt, l_cur, l_after, l_cont, l_break) case stmt - when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after] - when C::Goto; g.to[l_cur] = [stmt.target] - when C::Continue; g.to[l_cur] = [l_cont] - when C::Break; g.to[l_cur] = [l_break] + when C::Label; @to[l_cur] = [stmt.name] ; @to[stmt.name] = [l_after] + when C::Goto; @to[l_cur] = [stmt.target] + when C::Continue; @to[l_cur] = [l_cont] + when C::Break; @to[l_cur] = [l_break] when C::CExpression - g.exprs[l_cur] = [stmt] - g.to[l_cur] = [l_after] + @exprs[l_cur] = [stmt] + @to[l_cur] = [l_after] when C::Return - g.exprs[l_cur] = [stmt.value] if stmt.value - g.to[l_cur] = [] + @exprs[l_cur] = [stmt.value] if stmt.value + @to[l_cur] = [] when C::Block - to_graph[stmt.statements, l_cur, l_after, l_cont, l_break] + to_graph(stmt.statements, l_cur, l_after, l_cont, l_break) when ::Array - g.exprs[l_cur] = [] - g.block[l_cur] = true + @exprs[l_cur] = [] + @block[l_cur] = true stmt.each_with_index { |s, i| case s when C::Declaration when C::CExpression - g.exprs[l_cur] << s + @exprs[l_cur] << s else - l = anon_label += 1 - ll = anon_label += 1 - g.to[l_cur] = [l] - g.block[l_cur] = true - to_graph[stmt[i], l, ll, l_cont, l_break] + l = @anon_label += 1 + ll = @anon_label += 1 + @to[l_cur] = [l] + @block[l_cur] = true + to_graph(stmt[i], l, ll, l_cont, l_break) l_cur = ll - g.exprs[l_cur] = [] + @exprs[l_cur] = [] end } - g.to[l_cur] = [l_after].compact + @to[l_cur] = [l_after].compact when C::If - g.exprs[l_cur] = [stmt.test] - lt = anon_label += 1 - to_graph[stmt.bthen, lt, l_after, l_cont, l_break] - le = anon_label += 1 - to_graph[stmt.belse, le, l_after, l_cont, l_break] - g.to[l_cur] = [lt, le] + @exprs[l_cur] = [stmt.test] + lt = @anon_label += 1 + to_graph(stmt.bthen, lt, l_after, l_cont, l_break) + le = @anon_label += 1 + to_graph(stmt.belse, le, l_after, l_cont, l_break) + @to[l_cur] = [lt, le] when C::While, C::DoWhile - la = anon_label += 1 - if stmt.kind_of? C::DoWhile + la = @anon_label += 1 + if stmt.kind_of?(C::DoWhile) lt, lb = la, l_cur else lt, lb = l_cur, la end - g.exprs[lt] = [stmt.test] - g.to[lt] = [lb, l_after] - to_graph[stmt.body, lb, lt, lt, l_after] - when C::Asm, nil; g.to[l_cur] = [l_after] + @exprs[lt] = [stmt.test] + @to[lt] = [lb, l_after] + to_graph(stmt.body, lb, lt, lt, l_after) + when C::Asm, nil; @to[l_cur] = [l_after] else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE end - } + end - g.start = anon_label - to_graph[st, g.start, nil, nil, nil] - # optimize graph - g.to_optim = {} - g.to.each { |k, v| g.to_optim[k] = v.uniq } - g.exprs.delete_if { |k, v| v == [] } - g.to_optim.delete_if { |k, v| - if v.length == 1 and not g.exprs[k] and v != [k] - g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end } - true - elsif v.length == 0 and not g.exprs[k] - g.to_optim.each_value { |t| t.delete k } - true + def optimize + @to_optim = {} + @to.each { |k, v| @to_optim[k] = v.uniq } + @exprs.delete_if { |k, v| v == [] } + @to_optim.delete_if { |k, v| + if v.length == 1 and not @exprs[k] and v != [k] + @to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end } + true + elsif v.length == 0 and not @exprs[k] + @to_optim.each_value { |t| t.delete k } + true + end + } + + @from_optim = {} + @to_optim.each { |k, v| v.each { |t| (@from_optim[t] ||= []) << k } } + end + + # varname => { label => [list of indices of @exprs[label] referencing varname] } + def exprs_var + @exprs_var ||= init_exprs_var + end + + # returns the list of variable names referenced by a CExpr + def get_expr_vars(e) + case e + when C::CExpression; get_expr_vars(e.lexpr) + get_expr_vars(e.rexpr) + when ::Array; e.inject([]) { |a, ee| a.concat get_expr_vars(ee) } + when C::Variable; [e.name] + else; [] end - } + end - g.from_optim = {} - g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } } + # initialize @exprs_var + def init_exprs_var + @exprs_var = {} + @exprs.each_key { |label| update_exprs_var(label) } + @exprs_var + end - g + # populate one label of @exprs_var + def update_exprs_var(label) + @exprs[label].each_with_index { |e, idx| + get_expr_vars(e).uniq.each { |varname| + @exprs_var[varname] ||= {} + @exprs_var[varname][label] ||= [] + @exprs_var[varname][label] << idx + } + } + end + + # invalidates one label (eg exprs were deleted) + # rebuilds @exprs_var if necessary + def invalidate(label=nil) + if @exprs_var + if label + @exprs_var.each { |v, h| h.delete(label) } + update_exprs_var(label) + else + @exprs_var = nil + end + end + end end + def c_to_graph(stmt) + CGraph.new.build(stmt) + end # dataflow optimization # condenses expressions (++x; if (x) => if (++x)) # remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc) + # XXX omg def optimize_vars(scope) return if forbid_optimize_dataflow g = c_to_graph(scope) @@ -1994,66 +2183,70 @@ } } # badlabels is a list of labels that may be reached without passing through the first invocation block find_next_read_rec = lambda { |label, idx, var, done, badlabels| - next if done.include? label + next if done.include?(label) done << label if idx == 0 + list = g.exprs_var[var.name][label].to_a.find_all { |i| i >= idx } + idx = list.shift - idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] + idx = list.shift while idx and not ret = find_next_read_ce[g.exprs[label][idx], var] next ret if ret to = g.to_optim[label].to_a.map { |t| - break [:split] if badlabels.include? t + break [:split] if badlabels.include?(t) find_next_read_rec[t, 0, var, done, badlabels] }.compact tw = to - [:write] - if to.include? :split or tw.length > 1 + if to.include?(:split) or tw.length > 1 :split elsif tw.length == 1 tw.first - elsif to.include? :write + elsif to.include?(:write) :write end } # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason find_next_read = nil find_prev_read_rec = lambda { |label, idx, var, done| - next if done.include? label + next if done.include?(label) done << label if idx == g.exprs[label].length-1 + list = g.exprs_var[var.name][label].to_a.find_all { |i| i <= idx } + idx = list.pop - idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var] - if ret.kind_of? C::CExpression + idx = list.pop while idx and not ret = find_next_read_ce[g.exprs[label][idx], var] + if ret.kind_of?(C::CExpression) fwchk = find_next_read[label, idx+1, var] - ret = fwchk if not fwchk.kind_of? C::CExpression + ret = fwchk if not fwchk.kind_of?(C::CExpression) end next ret if ret from = g.from_optim[label].to_a.map { |f| find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done] }.compact - next :split if from.include? :split + next :split if from.include?(:split) fw = from - [:write] if fw.length == 1 fw.first elsif fw.length > 1 :split - elsif from.include? :write + elsif from.include?(:write) :write end } - # list of labels reachable without using a label + # list of labels reachable without passing through label badlab = {} build_badlabel = lambda { |label| next if badlab[label] badlab[label] = [] todo = [g.start] while l = todo.pop - next if l == label or badlab[label].include? l + next if l == label or badlab[label].include?(l) badlab[label] << l todo.concat g.to_optim[l].to_a end } @@ -2083,14 +2276,14 @@ e = exprs[i] i += 1 # TODO x = x + 1 => x += 1 => ++x here, move all other optimizations after (in optim_code) # needs also int & 0xffffffff -> int, *&var etc (decomp_type? optim_type?) - if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and - scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile - next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and - !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression) + if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of?(C::Variable) and + scope.symbol[v.name] and not v.type.qualifier.to_a.include?(:volatile) + next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of?(C::CExpression)) and + !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of?(C::CExpression)) next if oe.op == :& and not oe.lexpr # no &(++eax) # merge pre/postincrement into next/prev var usage # find_prev_read must fwd check when it finds something, to avoid # while(x) x++; return x; to be converted to while(x++); return x; (return wrong value) @@ -2119,11 +2312,11 @@ oe.rexpr = C::CExpression[1] end when :'+=', :'-=' # TODO i++; i += 4 => i += 5 next - when *AssignOp + when *C::CExpression::AssignOp next # ++i; i |= 4 => ignore else if pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v] elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v] elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op] @@ -2132,20 +2325,31 @@ end end i -= 1 exprs.delete_at(i) + g.invalidate(label) e.lexpr = e.op = e.rexpr = nil - elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and - not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v] + elsif e.op == :'=' and v = e.lexpr and v.kind_of?(C::Variable) and scope.symbol[v.name] and + not v.type.qualifier.to_a.include?(:volatile) and not find_next_read_ce[e.rexpr, v] # reduce trivial static assignments - if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or - (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or - (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array) + # b = a + 1 ; a = b => a = a + 1 ; b = a + if ne = g.exprs[label][i] and ne.op == :'=' and ne.rexpr == e.lexpr and ne.lexpr.kind_of?(C::Variable) and find_next_read_ce[e.rexpr, ne.lexpr] + e.lexpr, ne.lexpr, ne.rexpr = ne.lexpr, ne.rexpr, ne.lexpr + optimize_code(e) + i -= 1 + g.invalidate(label) + next + end + + # i = 4 ; f(i) => f(4) + if (e.rexpr.kind_of?(C::CExpression) and iv = e.rexpr.reduce(@c_parser) and iv.kind_of?(::Integer)) or + (e.rexpr.kind_of?(C::CExpression) and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of?(C::Variable)) or + (e.rexpr.kind_of?(C::Variable) and e.rexpr.type.kind_of?(C::Array)) rewritten = false readers = [] discard = [e] g.exprs.each { |l, el| el.each_with_index { |ce, ci| @@ -2171,26 +2375,26 @@ discard.each { |d| d.lexpr = d.op = d.rexpr = nil } next end end - case nr = find_next_read[label, i, v] + case find_next_read[label, i, v] when C::CExpression # read in one place only, try to patch rexpr in there r = e.rexpr # must check for conflicts (x = y; y += 1; foo(x) =!> foo(y)) # XXX x = a[1]; *(a+1) = 28; foo(x)... isfunc = false depend_vars = [] walk_ce(C::CExpression[r]) { |ce| - isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or + isfunc = true if ce.op == :func and (not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.has_attribute('pure')) # XXX is there a C attr for func depending only on staticvars+param ? - depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable - depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v) + depend_vars << ce.lexpr if ce.lexpr.kind_of?(C::Variable) + depend_vars << ce.rexpr if ce.rexpr.kind_of?(C::Variable) and (ce.lexpr or ce.op != :&) # a = &v; v = 12; func(a) => func(&v) depend_vars << ce if ce.lvalue? - depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array + depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of?(::Array) } depend_vars.uniq! # XXX x = 1; if () { x = 2; } foo(x) =!> foo(1) (find_next_read will return this) # we'll just redo a find_next_read like @@ -2207,15 +2411,15 @@ when :funcall bad = true if isfunc ce.rexpr.each { |a| cnt += 1 if a == v } cnt += 1 if ce.lexpr == v when :'=' - bad = true if depend_vars.include? ce.lexpr + bad = true if depend_vars.include?(ce.lexpr) cnt += 1 if ce.rexpr == v else - bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr - bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr + bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include?(ce.rexpr) + bad = true if C::CExpression::AssignOp.include?(ce.op) and depend_vars.include?(ce.lexpr) cnt += 1 if ce.lexpr == v cnt += 1 if ce.rexpr == v end } case cnt @@ -2224,11 +2428,11 @@ next when 1 # good break if e.complexity > 10 and ce_.complexity > 3 # try to keep the C readable # x = 1; y = x; z = x; => cannot suppress x nr = find_next_read[l_l, n_i+1, v] - break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v } + break if (nr.kind_of?(C::CExpression) or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v } else break # a = 1; b = a + a => fail end # TODO XXX x = 1; y = x; z = x; res = walk_ce(ce_, true) { |ce| @@ -2244,16 +2448,16 @@ ce.lexpr = r break :done elsif isfunc break :fail end - when *AssignOp - break :fail if not ce.lexpr and depend_vars.include? ce.rexpr # ++depend + when *C::CExpression::AssignOp + break :fail if not ce.lexpr and depend_vars.include?(ce.rexpr) # ++depend if ce.rexpr == v ce.rexpr = r break :done - elsif ce.lexpr == v or depend_vars.include? ce.lexpr + elsif ce.lexpr == v or depend_vars.include?(ce.lexpr) break :fail end else break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v if ce.lexpr == v @@ -2268,19 +2472,22 @@ } case res when :done i -= 1 exprs.delete_at(i) + g.invalidate(label) + g.invalidate(l_l) if l_l != label e.lexpr = e.op = e.rexpr = nil break when :fail break end } # ignore branches that will never reuse v - may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression } - if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] + may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of?(C::CExpression) } + if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] and + not sideeffect(e.rexpr) # dont do cross-block var optimization, eg 'a = f() ; if() return a' =!> 'if () return f()' l_i = 0 l_l = to else break end end @@ -2290,14 +2497,14 @@ # XXX foo = &bar; bar = 12; baz(*foo) e.replace(C::CExpression[e.rexpr]) # remove sideeffectless subexprs loop do case e.op - when :funcall, *AssignOp + when :funcall, *C::CExpression::AssignOp else - l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr)) - r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr)) + l = (e.lexpr.kind_of?(C::CExpression) and sideeffect(e.lexpr)) + r = (e.rexpr.kind_of?(C::CExpression) and sideeffect(e.rexpr)) if l and r # could split... elsif l e.replace(e.lexpr) next elsif r @@ -2309,127 +2516,223 @@ e.lexpr = e.op = e.rexpr = nil end end break end + g.invalidate(label) end end end } + + # var propagation + # find vars who are written only once, and replace all their use by their assignment value + # XXX this may supercede some of the ugly stuff just before + loop do + g.invalidate + writtenonce = {} # var => [label, offset] of assignment + neverwritten = {} # var => true (eg args) + g.exprs_var.each { |varname, h1| + next if not var = scope.symbol[varname] + neverwritten[varname] = true + h1.each { |label, idx_list| + idx_list.each { |expr_idx| + e = g.exprs[label][expr_idx] + if ce_write(e, var) + neverwritten.delete varname + if writtenonce[varname] + # written twice, abort + writtenonce.delete varname + break + elsif e.op == :'=' and e.lexpr == var and not ce_write(e.rexpr, var) and not ce_read(e.rexpr, var) + # good ! + writtenonce[varname] = [label, expr_idx] + else + # unhandled write, abort + break + end + end + } + } + } + # XXX check cycles ? + + is_trivial_assign = lambda { |e, rec_max| + case e + when C::Variable; writtenonce[e.name] or neverwritten[e.name] + when ::Integer, nil; true + when C::CExpression + rec_max > 0 and + not sideeffect(e) and + (e.op == :+ or e.op == :- or e.op == nil) and + is_trivial_assign[e.lexpr, rec_max-1] and + is_trivial_assign[e.rexpr, rec_max-1] + end + } + break if not trivial_var = writtenonce.keys.find { |var| + l, i = writtenonce[var] + e = g.exprs[l][i].rexpr + is_trivial_assign[e, 3] + } + label, idx = writtenonce[trivial_var] + assign_expr = g.exprs[label][idx] + var = assign_expr.lexpr + value = assign_expr.rexpr + g.exprs_var[trivial_var].each { |l, list| + list.each { |i| + e = g.exprs[l][i] + if l == label and i == idx + e.lexpr = e.op = e.rexpr = nil + else + ce_patch(e, var, value) + optimize_code(e) + end + } + } + end + + # wipe cexprs marked in the previous step walk(scope) { |st| - next if not st.kind_of? C::Block - st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr } + next if not st.kind_of?(C::Block) + st.statements.delete_if { |e| e.kind_of?(C::CExpression) and not e.lexpr and not e.op and not e.rexpr } } + g.invalidate # reoptimize cexprs walk_ce(scope, true) { |ce| # redo some simplification that may become available after variable propagation # int8 & 255 => int8 - if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1 + if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1 ce.replace C::CExpression[ce.lexpr] end # int *ptr; *(ptr + 4) => ptr[4] - if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer? + if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of?(C::Variable) and var.type.pointer? ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr end # useless casts - if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or - (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union)) # keep ((struct*)0)->memb + if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of?(C::CExpression) or + (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of?(C::Union))) # keep ((struct*)0)->memb ce.rexpr = ce.rexpr.rexpr end - if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?)) + if not ce.op and ce.rexpr.kind_of?(C::CExpression) and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?)) ce.replace ce.rexpr end # useless casts (type)*((oeua)Ptype) - if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and - p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed + if not ce.op and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and + p = ce.rexpr.rexpr.rexpr and p.kind_of?(C::Typed) and p.type.pointer? and ce.type == p.type.pointed ce.op = ce.rexpr.op ce.rexpr = ce.rexpr.rexpr.rexpr end + + # (char *)42 => new global var + if not ce.op and ce.type.pointer? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) + ce.rexpr = new_global_var(ce.rexpr.rexpr, ce.type, scope) || ce.rexpr + end + # (a > 0) != 0 - if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and - [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op + if ce.op == :'!=' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of?(C::CExpression) and + [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?(ce.lexpr.op) ce.replace ce.lexpr end # a == 0 => !a - if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 + if ce.op == :== and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 ce.replace C::CExpression[:'!', ce.lexpr] end # !(int)a => !a - if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression + if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::CExpression) ce.rexpr = ce.rexpr.rexpr end # (int)a < (int)b => a < b TODO uint <-> int - if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and - ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer? + if [:<, :<=, :>, :>=].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ce.lexpr.kind_of?(C::CExpression) and not ce.rexpr.op and not ce.lexpr.op and + ce.rexpr.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of?(C::CExpression) and ce.lexpr.rexpr.type.pointer? ce.rexpr = ce.rexpr.rexpr ce.lexpr = ce.lexpr.rexpr end # a & 3 & 1 - while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and - ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and - ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer + while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and + ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == ce.op and ce.lexpr.lexpr and + ce.lexpr.rexpr.kind_of?(C::CExpression) and ce.lexpr.rexpr.rexpr.kind_of?(::Integer) ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr) end # x = x | 4 => x |= 4 - if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr + if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include?(ce.rexpr.op) and ce.rexpr.lexpr == ce.lexpr ce.op = (ce.rexpr.op.to_s + '=').to_sym ce.rexpr = ce.rexpr.rexpr end + # x = 4 | x => x |= 4 + if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and [:+, :*, :|, :&, :^].include?(ce.rexpr.op) and ce.rexpr.rexpr == ce.lexpr + ce.op = (ce.rexpr.op.to_s + '=').to_sym + ce.rexpr = ce.rexpr.lexpr + end + # x += 1 => ++x - if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 - ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr + if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and (ce.rexpr.rexpr == 1 or ce.rexpr.rexpr == -1) + if ce.rexpr.rexpr == 1 + ce.op = {:'+=' => :'++', :'-=' => :'--'}[ce.op] + else + ce.op = {:'+=' => :'--', :'-=' => :'++'}[ce.op] + end + ce.rexpr = ce.lexpr + ce.lexpr = nil end # --x+1 => x-- - if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and - ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 + if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and + ce.lexpr.rexpr and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 1 ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil end + + # 1+2 => 3 + if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and + ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and + [:+, :-, :*, :/, :|, :&, :^, :>, :'==', :'!=', :<, :>=, :<=].include?(ce.op) + ce.rexpr = ce.reduce(@c_parser) + ce.lexpr = ce.op = nil + end } end def remove_unreferenced_vars(scope) used = {} walk_ce(scope) { |ce| # remove unreferenced local vars - used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable - used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable - ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array) + used[ce.rexpr.name] = true if ce.rexpr.kind_of?(C::Variable) + used[ce.lexpr.name] = true if ce.lexpr.kind_of?(C::Variable) + ce.rexpr.each { |v| used[v.name] = true if v.kind_of?(C::Variable) } if ce.rexpr.kind_of?(::Array) } unused = scope.symbol.keys.find_all { |n| not used[n] } unused.each { |v| scope.symbol[v].add_attribute 'unused' } # fastcall args need it - scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name } - scope.symbol.delete_if { |n, v| unused.include? n } + scope.statements.delete_if { |sm| sm.kind_of?(C::Declaration) and unused.include?(sm.var.name) } + scope.symbol.delete_if { |n, v| unused.include?(n) } end def finalize optimize_global true end def optimize_global # check all global vars (pointers to global data) tl = @c_parser.toplevel - vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function } + vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of?(C::Variable) and not tl.symbol[k].type.kind_of?(C::Function) and not tl.symbol[k].storage == :extern and not tl.symbol[k].storage == :static } countref = Hash.new(0) walk_ce(tl) { |ce| # XXX int foo; void bar() { int foo; } => false negative - countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable) + countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable) } vars.delete_if { |v| countref[v] == 0 } - countref.delete_if { |k, v| not vars.include? k } + countref.delete_if { |k, v| not vars.include?(k) } # by default globals are C::Arrays # if all references are *foo, dereference the var type # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc) countderef = Hash.new(0) @@ -2439,19 +2742,19 @@ elsif ce.op == :'->' r = C::CExpression[ce.lexpr] else next end # compare type.type cause var is an Array and the cast is a Pointer - countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and + countderef[r.rexpr.name] += 1 if r.kind_of?(C::CExpression) and not r.op and r.rexpr.kind_of?(C::Variable) and sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil } vars.each { |n| if countref[n] == countderef[n] v = tl.symbol[n] target = C::CExpression[:*, [v]] v.type = v.type.type - v.initializer = v.initializer.first if v.initializer.kind_of? ::Array + v.initializer = v.initializer.first if v.initializer.kind_of?(::Array) walk_ce(tl) { |ce| if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v] ce.op = :'.' elsif ce.lexpr == target ce.lexpr = v @@ -2462,32 +2765,32 @@ end } # if a global var appears only in one function, make it a static variable tl.statements.each { |st| - next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer + next if not st.kind_of?(C::Declaration) or not st.var.type.kind_of?(C::Function) or not scope = st.var.initializer localcountref = Hash.new(0) walk_ce(scope) { |ce| - localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable - localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable + localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable) + localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable) } vars.delete_if { |n| next if scope.symbol[n] next if localcountref[n] != countref[n] v = tl.symbol.delete(n) - tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n } + tl.statements.delete_if { |d| d.kind_of?(C::Declaration) and d.var.name == n } - if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String + if countref[n] == 1 and v.initializer.kind_of?(C::CExpression) and v.initializer.rexpr.kind_of?(String) walk_ce(scope) { |ce| - if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n + if ce.rexpr.kind_of?(C::Variable) and ce.rexpr.name == n if not ce.op ce.replace v.initializer else ce.rexpr = v.initializer end - elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n + elsif ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == n ce.lexpr = v.initializer end } else v.storage = :static @@ -2505,11 +2808,11 @@ scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? } args = func.type.args decl = [] scope.statements.delete_if { |sm| - next if not sm.kind_of? C::Declaration + next if not sm.kind_of?(C::Declaration) if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/ # aliased vars: use 1st domain only args << sm.var else decl << sm end @@ -2518,12 +2821,12 @@ # move trivial affectations to initialiser # XXX a = 1 ; b = a ; a = 2 go = true # break from delete_if does not delete.. scope.statements.delete_if { |st| - if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and - st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name] + if go and st.kind_of?(C::CExpression) and st.op == :'=' and st.rexpr.kind_of?(C::CExpression) and not st.rexpr.op and + st.rexpr.rexpr.kind_of?(::Integer) and st.lexpr.kind_of?(C::Variable) and scope.symbol[st.lexpr.name] st.lexpr.initializer = st.rexpr else go = false end } @@ -2540,20 +2843,32 @@ func.type.args << a # __fastcall next end while curoff > argoff wantarg = C::Variable.new - wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff) + wantarg.name = stackoff_to_varname(argoff) wantarg.type = C::BaseType.new(:int) wantarg.attributes = ['unused'] func.type.args << wantarg scope.symbol[wantarg.name] = wantarg argoff += @c_parser.typesize[:ptr] end func.type.args << a argoff += @c_parser.typesize[:ptr] } + + # use user-supplied names + scope.symbol.keys.each { |s| + v = scope.symbol[s] + next if not v.kind_of?(C::Variable) + v.misc ||= {} + uan = v.misc[:unalias_name] ||= s + if newname = scope.decompdata[:unalias_name][uan] and newname != s + v.name = newname + scope.symbol[newname] = scope.symbol.delete(s) + end + } end # rename local variables from subfunc arg names def rename_variables(scope) funcs = [] @@ -2561,33 +2876,33 @@ cmpi = [] walk_ce(scope) { |ce| funcs << ce if ce.op == :funcall cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++' - cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer + cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.kind_of?(::Integer) } rename = lambda { |var, name| - var = var.rexpr if var.kind_of? C::CExpression and not var.op - next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name + var = var.rexpr if var.kind_of?(C::CExpression) and not var.op + next if not var.kind_of?(C::Variable) or not scope.symbol[var.name] or not name next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/ s = scope.symbol_ancestors n = name i = 0 n = name + "#{i+=1}" while s[n] scope.symbol[n] = scope.symbol.delete(var.name) var.name = n } funcs.each { |ce| - next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function + next if not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.type.kind_of?(C::Function) ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa } } funcs.each { |ce| - next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function + next if not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.type.kind_of?(C::Function) ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| - next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr + next if not a.kind_of?(C::CExpression) or a.op != :& or a.lexpr next if not fa or not fa.name rename[a.rexpr, fa.name.sub(/^l?p/, '')] } } (cntrs & cmpi).each { |v| rename[v, 'cntr'] } @@ -2622,37 +2937,49 @@ end nil end # yields each statement (recursive) - def walk(scope, post=false, &b) + # replace the element by the block return value if patch is true + def walk(scope, post=false, patch=false, &b) case scope - when ::Array; scope.each { |s| walk(s, post, &b) } + when ::Array + scope.each_with_index { |s, i| + v = walk(s, post, patch, &b) + scope[i] = v if patch and v + } + nil when C::Statement - yield scope if not post + v = yield scope if not post case scope - when C::Block; walk(scope.statements, post, &b) + when C::Block + walk(scope.statements, post, patch, &b) when C::If - yield scope.test - walk(scope.bthen, post, &b) - walk(scope.belse, post, &b) if scope.belse + vv = yield scope.test + scope.test = vv if patch and vv + walk(scope.bthen, post, patch, &b) + walk(scope.belse, post, patch, &b) if scope.belse when C::While, C::DoWhile - yield scope.test - walk(scope.body, post, &b) + vv = yield scope.test + scope.test = vv if patch and vv + walk(scope.body, post, patch, &b) when C::Return - yield scope.value + vv = yield scope.value + scope.value = vv if patch and vv end - yield scope if post + v = yield scope if post + v when C::Declaration - walk(scope.var.initializer, post, &b) if scope.var.initializer + walk(scope.var.initializer, post, patch, &b) if scope.var.initializer + nil end end # forwards to @c_parser, handles cast to Array (these should not happen btw...) def sizeof(var, type=nil) - var, type = nil, var if var.kind_of? C::Type and not type + var, type = nil, var if var.kind_of?(C::Type) and not type type ||= var.type - return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable + return @c_parser.typesize[:ptr] if type.kind_of?(C::Array) and not var.kind_of?(C::Variable) @c_parser.sizeof(var, type) rescue -1 end end end