# This file is part of Metasm, the Ruby assembly manipulation suite # Copyright (C) 2006-2009 Yoann GUILLOT # # Licence is LGPL, see LICENCE in the top-level directory require 'metasm/encode' require 'metasm/exe_format/elf' unless defined? Metasm::ELF module Metasm class ELF class Header def set_default_values elf @magic ||= "\x7fELF" @e_class ||= elf.bitsize.to_s @data ||= (elf.endianness == :big ? 'MSB' : 'LSB') @version ||= 'CURRENT' @i_version ||= @version @entry ||= 0 @phoff ||= elf.segments.empty? ? 0 : elf.new_label('phdr') @shoff ||= elf.sections.length <= 1 ? 0 : elf.new_label('shdr') @flags ||= [] @ehsize ||= Header.sizeof(elf) @phentsize ||= Segment.sizeof(elf) @phnum ||= elf.segments.length @shentsize ||= Section.sizeof(elf) @shnum ||= elf.sections.length super(elf) end end class Section def set_default_values elf make_name_p elf if name and @name != '' @flags ||= [] @addr ||= (encoded and @flags.include?('ALLOC')) ? elf.label_at(@encoded, 0) : 0 @offset ||= encoded ? elf.new_label('section_offset') : 0 @size ||= encoded ? @encoded.length : 0 @addralign ||= entsize || 0 @entsize ||= @addralign @link = elf.sections.index(@link) if link.kind_of? Section @info = elf.sections.index(@info) if info.kind_of? Section super(elf) end # defines the @name_p field from @name and elf.section[elf.header.shstrndx] # creates .shstrtab if needed def make_name_p elf return 0 if not name or @name == '' or elf.header.shnum == 0 if elf.header.shstrndx.to_i == 0 or not elf.sections[elf.header.shstrndx] sn = Section.new sn.name = '.shstrtab' sn.type = 'STRTAB' sn.flags = [] sn.addralign = 1 sn.encoded = EncodedData.new << 0 elf.header.shstrndx = elf.sections.length elf.sections << sn end sne = elf.sections[elf.header.shstrndx].encoded return if name_p and sne.data[@name_p, @name.length+1] == @name+0.chr return if @name_p = sne.data.index(@name+0.chr) @name_p = sne.virtsize sne << @name << 0 end end class Segment def set_default_values elf if encoded @offset ||= elf.new_label('segment_offset') @vaddr ||= elf.label_at(@encoded, 0) @filesz ||= @encoded.rawsize @memsz ||= @encoded.virtsize end @paddr ||= @vaddr if vaddr super(elf) end end class Symbol def set_default_values(elf, strtab) make_name_p elf, strtab if strtab and name and @name != '' super(elf) end # sets the value of @name_p, appends @name to strtab if needed def make_name_p(elf, strtab) s = strtab.kind_of?(EncodedData) ? strtab.data : strtab return if name_p and s[@name_p, @name.length+1] == @name+0.chr return if @name_p = s.index(@name+0.chr) @name_p = strtab.length strtab << @name << 0 end end def encode_byte(w) Expression[w].encode(:u8, @endianness, (caller if $DEBUG)) end def encode_half(w) Expression[w].encode(:u16, @endianness, (caller if $DEBUG)) end def encode_word(w) Expression[w].encode(:u32, @endianness, (caller if $DEBUG)) end def encode_sword(w) Expression[w].encode(:i32, @endianness, (caller if $DEBUG)) end def encode_xword(w) Expression[w].encode((@bitsize == 32 ? :u32 : :u64), @endianness, (caller if $DEBUG)) end def encode_sxword(w) Expression[w].encode((@bitsize == 32 ? :i32 : :i64), @endianness, (caller if $DEBUG)) end alias encode_addr encode_xword alias encode_off encode_xword # checks a section's data has not grown beyond s.size, if so undefs addr/offset def encode_check_section_size(s) if s.size and s.encoded.virtsize < s.size puts "W: Elf: preexisting section #{s} has grown, relocating" if $VERBOSE s.addr = s.offset = nil s.size = s.encoded.virtsize end end # reorders self.symbols according to their gnu_hash def encode_reorder_symbols gnu_hash_bucket_length = 42 # TODO @symbols[1..-1] = @symbols[1..-1].sort_by { |s| if s.bind != 'GLOBAL' -2 elsif s.shndx == 'UNDEF' or not s.name -1 else ELF.gnu_hash_symbol_name(s.name) % gnu_hash_bucket_length end } end # sorted insert of a new section to self.sections according to its permission (for segment merging) def encode_add_section s # order: r rx rw noalloc rank = lambda { |sec| f = sec.flags sec.type == 'NULL' ? -2 : sec.addr ? -1 : f.include?('ALLOC') ? !f.include?('WRITE') ? !f.include?('EXECINSTR') ? 0 : 1 : 2 : 3 } srank = rank[s] nexts = @sections.find { |sec| rank[sec] > srank } # find section with rank superior nexts = nexts ? @sections.index(nexts) : -1 # if none, last if @header.shstrndx.to_i != 0 and nexts != -1 and @header.shstrndx >= nexts @header.shstrndx += 1 end @sections.insert(nexts, s) # insert section end # encodes the GNU_HASH table # TODO def encode_gnu_hash return if true sortedsyms = @symbols.find_all { |s| s.bind == 'GLOBAL' and s.shndx != 'UNDEF' and s.name } bucket = Array.new(42) if not gnu_hash = @sections.find { |s| s.type == 'GNU_HASH' } gnu_hash = Section.new gnu_hash.name = '.gnu.hash' gnu_hash.type = 'GNU_HASH' gnu_hash.flags = ['ALLOC'] gnu_hash.entsize = gnu_hash.addralign = 4 encode_add_section gnu_hash end gnu_hash.encoded = EncodedData.new # "bloomfilter[N] has bit B cleared if there is no M (M > symndx) which satisfies (C = @header.class) # ((gnu_hash(sym[M].name) / C) % maskwords) == N && # ((gnu_hash(sym[M].name) % C) == B || # ((gnu_hash(sym[M].name) >> shift2) % C) == B" # bloomfilter may be [~0] bloomfilter = [] # bucket[N] contains the lowest M for which # gnu_hash(sym[M]) % nbuckets == N # or 0 if none bucket = [] gnu_hash.encoded << encode_word(bucket.length) << encode_word(@symbols.length - sortedsyms.length) << encode_word(bloomfilter.length) << encode_word(shift2) bloomfilter.each { |bf| gnu_hash.encoded << encode_xword(bf) } bucket.each { |bk| gnu_hash.encoded << encode_word(bk) } sortedsyms.each { |s| # (gnu_hash(sym[N].name) & ~1) | (N == dynsymcount-1 || (gnu_hash(sym[N].name) % nbucket) != (gnu_hash(sym[N+1].name) % nbucket)) # that's the hash, with its lower bit replaced by the bool [1 if i am the last sym having my hash as hash] val = 28 gnu_hash.encoded << encode_word(val) } @tag['GNU_HASH'] = label_at(gnu_hash.encoded, 0) encode_check_section_size gnu_hash gnu_hash end # encodes the symbol dynamic hash table in the .hash section, updates the HASH tag def encode_hash return if @symbols.length <= 1 if not hash = @sections.find { |s| s.type == 'HASH' } hash = Section.new hash.name = '.hash' hash.type = 'HASH' hash.flags = ['ALLOC'] hash.entsize = hash.addralign = 4 encode_add_section hash end hash.encoded = EncodedData.new # to find a symbol from its name : # 1: idx = hash(name) # 2: idx = bucket[idx % bucket.size] # 3: if idx == 0: return notfound # 4: if dynsym[idx].name == name: return found # 5: idx = chain[idx] ; goto 3 bucket = Array.new(@symbols.length/4+1, 0) chain = Array.new(@symbols.length, 0) @symbols.each_with_index { |s, i| next if s.bind == 'LOCAL' or not s.name or s.shndx == 'UNDEF' hash_mod = ELF.hash_symbol_name(s.name) % bucket.length chain[i] = bucket[hash_mod] bucket[hash_mod] = i } hash.encoded << encode_word(bucket.length) << encode_word(chain.length) bucket.each { |b| hash.encoded << encode_word(b) } chain.each { |c| hash.encoded << encode_word(c) } @tag['HASH'] = label_at(hash.encoded, 0) encode_check_section_size hash hash end # encodes the symbol table # should have a stable self.sections array (only append allowed after this step) def encode_segments_symbols(strtab) return if @symbols.length <= 1 if not dynsym = @sections.find { |s| s.type == 'DYNSYM' } dynsym = Section.new dynsym.name = '.dynsym' dynsym.type = 'DYNSYM' dynsym.entsize = Symbol.sizeof(self) dynsym.addralign = 4 dynsym.flags = ['ALLOC'] dynsym.info = @symbols[1..-1].find_all { |s| s.bind == 'LOCAL' }.length + 1 dynsym.link = strtab encode_add_section dynsym end dynsym.encoded = EncodedData.new @symbols.each { |s| dynsym.encoded << s.encode(self, strtab.encoded) } # needs all section indexes, as will be in the final section header @tag['SYMTAB'] = label_at(dynsym.encoded, 0) @tag['SYMENT'] = Symbol.sizeof(self) encode_check_section_size dynsym dynsym end # encodes the relocation tables # needs a complete self.symbols array def encode_segments_relocs return if not @relocations or @relocations.empty? arch_preencode_reloc_func = "arch_#{@header.machine.downcase}_preencode_reloc" send arch_preencode_reloc_func if respond_to? arch_preencode_reloc_func list = @relocations.find_all { |r| r.type == 'JMP_SLOT' } if not list.empty? or @relocations.empty? if list.find { |r| r.addend } stype = 'RELA' sname = '.rela.plt' else stype = 'REL' sname = '.rel.plt' end if not relplt = @sections.find { |s| s.type == stype and s.name == sname } relplt = Section.new relplt.name = sname relplt.flags = ['ALLOC'] encode_add_section relplt end relplt.encoded = EncodedData.new('', :export => {'_REL_PLT' => 0}) list.each { |r| relplt.encoded << r.encode(self) } @tag['JMPREL'] = label_at(relplt.encoded, 0) @tag['PLTRELSZ'] = relplt.encoded.virtsize @tag['PLTREL'] = relplt.type = stype @tag[stype + 'ENT'] = relplt.entsize = relplt.addralign = (stype == 'REL' ? Relocation.sizeof(self) : RelocationAddend.sizeof(self)) encode_check_section_size relplt end list = @relocations.find_all { |r| r.type != 'JMP_SLOT' and not r.addend } if not list.empty? if not @tag['TEXTREL'] and @sections.find { |s_| s_.encoded and e = s_.encoded.inv_export[0] and not s_.flags.include? 'WRITE' and list.find { |r| Expression[r.offset, :-, e].reduce.kind_of? ::Integer } # TODO need to check with r.offset.bind(elf_binding) } @tag['TEXTREL'] = 0 end if not rel = @sections.find { |s_| s_.type == 'REL' and s_.name == '.rel.dyn' } rel = Section.new rel.name = '.rel.dyn' rel.type = 'REL' rel.flags = ['ALLOC'] rel.entsize = rel.addralign = Relocation.sizeof(self) encode_add_section rel end rel.encoded = EncodedData.new list.each { |r| rel.encoded << r.encode(self) } @tag['REL'] = label_at(rel.encoded, 0) @tag['RELENT'] = Relocation.sizeof(self) @tag['RELSZ'] = rel.encoded.virtsize encode_check_section_size rel end list = @relocations.find_all { |r| r.type != 'JMP_SLOT' and r.addend } if not list.empty? if not rela = @sections.find { |s_| s_.type == 'RELA' and s_.name == '.rela.dyn' } rela = Section.new rela.name = '.rela.dyn' rela.type = 'RELA' rela.flags = ['ALLOC'] rela.entsize = rela.addralign = RelocationAddend.sizeof(self) encode_add_section rela end rela.encoded = EncodedData.new list.each { |r| rela.encoded << r.encode(self) } @tag['RELA'] = label_at(rela.encoded, 0) @tag['RELAENT'] = RelocationAddend.sizeof(self) @tag['RELASZ'] = rela.encoded.virtsize encode_check_section_size rela end end # creates the .plt/.got from the @relocations def arch_386_preencode_reloc return if @relocations.empty? # if .got.plt does not exist, the dynamic loader segfaults if not gotplt = @sections.find { |s| s.type == 'PROGBITS' and s.name == '.got.plt' } gotplt = Section.new gotplt.name = '.got.plt' gotplt.type = 'PROGBITS' gotplt.flags = %w[ALLOC WRITE] gotplt.addralign = @bitsize/8 # _DYNAMIC is not base-relocated at runtime encode_add_section gotplt end gotplt.encoded ||= (EncodedData.new('', :export => {'_PLT_GOT' => 0}) << encode_xword('_DYNAMIC') << encode_xword(0) << encode_xword(0)) @tag['PLTGOT'] = label_at(gotplt.encoded, 0) plt = nil shellcode = lambda { |c| Shellcode.new(@cpu).share_namespace(self).assemble(c).encoded } @relocations.dup.each { |r| case r.type when 'PC32' next if not r.symbol if r.symbol.type != 'FUNC' # external data xref: generate a GOT entry # XXX reuse .got.plt ? if not got ||= @sections.find { |s| s.type == 'PROGBITS' and s.name == '.got' } got = Section.new got.name = '.got' got.type = 'PROGBITS' got.flags = %w[ALLOC WRITE] got.addralign = @bitsize/8 got.encoded = EncodedData.new encode_add_section got end prevoffset = r.offset gotlabel = r.symbol.name + '_got_entry' if not got.encoded.export[gotlabel] # create the got thunk got.encoded.add_export(gotlabel, got.encoded.length) got.encoded << encode_xword(0) # transform the reloc PC32 => GLOB_DAT r.type = 'GLOB_DAT' r.offset = Expression[gotlabel] r.addend = 0 if @bitsize == 64 else @relocations.delete r end # prevoffset is label_section_start + int_section_offset target_s = @sections.find { |s| s.encoded and s.encoded.export[prevoffset.lexpr] == 0 } rel = target_s.encoded.reloc[prevoffset.rexpr] # [foo] => [foo - reloc_addr + gotlabel] rel.target = Expression[[rel.target, :-, prevoffset], :+, gotlabel] next end # convert to .plt entry # # [.plt header] # plt_start: # caller set ebx = gotplt if generate_PIC # push [gotplt+4] # jmp [gotplt+8] # # [.plt thunk] # some_func_thunk: # jmp [gotplt+func_got_offset] # some_func_got_default: # push some_func_jmpslot_offset_in_.rel.plt # jmp plt_start # # [.got.plt header] # dd _DYNAMIC # dd 0 # rewritten to GOTPLT? by ld-linux # dd 0 # rewritten to dlresolve_inplace by ld-linux # # [.got.plt + func_got_offset] # dd some_func_got_default # lazily rewritten to the real addr of some_func by jmp dlresolve_inplace # # base_relocated ? # in the PIC case, _dlresolve imposes us to use the ebx register (which may not be saved by the calling function..) # also geteip trashes eax, which may interfere with regparm(3) base = @cpu.generate_PIC ? @bitsize == 32 ? 'ebx' : 'rip-$_+_PLT_GOT' : '_PLT_GOT' if not plt ||= @sections.find { |s| s.type == 'PROGBITS' and s.name == '.plt' } plt = Section.new plt.name = '.plt' plt.type = 'PROGBITS' plt.flags = %w[ALLOC EXECINSTR] plt.addralign = @bitsize/8 plt.encoded = EncodedData.new sz = @bitsize/8 ptqual = @bitsize == 32 ? 'dword' : 'qword' plt.encoded << shellcode["metasm_plt_start:\npush #{ptqual} ptr [#{base}+#{sz}]\njmp #{ptqual} ptr [#{base}+#{2*sz}]"] if @cpu.generate_PIC and @bitsize == 32 and not @sections.find { |s| s.encoded and s.encoded.export['metasm_intern_geteip'] } plt.encoded << shellcode["metasm_intern_geteip:\ncall 42f\n42: pop eax\nsub eax, 42b-metasm_intern_geteip\nret"] end encode_add_section plt end prevoffset = r.offset pltlabel = r.symbol.name + '_plt_thunk' if not plt.encoded.export[pltlabel] # create the plt thunk plt.encoded.add_export pltlabel, plt.encoded.length if @cpu.generate_PIC and @bitsize == 32 plt.encoded << shellcode["call metasm_intern_geteip\nlea #{base}, [eax+_PLT_GOT-metasm_intern_geteip]"] end plt.encoded << shellcode["jmp [#{base} + #{gotplt.encoded.length}]"] plt.encoded.add_export r.symbol.name+'_plt_default', plt.encoded.length reloffset = @relocations.find_all { |rr| rr.type == 'JMP_SLOT' }.length reloffset *= Relocation.sizeof(self) if @bitsize == 32 plt.encoded << shellcode["push #{reloffset}\njmp metasm_plt_start"] # transform the reloc PC32 => JMP_SLOT r.type = 'JMP_SLOT' r.offset = Expression['_PLT_GOT', :+, gotplt.encoded.length] r.addend = 0 if @bitsize == 64 gotplt.encoded << encode_xword(r.symbol.name + '_plt_default') else @relocations.delete r end # mutate the original relocation # XXX relies on the exact form of r.target from arch_create_reloc target_s = @sections.find { |s| s.encoded and s.encoded.export[prevoffset.lexpr] == 0 } rel = target_s.encoded.reloc[prevoffset.rexpr] rel.target = Expression[[[rel.target, :-, prevoffset.rexpr], :-, label_at(target_s.encoded, 0)], :+, pltlabel] # when 'GOTOFF', 'GOTPC' end } encode_check_section_size gotplt encode_check_section_size plt if plt #encode_check_section_size got if got end alias arch_x86_64_preencode_reloc arch_386_preencode_reloc # encodes the .dynamic section, creates .hash/.gnu.hash/.rel/.rela/.dynsym/.strtab/.init,*_array as needed def encode_segments_dynamic if not strtab = @sections.find { |s| s.type == 'STRTAB' and s.flags.include? 'ALLOC' } strtab = Section.new strtab.name = '.dynstr' strtab.addralign = 1 strtab.type = 'STRTAB' strtab.flags = ['ALLOC'] encode_add_section strtab end strtab.encoded = EncodedData.new << 0 @tag['STRTAB'] = label_at(strtab.encoded, 0) if not dynamic = @sections.find { |s| s.type == 'DYNAMIC' } dynamic = Section.new dynamic.name = '.dynamic' dynamic.type = 'DYNAMIC' dynamic.flags = %w[WRITE ALLOC] # XXX why write ? dynamic.addralign = dynamic.entsize = @bitsize / 8 * 2 dynamic.link = strtab encode_add_section dynamic end dynamic.encoded = EncodedData.new('', :export => {'_DYNAMIC' => 0}) encode_tag = lambda { |k, v| dynamic.encoded << encode_sxword(int_from_hash(k, DYNAMIC_TAG)) << encode_xword(v) } # find or create string in strtab add_str = lambda { |n| if n and n != '' and not ret = strtab.encoded.data.index(n + 0.chr) ret = strtab.encoded.virtsize strtab.encoded << n << 0 end ret || 0 } @tag.keys.each { |k| case k when 'NEEDED'; @tag[k].each { |n| encode_tag[k, add_str[n]] } when 'SONAME', 'RPATH', 'RUNPATH'; encode_tag[k, add_str[@tag[k]]] when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY' # build section containing the array if not ar = @sections.find { |s| s.name == '.' + k.downcase } ar = Section.new ar.name = '.' + k.downcase ar.type = k ar.addralign = ar.entsize = @bitsize/8 ar.flags = %w[WRITE ALLOC] ar.encoded = EncodedData.new encode_add_section ar # insert before encoding syms/relocs (which need section indexes) end # fill these later, but create the base relocs now arch_create_reloc_func = "arch_#{@header.machine.downcase}_create_reloc" next if not respond_to?(arch_create_reloc_func) curaddr = label_at(@encoded, 0, 'elf_start') fkbind = {} @sections.each { |s| next if not s.encoded fkbind.update s.encoded.binding(Expression[curaddr, :+, 1]) } @relocations ||= [] off = ar.encoded.length @tag[k].each { |a| rel = Metasm::Relocation.new(Expression[a], "u#@bitsize".to_sym, @endianness) send(arch_create_reloc_func, ar, off, fkbind, rel) off += @bitsize/8 } end } encode_reorder_symbols encode_gnu_hash encode_hash encode_segments_relocs dynsym = encode_segments_symbols(strtab) @sections.find_all { |s| %w[HASH GNU_HASH REL RELA].include? s.type }.each { |s| s.link = dynsym } encode_check_section_size strtab # rm unused tag (shrink .nointerp binaries by allowing to skip the section entirely) @tag.delete('STRTAB') if strtab.encoded.length == 1 # XXX any order needed ? @tag.keys.each { |k| case k when Integer # unknown tags = array of values @tag[k].each { |n| encode_tag[k, n] } when 'PLTREL'; encode_tag[k, int_from_hash(@tag[k], DYNAMIC_TAG)] when 'FLAGS'; encode_tag[k, bits_from_hash(@tag[k], DYNAMIC_FLAGS)] when 'FLAGS_1'; encode_tag[k, bits_from_hash(@tag[k], DYNAMIC_FLAGS_1)] when 'FEATURES_1'; encode_tag[k, bits_from_hash(@tag[k], DYNAMIC_FEATURES_1)] when 'NULL' # keep last when 'STRTAB' encode_tag[k, @tag[k]] encode_tag['STRSZ', strtab.encoded.size] when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY' # build section containing the array ar = @sections.find { |s| s.name == '.' + k.downcase } @tag[k].each { |p| ar.encoded << encode_addr(p) } encode_check_section_size ar encode_tag[k, label_at(ar.encoded, 0)] encode_tag[k + 'SZ', ar.encoded.virtsize] when 'NEEDED', 'SONAME', 'RPATH', 'RUNPATH' # already handled else encode_tag[k, @tag[k]] end } encode_tag['NULL', @tag['NULL'] || 0] unless @tag.empty? encode_check_section_size dynamic end # creates the undef symbol list from the section.encoded.reloc and a list of known exported symbols (e.g. from libc) # also populates @tag['NEEDED'] def automagic_symbols GNUExports rescue return # autorequire autoexports = GNUExports::EXPORT.dup @sections.each { |s| next if not s.encoded s.encoded.export.keys.each { |e| autoexports.delete e } } @sections.each { |s| next if not s.encoded s.encoded.reloc.each_value { |r| et = r.target.externals extern = et.find_all { |name| autoexports[name] } next if extern.length != 1 symname = extern.first if not @symbols.find { |sym| sym.name == symname } @tag['NEEDED'] ||= [] @tag['NEEDED'] |= [autoexports[symname]] sym = Symbol.new sym.shndx = 'UNDEF' sym.type = 'FUNC' sym.name = symname sym.bind = 'GLOBAL' @symbols << sym end } } end # reads the existing segment/sections.encoded and populate @relocations from the encoded.reloc hash def create_relocations @relocations = [] arch_create_reloc_func = "arch_#{@header.machine.downcase}_create_reloc" if not respond_to? arch_create_reloc_func puts "Elf: create_reloc: unhandled architecture #{@header.machine}" if $VERBOSE return end # create a fake binding with all our own symbols # not foolproof, should work in most cases curaddr = label_at(@encoded, 0, 'elf_start') binding = {'_DYNAMIC' => 0, '_GOT' => 0} # XXX @sections.each { |s| next if not s.encoded binding.update s.encoded.binding(curaddr) curaddr = Expression[curaddr, :+, s.encoded.virtsize] } @sections.each { |s| next if not s.encoded s.encoded.reloc.each { |off, rel| t = rel.target.bind(binding).reduce next if not t.kind_of? Expression # XXX segment_encode only send(arch_create_reloc_func, s, off, binding) } } end # references to FUNC symbols are transformed to JMPSLOT relocations (aka call to .plt) # TODO ET_REL support def arch_386_create_reloc(section, off, binding, rel=nil) rel ||= section.encoded.reloc[off] if rel.endianness != @endianness or not [:u32, :i32, :a32].include? rel.type puts "ELF: 386_create_reloc: ignoring reloc #{rel.target} in #{section.name}: bad reloc type" if $VERBOSE return end startaddr = label_at(@encoded, 0) r = Relocation.new r.offset = Expression[label_at(section.encoded, 0, 'sect_start'), :+, off] if Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) # this location is relative to the base load address of the ELF r.type = 'RELATIVE' else et = rel.target.externals extern = et.find_all { |name| not binding[name] } if extern.length != 1 puts "ELF: 386_create_reloc: ignoring reloc #{rel.target} in #{section.name}: #{extern.inspect} unknown" if $VERBOSE return end if not sym = @symbols.find { |s| s.name == extern.first } puts "ELF: 386_create_reloc: ignoring reloc #{rel.target} in #{section.name}: undefined symbol #{extern.first}" if $VERBOSE return end r.symbol = sym rel.target = Expression[rel.target, :-, sym.name] if rel.target.bind(binding).reduce.kind_of? ::Integer r.type = '32' elsif Expression[rel.target, :+, label_at(section.encoded, 0)].bind(section.encoded.binding).reduce.kind_of? ::Integer rel.target = Expression[[rel.target, :+, label_at(section.encoded, 0)], :+, off] r.type = 'PC32' # TODO tls ? else puts "ELF: 386_create_reloc: ignoring reloc #{sym.name} + #{rel.target}: cannot find matching standard reloc type" if $VERBOSE return end end @relocations << r end def arch_x86_64_create_reloc(section, off, binding, rel=nil) rel ||= section.encoded.reloc[off] if rel.endianness != @endianness or not rel.type.to_s =~ /^[aiu](32|64)$/ puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: bad reloc type" if $VERBOSE return end startaddr = label_at(@encoded, 0) r = RelocationAddend.new r.addend = 0 r.offset = Expression[label_at(section.encoded, 0, 'sect_start'), :+, off] if Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) # this location is relative to the base load address of the ELF if rel.length != 8 puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: relative non-x64" if $VERBOSE return end r.type = 'RELATIVE' else et = rel.target.externals extern = et.find_all { |name| not binding[name] } if extern.length != 1 puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: #{extern.inspect} unknown" if $VERBOSE return end if not sym = @symbols.find { |s| s.name == extern.first } puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: undefined symbol #{extern.first}" if $VERBOSE return end r.symbol = sym rel.target = Expression[rel.target, :-, sym.name] if rel.target.bind(binding).reduce.kind_of? ::Integer r.type = '64' # XXX check that elsif Expression[rel.target, :+, label_at(section.encoded, 0)].bind(section.encoded.binding).reduce.kind_of? ::Integer rel.target = Expression[[rel.target, :+, label_at(section.encoded, 0)], :+, off] r.type = 'PC32' # XXX # TODO tls ? else puts "ELF: x86_64_create_reloc: ignoring reloc #{sym.name} + #{rel.target}: cannot find matching standard reloc type" if $VERBOSE return end end r.addend = Expression[rel.target] #section.encoded.reloc.delete off @relocations << r end def arch_mips_create_reloc(section, off, binding, rel=nil) rel ||= section.encoded.reloc[off] startaddr = label_at(@encoded, 0) r = Relocation.new r.offset = Expression[label_at(section.encoded, 0, 'sect_start'), :+, off] if Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) # this location is relative to the base load address of the ELF r.type = 'REL32' else et = rel.target.externals extern = et.find_all { |name| not binding[name] } if extern.length != 1 puts "ELF: mips_create_reloc: ignoring reloc #{rel.target} in #{section.name}: #{extern.inspect} unknown" if $VERBOSE return end if not sym = @symbols.find { |s| s.name == extern.first } puts "ELF: mips_create_reloc: ignoring reloc #{rel.target} in #{section.name}: undefined symbol #{extern.first}" if $VERBOSE return end r.symbol = sym if Expression[rel.target, :-, sym.name].bind(binding).reduce.kind_of?(::Integer) rel.target = Expression[rel.target, :-, sym.name] r.type = '32' elsif Expression[rel.target, :&, 0xffff0000].reduce.kind_of?(::Integer) lo = Expression[rel.target, :&, 0xffff].reduce lo = lo.lexpr if lo.kind_of?(Expression) and lo.op == :& and lo.rexpr == 0xffff if lo.kind_of?(Expression) and lo.op == :>> and lo.rexpr == 16 r.type = 'HI16' rel.target = Expression[rel.target, :&, 0xffff0000] # XXX offset ? elsif lo.kind_of?(String) or (lo.kind_of(Expression) and lo.op == :+) r.type = 'LO16' rel.target = Expression[rel.target, :&, 0xffff0000] # XXX offset ? else puts "ELF: mips_create_reloc: ignoring reloc #{lo}: cannot find matching 16 reloc type" if $VERBOSE return end #elsif Expression[rel.target, :+, label_at(section.encoded, 0)].bind(section.encoded.binding).reduce.kind_of? ::Integer # rel.target = Expression[[rel.target, :+, label_at(section.encoded, 0)], :+, off] # r.type = 'PC32' else puts "ELF: mips_create_reloc: ignoring reloc #{sym.name} + #{rel.target}: cannot find matching standard reloc type" if $VERBOSE return end end @relocations << r end # resets the fields of the elf headers that should be recalculated, eg phdr offset def invalidate_header @header.shoff = @header.shnum = nil @header.phoff = @header.phnum = nil @header.shstrndx = nil @sections.to_a.each { |s| s.name_p = nil s.offset = nil } @segments.to_a.each { |s| s.offset = nil } self end # put every ALLOC section in a segment, create segments if needed # sections with a good offset within a segment are ignored def encode_make_segments_from_sections # fixed addresses first seclist = @sections.find_all { |sec| sec.addr.kind_of? Integer }.sort_by { |sec| sec.addr } | @sections seclist.each { |sec| next if not sec.flags.to_a.include? 'ALLOC' # check if we fit in an existing segment loadsegs = @segments.find_all { |seg_| seg_.type == 'LOAD' } if sec.addr.kind_of?(::Integer) and seg = loadsegs.find { |seg_| seg_.vaddr.kind_of?(::Integer) and seg_.vaddr <= sec.addr and seg_.vaddr + seg_.memsz >= sec.addr + sec.size } # sections is already inside a segment: we're reencoding an ELF, just patch the section in the segment seg.encoded[sec.addr - seg.vaddr, sec.size] = sec.encoded if sec.encoded next end if not seg = loadsegs.find { |seg_| sec.flags.to_a.include?('WRITE') == seg_.flags.to_a.include?('W') and #sec.flags.to_a.include?('EXECINSTR') == seg_.flags.to_a.include?('X') and not seg_.memsz and not loadsegs[loadsegs.index(seg_)+1..-1].find { |sseg| # check if another segment would overlap if we add the sec to seg_ o = Expression[sseg.vaddr, :-, [seg_.vaddr, :+, seg_.encoded.length+sec.encoded.length]].reduce o.kind_of? ::Integer and o < 0 } } # nope, create a new one seg = Segment.new seg.type = 'LOAD' seg.flags = ['R'] seg.flags << 'W' if sec.flags.include? 'WRITE' seg.align = 0x1000 seg.encoded = EncodedData.new seg.offset = new_label('segment_offset') seg.vaddr = sec.addr || new_label('segment_address') @segments << seg end seg.flags |= ['X'] if sec.flags.include? 'EXECINSTR' seg.encoded.align sec.addralign if sec.addralign sec.addr = Expression[seg.vaddr, :+, seg.encoded.length] sec.offset = Expression[seg.offset, :+, seg.encoded.length] seg.encoded << sec.encoded } end # create the relocations from the sections.encoded.reloc # create the dynamic sections # put sections/phdr in PT_LOAD segments # link # TODO support mapped PHDR, obey section-specified base address, handle NOBITS # encode ET_REL def encode(type='DYN') @header.type ||= {:bin => 'EXEC', :lib => 'DYN', :obj => 'REL'}.fetch(type, type) @header.machine ||= case @cpu.shortname when 'x64'; 'X86_64' when 'ia32'; '386' when 'mips'; 'MIPS' when 'powerpc'; 'PPC' when 'arm'; 'ARM' end if @header.type == 'REL' encode_rel else encode_elf end end def encode_elf @encoded = EncodedData.new if @header.type != 'EXEC' or @segments.find { |i| i.type == 'INTERP' } # create a .dynamic section unless we are an ET_EXEC with .nointerp automagic_symbols create_relocations encode_segments_dynamic end @segments.delete_if { |s| s.type == 'INTERP' } if not @header.entry encode_make_segments_from_sections loadsegs = @segments.find_all { |seg_| seg_.type == 'LOAD' } # ensure PT_INTERP is mapped if present if interp = @segments.find { |i| i.type == 'INTERP' } if not seg = loadsegs.find { |seg_| not seg_.memsz and interp.flags & seg_.flags == interp.flags and not loadsegs[loadsegs.index(seg_)+1..-1].find { |sseg| o = Expression[sseg.vaddr, :-, [seg_.vaddr, :+, seg_.encoded.length+interp.encoded.length]].reduce o.kind_of? ::Integer and o < 0 } } seg = Segment.new seg.type = 'LOAD' seg.flags = interp.flags.dup seg.align = 0x1000 seg.encoded = EncodedData.new seg.offset = new_label('segment_offset') seg.vaddr = new_label('segment_address') loadsegs << seg @segments << seg end interp.vaddr = Expression[seg.vaddr, :+, seg.encoded.length] interp.offset = Expression[seg.offset, :+, seg.encoded.length] seg.encoded << interp.encoded interp.encoded = nil end # ensure last PT_LOAD is writeable (used for bss) seg = loadsegs.last if not seg or not seg.flags.include? 'W' seg = Segment.new seg.type = 'LOAD' seg.flags = ['R', 'W'] seg.encoded = EncodedData.new loadsegs << seg @segments << seg end # add dynamic segment if ds = @sections.find { |sec| sec.type == 'DYNAMIC' } and ds.encoded.length > 1 ds.set_default_values self seg = Segment.new seg.type = 'DYNAMIC' seg.flags = ['R', 'W'] seg.offset = ds.offset seg.vaddr = ds.addr seg.memsz = seg.filesz = ds.size @segments << seg end # use variables in the first segment descriptor, to allow fixup later # (when we'll be able to include the program header) if first_seg = loadsegs.first first_seg_oaddr = first_seg.vaddr # section's vaddr depend on oaddr first_seg_off = first_seg.offset first_seg.vaddr = new_label('segvaddr') first_seg.offset = new_label('segoff') first_seg.memsz = new_label('segmemsz') first_seg.filesz = new_label('segfilsz') end if first_seg and not @segments.find { |seg_| seg_.type == 'PHDR' } phdr = Segment.new phdr.type = 'PHDR' phdr.flags = first_seg.flags phdr.offset = new_label('segoff') phdr.vaddr = new_label('segvaddr') phdr.filesz = phdr.memsz = new_label('segmemsz') @segments.unshift phdr end # encode section&program headers if @header.shnum != 0 st = @sections.inject(EncodedData.new) { |edata, s| edata << s.encode(self) } else @header.shoff = @header.shnum = @header.shstrndx = 0 end pt = @segments.inject(EncodedData.new) { |edata, s| edata << s.encode(self) } binding = {} @encoded << @header.encode(self) @encoded.align 8 binding[@header.phoff] = @encoded.length if phdr binding[phdr.offset] = @encoded.length pt.add_export phdr.vaddr, 0 binding[phdr.memsz] = pt.length end @encoded << pt @encoded.align 8 if first_seg # put headers into the 1st mmaped segment if first_seg_oaddr.kind_of? ::Integer # pad headers to align the 1st segment's data @encoded.virtsize += (first_seg_oaddr - @encoded.virtsize) & 0xfff addr = first_seg_oaddr - @encoded.length else addr = ((@header.type == 'EXEC') ? 0x08048000 : 0) binding[first_seg_oaddr] = addr + @encoded.length end binding[first_seg_off] = @encoded.length if not first_seg_off.kind_of? ::Integer first_seg.encoded = @encoded << first_seg.encoded @encoded = EncodedData.new binding[first_seg.memsz] = first_seg.encoded.virtsize if not first_seg.memsz.kind_of? ::Integer binding[first_seg.filesz] = first_seg.encoded.rawsize if not first_seg.filesz.kind_of? ::Integer end @segments.each { |seg_| next if not seg_.encoded if seg_.vaddr.kind_of? ::Integer raise "cannot put segment at address #{Expression[seg_.vaddr]} (now at #{Expression[addr]})" if seg_.vaddr < addr addr = seg_.vaddr else binding[seg_.vaddr] = addr end # ensure seg_.vaddr & page_size == seg_.offset & page_size @encoded.virtsize += (addr - @encoded.virtsize) & 0xfff binding.update seg_.encoded.binding(addr) binding[seg_.offset] = @encoded.length seg_.encoded.align 8 @encoded << seg_.encoded[0, seg_.encoded.rawsize] addr += seg_.encoded.length # page break for memory permission enforcement if @segments[@segments.index(seg_)+1..-1].find { |seg__| seg__.encoded and seg__.vaddr.kind_of? ::Integer } addr += 0x1000 - (addr & 0xfff) if addr & 0xfff != 0 # minimize memory size else addr += 0x1000 # minimize file size end } binding[@header.shoff] = @encoded.length if st @encoded << st @encoded.align 8 @sections.each { |sec| next if not sec.encoded or sec.flags.include? 'ALLOC' # already in a segment.encoded binding[sec.offset] = @encoded.length binding.update sec.encoded.binding @encoded << sec.encoded @encoded.align 8 } @encoded.fixup! binding @encoded.data end def encode_rel @encoded = EncodedData.new automagic_symbols create_relocations @header.phoff = @header.phnum = @header.phentsize = 0 @header.entry = 0 @sections.each { |sec| sec.addr = 0 } st = @sections.inject(EncodedData.new) { |edata, sec| edata << sec.encode(self) } binding = {} @encoded << @header.encode(self) @encoded.align 8 binding[@header.shoff] = @encoded.length @encoded << st @encoded.align 8 @sections.each { |sec| next if not sec.encoded binding[sec.offset] = @encoded.length sec.encoded.fixup sec.encoded.binding @encoded << sec.encoded @encoded.align 8 } @encoded.fixup! binding @encoded.data end def parse_init # allow the user to specify a section, falls back to .text if none specified if not defined? @cursource or not @cursource @cursource = Object.new class << @cursource attr_accessor :elf def <<(*a) t = Preprocessor::Token.new(nil) t.raw = '.text' elf.parse_parser_instruction t elf.cursource.send(:<<, *a) end end @cursource.elf = self end @segments.delete_if { |s| s.type == 'INTERP' } seg = Segment.new seg.type = 'INTERP' seg.encoded = EncodedData.new << (@bitsize == 64 ? DEFAULT_INTERP64 : DEFAULT_INTERP) << 0 seg.flags = ['R'] seg.memsz = seg.filesz = seg.encoded.length @segments.unshift seg @source ||= {} super() end # handles elf meta-instructions # # syntax: # .section "" [] [base=] # change current section (where normal instruction/data are put) # perms = list of 'w' 'x' 'alloc', may be prefixed by 'no' # 'r' ignored # defaults to 'alloc' # shortcuts: .text .data .rodata .bss # base: immediate expression representing the section base address # .entrypoint [