# This file is part of Metasm, the Ruby assembly manipulation suite # Copyright (C) 2006-2009 Yoann GUILLOT # # Licence is LGPL, see LICENCE in the top-level directory # This sample creates the dynldr.so ruby shared object that allows interaction with # native libraries # x86 only for now module Metasm class DynLdr # basic C defs for ruby internals - 1.8 and 1.9 compat - x86/x64 RUBY_H = <<EOS #line #{__LINE__} typedef uintptr_t VALUE; #if defined(__PE__) && defined(__x86_64__) // sonovabeep #define INT2VAL(v) rb_ull2inum(v) #define VAL2INT(v) rb_num2ull(v) #else #define INT2VAL(v) rb_uint2inum(v) #define VAL2INT(v) rb_num2ulong(v) #endif struct rb_string_t { VALUE flags; VALUE klass; VALUE len; char *ptr; union { long capa; VALUE shared; } aux; }; #define RString(x) ((struct rb_string_t *)(x)) struct rb_array_t { VALUE flags; VALUE klass; VALUE len; union { long capa; VALUE shared; } aux; VALUE *ptr; }; #define RArray(x) ((struct rb_array_t *)(x)) // TODO improve autoimport to handle data imports correctly extern VALUE *rb_cObject __attribute__((import)); extern VALUE *rb_eRuntimeError __attribute__((import)); extern VALUE *rb_eArgError __attribute__((import)); #define Qfalse ((VALUE)0) #define Qtrue ((VALUE)2) #define Qnil ((VALUE)4) // allows generating a ruby1.9 dynldr.so from ruby1.8 #ifndef DYNLDR_RUBY_19 #define DYNLDR_RUBY_19 #{RUBY_VERSION >= '1.9' ? 1 : 0} #endif #if DYNLDR_RUBY_19 #define T_STRING 0x05 #define T_ARRAY 0x07 #define T_FIXNUM 0x15 #define T_MASK 0x1f #define RSTRING_NOEMBED (1<<13) #define STR_PTR(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->ptr : (char*)&RString(o)->len) #define STR_LEN(o) ((RString(o)->flags & RSTRING_NOEMBED) ? RString(o)->len : (RString(o)->flags >> 14) & 0x1f) #define RARRAY_EMBED (1<<13) #define ARY_PTR(o) ((RArray(o)->flags & RARRAY_EMBED) ? (VALUE*)&RArray(o)->len : RArray(o)->ptr) #define ARY_LEN(o) ((RArray(o)->flags & RARRAY_EMBED) ? ((RArray(o)->flags >> 15) & 3) : RArray(o)->len) #else #define T_STRING 0x07 #define T_ARRAY 0x09 #define T_FIXNUM 0x0a #define T_MASK 0x3f #define STR_PTR(o) (RString(o)->ptr) #define STR_LEN(o) (RString(o)->len) #define ARY_PTR(o) (RArray(o)->ptr) #define ARY_LEN(o) (RArray(o)->len) #endif #define TYPE(x) (((VALUE)(x) & 1) ? T_FIXNUM : (((VALUE)(x) & 3) || ((VALUE)(x) < 7)) ? 0x40 : RString(x)->flags & T_MASK) VALUE rb_uint2inum(VALUE); VALUE rb_ull2inum(unsigned long long); VALUE rb_num2ulong(VALUE); unsigned long long rb_num2ull(VALUE); VALUE rb_str_new(const char* ptr, long len); // alloc + memcpy + 0term VALUE rb_ary_new2(int len); VALUE rb_float_new(double); VALUE rb_intern(char *); VALUE rb_funcall(VALUE recv, VALUE id, int nargs, ...); VALUE rb_const_get(VALUE, VALUE); VALUE rb_raise(VALUE, char*, ...); void rb_define_const(VALUE, char *, VALUE); void rb_define_method(VALUE, char *, VALUE (*)(), int); void rb_define_singleton_method(VALUE, char *, VALUE (*)(), int); EOS # generic C source for the native component, ruby glue DYNLDR_C = <<EOS #{RUBY_H} #line #{__LINE__} #ifdef __PE__ __stdcall uintptr_t LoadLibraryA(char *); __stdcall uintptr_t GetProcAddress(uintptr_t, char *); #define os_load_lib(l) LoadLibraryA(l) #define os_load_sym(l, s) GetProcAddress(l, s) #define os_load_sym_ord(l, s) GetProcAddress(l, (char*)s) #endif #ifdef __ELF__ asm(".pt_gnu_stack rw"); #define RTLD_LAZY 1 uintptr_t dlopen(char*, int); uintptr_t dlsym(uintptr_t, char*); #define os_load_lib(l) dlopen(l, RTLD_LAZY) #define os_load_sym(l, s) dlsym(l, s) #define os_load_sym_ord(l, s) 0U #endif extern int *cb_ret_table; extern void *callback_handler; extern void *callback_id_0; extern void *callback_id_1; static VALUE dynldr; static VALUE memory_read(VALUE self, VALUE addr, VALUE len) { return rb_str_new((char*)VAL2INT(addr), (long)VAL2INT(len)); } static VALUE memory_read_int(VALUE self, VALUE addr) { return INT2VAL(*(uintptr_t*)VAL2INT(addr)); } static VALUE memory_write(VALUE self, VALUE addr, VALUE val) { if (TYPE(val) != T_STRING) rb_raise(*rb_eArgError, "mem_write needs a String"); char *src = STR_PTR(val); char *dst = (char*)VAL2INT(addr); unsigned len = (unsigned)STR_LEN(val); while (len--) *dst++ = *src++; return val; } static VALUE memory_write_int(VALUE self, VALUE addr, VALUE val) { *(uintptr_t *)VAL2INT(addr) = VAL2INT(val); return Qtrue; } static VALUE str_ptr(VALUE self, VALUE str) { if (TYPE(str) != T_STRING) rb_raise(*rb_eArgError, "Invalid ptr"); return INT2VAL((uintptr_t)STR_PTR(str)); } // return the VALUE of an object (different of .object_id for Symbols, maybe others) static VALUE rb_obj_to_value(VALUE self, VALUE obj) { return INT2VAL((uintptr_t)obj); } // return the ruby object at VALUE // USE WITH CAUTION, passing invalid values will segfault the interpreter/GC static VALUE rb_value_to_obj(VALUE self, VALUE val) { return VAL2INT(val); } // load a symbol from a lib byname, byordinal if integral static VALUE sym_addr(VALUE self, VALUE lib, VALUE func) { uintptr_t h, p; if (TYPE(lib) == T_STRING) h = os_load_lib(STR_PTR(lib)); else if (TYPE(lib) == T_FIXNUM) h = VAL2INT(lib); else rb_raise(*rb_eArgError, "Invalid lib"); if (TYPE(func) != T_STRING && TYPE(func) != T_FIXNUM) rb_raise(*rb_eArgError, "Invalid func"); if (TYPE(func) == T_FIXNUM) p = os_load_sym_ord(h, VAL2INT(func)); else p = os_load_sym(h, STR_PTR(func)); return INT2VAL(p); } #ifdef __i386__ __int64 do_invoke_stdcall(unsigned, unsigned, unsigned*); __int64 do_invoke_fastcall(unsigned, unsigned, unsigned*); __int64 do_invoke(unsigned, unsigned, unsigned*); double fake_float(void); // invoke a symbol // args is an array of Integers // flags: 1 stdcall 2 fastcall 4 ret_64bits 8 ret_float // TODO float args static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags) { if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 64) rb_raise(*rb_eArgError, "bad args"); uintptr_t flags_v = VAL2INT(flags); uintptr_t ptr_v = VAL2INT(ptr); unsigned i, argsz; uintptr_t args_c[64]; __int64 ret; argsz = ARY_LEN(args); for (i=0U ; i<argsz ; ++i) args_c[i] = VAL2INT(ARY_PTR(args)[i]); if (flags_v & 2) ret = do_invoke_fastcall(ptr_v, argsz, args_c); // supercedes stdcall else if (flags_v & 1) ret = do_invoke_stdcall(ptr_v, argsz, args_c); else ret = do_invoke(ptr_v, argsz, args_c); if (flags_v & 4) return rb_ull2inum((unsigned __int64)ret); else if (flags_v & 8) // fake_float does nothing, to allow the compiler to use ST(0) // which was in fact set by ptr_v() return rb_float_new(fake_float()); return INT2VAL((unsigned)ret); } // this is the function that is called on behalf of all callbacks // we're called through callback_handler (asm), itself called from the unique // callback generated by callback_alloc // heavy stack magick at work here ! // TODO float args / float retval / ret __int64 uintptr_t do_callback_handler(uintptr_t ori_retaddr, uintptr_t caller_id, uintptr_t arg0) { uintptr_t *addr = &arg0; unsigned i, ret; VALUE args = rb_ary_new2(8); // copy our args to a ruby-accessible buffer for (i=0U ; i<8U ; ++i) ARY_PTR(args)[i] = INT2VAL(*addr++); RArray(args)->len = 8U; // len == 8, no need to ARY_LEN/EMBED stuff ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(caller_id), args); // dynldr.callback will give us the arity (in bytes) of the callback in args[0] // we just put the stack lifting offset in caller_id for the asm stub to use caller_id = VAL2INT(ARY_PTR(args)[0]); return VAL2INT(ret); } #elif defined __amd64__ uintptr_t do_invoke(uintptr_t, uintptr_t, uintptr_t*); double fake_float(void); // invoke a symbol // args is an array of Integers // flags: 1 stdcall 2 fastcall 4 ret_64bits 8 ret_float // TODO float args static VALUE invoke(VALUE self, VALUE ptr, VALUE args, VALUE flags) { if (TYPE(args) != T_ARRAY || ARY_LEN(args) > 16) rb_raise(*rb_eArgError, "bad args"); uintptr_t flags_v = VAL2INT(flags); uintptr_t ptr_v = VAL2INT(ptr); int i, argsz; uintptr_t args_c[16]; uintptr_t ret; uintptr_t (*ptr_f)(uintptr_t, ...) = (void*)ptr_v; argsz = (int)ARY_LEN(args); for (i=0 ; i<argsz ; ++i) args_c[i] = VAL2INT(ARY_PTR(args)[i]); for (i=argsz ; i<16 ; ++i) args_c[i] = 0; if (argsz <= 4) ret = ptr_f(args_c[0], args_c[1], args_c[2], args_c[3]); else ret = ptr_f(args_c[0], args_c[1], args_c[2], args_c[3], args_c[4], args_c[5], args_c[6], args_c[7], args_c[8], args_c[9], args_c[10], args_c[11], args_c[12], args_c[13], args_c[14], args_c[15]); if (flags_v & 8) return rb_float_new(fake_float()); return INT2VAL(ret); } uintptr_t do_callback_handler(uintptr_t cb_id __attribute__((register(rax))), uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6, uintptr_t arg7) { uintptr_t ret; VALUE args = rb_ary_new2(8); VALUE *ptr = ARY_PTR(args); RArray(args)->len = 8; ptr[0] = INT2VAL(arg0); ptr[1] = INT2VAL(arg1); ptr[2] = INT2VAL(arg2); ptr[3] = INT2VAL(arg3); ptr[4] = INT2VAL(arg4); ptr[5] = INT2VAL(arg5); ptr[6] = INT2VAL(arg6); ptr[7] = INT2VAL(arg7); ret = rb_funcall(dynldr, rb_intern("callback_run"), 2, INT2VAL(cb_id), args); return VAL2INT(ret); } #endif int Init_dynldr(void) __attribute__((export_as(Init_<insertfilenamehere>))) // to patch before parsing to match the .so name { dynldr = rb_const_get(rb_const_get(*rb_cObject, rb_intern("Metasm")), rb_intern("DynLdr")); rb_define_singleton_method(dynldr, "memory_read", memory_read, 2); rb_define_singleton_method(dynldr, "memory_read_int", memory_read_int, 1); rb_define_singleton_method(dynldr, "memory_write", memory_write, 2); rb_define_singleton_method(dynldr, "memory_write_int", memory_write_int, 2); rb_define_singleton_method(dynldr, "str_ptr", str_ptr, 1); rb_define_singleton_method(dynldr, "rb_obj_to_value", rb_obj_to_value, 1); rb_define_singleton_method(dynldr, "rb_value_to_obj", rb_value_to_obj, 1); rb_define_singleton_method(dynldr, "sym_addr", sym_addr, 2); rb_define_singleton_method(dynldr, "raw_invoke", invoke, 3); rb_define_const(dynldr, "CALLBACK_TARGET", #ifdef __i386__ INT2VAL((VALUE)&callback_handler)); #elif defined __amd64__ INT2VAL((VALUE)&do_callback_handler)); #endif rb_define_const(dynldr, "CALLBACK_ID_0", INT2VAL((VALUE)&callback_id_0)); rb_define_const(dynldr, "CALLBACK_ID_1", INT2VAL((VALUE)&callback_id_1)); return 0; } EOS # see the note in compile_bin_module # this is a dynamic resolver for the ruby symbols we use DYNLDR_C_PE_HACK = <<EOS #line #{__LINE__} void* get_peb(void); // check if the wstr s1 contains 'ruby' (case-insensitive) static void *wstrcaseruby(short *s1, int len) { int i = 0; int match = 0; static char *want = "ruby"; // cant contain the same letter twice while (i < len) { if (want[match] == (s1[i] | 0x20)) { // downcase cmp if (match == 3) return s1+i-match; } else match = 0; if (want[match] == (s1[i] | 0x20)) ++match; ++i; } return 0; } asm(".text"); // TODO fix compiler #ifdef __x86_64__ asm("get_peb: mov rax, gs:[60h] ret"); #endif #ifdef __i386__ asm("get_peb: mov eax, fs:[30h] ret"); // 1st arg for ld_rb_imp == Init retaddr asm("Init_dynldr: call load_ruby_imports jmp Init_dynldr_real"); #endif struct _lmodule { struct _lmodule *next; // list_head void *; void *; void*; void*; void*; uintptr_t base, entry, size; short; short; short*; short len, maxlen; short *basename; }; struct _peb { void*; void*; void*; struct { int; int; void*; struct _lmodule *inloadorder; // list_head } *ldr; }; // find the ruby library in the loaded modules list of the interpreter through the PEB static uintptr_t find_ruby_module_peb(void) { struct _lmodule *ptr; void *base; struct _peb *peb = get_peb(); base = &peb->ldr->inloadorder; ptr = ((struct _lmodule *)base)->next; ptr = ptr->next; // skip the first entry = ruby.exe while (ptr != base) { if (wstrcaseruby(ptr->basename, ptr->len/2)) return ptr->base; ptr = ptr->next; } return 0; } // find the ruby library from an address in the ruby module (Init_dynldr retaddr) static uintptr_t find_ruby_module_mem(uintptr_t someaddr) { // could __try{}, but with no imports we're useless anyway. uintptr_t ptr = someaddr & (-0x10000); while (*((unsigned __int16 *)ptr) != 'ZM') // XXX too weak? ptr -= 0x10000; return ptr; } // a table of string offsets, base = the table itself // each entry is a ruby function, whose address is to be put inplace in the table // last entry == 0 extern void *ruby_import_table; __stdcall uintptr_t GetProcAddress(uintptr_t, char *); // resolve the ruby imports found by offset in ruby_import_table int load_ruby_imports(uintptr_t rbaddr) { uintptr_t ruby_module; uintptr_t *ptr; char *table; static int loaded_ruby_imports = 0; if (loaded_ruby_imports) return 0; loaded_ruby_imports = 1; if (rbaddr) ruby_module = find_ruby_module_mem(rbaddr); else ruby_module = find_ruby_module_peb(); if (!ruby_module) return 0; ptr = &ruby_import_table; table = (char*)ptr; while (*ptr) { if (!(*ptr = GetProcAddress(ruby_module, table+*ptr))) // TODO warning or something return 0; ptr++; } return 1; } #ifdef __x86_64__ #define DLL_PROCESS_ATTACH 1 __stdcall int DllMain(void *handle, int reason, void *res) { if (reason == DLL_PROCESS_ATTACH) return load_ruby_imports(0); return 1; } #endif EOS # ia32 asm source for the native component: handles ABI stuff DYNLDR_ASM_IA32 = <<EOS .text do_invoke_fastcall: push ebp mov ebp, esp // load ecx/edx, fix arg/argcount mov eax, [ebp+16] mov ecx, [eax] mov edx, [eax+4] add eax, 8 mov [ebp+16], eax mov eax, [ebp+12] sub eax, 2 jb _do_invoke_call jmp _do_invoke_copy do_invoke: do_invoke_stdcall: push ebp mov ebp, esp mov eax, [ebp+12] _do_invoke_copy: // make room for args shl eax, 2 jz _do_invoke_call sub esp, eax // copy args push esi push edi push ecx mov ecx, [ebp+12] mov esi, [ebp+16] mov edi, esp add edi, 12 rep movsd pop ecx pop edi pop esi // go _do_invoke_call: call dword ptr [ebp+8] leave fake_float: ret // entrypoint for callbacks: to the native api, give the addr of some code // that will push a unique cb_identifier and jmp here callback_handler: // stack here: cb_id_retaddr, cb_native_retaddr, cb_native_arg0, ... // swap caller retaddr & cb_identifier, fix cb_identifier from the stub pop eax // stuff pushed by the stub sub eax, callback_id_1 - callback_id_0 // fixup cb_id_retaddr to get a cb id xchg eax, [esp] // put on stack, retrieve original retaddr push eax // push intended cb retaddr call do_callback_handler // do_cb_handler puts the nr of bytes we have to pop from the stack in its 1st arg (eg [esp+4] here) // stack here: cb_native_retaddr, ruby_popcount, cb_native_arg0, ... pop ecx // get retaddr w/o interfering with retval (incl 64bits eax+edx) add esp, [esp] // pop cb args if stdcall add esp, 4 // pop cb_id/popcount jmp ecx // return // those are valid callback id // most of the time only 2 cb is used (source: meearse) // so this prevents dynamic allocation of a whole page for the most common case callback_id_0: call callback_handler callback_id_1: call callback_handler EOS # ia32 asm source for the native component: handles ABI stuff DYNLDR_ASM_X86_64 = <<EOS .text fake_float: ret // entrypoint for callbacks: to the native api, give the addr of some code // that will save its address in rax and jump to do_cb_h callback_id_0: lea rax, [rip-$_+callback_id_0] jmp do_callback_handler callback_id_1: lea rax, [rip-$_+callback_id_1] jmp do_callback_handler EOS # initialization # load (build if needed) the binary module def self.start # callbacks are really just a list of asm 'call', so we share them among subclasses of DynLdr @@callback_addrs = [] # list of all allocated callback addrs (in use or not) @@callback_table = {} # addr -> cb structure (inuse only) binmodule = find_bin_path if not File.exists?(binmodule) or File.stat(binmodule).mtime < File.stat(__FILE__).mtime compile_binary_module(host_exe, host_cpu, binmodule) end require binmodule @@callback_addrs << CALLBACK_ID_0 << CALLBACK_ID_1 end # compile the dynldr binary ruby module for a specific arch/cpu/modulename def self.compile_binary_module(exe, cpu, modulename) bin = exe.new(cpu) # compile the C code, but patch the Init_ export name, which must match the string used in 'require' module_c_src = DYNLDR_C.gsub('<insertfilenamehere>', File.basename(modulename, '.so')) bin.compile_c module_c_src # compile the Asm stuff according to the target architecture bin.assemble case cpu.shortname when 'ia32'; DYNLDR_ASM_IA32 when 'x64'; DYNLDR_ASM_X86_64 end # tweak the resulting binary linkage procedures if needed compile_binary_module_hack(bin) # save the shared library bin.encode_file(modulename, :lib) end def self.compile_binary_module_hack(bin) # this is a hack # we need the module to use ruby symbols # but we don't know the actual ruby lib filename (depends on ruby version, # platform, ...) case bin.shortname when 'elf' # we know the lib is already loaded by the main ruby executable, no DT_NEEDED needed class << bin def automagic_symbols(*a) # do the plt generation super(*a) # but remove the specific lib names @tag.delete 'NEEDED' end end return when 'coff' # the hard part, see below else # unhandled arch, dont tweak return end # we remove the PE IAT section related to ruby symbols, and make # a manual symbol resolution on module loading. # populate the ruby import table ourselves on module loading bin.imports.delete_if { |id| id.libname =~ /ruby/ } # we generate something like: # .data # ruby_import_table: # rb_cObject dd str_rb_cObject - ruby_import_table # riat_rb_intern dd str_rb_intern - ruby_import_table # dd 0 # # .rodata # str_rb_cObject db "rb_cObject", 0 # str_rb_intern db "rb_intern", 0 # # .text # rb_intern: jmp [riat_rb_intern] # # the PE_HACK code will parse ruby_import_table and make the symbol resolution on startup # setup the string table and the thunks text = bin.sections.find { |s| s.name == '.text' }.encoded rb_syms = text.reloc_externals.grep(/^rb_/) dd = (bin.cpu.size == 64 ? 'dq' : 'dd') init_symbol = text.export.keys.grep(/^Init_/).first raise 'no Init_mname symbol found' if not init_symbol if bin.cpu.size == 32 # hax to find the base of libruby under Win98 (peb sux) text.export[init_symbol + '_real'] = text.export.delete(init_symbol) bin.unique_labels_cache.delete(init_symbol) end # the C glue: getprocaddress etc bin.compile_c DYNLDR_C_PE_HACK.gsub('Init_dynldr', init_symbol) # the IAT, initialized with relative offsets to symbol names asm_table = ['.data', '.align 8', 'ruby_import_table:'] # strings will be in .rodata bin.parse('.rodata') rb_syms.each { |sym| # raw symbol name str_label = bin.parse_new_label('str', "db #{sym.inspect}, 0") if sym !~ /^rb_[ce][A-Z]/ # if we dont reference a data import (rb_cClass / rb_eException), # then create a function thunk i = PE::ImportDirectory::Import.new i.thunk = sym sym = i.target = 'riat_' + str_label bin.arch_encode_thunk(text, i) # encode a jmp [importtable] end # update the IAT asm_table << "#{sym} #{dd} #{str_label} - ruby_import_table" } # IAT null-terminated asm_table << "#{dd} 0" # now parse & assemble the IAT in .data bin.assemble asm_table.join("\n") end # find the path of the binary module # if none exists, create a path writeable by the current user def self.find_bin_path fname = ['dynldr', host_arch, host_cpu.shortname, ('19' if RUBY_VERSION >= '1.9')].compact.join('-') + '.so' dir = File.dirname(__FILE__) binmodule = File.join(dir, fname) if not File.exists? binmodule or File.stat(binmodule).mtime < File.stat(__FILE__).mtime if not dir = find_write_dir raise LoadError, "no writable dir to put the DynLdr ruby module, try to run as root" end binmodule = File.join(dir, fname) end binmodule end # find a writeable directory # searches this script directory, $HOME / %APPDATA% / %USERPROFILE%, or $TMP def self.find_write_dir writable = lambda { |d| begin foo = '/_test_write_' + rand(1<<32).to_s true if File.writable?(d) and File.open(d+foo, 'w') { true } and File.unlink(d+foo) rescue end } dir = File.dirname(__FILE__) return dir if writable[dir] dir = ENV['HOME'] || ENV['APPDATA'] || ENV['USERPROFILE'] if writable[dir] dir = File.join(dir, '.metasm') Dir.mkdir dir if not File.directory? dir return dir end ENV['TMP'] || ENV['TEMP'] || '.' end # CPU suitable for compiling code for the current running host def self.host_cpu @cpu ||= case RUBY_PLATFORM when /i[3-6]86/; Ia32.new when /x86_64|x64/i; X86_64.new else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" end end # returns whether we run on linux or windows def self.host_arch case RUBY_PLATFORM when /linux/i; :linux when /mswin|mingw|cygwin/i; :windows else raise LoadError, "Unsupported host platform #{RUBY_PLATFORM}" end end # ExeFormat suitable as current running host native module def self.host_exe case host_arch when :linux; ELF when :windows; PE end end # parse a C string into the @cp parser, create it if needed def self.parse_c(src) cp.parse(src) end # compile a C fragment into a Shellcode, honors the host ABI def self.compile_c(src) # XXX could we reuse self.cp ? (for its macros etc) cp = C::Parser.new(host_exe.new(host_cpu)) cp.parse(src) sc = Shellcode.new(host_cpu) asm = host_cpu.new_ccompiler(cp, sc).compile sc.assemble(asm) end # retrieve the library where a symbol is to be found (uses AutoImport) def self.lib_from_sym(symname) case host_arch when :linux; GNUExports::EXPORT when :windows; WindowsExports::EXPORT end[symname] end # reads a bunch of C code, creates binding for those according to the prototypes # handles enum/defines to define constants # For each toplevel method prototype, it generates a ruby method in this module, the name is lowercased # For each numeric macro/enum, it also generates an uppercase named constant # When such a function is called with a lambda as argument, a callback is created for the duration of the call # and destroyed afterwards ; use callback_alloc_c to get a callback id with longer life span def self.new_api_c(proto, fromlib=nil) proto += "\n;" # allow 'int foo()' and '#include <bar>' parse_c(proto) cp.toplevel.symbol.dup.each_value { |v| next if not v.kind_of? C::Variable # enums cp.toplevel.symbol.delete v.name lib = fromlib || lib_from_sym(v.name) addr = sym_addr(lib, v.name) if addr == 0 or addr == -1 or addr == 0xffff_ffff or addr == 0xffffffff_ffffffff api_not_found(lib, v) next end rbname = c_func_name_to_rb(v.name) if not v.type.kind_of? C::Function # not a function, simply return the symbol address # TODO struct/table access through hash/array ? class << self ; self ; end.send(:define_method, rbname) { addr } next end next if v.initializer # inline & stuff puts "new_api_c: load method #{rbname} from #{lib}" if $DEBUG new_caller_for(v, rbname, addr) } # predeclare constants from enums # macros are handled in const_missing (too slow to (re)do here everytime) # TODO #define FOO(v) (v<<1)|1 => create ruby counterpart cexist = constants.inject({}) { |h, c| h.update c.to_s => true } cp.toplevel.symbol.each { |k, v| if v.kind_of? ::Integer n = c_const_name_to_rb(k) const_set(n, v) if v.kind_of? Integer and not cexist[n] end } # avoid WTF rb warning: toplevel const TRUE referenced by WinAPI::TRUE cp.lexer.definition.each_key { |k| n = c_const_name_to_rb(k) if not cexist[n] and Object.const_defined?(n) and v = @cp.macro_numeric(n) const_set(n, v) end } end # const_missing handler: will try to find a matching #define def self.const_missing(c) # infinite loop on autorequire C.. return super(c) if not defined? @cp or not @cp cs = c.to_s if @cp.lexer.definition[cs] m = cs else m = @cp.lexer.definition.keys.find { |k| c_const_name_to_rb(k) == cs } end if m and v = @cp.macro_numeric(m) const_set(c, v) v else super(c) end end # when defining ruby wrapper for C methods, the ruby method name is the string returned by this function from the C name def self.c_func_name_to_rb(name) n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.downcase n = "m#{n}" if n !~ /^[a-z]/ n end # when defining ruby wrapper for C constants (numeric define/enum), the ruby const name is # the string returned by this function from the C name. It should follow ruby standards (1st letter upcase) def self.c_const_name_to_rb(name) n = name.to_s.gsub(/[^a-z0-9_]/i) { |c| c.unpack('H*')[0] }.upcase n = "C#{n}" if n !~ /^[A-Z]/ n end def self.api_not_found(lib, func) raise "could not find symbol #{func.name.inspect} in #{lib.inspect}" end # called whenever a native API is called through new_api_c/new_func_c/etc def self.trace_invoke(api, args) #p api end # define a new method 'name' in the current module to invoke the raw method at addr addr # translates ruby args to raw args using the specified prototype def self.new_caller_for(proto, name, addr) flags = 0 flags |= 1 if proto.has_attribute('stdcall') flags |= 2 if proto.has_attribute('fastcall') flags |= 4 if proto.type.type.integral? and cp.sizeof(nil, proto.type.type) == 8 flags |= 8 if proto.type.type.float? class << self ; self ; end.send(:define_method, name) { |*a| raise ArgumentError, "bad arg count for #{name}: #{a.length} for #{proto.type.args.length}" if a.length != proto.type.args.length and not proto.type.varargs # convert the arglist suitably for raw_invoke auto_cb = [] # list of automatic C callbacks generated from lambdas a = a.zip(proto.type.args).map { |ra, fa| aa = convert_rb2c(fa, ra, :cb_list => auto_cb) if fa and fa.type.integral? and cp.sizeof(fa) == 8 and host_cpu.size == 32 aa = [aa & 0xffff_ffff, (aa >> 32) & 0xffff_ffff] aa.reverse! if host_cpu.endianness != :little end aa }.flatten trace_invoke(name, a) # do it ret = raw_invoke(addr, a, flags) # cleanup autogenerated callbacks auto_cb.each { |cb| callback_free(cb) } # interpret return value ret = convert_ret_c2rb(proto, ret) } end # ruby object -> integer suitable as arg for raw_invoke def self.convert_rb2c(formal, val, opts=nil) case val when String; str_ptr(val) when Proc; cb = callback_alloc_cobj(formal, val) ; (opts[:cb_list] << cb if opts and opts[:cb_list]) ; cb when C::AllocCStruct; str_ptr(val.str) + val.stroff when Hash if not formal.type.pointed.kind_of?(C::Struct) raise "invalid argument #{val.inspect} for #{formal}, need a struct*" end buf = cp.alloc_c_struct(formal, val) val.instance_variable_set('@rb2c', buf) # GC trick: lifetime(buf) >= lifetime(hash) (XXX or until next call to convert_rb2c) str_ptr(buf.str) #when Float; val # TODO handle that in raw_invoke C code else v = val.to_i rescue 0 # NaN, Infinity, etc v = -v if v == -(1<<(cp.typesize[:ptr]*8-1)) # ruby bug... raise -0x8000_0000: out of ulong range v end end # this method is called from the C part to run the ruby code corresponding to # a given C callback allocated by callback_alloc_c def self.callback_run(id, args) cb = @@callback_table[id] raise "invalid callback #{'%x' % id} not in #{@@callback_table.keys.map { |c| c.to_s(16) }}" if not cb rawargs = args.dup ra = cb[:proto] ? cb[:proto].args.map { |fa| convert_cbargs_c2rb(fa, rawargs) } : [] # run it ret = cb[:proc].call(*ra) # the C code expects to find in args[0] the amount of stack fixing needed for __stdcall callbacks args[0] = cb[:abi_stackfix] || 0 ret end # C raw cb arg -> ruby object # will combine 2 32bit values for 1 64bit arg def self.convert_cbargs_c2rb(formal, rawargs) val = rawargs.shift if formal.type.integral? and cp.sizeof(formal) == 8 and host_cpu.size == 32 if host.cpu.endianness == :little val |= rawargs.shift << 32 else val = (val << 32) | rawargs.shift end end convert_c2rb(formal, val) end # interpret a raw decoded C value to a ruby value according to the C prototype # handles signedness etc # XXX val is an integer, how to decode Floats etc ? raw binary ptr ? def self.convert_c2rb(formal, val) formal = formal.type if formal.kind_of? C::Variable val = Expression.make_signed(val, 8*cp.sizeof(formal)) if formal.integral? and formal.signed? val = nil if formal.pointer? and val == 0 val end # C raw ret -> ruby obj # can be overridden for system-specific calling convention (eg return 0/-1 => raise an error) def self.convert_ret_c2rb(fproto, ret) fproto = fproto.type if fproto.kind_of? C::Variable convert_c2rb(fproto.untypedef.type, ret) end def self.cp ; @cp ||= C::Parser.new(host_exe.new(host_cpu)) ; end def self.cp=(c); @cp = c ; end # allocate a callback for a given C prototype (string) # accepts full C functions (with body) (only 1 at a time) or toplevel 'asm' statement def self.callback_alloc_c(proto, &b) proto += ';' # allow 'int foo()' parse_c(proto) v = cp.toplevel.symbol.values.find_all { |v_| v_.kind_of? C::Variable and v_.type.kind_of? C::Function }.first if (v and v.initializer) or cp.toplevel.statements.find { |st| st.kind_of? C::Asm } cp.toplevel.statements.delete_if { |st| st.kind_of? C::Asm } cp.toplevel.symbol.delete v.name if v sc = compile_c(proto) ptr = memory_alloc(sc.encoded.length) sc.base_addr = ptr # TODO fixup external calls memory_write ptr, sc.encode_string memory_perm ptr, sc.encoded.length, 'rwx' ptr elsif not v raise 'empty prototype' else cp.toplevel.symbol.delete v.name callback_alloc_cobj(v, b) end end # allocates a callback for a given C prototype (C variable, pointer to func accepted) def self.callback_alloc_cobj(proto, b) ori = proto proto = proto.type if proto and proto.kind_of? C::Variable proto = proto.pointed while proto and proto.pointer? id = callback_find_id cb = {} cb[:id] = id cb[:proc] = b cb[:proto] = proto cb[:abi_stackfix] = proto.args.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('stdcall') cb[:abi_stackfix] = proto.args[2..-1].to_a.inject(0) { |s, a| s + [cp.sizeof(a), cp.typesize[:ptr]].max } if ori and ori.has_attribute('fastcall') # supercedes stdcall @@callback_table[id] = cb id end # releases a callback id, so that it may be reused by a later callback_alloc def self.callback_free(id) @@callback_table.delete id end # finds a free callback id, allocates a new page if needed def self.callback_find_id if not id = @@callback_addrs.find { |a| not @@callback_table[a] } cb_page = memory_alloc(4096) sc = Shellcode.new(host_cpu, cb_page) case sc.cpu.shortname when 'ia32' addr = cb_page nrcb = 128 # TODO should be 4096/5, but the parser/compiler is really too slow nrcb.times { @@callback_addrs << addr sc.parse "call #{CALLBACK_TARGET}" addr += 5 } when 'x64' addr = cb_page nrcb = 128 # same remark nrcb.times { @@callback_addrs << addr sc.parse "1: lea rax, [rip-$_+1b] jmp #{CALLBACK_TARGET}" addr += 12 # XXX approximative.. } end sc.assemble memory_write cb_page, sc.encode_string memory_perm cb_page, 4096, 'rx' raise 'callback_alloc bouh' if not id = @@callback_addrs.find { |a| not @@callback_table[a] } end id end # compile a bunch of C functions, defines methods in this module to call them # returns the raw pointer to the code page # if given a block, run the block and then undefine all the C functions & free memory def self.new_func_c(src) sc = compile_c(src) ptr = memory_alloc(sc.encoded.length) sc.base_addr = ptr bd = sc.encoded.binding(ptr) sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } sc.encoded.fixup(bd) memory_write ptr, sc.encode_string memory_perm ptr, sc.encoded.length, 'rwx' parse_c(src) # XXX the Shellcode parser may have defined stuff / interpreted C another way... defs = [] cp.toplevel.symbol.dup.each_value { |v| next if not v.kind_of? C::Variable cp.toplevel.symbol.delete v.name next if not v.type.kind_of? C::Function or not v.initializer next if not off = sc.encoded.export[v.name] rbname = c_func_name_to_rb(v.name) new_caller_for(v, rbname, ptr+off) defs << rbname } if block_given? begin yield ensure defs.each { |d| class << self ; self ; end.send(:remove_method, d) } memory_free ptr end else ptr end end # compile an asm sequence, callable with the ABI of the C prototype given # function name comes from the prototype def self.new_func_asm(proto, asm) proto += "\n;" old = cp.toplevel.symbol.keys parse_c(proto) news = cp.toplevel.symbol.keys - old raise "invalid proto #{proto}" if news.length != 1 f = cp.toplevel.symbol[news.first] raise "invalid func proto #{proto}" if not f.name or not f.type.kind_of? C::Function or f.initializer cp.toplevel.symbol.delete f.name sc = Shellcode.assemble(host_cpu, asm) ptr = memory_alloc(sc.encoded.length) bd = sc.encoded.binding(ptr) sc.encoded.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(lib_from_sym(ext), ext) or raise "unknown symbol #{ext}" } sc.encoded.fixup(bd) memory_write ptr, sc.encode_string memory_perm ptr, sc.encoded.length, 'rwx' rbname = c_func_name_to_rb(f.name) new_caller_for(f, rbname, ptr) if block_given? begin yield ensure class << self ; self ; end.send(:remove_method, rbname) memory_free ptr end else ptr end end # allocate a C::AllocCStruct to hold a specific struct defined in a previous new_api_c def self.alloc_c_struct(structname, values={}) cp.alloc_c_struct(structname, values) end # return a C::AllocCStruct mapped over the string (with optionnal offset) # str may be an EncodedData def self.decode_c_struct(structname, str, off=0) str = str.data if str.kind_of? EncodedData cp.decode_c_struct(structname, str, off) end # allocate a C::AllocCStruct holding an Array of typename variables # if len is an int, it holds the ary length, or it can be an array of initialisers # eg alloc_c_ary("int", [4, 5, 28]) def self.alloc_c_ary(typename, len) cp.alloc_c_ary(typename, len) end # return a C::AllocCStruct holding an array of type typename mapped over str def self.decode_c_ary(typename, len, str, off=0) cp.decode_c_ary(typename, len, str, off) end # return an AllocCStruct holding an array of 1 element of type typename # access its value with obj[0] # useful when you need a pointer to an int that will be filled by an API: use alloc_c_ptr('int') def self.alloc_c_ptr(typename, init=nil) cp.alloc_c_ary(typename, (init ? [init] : 1)) end # return the binary version of a ruby value encoded as a C variable # only integral types handled for now def self.encode_c_value(var, val) cp.encode_c_value(var, val) end # decode a C variable # only integral types handled for now def self.decode_c_value(str, var, off=0) cp.decode_c_value(str, var, off) end # read a 0-terminated string from memory def self.memory_read_strz(ptr, szmax=4096) # read up to the end of the ptr memory page pglim = (ptr + 0x1000) & ~0xfff sz = [pglim-ptr, szmax].min data = memory_read(ptr, sz) return data[0, data.index(?\0)] if data.index(?\0) if sz < szmax data = memory_read(ptr, szmax) data = data[0, data.index(?\0)] if data.index(?\0) end data end # read a 0-terminated wide string from memory def self.memory_read_wstrz(ptr, szmax=4096) # read up to the end of the ptr memory page pglim = (ptr + 0x1000) & ~0xfff sz = [pglim-ptr, szmax].min data = memory_read(ptr, sz) if i = data.unpack('v*').index(0) return data[0, 2*i] end if sz < szmax data = memory_read(ptr, szmax) data = data[0, 2*i] if i = data.unpack('v*').index(0) end data end # automatically build/load the bin module start case host_arch when :windows new_api_c <<EOS, 'kernel32' #define PAGE_NOACCESS 0x01 #define PAGE_READONLY 0x02 #define PAGE_READWRITE 0x04 #define PAGE_WRITECOPY 0x08 #define PAGE_EXECUTE 0x10 #define PAGE_EXECUTE_READ 0x20 #define PAGE_EXECUTE_READWRITE 0x40 #define PAGE_EXECUTE_WRITECOPY 0x80 #define PAGE_GUARD 0x100 #define PAGE_NOCACHE 0x200 #define PAGE_WRITECOMBINE 0x400 #define MEM_COMMIT 0x1000 #define MEM_RESERVE 0x2000 #define MEM_DECOMMIT 0x4000 #define MEM_RELEASE 0x8000 #define MEM_FREE 0x10000 #define MEM_PRIVATE 0x20000 #define MEM_MAPPED 0x40000 #define MEM_RESET 0x80000 #define MEM_TOP_DOWN 0x100000 #define MEM_WRITE_WATCH 0x200000 #define MEM_PHYSICAL 0x400000 #define MEM_LARGE_PAGES 0x20000000 #define MEM_4MB_PAGES 0x80000000 __stdcall uintptr_t VirtualAlloc(uintptr_t addr, uintptr_t size, int type, int prot); __stdcall uintptr_t VirtualFree(uintptr_t addr, uintptr_t size, int freetype); __stdcall uintptr_t VirtualProtect(uintptr_t addr, uintptr_t size, int prot, int *oldprot); EOS # allocate some memory suitable for code allocation (ie VirtualAlloc) def self.memory_alloc(sz) virtualalloc(nil, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE) end # free memory allocated through memory_alloc def self.memory_free(addr) virtualfree(addr, 0, MEM_RELEASE) end # change memory permissions - perm in [r rw rx rwx] def self.memory_perm(addr, len, perm) perm = { 'r' => PAGE_READONLY, 'rw' => PAGE_READWRITE, 'rx' => PAGE_EXECUTE_READ, 'rwx' => PAGE_EXECUTE_READWRITE }[perm.to_s.downcase] virtualprotect(addr, len, perm, str_ptr([0].pack('C')*8)) end when :linux new_api_c <<EOS #define PROT_READ 0x1 #define PROT_WRITE 0x2 #define PROT_EXEC 0x4 #define MAP_PRIVATE 0x2 #define MAP_ANONYMOUS 0x20 uintptr_t mmap(uintptr_t addr, uintptr_t length, int prot, int flags, uintptr_t fd, uintptr_t offset); uintptr_t munmap(uintptr_t addr, uintptr_t length); uintptr_t mprotect(uintptr_t addr, uintptr_t len, int prot); EOS # allocate some memory suitable for code allocation (ie mmap) def self.memory_alloc(sz) @mmaps ||= {} # save size for mem_free a = mmap(nil, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) @mmaps[a] = sz a end # free memory allocated through memory_alloc def self.memory_free(addr) munmap(addr, @mmaps[addr]) end # change memory permissions - perm 'rwx' # on PaX-enabled systems, this may need a non-mprotect-restricted ruby interpreter def self.memory_perm(addr, len, perm) perm = perm.to_s.downcase len += (addr & 0xfff) + 0xfff len &= ~0xfff addr &= ~0xfff p = 0 p |= PROT_READ if perm.include? 'r' p |= PROT_WRITE if perm.include? 'w' p |= PROT_EXEC if perm.include? 'x' mprotect(addr, len, p) end end end end