vendor/tracemonkey/nanojit/Assembler.h in johnson-2.0.0.pre0 vs vendor/tracemonkey/nanojit/Assembler.h in johnson-2.0.0.pre1

- old
+ new

@@ -1,6 +1,7 @@ -/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */ +/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */ +/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with @@ -41,335 +42,304 @@ #define __nanojit_Assembler__ namespace nanojit { - /** - * Some notes on this Assembler (Emitter). - * - * LIR_call is a generic call operation that is encoded using form [2]. The 24bit - * integer is used as an index into a function look-up table that contains information - * about the target that is to be called; including address, # parameters, etc. - * - * The class RegAlloc is essentially the register allocator from MIR - * - * The Assembler class parses the LIR instructions starting at any point and converts - * them to machine code. It does the translation using expression trees which are simply - * LIR instructions in the stream that have side-effects. Any other instruction in the - * stream is simply ignored. - * This approach is interesting in that dead code elimination occurs for 'free', strength - * reduction occurs fairly naturally, along with some other optimizations. - * - * A negative is that we require state as we 'push' and 'pop' nodes along the tree. - * Also, this is most easily performed using recursion which may not be desirable in - * the mobile environment. - * - */ + /** + * Some notes on this Assembler (Emitter). + * + * The class RegAlloc is essentially the register allocator from MIR + * + * The Assembler class parses the LIR instructions starting at any point and converts + * them to machine code. It does the translation using expression trees which are simply + * LIR instructions in the stream that have side-effects. Any other instruction in the + * stream is simply ignored. + * This approach is interesting in that dead code elimination occurs for 'free', strength + * reduction occurs fairly naturally, along with some other optimizations. + * + * A negative is that we require state as we 'push' and 'pop' nodes along the tree. + * Also, this is most easily performed using recursion which may not be desirable in + * the mobile environment. + * + */ - #define STACK_GRANULARITY sizeof(void *) + #define STACK_GRANULARITY sizeof(void *) - /** - * The Assembler is only concerned with transforming LIR to native instructions - */ - struct Reservation - { - uint32_t arIndex:16; /* index into stack frame. displ is -4*arIndex */ - Register reg:15; /* register UnkownReg implies not in register */ - uint32_t used:1; - }; + struct AR + { + LIns* entry[ NJ_MAX_STACK_ENTRY ]; /* maps to 4B contiguous locations relative to the frame pointer */ + uint32_t tos; /* current top of stack entry */ + uint32_t lowwatermark; /* we pre-allocate entries from 0 upto this index-1; so dynamic entries are added above this index */ + }; - struct AR - { - LIns* entry[ NJ_MAX_STACK_ENTRY ]; /* maps to 4B contiguous locations relative to the frame pointer */ - uint32_t tos; /* current top of stack entry */ - uint32_t highwatermark; /* max tos hit */ - uint32_t lowwatermark; /* we pre-allocate entries from 0 upto this index-1; so dynamic entries are added above this index */ - }; + #ifdef AVMPLUS_WIN32 + #define AVMPLUS_ALIGN16(type) __declspec(align(16)) type + #else + #define AVMPLUS_ALIGN16(type) type __attribute__ ((aligned (16))) + #endif - #ifdef AVMPLUS_WIN32 - #define AVMPLUS_ALIGN16(type) __declspec(align(16)) type - #else - #define AVMPLUS_ALIGN16(type) type __attribute__ ((aligned (16))) - #endif - - struct Stats - { - counter_define(steals;) - counter_define(remats;) - counter_define(spills;) - counter_define(native;) + struct Stats + { + counter_define(steals;) + counter_define(remats;) + counter_define(spills;) + counter_define(native;) counter_define(exitnative;) - - int32_t pages; - NIns* codeStart; - NIns* codeExitStart; - DECLARE_PLATFORM_STATS() + int32_t pages; + NIns* codeStart; + NIns* codeExitStart; + + DECLARE_PLATFORM_STATS() #ifdef __GNUC__ - // inexplicably, gnuc gives padding/alignment warnings without this. pacify it. - bool pad[4]; + // inexplicably, gnuc gives padding/alignment warnings without this. pacify it. + bool pad[4]; #endif - }; + }; - class Fragmento; - - // error codes - enum AssmError - { - None = 0 - ,OutOMem - ,StackFull - ,ResvFull - ,RegionFull - ,MaxLength - ,MaxExit - ,MaxXJump - ,UnknownPrim + // error codes + enum AssmError + { + None = 0 + ,StackFull ,UnknownBranch - }; + }; - typedef avmplus::List<NIns*, avmplus::LIST_NonGCObjects> NInsList; - typedef avmplus::SortedMap<LIns*,NIns*,avmplus::LIST_NonGCObjects> InsMap; - typedef avmplus::SortedMap<NIns*,LIns*,avmplus::LIST_NonGCObjects> NInsMap; + typedef SeqBuilder<NIns*> NInsList; + typedef HashMap<NIns*, LIns*> NInsMap; - class LabelState MMGC_SUBCLASS_DECL + class LabelState { public: RegAlloc regs; NIns *addr; LabelState(NIns *a, RegAlloc &r) : regs(r), addr(a) {} }; class LabelStateMap { - avmplus::GC *gc; - avmplus::SortedMap<LIns*, LabelState*, avmplus::LIST_GCObjects> labels; + Allocator& alloc; + HashMap<LIns*, LabelState*> labels; public: - LabelStateMap(avmplus::GC *gc) : gc(gc), labels(gc) + LabelStateMap(Allocator& alloc) : alloc(alloc), labels(alloc) {} - ~LabelStateMap(); - void clear(); + void clear() { labels.clear(); } void add(LIns *label, NIns *addr, RegAlloc &regs); LabelState *get(LIns *); }; + + typedef SeqBuilder<char*> StringList; + + /** map tracking the register allocation state at each bailout point + * (represented by SideExit*) in a trace fragment. */ + typedef HashMap<SideExit*, RegAlloc*> RegAllocMap; + /** - * Information about the activation record for the method is built up - * as we generate machine code. As part of the prologue, we issue - * a stack adjustment instruction and then later patch the adjustment - * value. Temporary values can be placed into the AR as method calls - * are issued. Also MIR_alloc instructions will consume space. - */ - class Assembler MMGC_SUBCLASS_DECL - { - friend class DeadCodeFilter; - friend class VerboseBlockReader; - public: - #ifdef NJ_VERBOSE - static char outline[8192]; - static char outlineEOL[512]; // string to be added to the end of the line - static char* outputAlign(char* s, int col); + * Information about the activation record for the method is built up + * as we generate machine code. As part of the prologue, we issue + * a stack adjustment instruction and then later patch the adjustment + * value. Temporary values can be placed into the AR as method calls + * are issued. Also LIR_alloc instructions will consume space. + */ + class Assembler + { + friend class VerboseBlockReader; + public: + #ifdef NJ_VERBOSE + static char outline[8192]; + static char outlineEOL[512]; // string to be added to the end of the line + static char* outputAlign(char* s, int col); - void FASTCALL outputForEOL(const char* format, ...); - void FASTCALL output(const char* s); - void FASTCALL outputf(const char* format, ...); - void FASTCALL output_asm(const char* s); - - bool _verbose, outputAddr, vpad[2]; // if outputAddr=true then next asm instr. will include address in output - void printActivationState(); + void outputForEOL(const char* format, ...); + void output(const char* s); + void outputf(const char* format, ...); + void output_asm(const char* s); - StringList* _outputCache; - #endif + bool outputAddr, vpad[3]; // if outputAddr=true then next asm instr. will include address in output + void printActivationState(const char* what); - Assembler(Fragmento* frago); + StringList* _outputCache; + + // Log controller object. Contains what-stuff-should-we-print + // bits, and a sink function for debug printing + LogControl* _logc; + size_t codeBytes; + size_t exitBytes; + #endif + + Assembler(CodeAlloc& codeAlloc, Allocator& alloc, AvmCore* core, LogControl* logc); ~Assembler() {} - void assemble(Fragment* frag, NInsList& loopJumps); - void endAssembly(Fragment* frag, NInsList& loopJumps); - void beginAssembly(Fragment *frag, RegAllocMap* map); - void copyRegisters(RegAlloc* copyTo); - void releaseRegisters(); + void endAssembly(Fragment* frag); + void assemble(Fragment* frag); + void beginAssembly(Fragment *frag); + + void releaseRegisters(); void patch(GuardRecord *lr); void patch(SideExit *exit); #ifdef NANOJIT_IA32 - void patch(SideExit* exit, SwitchInfo* si); + void patch(SideExit *exit, SwitchInfo* si); #endif - AssmError error() { return _err; } - void setError(AssmError e) { _err = e; } - void setCallTable(const CallInfo *functions); - void pageReset(); - int32_t codeBytes(); - Page* handoverPages(bool exitPages=false); + AssmError error() { return _err; } + void setError(AssmError e) { _err = e; } - debug_only ( void pageValidate(); ) - debug_only ( bool onPage(NIns* where, bool exitPages=false); ) - - // support calling out from a fragment ; used to debug the jit - debug_only( void resourceConsistencyCheck(); ) - debug_only( void registerConsistencyCheck(); ) - - Stats _stats; - int hasLoop; + void reset(); - private: - - void gen(LirFilter* toCompile, NInsList& loopJumps); - NIns* genPrologue(); - NIns* genEpilogue(); + debug_only ( void pageValidate(); ) - uint32_t arReserve(LIns* l); - void arFree(uint32_t idx); - void arReset(); + // support calling out from a fragment ; used to debug the jit + debug_only( void resourceConsistencyCheck(); ) + debug_only( void registerConsistencyCheck(); ) - Register registerAlloc(RegisterMask allow); - void registerResetAll(); - void evictRegs(RegisterMask regs); + Stats _stats; + CodeList* codeList; // finished blocks of code. + + private: + + void gen(LirFilter* toCompile); + NIns* genPrologue(); + NIns* genEpilogue(); + + uint32_t arReserve(LIns* l); + void arFree(uint32_t idx); + void arReset(); + + Register registerAlloc(RegisterMask allow); + void registerResetAll(); + void evictAllActiveRegs(); + void evictSomeActiveRegs(RegisterMask regs); void evictScratchRegs(); - void intersectRegisterState(RegAlloc& saved); - void unionRegisterState(RegAlloc& saved); + void intersectRegisterState(RegAlloc& saved); + void unionRegisterState(RegAlloc& saved); void assignSaved(RegAlloc &saved, RegisterMask skip); - LInsp findVictim(RegAlloc& regs, RegisterMask allow); + LInsp findVictim(RegisterMask allow); Register getBaseReg(LIns *i, int &d, RegisterMask allow); - int findMemFor(LIns* i); - Register findRegFor(LIns* i, RegisterMask allow); - void findRegFor2(RegisterMask allow, LIns* ia, Reservation* &ra, LIns *ib, Reservation* &rb); - Register findSpecificRegFor(LIns* i, Register w); - Register prepResultReg(LIns *i, RegisterMask allow); - void freeRsrcOf(LIns *i, bool pop); - void evict(Register r); - RegisterMask hint(LIns*i, RegisterMask allow); + int findMemFor(LIns* i); + Register findRegFor(LIns* i, RegisterMask allow); + void findRegFor2(RegisterMask allow, LIns* ia, Reservation* &resva, LIns *ib, Reservation* &resvb); + void findRegFor2b(RegisterMask allow, LIns* ia, Register &ra, LIns *ib, Register &rb); + Register findSpecificRegFor(LIns* i, Register w); + Register prepResultReg(LIns *i, RegisterMask allow); + void freeRsrcOf(LIns *i, bool pop); + void evictIfActive(Register r); + void evict(Register r, LIns* vic); + RegisterMask hint(LIns*i, RegisterMask allow); - NIns* pageAlloc(bool exitPage=false); - void pagesFree(Page*& list); - void internalReset(); + void codeAlloc(NIns *&start, NIns *&end, NIns *&eip + verbose_only(, size_t &nBytes)); bool canRemat(LIns*); - Reservation* reserveAlloc(LInsp i); - void reserveFree(LInsp i); - void reserveReset(); + bool isKnownReg(Register r) { + return r != UnknownReg; + } - Reservation* getresv(LIns *x) { - uint32_t resv_index = x->resv(); - return resv_index ? &_resvTable[resv_index] : 0; + Reservation* getresv(LIns *x) { + Reservation* r = x->resv(); + return r->used ? r : 0; } - DWB(Fragmento*) _frago; - avmplus::GC* _gc; - DWB(Fragment*) _thisfrag; - RegAllocMap* _branchStateMap; - - const CallInfo *_functions; - - NIns* _nIns; // current native instruction - NIns* _nExitIns; // current instruction in exit fragment page - NIns* _startingIns; // starting location of code compilation for error handling - NIns* _epilogue; - Page* _nativePages; // list of NJ_PAGE_SIZE pages that have been alloc'd - Page* _nativeExitPages; // list of pages that have been allocated for exit code - AssmError _err; // 0 = means assemble() appears ok, otherwise it failed + Allocator& alloc; + CodeAlloc& _codeAlloc; + Fragment* _thisfrag; + RegAllocMap _branchStateMap; + NInsMap _patches; + LabelStateMap _labels; - AR _activation; - RegAlloc _allocator; + NIns *codeStart, *codeEnd; // current block we're adding code to + NIns *exitStart, *exitEnd; // current block for exit stubs + NIns* _nIns; // current native instruction + NIns* _nExitIns; // current instruction in exit fragment page + NIns* _epilogue; + AssmError _err; // 0 = means assemble() appears ok, otherwise it failed - LabelStateMap _labels; - NInsMap _patches; - Reservation _resvTable[ NJ_MAX_STACK_ENTRY ]; // table where we house stack and register information - uint32_t _resvFree; - bool _inExit, vpad2[3]; - InsList pending_lives; + AR _activation; + RegAlloc _allocator; - void asm_cmp(LIns *cond); - void asm_fcmp(LIns *cond); + bool _inExit, vpad2[3]; + + verbose_only( void asm_inc_m32(uint32_t*); ) void asm_setcc(Register res, LIns *cond); NIns * asm_jmpcc(bool brOnFalse, LIns *cond, NIns *target); - void asm_mmq(Register rd, int dd, Register rs, int ds); + void asm_mmq(Register rd, int dd, Register rs, int ds); NIns* asm_exit(LInsp guard); - NIns* asm_leave_trace(LInsp guard); + NIns* asm_leave_trace(LInsp guard); void asm_qjoin(LIns *ins); void asm_store32(LIns *val, int d, LIns *base); void asm_store64(LIns *val, int d, LIns *base); - void asm_restore(LInsp, Reservation*, Register); - void asm_load(int d, Register r); - void asm_spilli(LInsp i, Reservation *resv, bool pop); - void asm_spill(Register rr, int d, bool pop, bool quad); - void asm_load64(LInsp i); - void asm_pusharg(LInsp p); - void asm_quad(LInsp i); - void asm_loop(LInsp i, NInsList& loopJumps); - void asm_fcond(LInsp i); - void asm_cond(LInsp i); - void asm_arith(LInsp i); - void asm_neg_not(LInsp i); - void asm_ld(LInsp i); - void asm_cmov(LInsp i); - void asm_param(LInsp i); - void asm_int(LInsp i); - void asm_short(LInsp i); - void asm_qlo(LInsp i); - void asm_qhi(LInsp i); - void asm_fneg(LInsp ins); - void asm_fop(LInsp ins); - void asm_i2f(LInsp ins); - void asm_u2f(LInsp ins); - Register asm_prep_fcall(Reservation *rR, LInsp ins); - void asm_nongp_copy(Register r, Register s); - void asm_call(LInsp); - void asm_arg(ArgSize, LInsp, Register); - Register asm_binop_rhs_reg(LInsp ins); - NIns* asm_branch(bool branchOnFalse, LInsp cond, NIns* targ, bool isfar); - void asm_switch(LIns* ins, NIns* target); - void emitJumpTable(SwitchInfo* si, NIns* target); + void asm_restore(LInsp, Reservation*, Register); + void asm_load(int d, Register r); + void asm_spilli(LInsp i, bool pop); + void asm_spill(Register rr, int d, bool pop, bool quad); + void asm_load64(LInsp i); + void asm_ret(LInsp p); + void asm_quad(LInsp i); + void asm_fcond(LInsp i); + void asm_cond(LInsp i); + void asm_arith(LInsp i); + void asm_neg_not(LInsp i); + void asm_ld(LInsp i); + void asm_cmov(LInsp i); + void asm_param(LInsp i); + void asm_int(LInsp i); + void asm_qlo(LInsp i); + void asm_qhi(LInsp i); + void asm_fneg(LInsp ins); + void asm_fop(LInsp ins); + void asm_i2f(LInsp ins); + void asm_u2f(LInsp ins); + void asm_promote(LIns *ins); + Register asm_prep_fcall(Reservation *rR, LInsp ins); + void asm_nongp_copy(Register r, Register s); + void asm_call(LInsp); + Register asm_binop_rhs_reg(LInsp ins); + NIns* asm_branch(bool branchOnFalse, LInsp cond, NIns* targ); + void asm_switch(LIns* ins, NIns* target); + void emitJumpTable(SwitchInfo* si, NIns* target); void assignSavedRegs(); void reserveSavedRegs(); void assignParamRegs(); - void handleLoopCarriedExprs(); - - // flag values for nMarkExecute - enum - { - PAGE_READ = 0x0, // here only for clarity: all permissions include READ - PAGE_WRITE = 0x01, - PAGE_EXEC = 0x02 - }; - - // platform specific implementation (see NativeXXX.cpp file) - void nInit(AvmCore *); - Register nRegisterAllocFromSet(int32_t set); - void nRegisterResetAll(RegAlloc& a); - void nMarkExecute(Page* page, int flags); - void nFrameRestore(RegisterMask rmask); - NIns* nPatchBranch(NIns* branch, NIns* location); - void nFragExit(LIns* guard); + void handleLoopCarriedExprs(InsList& pending_lives); - // platform specific methods + // platform specific implementation (see NativeXXX.cpp file) + void nInit(AvmCore *); + void nBeginAssembly(); + Register nRegisterAllocFromSet(RegisterMask set); + void nRegisterResetAll(RegAlloc& a); + static void nPatchBranch(NIns* branch, NIns* location); + void nFragExit(LIns* guard); + + // platform specific methods public: - const static Register savedRegs[NumSavedRegs]; - DECLARE_PLATFORM_ASSEMBLER() + const static Register savedRegs[NumSavedRegs]; + DECLARE_PLATFORM_ASSEMBLER() - private: - debug_only( int32_t _fpuStkDepth; ) - debug_only( int32_t _sv_fpuStkDepth; ) + private: +#ifdef NANOJIT_IA32 + debug_only( int32_t _fpuStkDepth; ) + debug_only( int32_t _sv_fpuStkDepth; ) - // since we generate backwards the depth is negative - inline void fpu_push() { - debug_only( ++_fpuStkDepth; /*char foo[8]= "FPUSTK0"; foo[6]-=_fpuStkDepth; output_asm(foo);*/ NanoAssert(_fpuStkDepth<=0); ) - } - inline void fpu_pop() { - debug_only( --_fpuStkDepth; /*char foo[8]= "FPUSTK0"; foo[6]-=_fpuStkDepth; output_asm(foo);*/ NanoAssert(_fpuStkDepth<=0); ) - } - #ifdef AVMPLUS_PORTING_API - // these pointers are required to store - // the address range where code has been - // modified so we can flush the instruction cache. - void* _endJit2Addr; - #endif // AVMPLUS_PORTING_API - avmplus::Config &config; - }; + // since we generate backwards the depth is negative + inline void fpu_push() { + debug_only( ++_fpuStkDepth; /*char foo[8]= "FPUSTK0"; foo[6]-=_fpuStkDepth; output_asm(foo);*/ NanoAssert(_fpuStkDepth<=0); ) + } + inline void fpu_pop() { + debug_only( --_fpuStkDepth; /*char foo[8]= "FPUSTK0"; foo[6]-=_fpuStkDepth; output_asm(foo);*/ NanoAssert(_fpuStkDepth<=0); ) + } +#endif + avmplus::Config &config; + }; - inline int32_t disp(Reservation* r) - { - return stack_direction((int32_t)STACK_GRANULARITY) * int32_t(r->arIndex) + NJ_STACK_OFFSET; - } + inline int32_t disp(Reservation* r) + { + // even on 64bit cpu's, we allocate stack area in 4byte chunks + return stack_direction(4 * int32_t(r->arIndex)); + } + inline int32_t disp(LIns* ins) + { + // even on 64bit cpu's, we allocate stack area in 4byte chunks + return stack_direction(4 * int32_t(ins->getArIndex())); + } } #endif // __nanojit_Assembler__