lua-hooks/ext/luajit/src/vm_mips.dasc in immunio-1.1.2 vs lua-hooks/ext/luajit/src/vm_mips.dasc in immunio-1.1.5
- old
+ new
@@ -1,8 +1,11 @@
|// Low-level VM code for MIPS CPUs.
|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
+|//
+|// MIPS soft-float support contributed by Djordje Kovacevic and
+|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
|
|.arch mips
|.section code_op, code_sub
|
|.actionlist build_actionlist
@@ -16,24 +19,32 @@
|//-----------------------------------------------------------------------
|
|// Fixed register assignments for the interpreter.
|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
|// The following must be C callee-save (but BASE is often refetched).
|.define BASE, r16 // Base of current Lua stack frame.
|.define KBASE, r17 // Constants of current Lua function.
|.define PC, r18 // Next PC.
|.define DISPATCH, r19 // Opcode dispatch table.
|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
-|// NYI: r22 currently unused.
|
|.define JGL, r30 // On-trace: global_State + 32768.
|
|// Constants for type-comparisons, stores and conversions. C callee-save.
+|.define TISNUM, r22
|.define TISNIL, r30
+|.if FPU
|.define TOBIT, f30 // 2^52 + 2^51.
+|.endif
|
|// The following temporaries are not saved across C calls, except for RA.
|.define RA, r23 // Callee-save.
|.define RB, r8
|.define RC, r9
@@ -54,26 +65,60 @@
|.define CARG4, r7
|
|.define CRET1, r2
|.define CRET2, r3
|
+|.if ENDIAN_LE
+|.define SFRETLO, CRET1
+|.define SFRETHI, CRET2
+|.define SFARG1LO, CARG1
+|.define SFARG1HI, CARG2
+|.define SFARG2LO, CARG3
+|.define SFARG2HI, CARG4
+|.else
+|.define SFRETLO, CRET2
+|.define SFRETHI, CRET1
+|.define SFARG1LO, CARG2
+|.define SFARG1HI, CARG1
+|.define SFARG2LO, CARG4
+|.define SFARG2HI, CARG3
+|.endif
+|
+|.if FPU
|.define FARG1, f12
|.define FARG2, f14
|
|.define FRET1, f0
|.define FRET2, f2
+|.endif
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.if FPU // MIPS32 hard-float.
+|
|.define CFRAME_SPACE, 112 // Delta for sp.
|
|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
|.define SAVE_NRES, 120(sp)
|.define SAVE_CFRAME, 116(sp)
|.define SAVE_L, 112(sp)
|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
+|
+|.else // MIPS32 soft-float
+|
+|.define CFRAME_SPACE, 64 // Delta for sp.
+|
+|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
+|.define SAVE_NRES, 72(sp)
+|.define SAVE_CFRAME, 68(sp)
+|.define SAVE_L, 64(sp)
+|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
+|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
+|
+|.endif
+|
|.define SAVE_PC, 20(sp)
|.define ARG5, 16(sp)
|.define CSAVE_4, 12(sp)
|.define CSAVE_3, 8(sp)
|.define CSAVE_2, 4(sp)
@@ -81,47 +126,49 @@
|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee.
|
|.define ARG5_OFS, 16
|.define SAVE_MULTRES, ARG5
|
+|//-----------------------------------------------------------------------
+|
|.macro saveregs
| addiu sp, sp, -CFRAME_SPACE
| sw ra, SAVE_GPR_+9*4(sp)
| sw r30, SAVE_GPR_+8*4(sp)
-| sdc1 f30, SAVE_FPR_+5*8(sp)
+| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
| sw r23, SAVE_GPR_+7*4(sp)
| sw r22, SAVE_GPR_+6*4(sp)
-| sdc1 f28, SAVE_FPR_+4*8(sp)
+| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
| sw r21, SAVE_GPR_+5*4(sp)
| sw r20, SAVE_GPR_+4*4(sp)
-| sdc1 f26, SAVE_FPR_+3*8(sp)
+| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
| sw r19, SAVE_GPR_+3*4(sp)
| sw r18, SAVE_GPR_+2*4(sp)
-| sdc1 f24, SAVE_FPR_+2*8(sp)
+| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
| sw r17, SAVE_GPR_+1*4(sp)
| sw r16, SAVE_GPR_+0*4(sp)
-| sdc1 f22, SAVE_FPR_+1*8(sp)
-| sdc1 f20, SAVE_FPR_+0*8(sp)
+| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
+| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
|.endmacro
|
|.macro restoreregs_ret
| lw ra, SAVE_GPR_+9*4(sp)
| lw r30, SAVE_GPR_+8*4(sp)
-| ldc1 f30, SAVE_FPR_+5*8(sp)
+| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
| lw r23, SAVE_GPR_+7*4(sp)
| lw r22, SAVE_GPR_+6*4(sp)
-| ldc1 f28, SAVE_FPR_+4*8(sp)
+| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
| lw r21, SAVE_GPR_+5*4(sp)
| lw r20, SAVE_GPR_+4*4(sp)
-| ldc1 f26, SAVE_FPR_+3*8(sp)
+| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
| lw r19, SAVE_GPR_+3*4(sp)
| lw r18, SAVE_GPR_+2*4(sp)
-| ldc1 f24, SAVE_FPR_+2*8(sp)
+| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
| lw r17, SAVE_GPR_+1*4(sp)
| lw r16, SAVE_GPR_+0*4(sp)
-| ldc1 f22, SAVE_FPR_+1*8(sp)
-| ldc1 f20, SAVE_FPR_+0*8(sp)
+| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
+| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
| jr ra
| addiu sp, sp, CFRAME_SPACE
|.endmacro
|
|// Type definitions. Some of these are only used for documentation.
@@ -151,17 +198,27 @@
|.macro ., a,b,c; a,b,c; .endmacro
|
|//-----------------------------------------------------------------------
|
|// Endian-specific defines.
-|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8)
-|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4)
-|.define HI, LJ_ENDIAN_SELECT(4,0)
-|.define LO, LJ_ENDIAN_SELECT(0,4)
-|.define OFS_RD, LJ_ENDIAN_SELECT(2,0)
-|.define OFS_RA, LJ_ENDIAN_SELECT(1,2)
-|.define OFS_OP, LJ_ENDIAN_SELECT(0,3)
+|.if ENDIAN_LE
+|.define FRAME_PC, -4
+|.define FRAME_FUNC, -8
+|.define HI, 4
+|.define LO, 0
+|.define OFS_RD, 2
+|.define OFS_RA, 1
+|.define OFS_OP, 0
+|.else
+|.define FRAME_PC, -8
+|.define FRAME_FUNC, -4
+|.define HI, 0
+|.define LO, 4
+|.define OFS_RD, 0
+|.define OFS_RA, 2
+|.define OFS_OP, 3
+|.endif
|
|// Instruction decode.
|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
|.macro decode_OP4a, dst, ins; andi dst, ins, 0xff; .endmacro
|.macro decode_OP4b, dst; sll dst, dst, 2; .endmacro
@@ -352,13 +409,15 @@
| st_vmstate
| beqz TMP1, >2
|. sll TMP2, TMP2, 3
|1:
| addiu TMP1, TMP1, -8
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
- | sdc1 f0, 0(BASE)
+ | sw SFRETHI, HI(BASE)
+ | sw SFRETLO, LO(BASE)
| bnez TMP1, <1
|. addiu BASE, BASE, 8
|
|2:
| bne TMP2, RD, >6
@@ -423,19 +482,20 @@
| // (void *cframe)
| li AT, -4
| and sp, CARG1, AT
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| lw L, SAVE_L
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| li TISNIL, LJ_TNIL
| lw BASE, L->base
| lw DISPATCH, L->glref // Setup pointer to dispatch table.
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| li TMP1, LJ_TFALSE
| li_vmstate INTERP
| lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| addiu RA, BASE, -8 // Results start at BASE-8.
| addiu DISPATCH, DISPATCH, GG_G2DISP
| sw TMP1, HI(RA) // Prepend false to error message.
| st_vmstate
| b ->vm_returnc
@@ -494,17 +554,18 @@
|
| // Resume after yield (like a return).
| sw L, DISPATCH_GL(cur_L)(DISPATCH)
| move RA, BASE
| lw BASE, L->base
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lw TMP1, L->top
| lw PC, FRAME_PC(BASE)
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| subu RD, TMP1, BASE
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| sb r0, L->status
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| li_vmstate INTERP
| addiu RD, RD, 8
| st_vmstate
| move MULTRES, RD
| andi TMP0, PC, FRAME_TYPE
@@ -538,17 +599,18 @@
| sw sp, L->cframe // Add our C frame to cframe chain.
|
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
| sw L, DISPATCH_GL(cur_L)(DISPATCH)
| lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lw TMP1, L->top
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| addu PC, PC, BASE
| subu NARGS8:RC, TMP1, BASE
| subu PC, PC, TMP2 // PC = frame delta + frame type
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| li_vmstate INTERP
| li TISNIL, LJ_TNIL
| st_vmstate
|
|->vm_call_dispatch:
@@ -626,23 +688,26 @@
|.endif
|
|->cont_cat: // RA = resultptr, RB = meta base
| lw INS, -4(PC)
| addiu CARG2, RB, -16
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| decode_RB8a MULTRES, INS
| decode_RA8a RA, INS
| decode_RB8b MULTRES
| decode_RA8b RA
| addu TMP1, BASE, MULTRES
| sw BASE, L->base
| subu CARG3, CARG2, TMP1
+ | sw SFRETHI, HI(CARG2)
| bne TMP1, CARG2, ->BC_CAT_Z
- |. sdc1 f0, 0(CARG2)
+ |. sw SFRETLO, LO(CARG2)
| addu RA, BASE, RA
+ | sw SFRETHI, HI(RA)
| b ->cont_nop
- |. sdc1 f0, 0(RA)
+ |. sw SFRETLO, LO(RA)
|
|//-- Table indexing metamethods -----------------------------------------
|
|->vmeta_tgets1:
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
@@ -661,14 +726,13 @@
| sw STR:RC, LO(CARG3)
| b >1
|. sw TMP1, HI(CARG3)
|
|->vmeta_tgetb: // TMP0 = index
- | mtc1 TMP0, f0
- | cvt.d.w f0, f0
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
- | sdc1 f0, 0(CARG3)
+ | sw TMP0, LO(CARG3)
+ | sw TISNUM, HI(CARG3)
|
|->vmeta_tgetv:
|1:
| load_got lj_meta_tget
| sw BASE, L->base
@@ -676,13 +740,15 @@
| call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
|. move CARG1, L
| // Returns TValue * (finished) or NULL (metamethod).
| beqz CRET1, >3
|. addiu TMP1, BASE, -FRAME_CONT
- | ldc1 f0, 0(CRET1)
+ | lw SFARG1HI, HI(CRET1)
+ | lw SFARG2HI, LO(CRET1)
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFARG1HI, HI(RA)
+ | sw SFARG2HI, LO(RA)
| ins_next2
|
|3: // Call __index metamethod.
| // BASE = base, L->top = new base, stack = cont/func/t/k
| lw BASE, L->top
@@ -695,14 +761,15 @@
|->vmeta_tgetr:
| load_got lj_tab_getinth
| call_intern lj_tab_getinth // (GCtab *t, int32_t key)
|. nop
| // Returns cTValue * or NULL.
- | beqz CRET1, >1
- |. nop
+ | beqz CRET1, ->BC_TGETR_Z
+ |. move SFARG2HI, TISNIL
+ | lw SFARG2HI, HI(CRET1)
| b ->BC_TGETR_Z
- |. ldc1 f0, 0(CRET1)
+ |. lw SFARG2LO, LO(CRET1)
|
|//-----------------------------------------------------------------------
|
|->vmeta_tsets1:
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
@@ -721,38 +788,40 @@
| sw STR:RC, LO(CARG3)
| b >1
|. sw TMP1, HI(CARG3)
|
|->vmeta_tsetb: // TMP0 = index
- | mtc1 TMP0, f0
- | cvt.d.w f0, f0
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
- | sdc1 f0, 0(CARG3)
+ | sw TMP0, LO(CARG3)
+ | sw TISNUM, HI(CARG3)
|
|->vmeta_tsetv:
|1:
| load_got lj_meta_tset
| sw BASE, L->base
| sw PC, SAVE_PC
| call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
|. move CARG1, L
| // Returns TValue * (finished) or NULL (metamethod).
+ | lw SFARG1HI, HI(RA)
| beqz CRET1, >3
- |. ldc1 f0, 0(RA)
+ |. lw SFARG1LO, LO(RA)
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| ins_next1
- | sdc1 f0, 0(CRET1)
+ | sw SFARG1HI, HI(CRET1)
+ | sw SFARG1LO, LO(CRET1)
| ins_next2
|
|3: // Call __newindex metamethod.
| // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
| addiu TMP1, BASE, -FRAME_CONT
| lw BASE, L->top
| sw PC, -16+HI(BASE) // [cont|PC]
| subu PC, BASE, TMP1
| lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | sdc1 f0, 16(BASE) // Copy value to third argument.
+ | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
+ | sw SFARG1LO, 16+LO(BASE)
| b ->vm_call_dispatch_f
|. li NARGS8:RC, 24 // 3 args for func(t, k, v)
|
|->vmeta_tsetr:
| load_got lj_tab_setinth
@@ -765,11 +834,13 @@
|. nop
|
|//-- Comparison metamethods ---------------------------------------------
|
|->vmeta_comp:
- | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT.
+ | // RA/RD point to o1/o2.
+ | move CARG2, RA
+ | move CARG3, RD
| load_got lj_meta_comp
| addiu PC, PC, -4
| sw BASE, L->base
| sw PC, SAVE_PC
| decode_OP1 CARG4, INS
@@ -791,15 +862,17 @@
|->cont_nop:
| ins_next
|
|->cont_ra: // RA = resultptr
| lbu TMP1, -4+OFS_RA(PC)
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| sll TMP1, TMP1, 3
| addu TMP1, BASE, TMP1
+ | sw SFRETHI, HI(TMP1)
| b ->cont_nop
- |. sdc1 f0, 0(TMP1)
+ |. sw SFRETLO, LO(TMP1)
|
|->cont_condt: // RA = resultptr
| lw TMP0, HI(RA)
| sltiu AT, TMP0, LJ_TISTRUECOND
| b <4
@@ -810,12 +883,15 @@
| sltiu AT, TMP0, LJ_TISTRUECOND
| b <4
|. addiu TMP2, AT, -1 // Branch if result is false.
|
|->vmeta_equal:
- | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+ | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
| load_got lj_meta_equal
+ | move CARG2, SFARG1LO
+ | move CARG3, SFARG2LO
+ | move CARG4, TMP0
| addiu PC, PC, -4
| sw BASE, L->base
| sw PC, SAVE_PC
| call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
|. move CARG1, L
@@ -850,18 +926,20 @@
|. nop
|
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_unm:
- | move CARG4, CARG3
+ | move RC, RB
|
|->vmeta_arith:
| load_got lj_meta_arith
| decode_OP1 TMP0, INS
| sw BASE, L->base
- | sw PC, SAVE_PC
| move CARG2, RA
+ | sw PC, SAVE_PC
+ | move CARG3, RB
+ | move CARG4, RC
| sw TMP0, ARG5
| call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
|. move CARG1, L
| // Returns NULL (finished) or TValue * (metamethod).
| beqz CRET1, ->cont_nop
@@ -965,44 +1043,56 @@
|->ff_ .. name:
|.endmacro
|
|.macro .ffunc_1, name
|->ff_ .. name:
+ | lw SFARG1HI, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
- |. lw CARG3, HI(BASE)
- | lw CARG1, LO(BASE)
+ |. lw SFARG1LO, LO(BASE)
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
| sltiu AT, NARGS8:RC, 16
- | lw CARG3, HI(BASE)
+ | lw SFARG1HI, HI(BASE)
| bnez AT, ->fff_fallback
- |. lw CARG4, 8+HI(BASE)
- | lw CARG1, LO(BASE)
- | lw CARG2, 8+LO(BASE)
+ |. lw SFARG2HI, 8+HI(BASE)
+ | lw SFARG1LO, LO(BASE)
+ | lw SFARG2LO, 8+LO(BASE)
|.endmacro
|
|.macro .ffunc_n, name // Caveat: has delay slot!
|->ff_ .. name:
- | lw CARG3, HI(BASE)
+ | lw SFARG1HI, HI(BASE)
+ |.if FPU
+ | ldc1 FARG1, 0(BASE)
+ |.else
+ | lw SFARG1LO, LO(BASE)
+ |.endif
| beqz NARGS8:RC, ->fff_fallback
- |. ldc1 FARG1, 0(BASE)
- | sltiu AT, CARG3, LJ_TISNUM
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
|.endmacro
|
|.macro .ffunc_nn, name // Caveat: has delay slot!
|->ff_ .. name:
| sltiu AT, NARGS8:RC, 16
- | lw CARG3, HI(BASE)
+ | lw SFARG1HI, HI(BASE)
| bnez AT, ->fff_fallback
- |. lw CARG4, 8+HI(BASE)
- | ldc1 FARG1, 0(BASE)
- | ldc1 FARG2, 8(BASE)
- | sltiu TMP0, CARG3, LJ_TISNUM
- | sltiu TMP1, CARG4, LJ_TISNUM
+ |. lw SFARG2HI, 8+HI(BASE)
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
+ |.if FPU
+ | ldc1 FARG1, 0(BASE)
+ |.else
+ | lw SFARG1LO, LO(BASE)
+ |.endif
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
+ |.if FPU
+ | ldc1 FARG2, 8(BASE)
+ |.else
+ | lw SFARG2LO, 8+LO(BASE)
+ |.endif
| and TMP0, TMP0, TMP1
| beqz TMP0, ->fff_fallback
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
@@ -1014,56 +1104,58 @@
|.endmacro
|
|//-- Base library: checks -----------------------------------------------
|
|.ffunc_1 assert
- | sltiu AT, CARG3, LJ_TISTRUECOND
+ | sltiu AT, SFARG1HI, LJ_TISTRUECOND
| beqz AT, ->fff_fallback
|. addiu RA, BASE, -8
| lw PC, FRAME_PC(BASE)
| addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
| addu TMP2, RA, NARGS8:RC
- | sw CARG3, HI(RA)
+ | sw SFARG1HI, HI(RA)
| addiu TMP1, BASE, 8
| beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
- |. sw CARG1, LO(RA)
+ |. sw SFARG1LO, LO(RA)
|1:
- | ldc1 f0, 0(TMP1)
- | sdc1 f0, -8(TMP1)
+ | lw SFRETHI, HI(TMP1)
+ | lw SFRETLO, LO(TMP1)
+ | sw SFRETHI, -8+HI(TMP1)
+ | sw SFRETLO, -8+LO(TMP1)
| bne TMP1, TMP2, <1
|. addiu TMP1, TMP1, 8
| b ->fff_res
|. nop
|
|.ffunc type
- | lw CARG3, HI(BASE)
- | li TMP1, LJ_TISNUM
+ | lw SFARG1HI, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
- |. sltiu TMP0, CARG3, LJ_TISNUM
- | movz TMP1, CARG3, TMP0
- | not TMP1, TMP1
+ |. sltiu TMP0, SFARG1HI, LJ_TISNUM
+ | movn SFARG1HI, TISNUM, TMP0
+ | not TMP1, SFARG1HI
| sll TMP1, TMP1, 3
| addu TMP1, CFUNC:RB, TMP1
- | b ->fff_resn
- |. ldc1 FRET1, CFUNC:TMP1->upvalue
+ | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
+ | b ->fff_restv
+ |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
|
|//-- Base library: getters and setters ---------------------------------
|
|.ffunc_1 getmetatable
| li AT, LJ_TTAB
- | bne CARG3, AT, >6
+ | bne SFARG1HI, AT, >6
|. li AT, LJ_TUDATA
|1: // Field metatable must be at same offset for GCtab and GCudata!
- | lw TAB:CARG1, TAB:CARG1->metatable
+ | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
|2:
| lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
- | beqz TAB:CARG1, ->fff_restv
- |. li CARG3, LJ_TNIL
- | lw TMP0, TAB:CARG1->hmask
- | li CARG3, LJ_TTAB // Use metatable as default result.
+ | beqz TAB:SFARG1LO, ->fff_restv
+ |. li SFARG1HI, LJ_TNIL
+ | lw TMP0, TAB:SFARG1LO->hmask
+ | li SFARG1HI, LJ_TTAB // Use metatable as default result.
| lw TMP1, STR:RC->hash
- | lw NODE:TMP2, TAB:CARG1->node
+ | lw NODE:TMP2, TAB:SFARG1LO->node
| and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
| sll TMP0, TMP1, 5
| sll TMP1, TMP1, 3
| subu TMP1, TMP0, TMP1
| addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
@@ -1071,49 +1163,48 @@
|3: // Rearranged logic, because we expect _not_ to find the key.
| lw CARG4, offsetof(Node, key)+HI(NODE:TMP2)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
| lw NODE:TMP3, NODE:TMP2->next
| bne CARG4, AT, >4
- |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
+ |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
| beq TMP0, STR:RC, >5
|. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
|4:
| beqz NODE:TMP3, ->fff_restv // Not found, keep default result.
|. move NODE:TMP2, NODE:TMP3
| b <3
|. nop
|5:
- | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value.
+ | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
|. nop
- | move CARG3, CARG2 // Return value of mt.__metatable.
+ | move SFARG1HI, CARG3 // Return value of mt.__metatable.
| b ->fff_restv
- |. move CARG1, TMP1
+ |. move SFARG1LO, TMP1
|
|6:
- | beq CARG3, AT, <1
- |. sltiu TMP0, CARG3, LJ_TISNUM
- | li TMP1, LJ_TISNUM
- | movz TMP1, CARG3, TMP0
- | not TMP1, TMP1
+ | beq SFARG1HI, AT, <1
+ |. sltu AT, TISNUM, SFARG1HI
+ | movz SFARG1HI, TISNUM, AT
+ | not TMP1, SFARG1HI
| sll TMP1, TMP1, 2
| addu TMP1, DISPATCH, TMP1
| b <2
- |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
+ |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
|
|.ffunc_2 setmetatable
| // Fast path: no mt for table yet and not clearing the mt.
| li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
- |. addiu CARG4, CARG4, -LJ_TTAB
- | lw TAB:TMP1, TAB:CARG1->metatable
- | lbu TMP3, TAB:CARG1->marked
- | or AT, CARG4, TAB:TMP1
+ | bne SFARG1HI, AT, ->fff_fallback
+ |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
+ | lbu TMP3, TAB:SFARG1LO->marked
+ | or AT, SFARG2HI, TAB:TMP1
| bnez AT, ->fff_fallback
|. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
| beqz AT, ->fff_restv
- |. sw TAB:CARG2, TAB:CARG1->metatable
- | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv
+ |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
+ | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
|
|.ffunc rawget
| lw CARG4, HI(BASE)
| sltiu AT, NARGS8:RC, 16
| lw TAB:CARG2, LO(BASE)
@@ -1123,48 +1214,48 @@
| bnez AT, ->fff_fallback
| addiu CARG3, BASE, 8
| call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|. move CARG1, L
| // Returns cTValue *.
- | b ->fff_resn
- |. ldc1 FRET1, 0(CRET1)
+ | lw SFARG1HI, HI(CRET1)
+ | b ->fff_restv
+ |. lw SFARG1LO, LO(CRET1)
|
|//-- Base library: conversions ------------------------------------------
|
|.ffunc tonumber
| // Only handles the number case inline (without a base argument).
| lw CARG1, HI(BASE)
- | xori AT, NARGS8:RC, 8
- | sltiu CARG1, CARG1, LJ_TISNUM
- | movn CARG1, r0, AT
- | beqz CARG1, ->fff_fallback // Exactly one number argument.
- |. ldc1 FRET1, 0(BASE)
- | b ->fff_resn
- |. nop
+ | xori AT, NARGS8:RC, 8 // Exactly one number argument.
+ | sltu TMP0, TISNUM, CARG1
+ | or AT, AT, TMP0
+ | bnez AT, ->fff_fallback
+ |. lw SFARG1HI, HI(BASE)
+ | b ->fff_restv
+ |. lw SFARG1LO, LO(BASE)
|
|.ffunc_1 tostring
| // Only handles the string or number case inline.
| li AT, LJ_TSTR
| // A __tostring method in the string base metatable is ignored.
- | beq CARG3, AT, ->fff_restv // String key?
+ | beq SFARG1HI, AT, ->fff_restv // String key?
| // Handle numbers inline, unless a number base metatable is present.
|. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
- | sltiu TMP0, CARG3, LJ_TISNUM
- | sltiu TMP1, TMP1, 1
- | and TMP0, TMP0, TMP1
- | beqz TMP0, ->fff_fallback
+ | sltu TMP0, TISNUM, SFARG1HI
+ | or TMP0, TMP0, TMP1
+ | bnez TMP0, ->fff_fallback
|. sw BASE, L->base // Add frame since C call can throw.
| ffgccheck
|. sw PC, SAVE_PC // Redundant (but a defined value).
- | load_got lj_strfmt_num
+ | load_got lj_strfmt_number
| move CARG1, L
- | call_intern lj_strfmt_num // (lua_State *L, lua_Number *np)
+ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
|. move CARG2, BASE
| // Returns GCstr *.
- | li CARG3, LJ_TSTR
+ | li SFARG1HI, LJ_TSTR
| b ->fff_restv
- |. move CARG1, CRET1
+ |. move SFARG1LO, CRET1
|
|//-- Base library: iterators -------------------------------------------
|
|.ffunc next
| lw CARG1, HI(BASE)
@@ -1182,102 +1273,109 @@
| sw PC, SAVE_PC
| call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
|. move CARG1, L
| // Returns 0 at end of traversal.
| beqz CRET1, ->fff_restv // End of traversal: return nil.
- |. li CARG3, LJ_TNIL
- | ldc1 f0, 8(BASE) // Copy key and value to results.
+ |. li SFARG1HI, LJ_TNIL
+ | lw TMP0, 8+HI(BASE)
+ | lw TMP1, 8+LO(BASE)
| addiu RA, BASE, -8
- | ldc1 f2, 16(BASE)
- | li RD, (2+1)*8
- | sdc1 f0, 0(RA)
+ | lw TMP2, 16+HI(BASE)
+ | lw TMP3, 16+LO(BASE)
+ | sw TMP0, HI(RA)
+ | sw TMP1, LO(RA)
+ | sw TMP2, 8+HI(RA)
+ | sw TMP3, 8+LO(RA)
| b ->fff_res
- |. sdc1 f2, 8(RA)
+ |. li RD, (2+1)*8
|
|.ffunc_1 pairs
| li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
+ | bne SFARG1HI, AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
#if LJ_52
- | lw TAB:TMP2, TAB:CARG1->metatable
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
| bnez TAB:TMP2, ->fff_fallback
#else
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
#endif
|. addiu RA, BASE, -8
| sw TISNIL, 8+HI(BASE)
- | li RD, (3+1)*8
+ | sw TMP0, HI(RA)
+ | sw TMP1, LO(RA)
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. li RD, (3+1)*8
|
|.ffunc ipairs_aux
| sltiu AT, NARGS8:RC, 16
| lw CARG3, HI(BASE)
| lw TAB:CARG1, LO(BASE)
| lw CARG4, 8+HI(BASE)
| bnez AT, ->fff_fallback
- |. ldc1 FARG2, 8(BASE)
- | addiu CARG3, CARG3, -LJ_TTAB
- | sltiu AT, CARG4, LJ_TISNUM
- | li TMP0, 1
- | movn AT, r0, CARG3
- | mtc1 TMP0, FARG1
- | beqz AT, ->fff_fallback
+ |. addiu CARG3, CARG3, -LJ_TTAB
+ | xor CARG4, CARG4, TISNUM
+ | and AT, CARG3, CARG4
+ | bnez AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
- | trunc.w.d FRET1, FARG2
- | cvt.d.w FARG1, FARG1
+ | lw TMP2, 8+LO(BASE)
| lw TMP0, TAB:CARG1->asize
| lw TMP1, TAB:CARG1->array
- | mfc1 TMP2, FRET1
- | addiu RA, BASE, -8
- | add.d FARG2, FARG2, FARG1
| addiu TMP2, TMP2, 1
+ | sw TISNUM, -8+HI(BASE)
| sltu AT, TMP2, TMP0
+ | sw TMP2, -8+LO(BASE)
+ | beqz AT, >2 // Not in array part?
+ |. addiu RA, BASE, -8
| sll TMP3, TMP2, 3
| addu TMP3, TMP1, TMP3
- | beqz AT, >2 // Not in array part?
- |. sdc1 FARG2, 0(RA)
- | lw TMP2, HI(TMP3)
- | ldc1 f0, 0(TMP3)
+ | lw TMP1, HI(TMP3)
+ | lw TMP2, LO(TMP3)
|1:
- | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results.
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
|. li RD, (0+1)*8
- | li RD, (2+1)*8
+ | sw TMP1, 8+HI(RA)
+ | sw TMP2, 8+LO(RA)
| b ->fff_res
- |. sdc1 f0, 8(RA)
+ |. li RD, (2+1)*8
+ |
|2: // Check for empty hash part first. Otherwise call C function.
| lw TMP0, TAB:CARG1->hmask
| load_got lj_tab_getinth
| beqz TMP0, ->fff_res
|. li RD, (0+1)*8
| call_intern lj_tab_getinth // (GCtab *t, int32_t key)
|. move CARG2, TMP2
| // Returns cTValue * or NULL.
| beqz CRET1, ->fff_res
|. li RD, (0+1)*8
- | lw TMP2, HI(CRET1)
+ | lw TMP1, HI(CRET1)
| b <1
- |. ldc1 f0, 0(CRET1)
+ |. lw TMP2, LO(CRET1)
|
|.ffunc_1 ipairs
| li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
+ | bne SFARG1HI, AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
#if LJ_52
- | lw TAB:TMP2, TAB:CARG1->metatable
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
| bnez TAB:TMP2, ->fff_fallback
#else
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
#endif
|. addiu RA, BASE, -8
- | sw r0, 8+HI(BASE)
+ | sw TISNUM, 8+HI(BASE)
| sw r0, 8+LO(BASE)
- | li RD, (3+1)*8
+ | sw TMP0, HI(RA)
+ | sw TMP1, LO(RA)
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. li RD, (3+1)*8
|
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
| lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
@@ -1293,31 +1391,37 @@
|
|.ffunc xpcall
| sltiu AT, NARGS8:RC, 16
| lw CARG4, 8+HI(BASE)
| bnez AT, ->fff_fallback
- |. ldc1 FARG2, 8(BASE)
- | ldc1 FARG1, 0(BASE)
+ |. lw CARG3, 8+LO(BASE)
+ | lw CARG1, LO(BASE)
+ | lw CARG2, HI(BASE)
| lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
| li AT, LJ_TFUNC
| move TMP2, BASE
| bne CARG4, AT, ->fff_fallback // Traceback must be a function.
| addiu BASE, BASE, 16
| // Remember active hook before pcall.
| srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
- | sdc1 FARG2, 0(TMP2) // Swap function and traceback.
+ | sw CARG3, LO(TMP2) // Swap function and traceback.
+ | sw CARG4, HI(TMP2)
| andi TMP3, TMP3, 1
- | sdc1 FARG1, 8(TMP2)
+ | sw CARG1, 8+LO(TMP2)
+ | sw CARG2, 8+HI(TMP2)
| addiu PC, TMP3, 16+FRAME_PCALL
| b ->vm_call_dispatch
|. addiu NARGS8:RC, NARGS8:RC, -16
|
|//-- Coroutine library --------------------------------------------------
|
|.macro coroutine_resume_wrap, resume
|.if resume
- |.ffunc_1 coroutine_resume
+ |.ffunc coroutine_resume
+ | lw CARG3, HI(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |. lw CARG1, LO(BASE)
| li AT, LJ_TTHREAD
| bne CARG3, AT, ->fff_fallback
|.else
|.ffunc coroutine_wrap_aux
| lw L:CARG1, CFUNC:RB->upvalue[0].gcr
@@ -1348,15 +1452,17 @@
| sw TMP2, L:CARG1->top
| addu TMP1, BASE, NARGS8:RC
| move CARG3, CARG2
| sw BASE, L->top
|2: // Move args to coroutine.
- | ldc1 f0, 0(BASE)
+ | lw SFRETHI, HI(BASE)
+ | lw SFRETLO, LO(BASE)
| sltu AT, BASE, TMP1
| beqz AT, >3
|. addiu BASE, BASE, 8
- | sdc1 f0, 0(CARG3)
+ | sw SFRETHI, HI(CARG3)
+ | sw SFRETLO, LO(CARG3)
| b <2
|. addiu CARG3, CARG3, 8
|3:
| bal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
|. move L:RA, L:CARG1
@@ -1378,14 +1484,16 @@
| bnez AT, >9 // Need to grow stack?
|. addu TMP3, TMP2, RD
| sw TMP2, L:RA->top // Clear coroutine stack.
| move TMP1, BASE
|5: // Move results from coroutine.
- | ldc1 f0, 0(TMP2)
+ | lw SFRETHI, HI(TMP2)
+ | lw SFRETLO, LO(TMP2)
| addiu TMP2, TMP2, 8
| sltu AT, TMP2, TMP3
- | sdc1 f0, 0(TMP1)
+ | sw SFRETHI, HI(TMP1)
+ | sw SFRETLO, LO(TMP1)
| bnez AT, <5
|. addiu TMP1, TMP1, 8
|6:
| andi TMP0, PC, FRAME_TYPE
|.if resume
@@ -1406,16 +1514,18 @@
|
|8: // Coroutine returned with error (at co->top-1).
|.if resume
| addiu TMP3, TMP3, -8
| li TMP1, LJ_TFALSE
- | ldc1 f0, 0(TMP3)
+ | lw SFRETHI, HI(TMP3)
+ | lw SFRETLO, LO(TMP3)
| sw TMP3, L:RA->top // Remove error from coroutine stack.
| li RD, (2+1)*8
| sw TMP1, -8+HI(BASE) // Prepend false to results.
| addiu RA, BASE, -8
- | sdc1 f0, 0(BASE) // Copy error message.
+ | sw SFRETHI, HI(BASE) // Copy error message.
+ | sw SFRETLO, LO(BASE)
| b <7
|. andi TMP0, PC, FRAME_TYPE
|.else
| load_got lj_ffh_coroutine_wrap_err
| move CARG2, L:RA
@@ -1447,24 +1557,33 @@
| b ->vm_leave_unw
|. sb CRET1, L->status
|
|//-- Math library -------------------------------------------------------
|
- |.ffunc_n math_abs
- |. abs.d FRET1, FARG1
- |->fff_resn:
- | lw PC, FRAME_PC(BASE)
- | addiu RA, BASE, -8
- | b ->fff_res1
- |. sdc1 FRET1, -8(BASE)
+ |.ffunc_1 math_abs
+ | bne SFARG1HI, TISNUM, >1
+ |. sra TMP0, SFARG1LO, 31
+ | xor TMP1, SFARG1LO, TMP0
+ | subu SFARG1LO, TMP1, TMP0
+ | bgez SFARG1LO, ->fff_restv
+ |. nop
+ | lui SFARG1HI, 0x41e0 // 2^31 as a double.
+ | b ->fff_restv
+ |. li SFARG1LO, 0
+ |1:
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | beqz AT, ->fff_fallback
+ |. sll SFARG1HI, SFARG1HI, 1
+ | srl SFARG1HI, SFARG1HI, 1
+ |// fallthrough
|
|->fff_restv:
- | // CARG3/CARG1 = TValue result.
+ | // SFARG1LO/SFARG1HI = TValue result.
| lw PC, FRAME_PC(BASE)
- | sw CARG3, -8+HI(BASE)
+ | sw SFARG1HI, -8+HI(BASE)
| addiu RA, BASE, -8
- | sw CARG1, -8+LO(BASE)
+ | sw SFARG1LO, -8+LO(BASE)
|->fff_res1:
| // RA = results, PC = return.
| li RD, (1+1)*8
|->fff_res:
| // RA = results, RD = (nresults+1)*8, PC = return.
@@ -1489,19 +1608,23 @@
| addiu RD, RD, 8
| b <5
|. sw TISNIL, -8+HI(TMP1)
|
|.macro math_extern, func
- |->ff_math_ .. func:
- | lw CARG3, HI(BASE)
+ | .ffunc math_ .. func
+ | lw SFARG1HI, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
|. load_got func
- | sltiu AT, CARG3, LJ_TISNUM
+ | sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. nop
- | call_extern
+ |.if FPU
|. ldc1 FARG1, 0(BASE)
+ |.else
+ |. lw SFARG1LO, LO(BASE)
+ |.endif
+ | call_extern
+ |. nop
| b ->fff_resn
|. nop
|.endmacro
|
|.macro math_extern2, func
@@ -1511,32 +1634,48 @@
|. nop
| b ->fff_resn
|. nop
|.endmacro
|
+ |// TODO: Return integer type if result is integer (own sf implementation).
|.macro math_round, func
- | .ffunc_n math_ .. func
- |. nop
+ |->ff_math_ .. func:
+ | lw SFARG1HI, HI(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |. lw SFARG1LO, LO(BASE)
+ | beq SFARG1HI, TISNUM, ->fff_restv
+ |. sltu AT, SFARG1HI, TISNUM
+ | beqz AT, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
| bal ->vm_ .. func
+ |.else
+ |. load_got func
+ | call_extern
+ |.endif
|. nop
| b ->fff_resn
|. nop
|.endmacro
|
| math_round floor
| math_round ceil
|
|.ffunc math_log
- | lw CARG3, HI(BASE)
| li AT, 8
| bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
- |. load_got log
- | sltiu AT, CARG3, LJ_TISNUM
+ |. lw SFARG1HI, HI(BASE)
+ | sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. nop
+ |. load_got log
+ |.if FPU
| call_extern
|. ldc1 FARG1, 0(BASE)
+ |.else
+ | call_extern
+ |. lw SFARG1LO, LO(BASE)
+ |.endif
| b ->fff_resn
|. nop
|
| math_extern log10
| math_extern exp
@@ -1551,77 +1690,169 @@
| math_extern tanh
| math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
+ |.if FPU
|.ffunc_n math_sqrt
|. sqrt.d FRET1, FARG1
- | b ->fff_resn
- |. nop
+ |// fallthrough to ->fff_resn
+ |.else
+ | math_extern sqrt
+ |.endif
|
- |.ffunc_nn math_ldexp
- | trunc.w.d FARG2, FARG2
+ |->fff_resn:
+ | lw PC, FRAME_PC(BASE)
+ | addiu RA, BASE, -8
+ |.if FPU
+ | b ->fff_res1
+ |. sdc1 FRET1, -8(BASE)
+ |.else
+ | sw SFRETHI, -8+HI(BASE)
+ | b ->fff_res1
+ |. sw SFRETLO, -8+LO(BASE)
+ |.endif
+ |
+ |
+ |.ffunc math_ldexp
+ | sltiu AT, NARGS8:RC, 16
+ | lw SFARG1HI, HI(BASE)
+ | bnez AT, ->fff_fallback
+ |. lw CARG4, 8+HI(BASE)
+ | bne CARG4, TISNUM, ->fff_fallback
| load_got ldexp
- | mfc1 CARG3, FARG2
+ |. sltu AT, SFARG1HI, TISNUM
+ | beqz AT, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
+ |.else
+ |. lw SFARG1LO, LO(BASE)
+ |.endif
| call_extern
- |. nop
+ |. lw CARG3, 8+LO(BASE)
| b ->fff_resn
|. nop
|
|.ffunc_n math_frexp
| load_got frexp
| lw PC, FRAME_PC(BASE)
| call_extern
|. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
| lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
| addiu RA, BASE, -8
+ |.if FPU
| mtc1 TMP1, FARG2
| sdc1 FRET1, 0(RA)
| cvt.d.w FARG2, FARG2
| sdc1 FARG2, 8(RA)
+ |.else
+ | sw SFRETLO, LO(RA)
+ | sw SFRETHI, HI(RA)
+ | sw TMP1, 8+LO(RA)
+ | sw TISNUM, 8+HI(RA)
+ |.endif
| b ->fff_res
|. li RD, (2+1)*8
|
|.ffunc_n math_modf
| load_got modf
| lw PC, FRAME_PC(BASE)
| call_extern
|. addiu CARG3, BASE, -8
| addiu RA, BASE, -8
+ |.if FPU
| sdc1 FRET1, 0(BASE)
+ |.else
+ | sw SFRETLO, LO(BASE)
+ | sw SFRETHI, HI(BASE)
+ |.endif
| b ->fff_res
|. li RD, (2+1)*8
|
- |.macro math_minmax, name, ismax
- |->ff_ .. name:
- | lw CARG3, HI(BASE)
- | beqz NARGS8:RC, ->fff_fallback
- |. ldc1 FRET1, 0(BASE)
- | sltiu AT, CARG3, LJ_TISNUM
+ |.macro math_minmax, name, intins, fpins
+ | .ffunc_1 name
+ | addu TMP3, BASE, NARGS8:RC
+ | bne SFARG1HI, TISNUM, >5
+ |. addiu TMP2, BASE, 8
+ |1: // Handle integers.
+ |. lw SFARG2HI, HI(TMP2)
+ | beq TMP2, TMP3, ->fff_restv
+ |. lw SFARG2LO, LO(TMP2)
+ | bne SFARG2HI, TISNUM, >3
+ |. slt AT, SFARG1LO, SFARG2LO
+ | intins SFARG1LO, SFARG2LO, AT
+ | b <1
+ |. addiu TMP2, TMP2, 8
+ |
+ |3: // Convert intermediate result to number and continue with number loop.
+ | sltiu AT, SFARG2HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. addu TMP2, BASE, NARGS8:RC
- | addiu TMP1, BASE, 8
- | beq TMP1, TMP2, ->fff_resn
- |1:
- |. lw CARG3, HI(TMP1)
- | ldc1 FARG1, 0(TMP1)
- | addiu TMP1, TMP1, 8
- | sltiu AT, CARG3, LJ_TISNUM
+ |.if FPU
+ |. mtc1 SFARG1LO, FRET1
+ | cvt.d.w FRET1, FRET1
+ | b >7
+ |. ldc1 FARG1, 0(TMP2)
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_1
+ |. nop
+ | b >7
+ |. nop
+ |.endif
+ |
+ |5:
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |.if ismax
- |. c.olt.d FARG1, FRET1
+ |.if FPU
+ |. ldc1 FRET1, 0(BASE)
+ |.endif
+ |
+ |6: // Handle numbers.
+ |. lw SFARG2HI, HI(TMP2)
+ |.if FPU
+ | beq TMP2, TMP3, ->fff_resn
|.else
- |. c.olt.d FRET1, FARG1
+ | beq TMP2, TMP3, ->fff_restv
|.endif
- | bne TMP1, TMP2, <1
- |. movf.d FRET1, FARG1
- | b ->fff_resn
+ |. sltiu AT, SFARG2HI, LJ_TISNUM
+ | beqz AT, >8
+ |.if FPU
+ |. ldc1 FARG1, 0(TMP2)
+ |.else
+ |. lw SFARG2LO, LO(TMP2)
+ |.endif
+ |7:
+ |.if FPU
+ | c.olt.d FRET1, FARG1
+ | fpins FRET1, FARG1
+ |.else
+ | bal ->vm_sfcmpolt
|. nop
+ | intins SFARG1LO, SFARG2LO, CRET1
+ | intins SFARG1HI, SFARG2HI, CRET1
+ |.endif
+ | b <6
+ |. addiu TMP2, TMP2, 8
+ |
+ |8: // Convert integer to number and continue with number loop.
+ | bne SFARG2HI, TISNUM, ->fff_fallback
+ |.if FPU
+ |. lwc1 FARG1, LO(TMP2)
+ | b <7
+ |. cvt.d.w FARG1, FARG1
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_2
+ |. nop
+ | b <7
+ |. nop
+ |.endif
+ |
|.endmacro
|
- | math_minmax math_min, 0
- | math_minmax math_max, 1
+ | math_minmax math_min, movz, movf.d
+ | math_minmax math_max, movn, movt.d
|
|//-- String library -----------------------------------------------------
|
|.ffunc string_byte // Only handle the 1-arg case here.
| lw CARG3, HI(BASE)
@@ -1630,73 +1861,65 @@
| addiu CARG3, CARG3, -LJ_TSTR
| or AT, AT, CARG3
| bnez AT, ->fff_fallback // Need exactly 1 string argument.
|. nop
| lw TMP0, STR:CARG1->len
- | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
| addiu RA, BASE, -8
+ | lw PC, FRAME_PC(BASE)
| sltu RD, r0, TMP0
- | mtc1 TMP1, f0
+ | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
| addiu RD, RD, 1
- | cvt.d.w f0, f0
- | lw PC, FRAME_PC(BASE)
| sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
+ | sw TISNUM, HI(RA)
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. sw TMP1, LO(RA)
|
|.ffunc string_char // Only handle the 1-arg case here.
| ffgccheck
- | lw CARG3, HI(BASE)
- | ldc1 FARG1, 0(BASE)
- | li AT, 8
- | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
- |. sltiu AT, CARG3, LJ_TISNUM
- | beqz AT, ->fff_fallback
+ |. lw CARG3, HI(BASE)
+ | lw CARG1, LO(BASE)
+ | li TMP1, 255
+ | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
+ | xor TMP0, CARG3, TISNUM // Integer.
+ | sltu TMP1, TMP1, CARG1 // !(255 < n).
+ | or AT, AT, TMP0
+ | or AT, AT, TMP1
+ | bnez AT, ->fff_fallback
|. li CARG3, 1
- | trunc.w.d FARG1, FARG1
| addiu CARG2, sp, ARG5_OFS
- | sltiu AT, TMP0, 256
- | mfc1 TMP0, FARG1
- | beqz AT, ->fff_fallback
- |. sw TMP0, ARG5
+ | sb CARG1, ARG5
|->fff_newstr:
| load_got lj_str_new
| sw BASE, L->base
| sw PC, SAVE_PC
| call_intern lj_str_new // (lua_State *L, char *str, size_t l)
|. move CARG1, L
| // Returns GCstr *.
| lw BASE, L->base
|->fff_resstr:
- | move CARG1, CRET1
+ | move SFARG1LO, CRET1
| b ->fff_restv
- |. li CARG3, LJ_TSTR
+ |. li SFARG1HI, LJ_TSTR
|
|.ffunc string_sub
| ffgccheck
- | addiu AT, NARGS8:RC, -16
+ |. addiu AT, NARGS8:RC, -16
| lw CARG3, 16+HI(BASE)
- | ldc1 f0, 16(BASE)
| lw TMP0, HI(BASE)
| lw STR:CARG1, LO(BASE)
| bltz AT, ->fff_fallback
- | lw CARG2, 8+HI(BASE)
- | ldc1 f2, 8(BASE)
+ |. lw CARG2, 8+HI(BASE)
| beqz AT, >1
|. li CARG4, -1
- | trunc.w.d f0, f0
- | sltiu AT, CARG3, LJ_TISNUM
- | beqz AT, ->fff_fallback
- |. mfc1 CARG4, f0
+ | bne CARG3, TISNUM, ->fff_fallback
+ |. lw CARG4, 16+LO(BASE)
|1:
- | sltiu AT, CARG2, LJ_TISNUM
- | beqz AT, ->fff_fallback
+ | bne CARG2, TISNUM, ->fff_fallback
|. li AT, LJ_TSTR
- | trunc.w.d f2, f2
| bne TMP0, AT, ->fff_fallback
- |. lw CARG2, STR:CARG1->len
- | mfc1 CARG3, f2
+ |. lw CARG3, 8+LO(BASE)
+ | lw CARG2, STR:CARG1->len
| // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
| slt AT, CARG4, r0
| addiu TMP0, CARG2, 1
| addu TMP1, CARG4, TMP0
| slt TMP3, CARG3, r0
@@ -1714,18 +1937,18 @@
| subu CARG3, CARG4, CARG3 // len = end - start
| addiu CARG2, CARG2, sizeof(GCstr)-1
| bgez CARG3, ->fff_newstr
|. addiu CARG3, CARG3, 1 // len++
|->fff_emptystr: // Return empty string.
- | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
+ | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
| b ->fff_restv
- |. li CARG3, LJ_TSTR
+ |. li SFARG1HI, LJ_TSTR
|
|.macro ffstring_op, name
| .ffunc string_ .. name
| ffgccheck
- | lw CARG3, HI(BASE)
+ |. lw CARG3, HI(BASE)
| lw STR:CARG2, LO(BASE)
| beqz NARGS8:RC, ->fff_fallback
|. li AT, LJ_TSTR
| bne CARG3, AT, ->fff_fallback
|. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
@@ -1747,31 +1970,100 @@
|ffstring_op lower
|ffstring_op upper
|
|//-- Bit library --------------------------------------------------------
|
+ |->vm_tobit_fb:
+ | beqz TMP1, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
+ | add.d FARG1, FARG1, TOBIT
+ | jr ra
+ |. mfc1 CRET1, FARG1
+ |.else
+ |// FP number to bit conversion for soft-float.
+ |->vm_tobit:
+ | sll TMP0, SFARG1HI, 1
+ | lui AT, 0x0020
+ | addu TMP0, TMP0, AT
+ | slt AT, TMP0, r0
+ | movz SFARG1LO, r0, AT
+ | beqz AT, >2
+ |. li TMP1, 0x3e0
+ | not TMP1, TMP1
+ | sra TMP0, TMP0, 21
+ | subu TMP0, TMP1, TMP0
+ | slt AT, TMP0, r0
+ | bnez AT, >1
+ |. sll TMP1, SFARG1HI, 11
+ | lui AT, 0x8000
+ | or TMP1, TMP1, AT
+ | srl AT, SFARG1LO, 21
+ | or TMP1, TMP1, AT
+ | slt AT, SFARG1HI, r0
+ | beqz AT, >2
+ |. srlv SFARG1LO, TMP1, TMP0
+ | subu SFARG1LO, r0, SFARG1LO
+ |2:
+ | jr ra
+ |. move CRET1, SFARG1LO
+ |1:
+ | addiu TMP0, TMP0, 21
+ | srlv TMP1, SFARG1LO, TMP0
+ | li AT, 20
+ | subu TMP0, AT, TMP0
+ | sll SFARG1LO, SFARG1HI, 12
+ | sllv AT, SFARG1LO, TMP0
+ | or SFARG1LO, TMP1, AT
+ | slt AT, SFARG1HI, r0
+ | beqz AT, <2
+ |. nop
+ | jr ra
+ |. subu CRET1, r0, SFARG1LO
+ |.endif
+ |
|.macro .ffunc_bit, name
- | .ffunc_n bit_..name
- |. add.d FARG1, FARG1, TOBIT
- | mfc1 CRET1, FARG1
+ | .ffunc_1 bit_..name
+ | beq SFARG1HI, TISNUM, >6
+ |. move CRET1, SFARG1LO
+ | bal ->vm_tobit_fb
+ |. sltu TMP1, SFARG1HI, TISNUM
+ |6:
|.endmacro
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name
- | addiu TMP1, BASE, 8
- | addu TMP2, BASE, NARGS8:RC
+ | addiu TMP2, BASE, 8
+ | addu TMP3, BASE, NARGS8:RC
|1:
- | lw CARG4, HI(TMP1)
- | beq TMP1, TMP2, ->fff_resi
- |. ldc1 FARG1, 0(TMP1)
- | sltiu AT, CARG4, LJ_TISNUM
- | beqz AT, ->fff_fallback
- | add.d FARG1, FARG1, TOBIT
- | mfc1 CARG2, FARG1
- | ins CRET1, CRET1, CARG2
+ | lw SFARG1HI, HI(TMP2)
+ | beq TMP2, TMP3, ->fff_resi
+ |. lw SFARG1LO, LO(TMP2)
+ |.if FPU
+ | bne SFARG1HI, TISNUM, >2
+ |. addiu TMP2, TMP2, 8
| b <1
- |. addiu TMP1, TMP1, 8
+ |. ins CRET1, CRET1, SFARG1LO
+ |2:
+ | ldc1 FARG1, -8(TMP2)
+ | sltu TMP1, SFARG1HI, TISNUM
+ | beqz TMP1, ->fff_fallback
+ |. add.d FARG1, FARG1, TOBIT
+ | mfc1 SFARG1LO, FARG1
+ | b <1
+ |. ins CRET1, CRET1, SFARG1LO
+ |.else
+ | beq SFARG1HI, TISNUM, >2
+ |. move CRET2, CRET1
+ | bal ->vm_tobit_fb
+ |. sltu TMP1, SFARG1HI, TISNUM
+ | move SFARG1LO, CRET2
+ |2:
+ | ins CRET1, CRET1, SFARG1LO
+ | b <1
+ |. addiu TMP2, TMP2, 8
+ |.endif
|.endmacro
|
|.ffunc_bit_op band, and
|.ffunc_bit_op bor, or
|.ffunc_bit_op bxor, xor
@@ -1791,28 +2083,32 @@
|.ffunc_bit bnot
| b ->fff_resi
|. not CRET1, CRET1
|
|.macro .ffunc_bit_sh, name, ins, shmod
- | .ffunc_nn bit_..name
- |. add.d FARG1, FARG1, TOBIT
- | add.d FARG2, FARG2, TOBIT
- | mfc1 CARG1, FARG1
- | mfc1 CARG2, FARG2
+ | .ffunc_2 bit_..name
+ | beq SFARG1HI, TISNUM, >1
+ |. nop
+ | bal ->vm_tobit_fb
+ |. sltu TMP1, SFARG1HI, TISNUM
+ | move SFARG1LO, CRET1
+ |1:
+ | bne SFARG2HI, TISNUM, ->fff_fallback
+ |. nop
|.if shmod == 1
| li AT, 32
- | subu TMP0, AT, CARG2
- | sllv CARG2, CARG1, CARG2
- | srlv CARG1, CARG1, TMP0
+ | subu TMP0, AT, SFARG2LO
+ | sllv SFARG2LO, SFARG1LO, SFARG2LO
+ | srlv SFARG1LO, SFARG1LO, TMP0
|.elif shmod == 2
| li AT, 32
- | subu TMP0, AT, CARG2
- | srlv CARG2, CARG1, CARG2
- | sllv CARG1, CARG1, TMP0
+ | subu TMP0, AT, SFARG2LO
+ | srlv SFARG2LO, SFARG1LO, SFARG2LO
+ | sllv SFARG1LO, SFARG1LO, TMP0
|.endif
| b ->fff_resi
- |. ins CRET1, CARG1, CARG2
+ |. ins CRET1, SFARG1LO, SFARG2LO
|.endmacro
|
|.ffunc_bit_sh lshift, sllv, 0
|.ffunc_bit_sh rshift, srlv, 0
|.ffunc_bit_sh arshift, srav, 0
@@ -1820,13 +2116,15 @@
|.ffunc_bit_sh rol, or, 1
|.ffunc_bit_sh ror, or, 2
|
|.ffunc_bit tobit
|->fff_resi:
- | mtc1 CRET1, FRET1
- | b ->fff_resn
- |. cvt.d.w FRET1, FRET1
+ | lw PC, FRAME_PC(BASE)
+ | addiu RA, BASE, -8
+ | sw TISNUM, -8+HI(BASE)
+ | b ->fff_res1
+ |. sw CRET1, -8+LO(BASE)
|
|//-----------------------------------------------------------------------
|
|->fff_fallback: // Call fast function fallback handler.
| // BASE = new base, RB = CFUNC, RC = nargs*8
@@ -2013,40 +2311,38 @@
|
|->cont_stitch: // Trace stitching.
|.if JIT
| // RA = resultptr, RB = meta base
| lw INS, -4(PC)
- | lw TMP3, -24+LO(RB) // Save previous trace number.
+ | lw TMP2, -24+LO(RB) // Save previous trace.
| decode_RA8a RC, INS
| addiu AT, MULTRES, -8
| decode_RA8b RC
| beqz AT, >2
|. addu RC, BASE, RC // Call base.
|1: // Move results down.
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu AT, AT, -8
| addiu RA, RA, 8
- | sdc1 f0, 0(RC)
+ | sw SFRETHI, HI(RC)
+ | sw SFRETLO, LO(RC)
| bnez AT, <1
|. addiu RC, RC, 8
|2:
| decode_RA8a RA, INS
| decode_RB8a RB, INS
| decode_RA8b RA
| decode_RB8b RB
| addu RA, RA, RB
- | lw TMP1, DISPATCH_J(trace)(DISPATCH)
| addu RA, BASE, RA
|3:
| sltu AT, RC, RA
| bnez AT, >9 // More results wanted?
- |. sll TMP2, TMP3, 2
+ |. nop
|
- | addu TMP2, TMP1, TMP2
- | lw TRACE:TMP2, 0(TMP2)
- | beqz TRACE:TMP2, ->cont_nop
- |. nop
+ | lhu TMP3, TRACE:TMP2->traceno
| lhu RD, TRACE:TMP2->link
| beq RD, TMP3, ->cont_nop // Blacklisted.
|. load_got lj_dispatch_stitch
| bnez RD, =>BC_JLOOP // Jump to stitched trace.
|. sll RD, RD, 3
@@ -2084,18 +2380,27 @@
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b
+ |.if FPU
| sdc1 f..a, 16+a*8(sp)
| sw r..a, 16+32*8+a*4(sp)
| sw r..b, 16+32*8+b*4(sp)
+ |.else
+ | sw r..a, 16+a*4(sp)
+ | sw r..b, 16+b*4(sp)
+ |.endif
|.endmacro
|
|->vm_exit_handler:
|.if JIT
+ |.if FPU
| addiu sp, sp, -(16+32*8+32*4)
+ |.else
+ | addiu sp, sp, -(16+32*4)
+ |.endif
| savex_ 0, 1
| savex_ 2, 3
| savex_ 4, 5
| savex_ 6, 7
| savex_ 8, 9
@@ -2106,21 +2411,29 @@
| savex_ 18, 19
| savex_ 20, 21
| savex_ 22, 23
| savex_ 24, 25
| savex_ 26, 27
+ |.if FPU
| sdc1 f28, 16+28*8(sp)
- | sw r28, 16+32*8+28*4(sp)
| sdc1 f30, 16+30*8(sp)
+ | sw r28, 16+32*8+28*4(sp)
| sw r30, 16+32*8+30*4(sp)
| sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
+ | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
+ | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
+ |.else
+ | sw r28, 16+28*4(sp)
+ | sw r30, 16+30*4(sp)
+ | sw r0, 16+31*4(sp) // Clear RID_TMP.
+ | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
+ | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
+ |.endif
| li_vmstate EXIT
- | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
| addiu DISPATCH, JGL, -GG_DISP2G-32768
| lw TMP1, 0(TMP2) // Load exit number.
| st_vmstate
- | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
| lw L, DISPATCH_GL(cur_L)(DISPATCH)
| lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
| load_got lj_trace_exit
| sw L, DISPATCH_J(L)(DISPATCH)
| sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
@@ -2146,27 +2459,28 @@
| addiu DISPATCH, JGL, -GG_DISP2G-32768
| sw BASE, L->base
|1:
| bltz CRET1, >9 // Check for error from exit.
|. lw LFUNC:RB, FRAME_FUNC(BASE)
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| sll MULTRES, CRET1, 3
| li TISNIL, LJ_TNIL
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| sw MULTRES, SAVE_MULTRES
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| lw TMP1, LFUNC:RB->pc
| sw r0, DISPATCH_GL(jit_base)(DISPATCH)
| lw KBASE, PC2PROTO(k)(TMP1)
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| // Modified copy of ins_next which handles function header dispatch, too.
| lw INS, 0(PC)
| addiu PC, PC, 4
| // Assumes TISNIL == ~LJ_VMST_INTERP == -1
| sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
| decode_OP4a TMP1, INS
| decode_OP4b TMP1
- | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header?
+ | sltiu TMP2, TMP1, BC_FUNCF*4
| addu TMP0, DISPATCH, TMP1
| decode_RD8a RD, INS
| lw AT, 0(TMP0)
| decode_RA8a RA, INS
| beqz TMP2, >2
@@ -2203,12 +2517,13 @@
|
|//-----------------------------------------------------------------------
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
+ |// Hard-float round to integer.
|// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
- |.macro vm_round, func
+ |.macro vm_round_hf, func
| lui TMP0, 0x4330 // Hiword of 2^52 (double).
| mtc1 r0, f4
| mtc1 TMP0, f5
| abs.d FRET2, FARG1 // |x|
| mfc1 AT, f13
@@ -2246,19 +2561,197 @@
|1:
| jr ra
|. mov.d FRET1, FARG1
|.endmacro
|
+ |.macro vm_round, func
+ |.if FPU
+ | vm_round_hf, func
+ |.endif
+ |.endmacro
+ |
|->vm_floor:
| vm_round floor
|->vm_ceil:
| vm_round ceil
|->vm_trunc:
|.if JIT
| vm_round trunc
|.endif
|
+ |// Soft-float integer to number conversion.
+ |.macro sfi2d, AHI, ALO
+ |.if not FPU
+ | beqz ALO, >9 // Handle zero first.
+ |. sra TMP0, ALO, 31
+ | xor TMP1, ALO, TMP0
+ | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
+ | clz AHI, TMP1
+ | andi TMP0, TMP0, 0x800 // Mask sign bit.
+ | li AT, 0x3ff+31-1
+ | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
+ | subu AHI, AT, AHI // Exponent - 1 in AHI.
+ | sll ALO, TMP1, 21
+ | or AHI, AHI, TMP0 // Sign | Exponent.
+ | srl TMP1, TMP1, 11
+ | sll AHI, AHI, 20 // Align left.
+ | jr ra
+ |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
+ |9:
+ | jr ra
+ |. li AHI, 0
+ |.endif
+ |.endmacro
+ |
+ |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfi2d_1:
+ | sfi2d SFARG1HI, SFARG1LO
+ |
+ |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfi2d_2:
+ | sfi2d SFARG2HI, SFARG2LO
+ |
+ |// Soft-float comparison. Equivalent to c.eq.d.
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpeq:
+ |.if not FPU
+ | sll AT, SFARG1HI, 1
+ | sll TMP0, SFARG2HI, 1
+ | or CRET1, SFARG1LO, SFARG2LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 1.
+ |. sltu CRET1, r0, SFARG1LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG2LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
+ |. xor TMP0, SFARG1HI, SFARG2HI
+ | xor TMP1, SFARG1LO, SFARG2LO
+ | or AT, TMP0, TMP1
+ | jr ra
+ |. sltiu CRET1, AT, 1 // Same values: return 1.
+ |8:
+ | jr ra
+ |. li CRET1, 1
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
+ |->vm_sfcmpult:
+ |.if not FPU
+ | b >1
+ |. li CRET2, 1
+ |.endif
+ |
+ |->vm_sfcmpolt:
+ |.if not FPU
+ | li CRET2, 0
+ |1:
+ | sll AT, SFARG1HI, 1
+ | sll TMP0, SFARG2HI, 1
+ | or CRET1, SFARG1LO, SFARG2LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 0.
+ |. sltu CRET1, r0, SFARG1LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG2LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
+ |. and AT, SFARG1HI, SFARG2HI
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | beq SFARG1HI, SFARG2HI, >8
+ |. sltu CRET1, SFARG1LO, SFARG2LO
+ | jr ra
+ |. slt CRET1, SFARG1HI, SFARG2HI
+ |5: // Swap conditions if both operands are negative.
+ | beq SFARG1HI, SFARG2HI, >8
+ |. sltu CRET1, SFARG2LO, SFARG1LO
+ | jr ra
+ |. slt CRET1, SFARG2HI, SFARG1HI
+ |8:
+ | jr ra
+ |. nop
+ |9:
+ | jr ra
+ |. move CRET1, CRET2
+ |.endif
+ |
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
+ |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpolex:
+ |.if not FPU
+ | sll AT, SFARG1HI, 1
+ | sll TMP0, SFARG2HI, 1
+ | or CRET1, SFARG1LO, SFARG2LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 1.
+ |. sltu CRET1, r0, SFARG1LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG2LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
+ |. and AT, SFARG1HI, SFARG2HI
+ | xor AT, AT, TMP3
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | beq SFARG1HI, SFARG2HI, >6
+ |. sltu CRET1, SFARG2LO, SFARG1LO
+ | jr ra
+ |. slt CRET1, SFARG2HI, SFARG1HI
+ |5: // Swap conditions if both operands are negative.
+ | beq SFARG1HI, SFARG2HI, >6
+ |. sltu CRET1, SFARG1LO, SFARG2LO
+ | slt CRET1, SFARG1HI, SFARG2HI
+ |6:
+ | jr ra
+ |. nop
+ |8:
+ | jr ra
+ |. li CRET1, 1
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |.macro sfmin_max, name, intins
+ |->vm_sf .. name:
+ |.if JIT and not FPU
+ | move TMP2, ra
+ | bal ->vm_sfcmpolt
+ |. nop
+ | move TMP0, CRET1
+ | move SFRETHI, SFARG1HI
+ | move SFRETLO, SFARG1LO
+ | move ra, TMP2
+ | intins SFRETHI, SFARG2HI, TMP0
+ | jr ra
+ |. intins SFRETLO, SFARG2LO, TMP0
+ |.endif
+ |.endmacro
+ |
+ | sfmin_max min, movz
+ | sfmin_max max, movn
+ |
|//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
|//-----------------------------------------------------------------------
@@ -2274,32 +2767,33 @@
| addiu DISPATCH, r2, GG_G2DISP
| load_got lj_ccallback_enter
| sw r1, CTSTATE->cb.slot
| sw CARG1, CTSTATE->cb.gpr[0]
| sw CARG2, CTSTATE->cb.gpr[1]
- | sdc1 FARG1, CTSTATE->cb.fpr[0]
+ | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
| sw CARG3, CTSTATE->cb.gpr[2]
| sw CARG4, CTSTATE->cb.gpr[3]
- | sdc1 FARG2, CTSTATE->cb.fpr[1]
+ | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
| addiu TMP0, sp, CFRAME_SPACE+16
| sw TMP0, CTSTATE->cb.stack
| sw r0, SAVE_PC // Any value outside of bytecode is ok.
| move CARG2, sp
| call_intern lj_ccallback_enter // (CTState *cts, void *cf)
|. move CARG1, CTSTATE
| // Returns lua_State *.
| lw BASE, L:CRET1->base
| lw RC, L:CRET1->top
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| move L, CRET1
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lw LFUNC:RB, FRAME_FUNC(BASE)
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| li_vmstate INTERP
| li TISNIL, LJ_TNIL
| subu RC, RC, BASE
| st_vmstate
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| ins_callt
|.endif
|
|->cont_ffi_callback: // Return from FFI callback.
|.if FFI
@@ -2309,15 +2803,15 @@
| sw RB, L->top
| sw L, CTSTATE->L
| move CARG2, RA
| call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
|. move CARG1, CTSTATE
+ | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
| lw CRET1, CTSTATE->cb.gpr[0]
- | ldc1 FRET1, CTSTATE->cb.fpr[0]
- | lw CRET2, CTSTATE->cb.gpr[1]
+ | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
| b ->vm_leave_unw
- |. ldc1 FRET2, CTSTATE->cb.fpr[1]
+ |. lw CRET2, CTSTATE->cb.gpr[1]
|.endif
|
|->vm_ffi_call: // Call C function via FFI.
| // Caveat: needs special frame unwinding, see below.
|.if FFI
@@ -2345,21 +2839,26 @@
|2:
| lw CFUNCADDR, CCSTATE->func
| lw CARG2, CCSTATE->gpr[1]
| lw CARG3, CCSTATE->gpr[2]
| lw CARG4, CCSTATE->gpr[3]
- | ldc1 FARG1, CCSTATE->fpr[0]
- | ldc1 FARG2, CCSTATE->fpr[1]
+ | .FPU ldc1 FARG1, CCSTATE->fpr[0]
+ | .FPU ldc1 FARG2, CCSTATE->fpr[1]
| jalr CFUNCADDR
|. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| lw CCSTATE:TMP1, -12(r16)
| lw TMP2, -8(r16)
| lw ra, -4(r16)
| sw CRET1, CCSTATE:TMP1->gpr[0]
| sw CRET2, CCSTATE:TMP1->gpr[1]
+ |.if FPU
| sdc1 FRET1, CCSTATE:TMP1->fpr[0]
| sdc1 FRET2, CCSTATE:TMP1->fpr[1]
+ |.else
+ | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
+ | sw CARG2, CCSTATE:TMP1->gpr[3]
+ |.endif
| move sp, r16
| jr ra
|. move r16, TMP2
|.endif
|// Note: vm_ffi_call must be the last function in this object file!
@@ -2379,102 +2878,166 @@
/* Remember: all ops branch for a true comparison, fall through otherwise. */
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
- | addu CARG2, BASE, RA
- | addu CARG3, BASE, RD
- | lw TMP0, HI(CARG2)
- | lw TMP1, HI(CARG3)
- | ldc1 f0, 0(CARG2)
- | ldc1 f2, 0(CARG3)
- | sltiu TMP0, TMP0, LJ_TISNUM
- | sltiu TMP1, TMP1, LJ_TISNUM
+ |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
+ | addu RA, BASE, RA
+ | addu RD, BASE, RD
+ | lw RAHI, HI(RA)
+ | lw RDHI, HI(RD)
| lhu TMP2, OFS_RD(PC)
- | and TMP0, TMP0, TMP1
| addiu PC, PC, 4
- | beqz TMP0, ->vmeta_comp
- |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
- | decode_RD4b TMP2
- | addu TMP2, TMP2, TMP1
- if (op == BC_ISLT || op == BC_ISGE) {
- | c.olt.d f0, f2
- } else {
- | c.ole.d f0, f2
- }
- if (op == BC_ISLT || op == BC_ISLE) {
- | movf TMP2, r0
- } else {
- | movt TMP2, r0
- }
- | addu PC, PC, TMP2
+ | bne RAHI, TISNUM, >2
+ |. lw RALO, LO(RA)
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | lw RDLO, LO(RD)
+ | bne RDHI, TISNUM, >5
+ |. decode_RD4b TMP2
+ | slt AT, SFARG1LO, SFARG2LO
+ | addu TMP2, TMP2, TMP3
+ | movop TMP2, r0, AT
|1:
+ | addu PC, PC, TMP2
| ins_next
+ |
+ |2: // RA is not an integer.
+ | sltiu AT, RAHI, LJ_TISNUM
+ | beqz AT, ->vmeta_comp
+ |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | sltiu AT, RDHI, LJ_TISNUM
+ |.if FPU
+ | ldc1 FRA, 0(RA)
+ | ldc1 FRD, 0(RD)
+ |.else
+ | lw RDLO, LO(RD)
+ |.endif
+ | beqz AT, >4
+ |. decode_RD4b TMP2
+ |3: // RA and RD are both numbers.
+ |.if FPU
+ | fcomp f20, f22
+ | addu TMP2, TMP2, TMP3
+ | b <1
+ |. fmovop TMP2, r0
+ |.else
+ | bal sfcomp
+ |. addu TMP2, TMP2, TMP3
+ | b <1
+ |. movop TMP2, r0, CRET1
+ |.endif
+ |
+ |4: // RA is a number, RD is not a number.
+ | bne RDHI, TISNUM, ->vmeta_comp
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+ |. lwc1 FRD, LO(RD)
+ | b <3
+ |. cvt.d.w FRD, FRD
+ |.else
+ |. nop
+ |.if "RDHI" == "SFARG1HI"
+ | bal ->vm_sfi2d_1
+ |.else
+ | bal ->vm_sfi2d_2
+ |.endif
+ |. nop
+ | b <3
+ |. nop
+ |.endif
+ |
+ |5: // RA is an integer, RD is not an integer
+ | sltiu AT, RDHI, LJ_TISNUM
+ | beqz AT, ->vmeta_comp
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+ |. mtc1 RALO, FRA
+ | ldc1 FRD, 0(RD)
+ | b <3
+ | cvt.d.w FRA, FRA
+ |.else
+ |. nop
+ |.if "RAHI" == "SFARG1HI"
+ | bal ->vm_sfi2d_1
+ |.else
+ | bal ->vm_sfi2d_2
+ |.endif
+ |. nop
+ | b <3
+ |. nop
+ |.endif
+ |.endmacro
+ |
+ if (op == BC_ISLT) {
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISGE) {
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISLE) {
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
+ } else {
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
+ }
break;
case BC_ISEQV: case BC_ISNEV:
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
| addu RA, BASE, RA
- | addiu PC, PC, 4
- | lw TMP0, HI(RA)
- | ldc1 f0, 0(RA)
+ | addiu PC, PC, 4
| addu RD, BASE, RD
+ | lw SFARG1HI, HI(RA)
| lhu TMP2, -4+OFS_RD(PC)
- | lw TMP1, HI(RD)
- | ldc1 f2, 0(RD)
+ | lw SFARG2HI, HI(RD)
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sltiu AT, TMP0, LJ_TISNUM
- | sltiu CARG1, TMP1, LJ_TISNUM
- | decode_RD4b TMP2
- | and AT, AT, CARG1
- | beqz AT, >5
- |. addu TMP2, TMP2, TMP3
- | c.eq.d f0, f2
+ | sltu AT, TISNUM, SFARG1HI
+ | sltu TMP0, TISNUM, SFARG2HI
+ | or AT, AT, TMP0
if (vk) {
- | movf TMP2, r0
+ | beqz AT, ->BC_ISEQN_Z
} else {
- | movt TMP2, r0
+ | beqz AT, ->BC_ISNEN_Z
}
- |1:
- | addu PC, PC, TMP2
- | ins_next
- |5: // Either or both types are not numbers.
- | lw CARG2, LO(RA)
- | lw CARG3, LO(RD)
+ |. decode_RD4b TMP2
+ | // Either or both types are not numbers.
+ | lw SFARG1LO, LO(RA)
+ | lw SFARG2LO, LO(RD)
+ | addu TMP2, TMP2, TMP3
|.if FFI
| li TMP3, LJ_TCDATA
- | beq TMP0, TMP3, ->vmeta_equal_cd
+ | beq SFARG1HI, TMP3, ->vmeta_equal_cd
|.endif
- |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive?
+ |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
|.if FFI
- | beq TMP1, TMP3, ->vmeta_equal_cd
+ | beq SFARG2HI, TMP3, ->vmeta_equal_cd
|.endif
- |. xor TMP3, CARG2, CARG3 // Same tv?
- | xor TMP1, TMP1, TMP0 // Same type?
- | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata?
+ |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
+ | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
+ | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
| movz TMP3, r0, AT // Ignore tv if primitive.
- | movn CARG1, r0, TMP1 // Tab/ud and same type?
- | or AT, TMP1, TMP3 // Same type && (pri||same tv).
- | movz CARG1, r0, AT
- | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv.
+ | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
+ | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
+ | movz TMP0, r0, AT
+ | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
if (vk) {
|. movn TMP2, r0, AT
} else {
|. movz TMP2, r0, AT
}
| // Different tables or userdatas. Need to check __eq metamethod.
| // Field metatable must be at same offset for GCtab and GCudata!
- | lw TAB:TMP1, TAB:CARG2->metatable
- | beqz TAB:TMP1, <1 // No metatable?
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
+ | beqz TAB:TMP1, >1 // No metatable?
|. nop
| lbu TMP1, TAB:TMP1->nomm
| andi TMP1, TMP1, 1<<MM_eq
- | bnez TMP1, <1 // Or 'no __eq' flag set?
+ | bnez TMP1, >1 // Or 'no __eq' flag set?
|. nop
| b ->vmeta_equal // Handle __eq metamethod.
- |. li CARG4, 1-vk // ne = 0 or 1.
+ |. li TMP0, 1-vk // ne = 0 or 1.
+ |1:
+ | addu PC, PC, TMP2
+ | ins_next
break;
case BC_ISEQS: case BC_ISNES:
vk = op == BC_ISEQS;
| // RA = src*8, RD = str_const*8 (~), JMP with RD = target
@@ -2507,44 +3070,130 @@
case BC_ISEQN: case BC_ISNEN:
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
| addu RA, BASE, RA
- | addiu PC, PC, 4
- | lw TMP0, HI(RA)
- | ldc1 f0, 0(RA)
- | addu RD, KBASE, RD
- | lhu TMP2, -4+OFS_RD(PC)
- | ldc1 f2, 0(RD)
+ | addu RD, KBASE, RD
+ | lw SFARG1HI, HI(RA)
+ | lw SFARG2HI, HI(RD)
+ | lhu TMP2, OFS_RD(PC)
+ | addiu PC, PC, 4
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sltiu AT, TMP0, LJ_TISNUM
| decode_RD4b TMP2
- |.if FFI
- | beqz AT, >5
- |.else
- | beqz AT, >1
- |.endif
- |. addu TMP2, TMP2, TMP3
- | c.eq.d f0, f2
if (vk) {
- | movf TMP2, r0
- | addu PC, PC, TMP2
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | bne SFARG1HI, TISNUM, >3
+ |. lw SFARG1LO, LO(RA)
+ | lw SFARG2LO, LO(RD)
+ | addu TMP2, TMP2, TMP3
+ | bne SFARG2HI, TISNUM, >6
+ |. xor AT, SFARG1LO, SFARG2LO
+ if (vk) {
+ | movn TMP2, r0, AT
|1:
+ | addu PC, PC, TMP2
+ |2:
} else {
- | movt TMP2, r0
+ | movz TMP2, r0, AT
|1:
+ |2:
| addu PC, PC, TMP2
}
| ins_next
+ |
+ |3: // RA is not an integer.
+ | sltiu AT, SFARG1HI, LJ_TISNUM
|.if FFI
- |5:
- | li AT, LJ_TCDATA
- | beq TMP0, AT, ->vmeta_equal_cd
+ | beqz AT, >8
+ |.else
+ | beqz AT, <2
+ |.endif
+ |. addu TMP2, TMP2, TMP3
+ | sltiu AT, SFARG2HI, LJ_TISNUM
+ |.if FPU
+ | ldc1 f20, 0(RA)
+ | ldc1 f22, 0(RD)
+ |.endif
+ | beqz AT, >5
+ |. lw SFARG2LO, LO(RD)
+ |4: // RA and RD are both numbers.
+ |.if FPU
+ | c.eq.d f20, f22
+ | b <1
+ if (vk) {
+ |. movf TMP2, r0
+ } else {
+ |. movt TMP2, r0
+ }
+ |.else
+ | bal ->vm_sfcmpeq
|. nop
| b <1
+ if (vk) {
+ |. movz TMP2, r0, CRET1
+ } else {
+ |. movn TMP2, r0, CRET1
+ }
+ |.endif
+ |
+ |5: // RA is a number, RD is not a number.
+ |.if FFI
+ | bne SFARG2HI, TISNUM, >9
+ |.else
+ | bne SFARG2HI, TISNUM, <2
+ |.endif
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+ |. lwc1 f22, LO(RD)
+ | b <4
+ |. cvt.d.w f22, f22
+ |.else
|. nop
+ | bal ->vm_sfi2d_2
+ |. nop
+ | b <4
+ |. nop
|.endif
+ |
+ |6: // RA is an integer, RD is not an integer
+ | sltiu AT, SFARG2HI, LJ_TISNUM
+ |.if FFI
+ | beqz AT, >9
+ |.else
+ | beqz AT, <2
+ |.endif
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+ |. mtc1 SFARG1LO, f20
+ | ldc1 f22, 0(RD)
+ | b <4
+ | cvt.d.w f20, f20
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_1
+ |. nop
+ | b <4
+ |. nop
+ |.endif
+ |
+ |.if FFI
+ |8:
+ | li AT, LJ_TCDATA
+ | bne SFARG1HI, AT, <2
+ |. nop
+ | b ->vmeta_equal_cd
+ |. nop
+ |9:
+ | li AT, LJ_TCDATA
+ | bne SFARG2HI, AT, <2
+ |. nop
+ | b ->vmeta_equal_cd
+ |. nop
+ |.endif
break;
case BC_ISEQP: case BC_ISNEP:
vk = op == BC_ISEQP;
| // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
@@ -2590,21 +3239,23 @@
| movn TMP2, r0, TMP0
}
| addu PC, PC, TMP2
} else {
| sltiu TMP0, TMP0, LJ_TISTRUECOND
- | ldc1 f0, 0(RD)
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
if (op == BC_ISTC) {
| beqz TMP0, >1
} else {
| bnez TMP0, >1
}
|. addu RA, BASE, RA
| decode_RD4b TMP2
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
| addu TMP2, TMP2, TMP3
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| addu PC, PC, TMP2
|1:
}
| ins_next
break;
@@ -2632,14 +3283,16 @@
/* -- Unary ops --------------------------------------------------------- */
case BC_MOV:
| // RA = dst*8, RD = src*8
| addu RD, BASE, RD
- | addu RA, BASE, RA
- | ldc1 f0, 0(RD)
+ | addu RA, BASE, RA
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
case BC_NOT:
| // RA = dst*8, RD = src*8
| addu RD, BASE, RD
@@ -2652,36 +3305,44 @@
| sw TMP1, HI(RA)
| ins_next2
break;
case BC_UNM:
| // RA = dst*8, RD = src*8
- | addu CARG3, BASE, RD
+ | addu RB, BASE, RD
+ | lw SFARG1HI, HI(RB)
| addu RA, BASE, RA
- | lw TMP0, HI(CARG3)
- | ldc1 f0, 0(CARG3)
- | sltiu AT, TMP0, LJ_TISNUM
- | beqz AT, ->vmeta_unm
- |. neg.d f0, f0
+ | bne SFARG1HI, TISNUM, >2
+ |. lw SFARG1LO, LO(RB)
+ | lui TMP1, 0x8000
+ | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
+ |. negu SFARG1LO, SFARG1LO
+ |1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFARG1HI, HI(RA)
+ | sw SFARG1LO, LO(RA)
| ins_next2
+ |2:
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | beqz AT, ->vmeta_unm
+ |. lui TMP1, 0x8000
+ | b <1
+ |. xor SFARG1HI, SFARG1HI, TMP1
break;
case BC_LEN:
| // RA = dst*8, RD = src*8
| addu CARG2, BASE, RD
| addu RA, BASE, RA
| lw TMP0, HI(CARG2)
| lw CARG1, LO(CARG2)
| li AT, LJ_TSTR
| bne TMP0, AT, >2
|. li AT, LJ_TTAB
- | lw CRET1, STR:CARG1->len
+ | lw CRET1, STR:CARG1->len
|1:
- | mtc1 CRET1, f0
- | cvt.d.w f0, f0
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw TISNUM, HI(RA)
+ | sw CRET1, LO(RA)
| ins_next2
|2:
| bne TMP0, AT, ->vmeta_len
|. nop
#if LJ_52
@@ -2708,108 +3369,236 @@
#endif
break;
/* -- Binary ops -------------------------------------------------------- */
- |.macro ins_arithpre
+ |.macro fpmod, a, b, c
+ | bal ->vm_floor // floor(b/c)
+ |. div.d FARG1, b, c
+ | mul.d a, FRET1, c
+ | sub.d a, b, a // b - floor(b/c)*c
+ |.endmacro
+
+ |.macro sfpmod
+ | addiu sp, sp, -16
+ |
+ | load_got __divdf3
+ | sw SFARG1HI, HI(sp)
+ | sw SFARG1LO, LO(sp)
+ | sw SFARG2HI, 8+HI(sp)
+ | call_extern
+ |. sw SFARG2LO, 8+LO(sp)
+ |
+ | load_got floor
+ | move SFARG1HI, SFRETHI
+ | call_extern
+ |. move SFARG1LO, SFRETLO
+ |
+ | load_got __muldf3
+ | move SFARG1HI, SFRETHI
+ | move SFARG1LO, SFRETLO
+ | lw SFARG2HI, 8+HI(sp)
+ | call_extern
+ |. lw SFARG2LO, 8+LO(sp)
+ |
+ | load_got __subdf3
+ | lw SFARG1HI, HI(sp)
+ | lw SFARG1LO, LO(sp)
+ | move SFARG2HI, SFRETHI
+ | call_extern
+ |. move SFARG2LO, SFRETLO
+ |
+ | addiu sp, sp, 16
+ |.endmacro
+
+ |.macro ins_arithpre, label
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- | decode_RB8a RB, INS
- | decode_RB8b RB
- | decode_RDtoRC8 RC, RD
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
||switch (vk) {
||case 0:
- | addu CARG3, BASE, RB
- | addu CARG4, KBASE, RC
- | lw TMP1, HI(CARG3)
- | ldc1 f20, 0(CARG3)
- | ldc1 f22, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
+ | addu RB, BASE, RB
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. addu RC, KBASE, RC
|| break;
||case 1:
- | addu CARG4, BASE, RB
- | addu CARG3, KBASE, RC
- | lw TMP1, HI(CARG4)
- | ldc1 f22, 0(CARG4)
- | ldc1 f20, 0(CARG3)
- | sltiu AT, TMP1, LJ_TISNUM
+ | decode_RB8a RC, INS
+ | decode_RB8b RC
+ | decode_RDtoRC8 RB, RD
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
+ | addu RC, BASE, RC
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. addu RB, KBASE, RB
|| break;
||default:
- | addu CARG3, BASE, RB
- | addu CARG4, BASE, RC
- | lw TMP1, HI(CARG3)
- | lw TMP2, HI(CARG4)
- | ldc1 f20, 0(CARG3)
- | ldc1 f22, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP2, LJ_TISNUM
- | and AT, AT, TMP0
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | // RA = dst*8, RB = src1*8, RC = src2*8
+ | addu RB, BASE, RB
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. addu RC, BASE, RC
|| break;
||}
- | beqz AT, ->vmeta_arith
- |. addu RA, BASE, RA
|.endmacro
|
- |.macro fpmod, a, b, c
- |->BC_MODVN_Z:
- | bal ->vm_floor // floor(b/c)
- |. div.d FARG1, b, c
- | mul.d a, FRET1, c
- | sub.d a, b, a // b - floor(b/c)*c
- |.endmacro
+ |.macro ins_arith, intins, fpins, fpcall, label
+ | ins_arithpre none
|
- |.macro ins_arith, ins
- | ins_arithpre
- |.if "ins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |. nop
+ |.if "label" ~= "none"
+ |label:
+ |.endif
+ |
+ | lw SFARG1HI, HI(RB)
+ | lw SFARG2HI, HI(RC)
+ |
+ |.if "intins" ~= "div"
+ |
+ | // Check for two integers.
+ | lw SFARG1LO, LO(RB)
+ | bne SFARG1HI, TISNUM, >5
+ |. lw SFARG2LO, LO(RC)
+ | bne SFARG2HI, TISNUM, >5
+ |
+ |.if "intins" == "addu"
+ |. intins CRET1, SFARG1LO, SFARG2LO
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
+ | xor TMP2, CRET1, SFARG2LO
+ | and TMP1, TMP1, TMP2
+ | bltz TMP1, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |.elif "intins" == "subu"
+ |. intins CRET1, SFARG1LO, SFARG2LO
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
+ | xor TMP2, SFARG1LO, SFARG2LO
+ | and TMP1, TMP1, TMP2
+ | bltz TMP1, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |.elif "intins" == "mult"
+ |. intins SFARG1LO, SFARG2LO
+ | mflo CRET1
+ | mfhi TMP2
+ | sra TMP1, CRET1, 31
+ | bne TMP1, TMP2, ->vmeta_arith
+ |. addu RA, BASE, RA
|.else
- | ins f0, f20, f22
+ |. load_got lj_vm_modi
+ | beqz SFARG2LO, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |.if ENDIAN_BE
+ | move CARG1, SFARG1LO
+ |.endif
+ | call_extern
+ |. move CARG2, SFARG2LO
+ |.endif
+ |
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw TISNUM, HI(RA)
+ | sw CRET1, LO(RA)
+ |3:
| ins_next2
+ |
+ |.elif not FPU
+ |
+ | lw SFARG1LO, LO(RB)
+ | lw SFARG2LO, LO(RC)
+ |
|.endif
+ |
+ |5: // Check for two numbers.
+ | .FPU ldc1 f20, 0(RB)
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
+ | .FPU ldc1 f22, 0(RC)
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |
+ |.if FPU
+ | fpins FRET1, f20, f22
+ |.elif "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | load_got fpcall
+ | call_extern
+ |. nop
+ |.endif
+ |
+ | ins_next1
+ |.if not FPU
+ | sw SFRETHI, HI(RA)
+ |.endif
+ |.if "intins" ~= "div"
+ | b <3
+ |.endif
+ |.if FPU
+ |. sdc1 FRET1, 0(RA)
+ |.else
+ |. sw SFRETLO, LO(RA)
+ |.endif
+ |.if "intins" == "div"
+ | ins_next2
+ |.endif
+ |
|.endmacro
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
- | ins_arith add.d
+ | ins_arith addu, add.d, __adddf3, none
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- | ins_arith sub.d
+ | ins_arith subu, sub.d, __subdf3, none
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith mul.d
+ | ins_arith mult, mul.d, __muldf3, none
break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arith div.d
+ case BC_DIVVN:
+ | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
break;
+ case BC_DIVNV: case BC_DIVVV:
+ | ins_arithpre ->BC_DIVVN_Z
+ break;
case BC_MODVN:
- | ins_arith fpmod
+ | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
break;
case BC_MODNV: case BC_MODVV:
- | ins_arith fpmod_
+ | ins_arithpre ->BC_MODVN_Z
break;
case BC_POW:
- | decode_RB8a RB, INS
- | decode_RB8b RB
- | decode_RDtoRC8 RC, RD
- | addu CARG3, BASE, RB
- | addu CARG4, BASE, RC
- | lw TMP1, HI(CARG3)
- | lw TMP2, HI(CARG4)
- | ldc1 FARG1, 0(CARG3)
- | ldc1 FARG2, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP2, LJ_TISNUM
+ | ins_arithpre none
+ | lw SFARG1HI, HI(RB)
+ | lw SFARG2HI, HI(RC)
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
| and AT, AT, TMP0
| load_got pow
| beqz AT, ->vmeta_arith
|. addu RA, BASE, RA
+ |.if FPU
+ | ldc1 FARG1, 0(RB)
+ | ldc1 FARG2, 0(RC)
+ |.else
+ | lw SFARG1LO, LO(RB)
+ | lw SFARG2LO, LO(RC)
+ |.endif
| call_extern
|. nop
| ins_next1
+ |.if FPU
| sdc1 FRET1, 0(RA)
+ |.else
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
+ |.endif
| ins_next2
break;
case BC_CAT:
| // RA = dst*8, RB = src_start*8, RC = src_end*8
@@ -2828,14 +3617,16 @@
|. move CARG1, L
| // Returns NULL (finished) or TValue * (metamethod).
| bnez CRET1, ->vmeta_binop
|. lw BASE, L->base
| addu RB, BASE, MULTRES
- | ldc1 f0, 0(RB)
+ | lw SFRETHI, HI(RB)
+ | lw SFRETLO, LO(RB)
| addu RA, BASE, RA
| ins_next1
- | sdc1 f0, 0(RA) // Copy result from RB to RA.
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
/* -- Constant ops ------------------------------------------------------ */
@@ -2866,24 +3657,25 @@
|.endif
break;
case BC_KSHORT:
| // RA = dst*8, RD = int16_literal*8
| sra RD, INS, 16
- | mtc1 RD, f0
| addu RA, BASE, RA
- | cvt.d.w f0, f0
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw TISNUM, HI(RA)
+ | sw RD, LO(RA)
| ins_next2
break;
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| addu RD, KBASE, RD
| addu RA, BASE, RA
- | ldc1 f0, 0(RD)
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
case BC_KPRI:
| // RA = dst*8, RD = primitive_type*8 (~)
| srl TMP1, RD, 3
@@ -2915,41 +3707,44 @@
| srl RD, RD, 1
| addu RD, RD, LFUNC:RB
| lw UPVAL:RB, LFUNC:RD->uvptr
| ins_next1
| lw TMP1, UPVAL:RB->v
- | ldc1 f0, 0(TMP1)
+ | lw SFRETHI, HI(TMP1)
+ | lw SFRETLO, LO(TMP1)
| addu RA, BASE, RA
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
case BC_USETV:
| // RA = uvnum*8, RD = src*8
| lw LFUNC:RB, FRAME_FUNC(BASE)
| srl RA, RA, 1
| addu RD, BASE, RD
| addu RA, RA, LFUNC:RB
- | ldc1 f0, 0(RD)
| lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
| lbu TMP3, UPVAL:RB->marked
| lw CARG2, UPVAL:RB->v
| andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbu TMP0, UPVAL:RB->closed
- | lw TMP2, HI(RD)
- | sdc1 f0, 0(CARG2)
+ | sw SFRETHI, HI(CARG2)
+ | sw SFRETLO, LO(CARG2)
| li AT, LJ_GC_BLACK|1
| or TMP3, TMP3, TMP0
| beq TMP3, AT, >2 // Upvalue is closed and black?
- |. addiu TMP2, TMP2, -(LJ_TNUMX+1)
+ |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
|1:
| ins_next
|
|2: // Check if new value is collectable.
| sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
| beqz AT, <1 // tvisgcv(v)
- |. lw TMP1, LO(RD)
- | lbu TMP3, GCOBJ:TMP1->gch.marked
+ |. nop
+ | lbu TMP3, GCOBJ:SFRETLO->gch.marked
| andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
| beqz TMP3, <1
|. load_got lj_gc_barrieruv
| // Crossed a write barrier. Move the barrier forward.
| call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
@@ -2993,28 +3788,30 @@
| // RA = uvnum*8, RD = num_const*8
| lw LFUNC:RB, FRAME_FUNC(BASE)
| srl RA, RA, 1
| addu RD, KBASE, RD
| addu RA, RA, LFUNC:RB
- | ldc1 f0, 0(RD)
- | lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
+ | lw TMP1, UPVAL:RB->v
| ins_next1
- | lw TMP1, UPVAL:RB->v
- | sdc1 f0, 0(TMP1)
+ | sw SFRETHI, HI(TMP1)
+ | sw SFRETLO, LO(TMP1)
| ins_next2
break;
case BC_USETP:
| // RA = uvnum*8, RD = primitive_type*8 (~)
| lw LFUNC:RB, FRAME_FUNC(BASE)
| srl RA, RA, 1
| srl TMP0, RD, 3
| addu RA, RA, LFUNC:RB
| not TMP0, TMP0
- | lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw UPVAL:RB, LFUNC:RA->uvptr
| ins_next1
- | lw TMP1, UPVAL:RB->v
- | sw TMP0, HI(TMP1)
+ | lw TMP1, UPVAL:RB->v
+ | sw TMP0, HI(TMP1)
| ins_next2
break;
case BC_UCLO:
| // RA = level*8, RD = target
@@ -3046,12 +3843,12 @@
| // Returns GCfuncL *.
| lw BASE, L->base
| li TMP0, LJ_TFUNC
| ins_next1
| addu RA, BASE, RA
- | sw TMP0, HI(RA)
| sw LFUNC:CRET1, LO(RA)
+ | sw TMP0, HI(RA)
| ins_next2
break;
/* -- Table ops --------------------------------------------------------- */
@@ -3128,35 +3925,27 @@
| addu CARG3, BASE, RC
| lw TMP1, HI(CARG2)
| lw TMP2, HI(CARG3)
| lw TAB:RB, LO(CARG2)
| li AT, LJ_TTAB
- | ldc1 f0, 0(CARG3)
| bne TMP1, AT, ->vmeta_tgetv
|. addu RA, BASE, RA
- | sltiu AT, TMP2, LJ_TISNUM
- | beqz AT, >5
- |. li AT, LJ_TSTR
- |
- | // Convert number key to integer, check for integerness and range.
- | cvt.w.d f2, f0
- | lw TMP0, TAB:RB->asize
- | mfc1 TMP2, f2
- | cvt.d.w f4, f2
+ | bne TMP2, TISNUM, >5
+ |. lw RC, LO(CARG3)
+ | lw TMP0, TAB:RB->asize
| lw TMP1, TAB:RB->array
- | c.eq.d f0, f4
- | sltu AT, TMP2, TMP0
- | movf AT, r0
- | sll TMP2, TMP2, 3
+ | sltu AT, RC, TMP0
+ | sll TMP2, RC, 3
| beqz AT, ->vmeta_tgetv // Integer key and in array part?
|. addu TMP2, TMP1, TMP2
- | lw TMP0, HI(TMP2)
- | beq TMP0, TISNIL, >2
- |. ldc1 f0, 0(TMP2)
+ | lw SFRETHI, HI(TMP2)
+ | beq SFRETHI, TISNIL, >2
+ |. lw SFRETLO, LO(TMP2)
|1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
|
|2: // Check for __index if table value is nil.
| lw TAB:TMP2, TAB:RB->metatable
| beqz TAB:TMP2, <1 // No metatable: done.
@@ -3167,12 +3956,13 @@
|. nop
| b ->vmeta_tgetv
|. nop
|
|5:
+ | li AT, LJ_TSTR
| bne TMP2, AT, ->vmeta_tgetv
- |. lw STR:RC, LO(CARG3)
+ |. nop
| b ->BC_TGETS_Z // String key?
|. nop
break;
case BC_TGETS:
| // RA = dst*8, RB = table*8, RC = str_const*4 (~)
@@ -3200,32 +3990,32 @@
| addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
|1:
| lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
| lw NODE:TMP1, NODE:TMP2->next
- | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
+ | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
| addiu CARG1, CARG1, -LJ_TSTR
| xor TMP0, TMP0, STR:RC
| or AT, CARG1, TMP0
| bnez AT, >4
|. lw TAB:TMP3, TAB:RB->metatable
- | beq CARG2, TISNIL, >5 // Key found, but nil value?
- |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2)
+ | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
+ |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
|3:
| ins_next1
- | sw CARG2, HI(RA)
- | sw CARG1, LO(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
|
|4: // Follow hash chain.
| bnez NODE:TMP1, <1
|. move NODE:TMP2, NODE:TMP1
| // End of hash chain: key not found, nil result.
|
|5: // Check for __index if table value is nil.
| beqz TAB:TMP3, <3 // No metatable: done.
- |. li CARG2, LJ_TNIL
+ |. li SFRETHI, LJ_TNIL
| lbu TMP0, TAB:TMP3->nomm
| andi TMP0, TMP0, 1<<MM_index
| bnez TMP0, <3 // 'no __index' flag set: done.
|. nop
| b ->vmeta_tgets
@@ -3246,51 +4036,52 @@
| lw TMP1, TAB:RB->asize
| lw TMP2, TAB:RB->array
| sltu AT, TMP0, TMP1
| beqz AT, ->vmeta_tgetb
|. addu RC, TMP2, RC
- | lw TMP1, HI(RC)
- | beq TMP1, TISNIL, >5
- |. ldc1 f0, 0(RC)
+ | lw SFRETHI, HI(RC)
+ | beq SFRETHI, TISNIL, >5
+ |. lw SFRETLO, LO(RC)
|1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
|
|5: // Check for __index if table value is nil.
| lw TAB:TMP2, TAB:RB->metatable
| beqz TAB:TMP2, <1 // No metatable: done.
|. nop
| lbu TMP1, TAB:TMP2->nomm
| andi TMP1, TMP1, 1<<MM_index
| bnez TMP1, <1 // 'no __index' flag set: done.
|. nop
- | b ->vmeta_tgetb // Caveat: preserve TMP0!
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
|. nop
break;
case BC_TGETR:
| // RA = dst*8, RB = table*8, RC = key*8
| decode_RB8a RB, INS
| decode_RB8b RB
| decode_RDtoRC8 RC, RD
- | addu CARG2, BASE, RB
- | addu CARG3, BASE, RC
- | lw TAB:CARG1, LO(CARG2)
- | ldc1 f0, 0(CARG3)
- | trunc.w.d f2, f0
- | lw TMP0, TAB:CARG1->asize
- | mfc1 CARG2, f2
+ | addu RB, BASE, RB
+ | addu RC, BASE, RC
+ | lw TAB:CARG1, LO(RB)
+ | lw CARG2, LO(RC)
+ | addu RA, BASE, RA
+ | lw TMP0, TAB:CARG1->asize
| lw TMP1, TAB:CARG1->array
| sltu AT, CARG2, TMP0
| sll TMP2, CARG2, 3
| beqz AT, ->vmeta_tgetr // In array part?
- |. addu TMP2, TMP1, TMP2
- | ldc1 f0, 0(TMP2)
+ |. addu CRET1, TMP1, TMP2
+ | lw SFARG2HI, HI(CRET1)
+ | lw SFARG2LO, LO(CRET1)
|->BC_TGETR_Z:
- | addu RA, BASE, RA
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFARG2HI, HI(RA)
+ | sw SFARG2LO, LO(RA)
| ins_next2
break;
case BC_TSETV:
| // RA = src*8, RB = table*8, RC = key*8
@@ -3301,37 +4092,30 @@
| addu CARG3, BASE, RC
| lw TMP1, HI(CARG2)
| lw TMP2, HI(CARG3)
| lw TAB:RB, LO(CARG2)
| li AT, LJ_TTAB
- | ldc1 f0, 0(CARG3)
| bne TMP1, AT, ->vmeta_tsetv
|. addu RA, BASE, RA
- | sltiu AT, TMP2, LJ_TISNUM
- | beqz AT, >5
- |. li AT, LJ_TSTR
- |
- | // Convert number key to integer, check for integerness and range.
- | cvt.w.d f2, f0
- | lw TMP0, TAB:RB->asize
- | mfc1 TMP2, f2
- | cvt.d.w f4, f2
+ | bne TMP2, TISNUM, >5
+ |. lw RC, LO(CARG3)
+ | lw TMP0, TAB:RB->asize
| lw TMP1, TAB:RB->array
- | c.eq.d f0, f4
- | sltu AT, TMP2, TMP0
- | movf AT, r0
- | sll TMP2, TMP2, 3
+ | sltu AT, RC, TMP0
+ | sll TMP2, RC, 3
| beqz AT, ->vmeta_tsetv // Integer key and in array part?
|. addu TMP1, TMP1, TMP2
- | lbu TMP3, TAB:RB->marked
| lw TMP0, HI(TMP1)
+ | lbu TMP3, TAB:RB->marked
+ | lw SFRETHI, HI(RA)
| beq TMP0, TISNIL, >3
- |. ldc1 f0, 0(RA)
+ |. lw SFRETLO, LO(RA)
|1:
- | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
- | bnez AT, >7
- |. sdc1 f0, 0(TMP1)
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | sw SFRETHI, HI(TMP1)
+ | bnez AT, >7
+ |. sw SFRETLO, LO(TMP1)
|2:
| ins_next
|
|3: // Check for __newindex if previous value is nil.
| lw TAB:TMP2, TAB:RB->metatable
@@ -3343,12 +4127,13 @@
|. nop
| b ->vmeta_tsetv
|. nop
|
|5:
+ | li AT, LJ_TSTR
| bne TMP2, AT, ->vmeta_tsetv
- |. lw STR:RC, LO(CARG3)
+ |. nop
| b ->BC_TSETS_Z // String key?
|. nop
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <2
@@ -3376,11 +4161,16 @@
| and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
| sll TMP0, TMP1, 5
| sll TMP1, TMP1, 3
| subu TMP1, TMP0, TMP1
| addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ |.if FPU
| ldc1 f20, 0(RA)
+ |.else
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
+ |.endif
|1:
| lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
| li AT, LJ_TSTR
| lw NODE:TMP1, NODE:TMP2->next
@@ -3390,12 +4180,18 @@
|. lbu TMP3, TAB:RB->marked
| beq CARG2, TISNIL, >4 // Key found, but nil value?
|. lw TAB:TMP0, TAB:RB->metatable
|2:
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| bnez AT, >7
|. sdc1 f20, NODE:TMP2->val
+ |.else
+ | sw SFRETHI, NODE:TMP2->val.u32.hi
+ | bnez AT, >7
+ |. sw SFRETLO, NODE:TMP2->val.u32.lo
+ |.endif
|3:
| ins_next
|
|4: // Check for __newindex if previous value is nil.
| beqz TAB:TMP0, <2 // No metatable: done.
@@ -3429,12 +4225,20 @@
| sw PC, SAVE_PC
| call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
|. move CARG1, L
| // Returns TValue *.
| lw BASE, L->base
+ |.if FPU
| b <3 // No 2nd write barrier needed.
|. sdc1 f20, 0(CRET1)
+ |.else
+ | lw SFARG1HI, HI(RA)
+ | lw SFARG1LO, LO(RA)
+ | sw SFARG1HI, HI(CRET1)
+ | b <3 // No 2nd write barrier needed.
+ |. sw SFARG1LO, LO(CRET1)
+ |.endif
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <3
break;
case BC_TSETB:
@@ -3455,15 +4259,17 @@
| beqz AT, ->vmeta_tsetb
|. addu RC, TMP2, RC
| lw TMP1, HI(RC)
| lbu TMP3, TAB:RB->marked
| beq TMP1, TISNIL, >5
- |. ldc1 f0, 0(RA)
|1:
+ |. lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | sw SFRETHI, HI(RC)
| bnez AT, >7
- |. sdc1 f0, 0(RC)
+ |. sw SFRETLO, LO(RC)
|2:
| ins_next
|
|5: // Check for __newindex if previous value is nil.
| lw TAB:TMP2, TAB:RB->metatable
@@ -3471,11 +4277,11 @@
|. nop
| lbu TMP1, TAB:TMP2->nomm
| andi TMP1, TMP1, 1<<MM_newindex
| bnez TMP1, <1 // 'no __newindex' flag set: done.
|. nop
- | b ->vmeta_tsetb // Caveat: preserve TMP0!
+ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
|. nop
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <2
break;
@@ -3484,36 +4290,35 @@
| decode_RB8a RB, INS
| decode_RB8b RB
| decode_RDtoRC8 RC, RD
| addu CARG1, BASE, RB
| addu CARG3, BASE, RC
- | lw TAB:CARG2, LO(CARG1)
- | ldc1 f0, 0(CARG3)
- | trunc.w.d f2, f0
- | lbu TMP3, TAB:CARG2->marked
+ | lw TAB:CARG2, LO(CARG1)
+ | lw CARG3, LO(CARG3)
+ | lbu TMP3, TAB:CARG2->marked
| lw TMP0, TAB:CARG2->asize
- | mfc1 CARG3, f2
- | lw TMP1, TAB:CARG2->array
+ | lw TMP1, TAB:CARG2->array
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
| bnez AT, >7
|. addu RA, BASE, RA
|2:
| sltu AT, CARG3, TMP0
| sll TMP2, CARG3, 3
| beqz AT, ->vmeta_tsetr // In array part?
- |. ldc1 f20, 0(RA)
- | addu CRET1, TMP1, TMP2
+ |. addu CRET1, TMP1, TMP2
|->BC_TSETR_Z:
+ | lw SFARG1HI, HI(RA)
+ | lw SFARG1LO, LO(RA)
| ins_next1
- | sdc1 f20, 0(CRET1)
+ | sw SFARG1HI, HI(CRET1)
+ | sw SFARG1LO, LO(CRET1)
| ins_next2
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <2
break;
-
case BC_TSETM:
| // RA = base*8 (table at base-1), RD = num_const*8 (start index)
| addu RA, BASE, RA
|1:
| addu TMP3, KBASE, RD
@@ -3531,14 +4336,16 @@
| bnez AT, >5
|. addu TMP2, RA, TMP0
| addu TMP1, TMP1, CARG1
| andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|3: // Copy result slots to table.
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
| sltu AT, RA, TMP2
- | sdc1 f0, 0(TMP1)
+ | sw SFRETHI, HI(TMP1)
+ | sw SFRETLO, LO(TMP1)
| bnez AT, <3
|. addiu TMP1, TMP1, 8
| bnez TMP0, >7
|. nop
|4:
@@ -3609,14 +4416,16 @@
| sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function?
| move TMP2, BASE
| beqz NARGS8:RC, >3
|. move TMP3, NARGS8:RC
|2:
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
| addiu TMP3, TMP3, -8
- | sdc1 f0, 0(TMP2)
+ | sw SFRETHI, HI(TMP2)
+ | sw SFRETLO, LO(TMP2)
| bnez TMP3, <2
|. addiu TMP2, TMP2, 8
|3:
| or TMP0, TMP0, AT
| beqz TMP0, >5
@@ -3649,16 +4458,20 @@
| move TMP2, BASE
| addu BASE, BASE, RA
| li AT, LJ_TFUNC
| lw TMP1, -24+HI(BASE)
| lw LFUNC:RB, -24+LO(BASE)
- | ldc1 f2, -8(BASE)
- | ldc1 f0, -16(BASE)
+ | lw SFARG1HI, -16+HI(BASE)
+ | lw SFARG1LO, -16+LO(BASE)
+ | lw SFARG2HI, -8+HI(BASE)
+ | lw SFARG2LO, -8+LO(BASE)
| sw TMP1, HI(BASE) // Copy callable.
| sw LFUNC:RB, LO(BASE)
- | sdc1 f2, 16(BASE) // Copy control var.
- | sdc1 f0, 8(BASE) // Copy state.
+ | sw SFARG1HI, 8+HI(BASE) // Copy state.
+ | sw SFARG1LO, 8+LO(BASE)
+ | sw SFARG2HI, 16+HI(BASE) // Copy control var.
+ | sw SFARG2LO, 16+LO(BASE)
| addiu BASE, BASE, 8
| bne TMP1, AT, ->vmeta_call
|. li NARGS8:RC, 16 // Iterators get 2 arguments.
| ins_call
break;
@@ -3677,24 +4490,24 @@
|1: // Traverse array part.
| sltu AT, RC, TMP0
| beqz AT, >5 // Index points after array part?
|. sll TMP3, RC, 3
| addu TMP3, TMP1, TMP3
- | lw TMP2, HI(TMP3)
- | ldc1 f0, 0(TMP3)
- | mtc1 RC, f2
+ | lw SFARG1HI, HI(TMP3)
+ | lw SFARG1LO, LO(TMP3)
| lhu RD, -4+OFS_RD(PC)
- | beq TMP2, TISNIL, <1 // Skip holes in array part.
+ | sw TISNUM, HI(RA)
+ | sw RC, LO(RA)
+ | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
|. addiu RC, RC, 1
- | cvt.d.w f2, f2
+ | sw SFARG1HI, 8+HI(RA)
+ | sw SFARG1LO, 8+LO(RA)
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sdc1 f0, 8(RA)
| decode_RD4b RD
| addu RD, RD, TMP3
| sw RC, -8+LO(RA) // Update control var.
| addu PC, PC, RD
- | sdc1 f2, 0(RA)
|3:
| ins_next
|
|5: // Traverse hash part.
| lw TMP1, TAB:RB->hmask
@@ -3705,22 +4518,25 @@
| bnez AT, <3
|. sll TMP3, RC, 5
| sll RB, RC, 3
| subu TMP3, TMP3, RB
| addu NODE:TMP3, TMP3, TMP2
- | lw RB, HI(NODE:TMP3)
- | ldc1 f0, 0(NODE:TMP3)
+ | lw SFARG1HI, NODE:TMP3->val.u32.hi
+ | lw SFARG1LO, NODE:TMP3->val.u32.lo
| lhu RD, -4+OFS_RD(PC)
- | beq RB, TISNIL, <6 // Skip holes in hash part.
+ | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
|. addiu RC, RC, 1
- | ldc1 f2, NODE:TMP3->key
+ | lw SFARG2HI, NODE:TMP3->key.u32.hi
+ | lw SFARG2LO, NODE:TMP3->key.u32.lo
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sdc1 f0, 8(RA)
+ | sw SFARG1HI, 8+HI(RA)
+ | sw SFARG1LO, 8+LO(RA)
| addu RC, RC, TMP0
| decode_RD4b RD
| addu RD, RD, TMP3
- | sdc1 f2, 0(RA)
+ | sw SFARG2HI, HI(RA)
+ | sw SFARG2LO, LO(RA)
| addu PC, PC, RD
| b <3
|. sw RC, -8+LO(RA) // Update control var.
break;
@@ -3796,13 +4612,15 @@
| addu TMP2, RA, TMP1
| sltu AT, TMP0, TMP2
| bnez AT, >7
|. addiu MULTRES, TMP1, 8
|6:
- | ldc1 f0, 0(RC)
+ | lw SFRETHI, HI(RC)
+ | lw SFRETLO, LO(RC)
| addiu RC, RC, 8
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| sltu AT, RC, TMP3
| bnez AT, <6 // More vararg slots?
|. addiu RA, RA, 8
| b <3
|. nop
@@ -3854,14 +4672,16 @@
| decode_RB8b RB
| addu TMP3, TMP2, RB
| beqz RC, >3
|. subu BASE, TMP2, TMP0
|2:
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
| addiu RC, RC, -8
- | sdc1 f0, 0(TMP2)
+ | sw SFRETHI, HI(TMP2)
+ | sw SFRETLO, LO(TMP2)
| bnez RC, <2
|. addiu TMP2, TMP2, 8
|3:
| addiu TMP3, TMP3, -8
|5:
@@ -3898,18 +4718,20 @@
|. xori TMP1, PC, FRAME_VARG
|
| lw INS, -4(PC)
| addiu TMP2, BASE, -8
if (op == BC_RET1) {
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
}
| decode_RB8a RB, INS
| decode_RA8a RA, INS
| decode_RB8b RB
| decode_RA8b RA
if (op == BC_RET1) {
- | sdc1 f0, 0(TMP2)
+ | sw SFRETHI, HI(TMP2)
+ | sw SFRETLO, LO(TMP2)
}
| subu BASE, TMP2, RA
|5:
| sltu AT, RD, RB
| bnez AT, >6
@@ -3947,73 +4769,151 @@
case BC_FORI:
case BC_IFORL:
| // RA = base*8, RD = target (after end of loop or start of loop)
vk = (op == BC_IFORL || op == BC_JFORL);
| addu RA, BASE, RA
- if (vk) {
- | ldc1 f0, FORL_IDX*8(RA)
- | ldc1 f4, FORL_STEP*8(RA)
- | ldc1 f2, FORL_STOP*8(RA)
- | lw TMP3, FORL_STEP*8+HI(RA)
- | add.d f0, f0, f4
- | sdc1 f0, FORL_IDX*8(RA)
- } else {
- | lw TMP1, FORL_IDX*8+HI(RA)
- | lw TMP3, FORL_STEP*8+HI(RA)
- | lw TMP2, FORL_STOP*8+HI(RA)
- | sltiu TMP1, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP3, LJ_TISNUM
- | sltiu TMP2, TMP2, LJ_TISNUM
- | and TMP1, TMP1, TMP0
- | and TMP1, TMP1, TMP2
- | ldc1 f0, FORL_IDX*8(RA)
- | beqz TMP1, ->vmeta_for
- |. ldc1 f2, FORL_STOP*8(RA)
- }
+ | lw SFARG1HI, FORL_IDX*8+HI(RA)
+ | lw SFARG1LO, FORL_IDX*8+LO(RA)
if (op != BC_JFORL) {
| srl RD, RD, 1
- | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535)
+ | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, RD, TMP2
}
- | c.le.d 0, f0, f2
- | c.le.d 1, f2, f0
- | sdc1 f0, FORL_EXT*8(RA)
+ if (!vk) {
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
+ | bne SFARG1HI, TISNUM, >5
+ |. lw SFRETHI, FORL_STEP*8+HI(RA)
+ | xor AT, SFARG2HI, TISNUM
+ | lw SFRETLO, FORL_STEP*8+LO(RA)
+ | xor TMP0, SFRETHI, TISNUM
+ | or AT, AT, TMP0
+ | bnez AT, ->vmeta_for
+ |. slt AT, SFRETLO, r0
+ | slt CRET1, SFARG2LO, SFARG1LO
+ | slt TMP1, SFARG1LO, SFARG2LO
+ | movn CRET1, TMP1, AT
+ } else {
+ | bne SFARG1HI, TISNUM, >5
+ |. lw SFARG2LO, FORL_STEP*8+LO(RA)
+ | lw SFRETLO, FORL_STOP*8+LO(RA)
+ | move TMP3, SFARG1LO
+ | addu SFARG1LO, SFARG1LO, SFARG2LO
+ | xor TMP0, SFARG1LO, TMP3
+ | xor TMP1, SFARG1LO, SFARG2LO
+ | and TMP0, TMP0, TMP1
+ | slt TMP1, SFARG1LO, SFRETLO
+ | slt CRET1, SFRETLO, SFARG1LO
+ | slt AT, SFARG2LO, r0
+ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
+ | movn CRET1, TMP1, AT
+ | or CRET1, CRET1, TMP0
+ }
+ |1:
+ if (op == BC_FORI) {
+ | movz TMP2, r0, CRET1
+ | addu PC, PC, TMP2
+ } else if (op == BC_JFORI) {
+ | addu PC, PC, TMP2
+ | lhu RD, -4+OFS_RD(PC)
+ } else if (op == BC_IFORL) {
+ | movn TMP2, r0, CRET1
+ | addu PC, PC, TMP2
+ }
+ if (vk) {
+ | sw SFARG1HI, FORL_IDX*8+HI(RA)
+ | sw SFARG1LO, FORL_IDX*8+LO(RA)
+ }
+ | ins_next1
+ | sw SFARG1HI, FORL_EXT*8+HI(RA)
+ | sw SFARG1LO, FORL_EXT*8+LO(RA)
+ |2:
if (op == BC_JFORI) {
- | li TMP1, 1
- | li TMP2, 1
- | addu TMP0, RD, TMP0
- | slt TMP3, TMP3, r0
- | movf TMP1, r0, 0
- | addu PC, PC, TMP0
- | movf TMP2, r0, 1
- | lhu RD, -4+OFS_RD(PC)
- | movn TMP1, TMP2, TMP3
- | bnez TMP1, =>BC_JLOOP
+ | beqz CRET1, =>BC_JLOOP
|. decode_RD8b RD
} else if (op == BC_JFORL) {
- | li TMP1, 1
- | li TMP2, 1
- | slt TMP3, TMP3, r0
- | movf TMP1, r0, 0
- | movf TMP2, r0, 1
- | movn TMP1, TMP2, TMP3
- | bnez TMP1, =>BC_JLOOP
+ | beqz CRET1, =>BC_JLOOP
+ }
+ | ins_next2
+ |
+ |5: // FP loop.
+ |.if FPU
+ if (!vk) {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
+ | sltiu AT, SFRETHI, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
+ |. slt TMP3, SFRETHI, r0
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | li CRET1, 1
+ | movt CRET1, r0, 0
+ | movt AT, r0, 1
+ | b <1
+ |. movn CRET1, AT, TMP3
+ } else {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f4, FORL_STEP*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
+ | add.d f0, f0, f4
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | slt TMP3, SFARG2HI, r0
+ | li CRET1, 1
+ | li AT, 1
+ | movt CRET1, r0, 0
+ | movt AT, r0, 1
+ | movn CRET1, AT, TMP3
+ if (op == BC_IFORL) {
+ | movn TMP2, r0, CRET1
+ | addu PC, PC, TMP2
+ }
+ | sdc1 f0, FORL_IDX*8(RA)
+ | ins_next1
+ | b <2
+ |. sdc1 f0, FORL_EXT*8(RA)
+ }
+ |.else
+ if (!vk) {
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
+ | sltiu AT, SFRETHI, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
|. nop
+ | bal ->vm_sfcmpolex
+ |. move TMP3, SFRETHI
+ | b <1
+ |. nop
} else {
- | addu TMP1, RD, TMP0
- | slt TMP3, TMP3, r0
- | move TMP2, TMP1
- if (op == BC_FORI) {
- | movt TMP1, r0, 0
- | movt TMP2, r0, 1
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
+ | load_got __adddf3
+ | call_extern
+ |. sw TMP2, ARG5
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
+ | move SFARG1HI, SFRETHI
+ | move SFARG1LO, SFRETLO
+ | bal ->vm_sfcmpolex
+ |. lw TMP3, FORL_STEP*8+HI(RA)
+ if ( op == BC_JFORL ) {
+ | lhu RD, -4+OFS_RD(PC)
+ | lw TMP2, ARG5
+ | b <1
+ |. decode_RD8b RD
} else {
- | movf TMP1, r0, 0
- | movf TMP2, r0, 1
+ | b <1
+ |. lw TMP2, ARG5
}
- | movn TMP1, TMP2, TMP3
- | addu PC, PC, TMP1
}
- | ins_next
+ |.endif
break;
case BC_ITERL:
|.if JIT
| hotloop
@@ -4258,12 +5158,14 @@
"\t.byte 0x9f\n\t.sleb128 1\n"
"\t.byte 0x9e\n\t.sleb128 2\n",
fcofs, CFRAME_SIZE);
for (i = 23; i >= 16; i--)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
+#if !LJ_SOFTFP
for (i = 30; i >= 20; i -= 2)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
+#endif
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE0:\n\n");
#if LJ_HASFFI
fprintf(ctx->fp,
@@ -4277,10 +5179,11 @@
"\t.byte 0x90\n\t.uleb128 2\n"
"\t.byte 0xd\n\t.uleb128 0x10\n"
"\t.align 2\n"
".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
#endif
+#if !LJ_NO_UNWIND
fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
fprintf(ctx->fp,
"\t.globl lj_err_unwind_dwarf\n"
".Lframe1:\n"
"\t.4byte .LECIE1-.LSCIE1\n"
@@ -4310,12 +5213,14 @@
"\t.byte 0x9f\n\t.sleb128 1\n"
"\t.byte 0x9e\n\t.sleb128 2\n",
fcofs, CFRAME_SIZE);
for (i = 23; i >= 16; i--)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
+#if !LJ_SOFTFP
for (i = 30; i >= 20; i -= 2)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
+#endif
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE2:\n\n");
#if LJ_HASFFI
fprintf(ctx->fp,
@@ -4344,9 +5249,10 @@
"\t.byte 0x9f\n\t.uleb128 1\n"
"\t.byte 0x90\n\t.uleb128 2\n"
"\t.byte 0xd\n\t.uleb128 0x10\n"
"\t.align 2\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
#endif
break;
default:
break;
}