;; aarch64 instruction selection and CLIF-to-MachInst lowering. ;; The main lowering constructor term: takes a clif `Inst` and returns the ;; register(s) within which the lowered instruction's result values live. (decl partial lower (Inst) InstOutput) ;; Variant of the main lowering constructor term, which receives an ;; additional argument (a vector of branch targets to be used) for ;; implementing branches. ;; For two-branch instructions, the first target is `taken` and the second ;; `not_taken`, even if it is a Fallthrough instruction: because we reorder ;; blocks while we lower, the fallthrough in the new order is not (necessarily) ;; the same as the fallthrough in CLIF. So, we use the explicitly-provided ;; target. (decl partial lower_branch (Inst VecMachLabel) Unit) ;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) (imm ty (ImmExtend.Zero) n)) ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (null))) (imm ty (ImmExtend.Zero) 0)) ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (f32const (u64_from_ieee32 n))) (constant_f32 n)) ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (f64const (u64_from_ieee64 n))) (constant_f64 n)) ;;;; Rules for `nop` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (nop)) (invalid_reg)) ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller ;; Base case, simply adding things in registers. (rule -1 (lower (has_type (fits_in_64 ty) (iadd x y))) (add ty x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. (rule 4 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y)))) (add_imm ty x y)) (rule 5 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y))) (add_imm ty y x)) ;; Same as the previous special cases, except we can switch the addition to a ;; subtraction if the negated immediate fits in 12 bits. (rule 2 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y)))) (sub_imm ty x y)) (rule 3 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y))) (sub_imm ty y x)) ;; Special cases for when we're adding an extended register where the extending ;; operation can get folded into the add itself. (rule 0 (lower (has_type (fits_in_64 ty) (iadd x (extended_value_from_value y)))) (add_extend ty x y)) (rule 1 (lower (has_type (fits_in_64 ty) (iadd (extended_value_from_value x) y))) (add_extend ty y x)) ;; Special cases for when we're adding the shift of a different ;; register by a constant amount and the shift can get folded into the add. (rule 7 (lower (has_type (fits_in_64 ty) (iadd x (ishl y (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (add_shift ty x y amt)) (rule 6 (lower (has_type (fits_in_64 ty) (iadd (ishl x (iconst k)) y))) (if-let amt (lshl_from_imm64 ty k)) (add_shift ty y x amt)) ;; Fold an `iadd` and `imul` combination into a `madd` instruction. (rule 7 (lower (has_type (fits_in_64 ty) (iadd x (imul y z)))) (madd ty y z x)) (rule 6 (lower (has_type (fits_in_64 ty) (iadd (imul x y) z))) (madd ty x y z)) ;; Fold an `isub` and `imul` combination into a `msub` instruction. (rule (lower (has_type (fits_in_64 ty) (isub x (imul y z)))) (msub ty y z x)) ;; vectors (rule -2 (lower (has_type ty @ (multi_lane _ _) (iadd x y))) (add_vec x y (vector_size ty))) ;; `i128` (rule -3 (lower (has_type $I128 (iadd x y))) (let ;; Get the high/low registers for `x`. ((x_regs ValueRegs x) (x_lo Reg (value_regs_get x_regs 0)) (x_hi Reg (value_regs_get x_regs 1)) ;; Get the high/low registers for `y`. (y_regs ValueRegs y) (y_lo Reg (value_regs_get y_regs 0)) (y_hi Reg (value_regs_get y_regs 1))) ;; the actual addition is `adds` followed by `adc` which comprises the ;; low/high bits of the result (with_flags (add_with_flags_paired $I64 x_lo y_lo) (adc_paired $I64 x_hi y_hi)))) ;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (shuffle rn rn2 (u128_from_immediate mask)))) (let ((mask_reg Reg (constant_f128 mask))) (vec_tbl2 rn rn2 mask_reg ty))) ;;;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type vec_i128_ty (swizzle rn rm))) (vec_tbl rn rm)) ;;;; Rules for `isplit` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (isplit x @ (value_type $I128))) (let ((x_regs ValueRegs x) (x_lo ValueRegs (value_regs_get x_regs 0)) (x_hi ValueRegs (value_regs_get x_regs 1))) (output_pair x_lo x_hi))) ;;;; Rules for `iconcat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I128 (iconcat lo hi))) (output (value_regs lo hi))) ;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32X4 (scalar_to_vector x))) (fpu_extend x (ScalarSize.Size32))) (rule (lower (has_type $F64X2 (scalar_to_vector x))) (fpu_extend x (ScalarSize.Size64))) (rule -1 (lower (scalar_to_vector x @ (value_type $I64))) (mov_to_fpu x (ScalarSize.Size64))) (rule -2 (lower (scalar_to_vector x @ (value_type (int_fits_in_32 _)))) (mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32))) ;;;; Rules for `vall_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; cmeq vtmp.2d, vm.2d, #0 ;; addp dtmp, vtmp.2d ;; fcmp dtmp, dtmp ;; cset xd, eq ;; ;; Note that after the ADDP the value of the temporary register will be either ;; 0 when all input elements are true, i.e. non-zero, or a NaN otherwise ;; (either -1 or -2 when represented as an integer); NaNs are the only ;; floating-point numbers that compare unequal to themselves. (rule (lower (vall_true x @ (value_type (multi_lane 64 2)))) (let ((x1 Reg (cmeq0 x (VectorSize.Size64x2))) (x2 Reg (addp x1 x1 (VectorSize.Size64x2)))) (with_flags (fpu_cmp (ScalarSize.Size64) x2 x2) (materialize_bool_result (Cond.Eq))))) (rule (lower (vall_true x @ (value_type (multi_lane 32 2)))) (let ((x1 Reg (mov_from_vec x 0 (ScalarSize.Size64)))) (with_flags (cmp_rr_shift (OperandSize.Size64) (zero_reg) x1 32) (ccmp_imm (OperandSize.Size32) x1 (u8_into_uimm5 0) (nzcv $false $true $false $false) (Cond.Ne))))) ;; This operation is implemented by using uminv to create a scalar value, which ;; is then compared against zero. ;; ;; uminv bn, vm.16b ;; mov xm, vn.d[0] ;; cmp xm, #0 ;; cset xm, ne (rule -1 (lower (vall_true x @ (value_type (lane_fits_in_32 ty)))) (if (not_vec32x2 ty)) (let ((x1 Reg (vec_lanes (VecLanesOp.Uminv) x (vector_size ty))) (x2 Reg (mov_from_vec x1 0 (ScalarSize.Size64)))) (with_flags (cmp_imm (OperandSize.Size64) x2 (u8_into_imm12 0)) (materialize_bool_result (Cond.Ne))))) ;;;; Rules for `vany_true` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (vany_true x @ (value_type in_ty))) (with_flags (vanytrue x in_ty) (materialize_bool_result (Cond.Ne)))) ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; special case for the `i16x8.extadd_pairwise_i8x16_s` wasm instruction (rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high x)))) (saddlp8 x)) ;; special case for the `i32x4.extadd_pairwise_i16x8_s` wasm instruction (rule (lower (has_type $I32X4 (iadd_pairwise (swiden_low x) (swiden_high x)))) (saddlp16 x)) ;; special case for the `i16x8.extadd_pairwise_i8x16_u` wasm instruction (rule (lower (has_type $I16X8 (iadd_pairwise (uwiden_low x) (uwiden_high x)))) (uaddlp8 x)) ;; special case for the `i32x4.extadd_pairwise_i16x8_u` wasm instruction (rule (lower (has_type $I32X4 (iadd_pairwise (uwiden_low x) (uwiden_high x)))) (uaddlp16 x)) (rule -1 (lower (has_type ty (iadd_pairwise x y))) (addp x y (vector_size ty))) ;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty @ (multi_lane _ _) (iabs x))) (vec_abs x (vector_size ty))) (rule 2 (lower (has_type $I64 (iabs x))) (abs (OperandSize.Size64) x)) (rule 1 (lower (has_type (fits_in_32 ty) (iabs x))) (abs (OperandSize.Size32) (put_in_reg_sext32 x))) ;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I64X2 (avg_round x y))) (let ((one Reg (splat_const 1 (VectorSize.Size64x2))) (c Reg (orr_vec x y (VectorSize.Size64x2))) (c Reg (and_vec c one (VectorSize.Size64x2))) (x Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 x (VectorSize.Size64x2))) (y Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 y (VectorSize.Size64x2))) (sum Reg (add_vec x y (VectorSize.Size64x2)))) (add_vec c sum (VectorSize.Size64x2)))) (rule -1 (lower (has_type (lane_fits_in_32 ty) (avg_round x y))) (vec_rrr (VecALUOp.Urhadd) x y (vector_size ty))) ;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty @ (multi_lane _ _) (sqmul_round_sat x y))) (vec_rrr (VecALUOp.Sqrdmulh) x y (vector_size ty))) ;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fadd rn rm))) (vec_rrr (VecALUOp.Fadd) rn rm (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fadd rn rm))) (fpu_rrr (FPUOp2.Add) rn rm (scalar_size ty))) ;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fsub rn rm))) (vec_rrr (VecALUOp.Fsub) rn rm (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fsub rn rm))) (fpu_rrr (FPUOp2.Sub) rn rm (scalar_size ty))) ;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fmul rn rm))) (vec_rrr (VecALUOp.Fmul) rn rm (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fmul rn rm))) (fpu_rrr (FPUOp2.Mul) rn rm (scalar_size ty))) ;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fdiv rn rm))) (vec_rrr (VecALUOp.Fdiv) rn rm (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fdiv rn rm))) (fpu_rrr (FPUOp2.Div) rn rm (scalar_size ty))) ;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin rn rm))) (vec_rrr (VecALUOp.Fmin) rn rm (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fmin rn rm))) (fpu_rrr (FPUOp2.Min) rn rm (scalar_size ty))) ;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax rn rm))) (vec_rrr (VecALUOp.Fmax) rn rm (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fmax rn rm))) (fpu_rrr (FPUOp2.Max) rn rm (scalar_size ty))) ;;;; Rules for `fmin_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fmin_pseudo rm rn))) (bsl ty (vec_rrr (VecALUOp.Fcmgt) rm rn (vector_size ty)) rn rm)) (rule (lower (has_type (ty_scalar_float ty) (fmin_pseudo rm rn))) (with_flags (fpu_cmp (scalar_size ty) rm rn) (fpu_csel ty (Cond.Gt) rn rm))) ;;;; Rules for `fmax_pseudo` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fmax_pseudo rm rn))) (bsl ty (vec_rrr (VecALUOp.Fcmgt) rn rm (vector_size ty)) rn rm)) (rule (lower (has_type (ty_scalar_float ty) (fmax_pseudo rm rn))) (with_flags (fpu_cmp (scalar_size ty) rn rm) (fpu_csel ty (Cond.Gt) rn rm))) ;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (sqrt x))) (vec_misc (VecMisc2.Fsqrt) x (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (sqrt x))) (fpu_rr (FPUOp1.Sqrt) x (scalar_size ty))) ;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fneg x))) (vec_misc (VecMisc2.Fneg) x (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fneg x))) (fpu_rr (FPUOp1.Neg) x (scalar_size ty))) ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (fabs x))) (vec_misc (VecMisc2.Fabs) x (vector_size ty))) (rule (lower (has_type (ty_scalar_float ty) (fabs x))) (fpu_rr (FPUOp1.Abs) x (scalar_size ty))) ;;;; Rules for `fpromote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F64 (fpromote x))) (fpu_rr (FPUOp1.Cvt32To64) x (ScalarSize.Size32))) ;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fdemote x))) (fpu_rr (FPUOp1.Cvt64To32) x (ScalarSize.Size64))) ;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (ceil x))) (vec_misc (VecMisc2.Frintp) x (vector_size ty))) (rule (lower (has_type $F32 (ceil x))) (fpu_round (FpuRoundMode.Plus32) x)) (rule (lower (has_type $F64 (ceil x))) (fpu_round (FpuRoundMode.Plus64) x)) ;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (floor x))) (vec_misc (VecMisc2.Frintm) x (vector_size ty))) (rule (lower (has_type $F32 (floor x))) (fpu_round (FpuRoundMode.Minus32) x)) (rule (lower (has_type $F64 (floor x))) (fpu_round (FpuRoundMode.Minus64) x)) ;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (trunc x))) (vec_misc (VecMisc2.Frintz) x (vector_size ty))) (rule (lower (has_type $F32 (trunc x))) (fpu_round (FpuRoundMode.Zero32) x)) (rule (lower (has_type $F64 (trunc x))) (fpu_round (FpuRoundMode.Zero64) x)) ;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane _ _) (nearest x))) (vec_misc (VecMisc2.Frintn) x (vector_size ty))) (rule (lower (has_type $F32 (nearest x))) (fpu_round (FpuRoundMode.Nearest32) x)) (rule (lower (has_type $F64 (nearest x))) (fpu_round (FpuRoundMode.Nearest64) x)) ;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty @ (multi_lane _ _) (fma x y z))) (vec_rrr_mod (VecALUModOp.Fmla) z x y (vector_size ty))) (rule 1 (lower (has_type ty @ (multi_lane _ _) (fma (fneg x) y z))) (vec_rrr_mod (VecALUModOp.Fmls) z x y (vector_size ty))) (rule 2 (lower (has_type ty @ (multi_lane _ _) (fma x (fneg y) z))) (vec_rrr_mod (VecALUModOp.Fmls) z x y (vector_size ty))) (rule 3 (lower (has_type (ty_scalar_float ty) (fma x y z))) (fpu_rrrr (FPUOp3.MAdd) (scalar_size ty) x y z)) ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (fcopysign x y))) (fcopy_sign x y ty)) ;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F32)))) (fpu_to_int_cvt (FpuToIntOp.F32ToU32) x $false $F32 out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F32)))) (fpu_to_int_cvt (FpuToIntOp.F32ToU64) x $false $F32 $I64)) (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint x @ (value_type $F64)))) (fpu_to_int_cvt (FpuToIntOp.F64ToU32) x $false $F64 out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_uint x @ (value_type $F64)))) (fpu_to_int_cvt (FpuToIntOp.F64ToU64) x $false $F64 $I64)) ;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F32)))) (fpu_to_int_cvt (FpuToIntOp.F32ToI32) x $true $F32 out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F32)))) (fpu_to_int_cvt (FpuToIntOp.F32ToI64) x $true $F32 $I64)) (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint x @ (value_type $F64)))) (fpu_to_int_cvt (FpuToIntOp.F64ToI32) x $true $F64 out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_sint x @ (value_type $F64)))) (fpu_to_int_cvt (FpuToIntOp.F64ToI64) x $true $F64 $I64)) ;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_uint x @ (value_type (multi_lane 32 _))))) (vec_misc (VecMisc2.Ucvtf) x (vector_size ty))) (rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_uint x @ (value_type (multi_lane 64 _))))) (vec_misc (VecMisc2.Ucvtf) x (vector_size ty))) (rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_32 _))))) (int_to_fpu (IntToFpuOp.U32ToF32) (put_in_reg_zext32 x))) (rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_32 _))))) (int_to_fpu (IntToFpuOp.U32ToF64) (put_in_reg_zext32 x))) (rule 1 (lower (has_type $F32 (fcvt_from_uint x @ (value_type $I64)))) (int_to_fpu (IntToFpuOp.U64ToF32) x)) (rule 1 (lower (has_type $F64 (fcvt_from_uint x @ (value_type $I64)))) (int_to_fpu (IntToFpuOp.U64ToF64) x)) ;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_from_sint x @ (value_type (multi_lane 32 _))))) (vec_misc (VecMisc2.Scvtf) x (vector_size ty))) (rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_from_sint x @ (value_type (multi_lane 64 _))))) (vec_misc (VecMisc2.Scvtf) x (vector_size ty))) (rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_32 _))))) (int_to_fpu (IntToFpuOp.I32ToF32) (put_in_reg_sext32 x))) (rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_32 _))))) (int_to_fpu (IntToFpuOp.I32ToF64) (put_in_reg_sext32 x))) (rule 1 (lower (has_type $F32 (fcvt_from_sint x @ (value_type $I64)))) (int_to_fpu (IntToFpuOp.I64ToF32) x)) (rule 1 (lower (has_type $F64 (fcvt_from_sint x @ (value_type $I64)))) (int_to_fpu (IntToFpuOp.I64ToF64) x)) ;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 32 _))))) (vec_misc (VecMisc2.Fcvtzu) x (vector_size ty))) (rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_uint_sat x @ (value_type (multi_lane 64 _))))) (vec_misc (VecMisc2.Fcvtzu) x (vector_size ty))) (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F32)))) (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU32) x $false out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F32)))) (fpu_to_int_cvt_sat (FpuToIntOp.F32ToU64) x $false $I64)) (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_uint_sat x @ (value_type $F64)))) (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU32) x $false out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_uint_sat x @ (value_type $F64)))) (fpu_to_int_cvt_sat (FpuToIntOp.F64ToU64) x $false $I64)) ;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty @ (multi_lane 32 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 32 _))))) (vec_misc (VecMisc2.Fcvtzs) x (vector_size ty))) (rule -1 (lower (has_type ty @ (multi_lane 64 _) (fcvt_to_sint_sat x @ (value_type (multi_lane 64 _))))) (vec_misc (VecMisc2.Fcvtzs) x (vector_size ty))) (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F32)))) (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI32) x $true out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F32)))) (fpu_to_int_cvt_sat (FpuToIntOp.F32ToI64) x $true $I64)) (rule (lower (has_type (fits_in_32 out_ty) (fcvt_to_sint_sat x @ (value_type $F64)))) (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI32) x $true out_ty)) (rule 1 (lower (has_type $I64 (fcvt_to_sint_sat x @ (value_type $F64)))) (fpu_to_int_cvt_sat (FpuToIntOp.F64ToI64) x $true $I64)) ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller ;; Base case, simply subtracting things in registers. (rule -4 (lower (has_type (fits_in_64 ty) (isub x y))) (sub ty x y)) ;; Special case for when one operand is an immediate that fits in 12 bits. (rule 0 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_value y)))) (sub_imm ty x y)) ;; Same as the previous special case, except we can switch the subtraction to an ;; addition if the negated immediate fits in 12 bits. (rule 2 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y)))) (add_imm ty x y)) ;; Special cases for when we're subtracting an extended register where the ;; extending operation can get folded into the sub itself. (rule 1 (lower (has_type (fits_in_64 ty) (isub x (extended_value_from_value y)))) (sub_extend ty x y)) ;; Finally a special case for when we're subtracting the shift of a different ;; register by a constant amount and the shift can get folded into the sub. (rule -3 (lower (has_type (fits_in_64 ty) (isub x (ishl y (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (sub_shift ty x y amt)) ;; vectors (rule -2 (lower (has_type ty @ (multi_lane _ _) (isub x y))) (sub_vec x y (vector_size ty))) ;; `i128` (rule -1 (lower (has_type $I128 (isub x y))) (sub_i128 x y)) ;;;; Rules for `uadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (ty_vec128 ty) (uadd_sat x y))) (uqadd x y (vector_size ty))) ;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (ty_vec128 ty) (sadd_sat x y))) (sqadd x y (vector_size ty))) ;;;; Rules for `usub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (ty_vec128 ty) (usub_sat x y))) (uqsub x y (vector_size ty))) ;;;; Rules for `ssub_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (ty_vec128 ty) (ssub_sat x y))) (sqsub x y (vector_size ty))) ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. (rule 1 (lower (has_type (fits_in_64 ty) (ineg x))) (sub ty (zero_reg) x)) ;; `i128` (rule 2 (lower (has_type $I128 (ineg x))) (sub_i128 (value_regs_zero) x)) ;; vectors. (rule (lower (has_type (ty_vec128 ty) (ineg x))) (neg x (vector_size ty))) ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. (rule -3 (lower (has_type (fits_in_64 ty) (imul x y))) (madd ty x y (zero_reg))) ;; `i128`. (rule -1 (lower (has_type $I128 (imul x y))) (let ;; Get the high/low registers for `x`. ((x_regs ValueRegs x) (x_lo Reg (value_regs_get x_regs 0)) (x_hi Reg (value_regs_get x_regs 1)) ;; Get the high/low registers for `y`. (y_regs ValueRegs y) (y_lo Reg (value_regs_get y_regs 0)) (y_hi Reg (value_regs_get y_regs 1)) ;; 128bit mul formula: ;; dst_lo = x_lo * y_lo ;; dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) ;; ;; We can convert the above formula into the following ;; umulh dst_hi, x_lo, y_lo ;; madd dst_hi, x_lo, y_hi, dst_hi ;; madd dst_hi, x_hi, y_lo, dst_hi ;; madd dst_lo, x_lo, y_lo, zero (dst_hi1 Reg (umulh $I64 x_lo y_lo)) (dst_hi2 Reg (madd $I64 x_lo y_hi dst_hi1)) (dst_hi Reg (madd $I64 x_hi y_lo dst_hi2)) (dst_lo Reg (madd $I64 x_lo y_lo (zero_reg)))) (value_regs dst_lo dst_hi))) ;; Case for i8x16, i16x8, and i32x4. (rule -2 (lower (has_type (ty_vec128 ty @ (not_i64x2)) (imul x y))) (mul x y (vector_size ty))) ;; Special lowering for i64x2. ;; ;; This I64X2 multiplication is performed with several 32-bit ;; operations. ;; ;; 64-bit numbers x and y, can be represented as: ;; x = a + 2^32(b) ;; y = c + 2^32(d) ;; ;; A 64-bit multiplication is: ;; x * y = ac + 2^32(ad + bc) + 2^64(bd) ;; note: `2^64(bd)` can be ignored, the value is too large to fit in ;; 64 bits. ;; ;; This sequence implements a I64X2 multiply, where the registers ;; `rn` and `rm` are split up into 32-bit components: ;; rn = |d|c|b|a| ;; rm = |h|g|f|e| ;; ;; rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)| ;; ;; The sequence is: ;; rev64 rd.4s, rm.4s ;; mul rd.4s, rd.4s, rn.4s ;; xtn tmp1.2s, rn.2d ;; addp rd.4s, rd.4s, rd.4s ;; xtn tmp2.2s, rm.2d ;; shll rd.2d, rd.2s, #32 ;; umlal rd.2d, tmp2.2s, tmp1.2s (rule -1 (lower (has_type $I64X2 (imul x y))) (let ((rn Reg x) (rm Reg y) ;; Reverse the 32-bit elements in the 64-bit words. ;; rd = |g|h|e|f| (rev Reg (rev64 rm (VectorSize.Size32x4))) ;; Calculate the high half components. ;; rd = |dg|ch|be|af| ;; ;; Note that this 32-bit multiply of the high half ;; discards the bits that would overflow, same as ;; if 64-bit operations were used. Also the Shll ;; below would shift out the overflow bits anyway. (mul Reg (mul rev rn (VectorSize.Size32x4))) ;; Extract the low half components of rn. ;; tmp1 = |c|a| (tmp1 Reg (xtn rn (ScalarSize.Size32))) ;; Sum the respective high half components. ;; rd = |dg+ch|be+af||dg+ch|be+af| (sum Reg (addp mul mul (VectorSize.Size32x4))) ;; Extract the low half components of rm. ;; tmp2 = |g|e| (tmp2 Reg (xtn rm (ScalarSize.Size32))) ;; Shift the high half components, into the high half. ;; rd = |dg+ch << 32|be+af << 32| (shift Reg (shll32 sum $false)) ;; Multiply the low components together, and accumulate with the high ;; half. ;; rd = |rd[1] + cg|rd[0] + ae| (result Reg (umlal32 shift tmp2 tmp1 $false))) result)) ;; Special case for `i16x8.extmul_low_i8x16_s`. (rule (lower (has_type $I16X8 (imul (swiden_low x @ (value_type $I8X16)) (swiden_low y @ (value_type $I8X16))))) (smull8 x y $false)) ;; Special case for `i16x8.extmul_high_i8x16_s`. (rule (lower (has_type $I16X8 (imul (swiden_high x @ (value_type $I8X16)) (swiden_high y @ (value_type $I8X16))))) (smull8 x y $true)) ;; Special case for `i16x8.extmul_low_i8x16_u`. (rule (lower (has_type $I16X8 (imul (uwiden_low x @ (value_type $I8X16)) (uwiden_low y @ (value_type $I8X16))))) (umull8 x y $false)) ;; Special case for `i16x8.extmul_high_i8x16_u`. (rule (lower (has_type $I16X8 (imul (uwiden_high x @ (value_type $I8X16)) (uwiden_high y @ (value_type $I8X16))))) (umull8 x y $true)) ;; Special case for `i32x4.extmul_low_i16x8_s`. (rule (lower (has_type $I32X4 (imul (swiden_low x @ (value_type $I16X8)) (swiden_low y @ (value_type $I16X8))))) (smull16 x y $false)) ;; Special case for `i32x4.extmul_high_i16x8_s`. (rule (lower (has_type $I32X4 (imul (swiden_high x @ (value_type $I16X8)) (swiden_high y @ (value_type $I16X8))))) (smull16 x y $true)) ;; Special case for `i32x4.extmul_low_i16x8_u`. (rule (lower (has_type $I32X4 (imul (uwiden_low x @ (value_type $I16X8)) (uwiden_low y @ (value_type $I16X8))))) (umull16 x y $false)) ;; Special case for `i32x4.extmul_high_i16x8_u`. (rule (lower (has_type $I32X4 (imul (uwiden_high x @ (value_type $I16X8)) (uwiden_high y @ (value_type $I16X8))))) (umull16 x y $true)) ;; Special case for `i64x2.extmul_low_i32x4_s`. (rule (lower (has_type $I64X2 (imul (swiden_low x @ (value_type $I32X4)) (swiden_low y @ (value_type $I32X4))))) (smull32 x y $false)) ;; Special case for `i64x2.extmul_high_i32x4_s`. (rule (lower (has_type $I64X2 (imul (swiden_high x @ (value_type $I32X4)) (swiden_high y @ (value_type $I32X4))))) (smull32 x y $true)) ;; Special case for `i64x2.extmul_low_i32x4_u`. (rule (lower (has_type $I64X2 (imul (uwiden_low x @ (value_type $I32X4)) (uwiden_low y @ (value_type $I32X4))))) (umull32 x y $false)) ;; Special case for `i64x2.extmul_high_i32x4_u`. (rule (lower (has_type $I64X2 (imul (uwiden_high x @ (value_type $I32X4)) (uwiden_high y @ (value_type $I32X4))))) (umull32 x y $true)) ;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type $I64 (smulhi x y))) (smulh $I64 x y)) (rule (lower (has_type (fits_in_32 ty) (smulhi x y))) (let ((x64 Reg (put_in_reg_sext64 x)) (y64 Reg (put_in_reg_sext64 y)) (mul Reg (madd $I64 x64 y64 (zero_reg))) (result Reg (asr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty))))) result)) ;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type $I64 (umulhi x y))) (umulh $I64 x y)) (rule (lower (has_type (fits_in_32 ty) (umulhi x y))) (let ( (x64 Reg (put_in_reg_zext64 x)) (y64 Reg (put_in_reg_zext64 y)) (mul Reg (madd $I64 x64 y64 (zero_reg))) (result Reg (lsr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty)))) ) (value_reg result))) ;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; TODO: Add UDiv32 to implement 32-bit directly, rather ;; than extending the input. ;; ;; Note that aarch64's `udiv` doesn't trap so to respect the semantics of ;; CLIF's `udiv` the check for zero needs to be manually performed. (rule (lower (has_type (fits_in_64 ty) (udiv x y))) (a64_udiv $I64 (put_in_reg_zext64 x) (put_nonzero_in_reg_zext64 y))) ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero. (decl put_nonzero_in_reg_zext64 (Value) Reg) (rule -1 (put_nonzero_in_reg_zext64 val) (trap_if_zero_divisor (put_in_reg_zext64 val))) ;; Special case where if a `Value` is known to be nonzero we can trivially ;; move it into a register. (rule (put_nonzero_in_reg_zext64 (and (value_type ty) (iconst (nonzero_u64_from_imm64 n)))) (imm ty (ImmExtend.Zero) n)) ;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; TODO: Add SDiv32 to implement 32-bit directly, rather ;; than extending the input. ;; ;; The sequence of checks here should look like: ;; ;; cbnz rm, #8 ;; udf ; divide by zero ;; cmn rm, 1 ;; ccmp rn, 1, #nzcv, eq ;; b.vc #8 ;; udf ; signed overflow ;; ;; Note The div instruction does not trap on divide by zero or overflow, so ;; checks need to be manually inserted. ;; ;; TODO: if `y` is -1 then a check that `x` is not INT_MIN is all that's ;; necessary, but right now `y` is checked to not be -1 as well. (rule (lower (has_type (fits_in_64 ty) (sdiv x y))) (let ((x64 Reg (put_in_reg_sext64 x)) (y64 Reg (put_nonzero_in_reg_sext64 y)) (valid_x64 Reg (trap_if_div_overflow ty x64 y64)) (result Reg (a64_sdiv $I64 valid_x64 y64))) result)) ;; Helper for extracting an immediate that's not 0 and not -1 from an imm64. (decl safe_divisor_from_imm64 (u64) Imm64) (extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64) ;; Special case for `sdiv` where no checks are needed due to division by a ;; constant meaning the checks are always passed. (rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y))))) (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y))) ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero. (decl put_nonzero_in_reg_sext64 (Value) Reg) (rule -1 (put_nonzero_in_reg_sext64 val) (trap_if_zero_divisor (put_in_reg_sext64 val))) ;; Note that this has a special case where if the `Value` is a constant that's ;; not zero we can skip the zero check. (rule (put_nonzero_in_reg_sext64 (and (value_type ty) (iconst (nonzero_u64_from_imm64 n)))) (imm ty (ImmExtend.Sign) n)) ;;;; Rules for `urem` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Remainder (x % y) is implemented as: ;; ;; tmp = x / y ;; result = x - (tmp*y) ;; ;; use 'result' for tmp and you have: ;; ;; cbnz y, #8 ; branch over trap ;; udf ; divide by zero ;; div rd, x, y ; rd = x / y ;; msub rd, rd, y, x ; rd = x - rd * y (rule (lower (has_type (fits_in_64 ty) (urem x y))) (let ((x64 Reg (put_in_reg_zext64 x)) (y64 Reg (put_nonzero_in_reg_zext64 y)) (div Reg (a64_udiv $I64 x64 y64)) (result Reg (msub $I64 div y64 x64))) result)) (rule (lower (has_type (fits_in_64 ty) (srem x y))) (let ((x64 Reg (put_in_reg_sext64 x)) (y64 Reg (put_nonzero_in_reg_sext64 y)) (div Reg (a64_sdiv $I64 x64 y64)) (result Reg (msub $I64 div y64 x64))) result)) ;;; Rules for integer min/max: umin, smin, umax, smax ;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. ;; cmp $x, $y ;; csel .., $x, $y, $cc (decl cmp_and_choose (Type Cond bool Value Value) ValueRegs) (rule (cmp_and_choose (fits_in_64 ty) cc _ x y) (let ((x Reg (put_in_reg x)) (y Reg (put_in_reg y))) (with_flags_reg (cmp (operand_size ty) x y) (csel cc x y)))) ;; `i16` and `i8` min/max require sign extension as ;; the comparison operates on (at least) 32 bits. (rule 1 (cmp_and_choose (fits_in_16 ty) cc signed x y) (let ((x Reg (extend (put_in_reg x) signed (ty_bits ty) 32)) (y Reg (extend (put_in_reg y) signed (ty_bits ty) 32))) (with_flags_reg (cmp (operand_size ty) x y) (csel cc x y)))) (rule 2 (lower (has_type (and (fits_in_64 ty) (ty_int _)) (umin x y))) (cmp_and_choose ty (Cond.Lo) $false x y)) (rule 2 (lower (has_type (and (fits_in_64 ty) (ty_int _)) (smin x y))) (cmp_and_choose ty (Cond.Lt) $true x y)) (rule 2 (lower (has_type (and (fits_in_64 ty) (ty_int _)) (umax x y))) (cmp_and_choose ty (Cond.Hi) $false x y)) (rule 2 (lower (has_type (and (fits_in_64 ty) (ty_int _)) (smax x y))) (cmp_and_choose ty (Cond.Gt) $true x y)) ;; Vector types. (rule (lower (has_type ty @ (not_i64x2) (smin x y))) (vec_rrr (VecALUOp.Smin) x y (vector_size ty))) (rule 1 (lower (has_type $I64X2 (smin x y))) (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) y x (VectorSize.Size64x2)) x y)) (rule (lower (has_type ty @ (not_i64x2) (umin x y))) (vec_rrr (VecALUOp.Umin) x y (vector_size ty))) (rule 1 (lower (has_type $I64X2 (umin x y))) (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) y x (VectorSize.Size64x2)) x y)) (rule (lower (has_type ty @ (not_i64x2) (smax x y))) (vec_rrr (VecALUOp.Smax) x y (vector_size ty))) (rule 1 (lower (has_type $I64X2 (smax x y))) (bsl $I64X2 (vec_rrr (VecALUOp.Cmgt) x y (VectorSize.Size64x2)) x y)) (rule (lower (has_type ty @ (not_i64x2) (umax x y))) (vec_rrr (VecALUOp.Umax) x y (vector_size ty))) (rule 1 (lower (has_type $I64X2 (umax x y))) (bsl $I64X2 (vec_rrr (VecALUOp.Cmhi) x y (VectorSize.Size64x2)) x y)) ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General rule for extending input to an output which fits in a single ;; register. (rule -2 (lower (has_type (fits_in_64 out) (uextend x @ (value_type in)))) (extend x $false (ty_bits in) (ty_bits out))) ;; Extraction of a vector lane automatically extends as necessary, so we can ;; skip an explicit extending instruction. (rule 1 (lower (has_type (fits_in_64 out) (uextend (extractlane vec @ (value_type in) (u8_from_uimm8 lane))))) (mov_from_vec (put_in_reg vec) lane (lane_size in))) ;; Atomic loads will also automatically zero their upper bits so the `uextend` ;; instruction can effectively get skipped here. (rule 1 (lower (has_type (fits_in_64 out) (uextend x @ (and (value_type in) (atomic_load flags _))))) (if-let mem_op (is_sinkable_inst x)) (load_acquire in flags (sink_atomic_load mem_op))) ;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper ;; bits are all zero. (rule -1 (lower (has_type $I128 (uextend x))) (value_regs (put_in_reg_zext64 x) (imm $I64 (ImmExtend.Zero) 0))) ;; Like above where vector extraction automatically zero-extends extending to ;; i128 only requires generating a 0 constant for the upper bits. (rule (lower (has_type $I128 (uextend (extractlane vec @ (value_type in) (u8_from_uimm8 lane))))) (value_regs (mov_from_vec (put_in_reg vec) lane (lane_size in)) (imm $I64 (ImmExtend.Zero) 0))) ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General rule for extending input to an output which fits in a single ;; register. (rule -4 (lower (has_type (fits_in_64 out) (sextend x @ (value_type in)))) (extend x $true (ty_bits in) (ty_bits out))) ;; Extraction of a vector lane automatically extends as necessary, so we can ;; skip an explicit extending instruction. (rule -3 (lower (has_type (fits_in_64 out) (sextend (extractlane vec @ (value_type in) (u8_from_uimm8 lane))))) (mov_from_vec_signed (put_in_reg vec) lane (vector_size in) (size_from_ty out))) ;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits. (rule -2 (lower (has_type $I128 (sextend x))) (let ((lo Reg (put_in_reg_sext64 x)) (hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))) (value_regs lo hi))) ;; Like above where vector extraction automatically zero-extends extending to ;; i128 only requires generating a 0 constant for the upper bits. ;; ;; Note that `mov_from_vec_signed` doesn't exist for i64x2, so that's ;; specifically excluded here. (rule (lower (has_type $I128 (sextend (extractlane vec @ (value_type in @ (not_i64x2)) (u8_from_uimm8 lane))))) (let ((lo Reg (mov_from_vec_signed (put_in_reg vec) lane (vector_size in) (size_from_ty $I64))) (hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))) (value_regs lo hi))) ;; Extension from an extraction of i64x2 into i128. (rule -1 (lower (has_type $I128 (sextend (extractlane vec @ (value_type $I64X2) (u8_from_uimm8 lane))))) (let ((lo Reg (mov_from_vec (put_in_reg vec) lane (ScalarSize.Size64))) (hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))) (value_regs lo hi))) ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Base case using `orn` between two registers. ;; ;; Note that bitwise negation is implemented here as ;; ;; NOT rd, rm ==> ORR_NOT rd, zero, rm (rule -1 (lower (has_type (fits_in_64 ty) (bnot x))) (orr_not ty (zero_reg) x)) ;; Special case to use `orr_not_shift` if it's a `bnot` of a const-left-shifted ;; value. (rule 1 (lower (has_type (fits_in_64 ty) (bnot (ishl x (iconst k))))) (if-let amt (lshl_from_imm64 ty k)) (orr_not_shift ty (zero_reg) x amt)) ;; Implementation of `bnot` for `i128`. (rule (lower (has_type $I128 (bnot x))) (let ((x_regs ValueRegs x) (x_lo Reg (value_regs_get x_regs 0)) (x_hi Reg (value_regs_get x_regs 1)) (new_lo Reg (orr_not $I64 (zero_reg) x_lo)) (new_hi Reg (orr_not $I64 (zero_reg) x_hi))) (value_regs new_lo new_hi))) ;; Implementation of `bnot` for vector types. (rule -2 (lower (has_type (ty_vec128 ty) (bnot x))) (not x (vector_size ty))) ;; Special-cases for fusing a bnot with bxor (rule 2 (lower (has_type (fits_in_64 ty) (bnot (bxor x y)))) (alu_rs_imm_logic (ALUOp.EorNot) ty x y)) (rule 3 (lower (has_type $I128 (bnot (bxor x y)))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 ty) (band x y))) (alu_rs_imm_logic_commutative (ALUOp.And) ty x y)) (rule (lower (has_type $I128 (band x y))) (i128_alu_bitop (ALUOp.And) $I64 x y)) (rule -2 (lower (has_type (ty_vec128 ty) (band x y))) (and_vec x y (vector_size ty))) ;; Specialized lowerings for `(band x (bnot y))` which is additionally produced ;; by Cranelift's `band_not` instruction that is legalized into the simpler ;; forms early on. (rule 1 (lower (has_type (fits_in_64 ty) (band x (bnot y)))) (alu_rs_imm_logic (ALUOp.AndNot) ty x y)) (rule 2 (lower (has_type (fits_in_64 ty) (band (bnot y) x))) (alu_rs_imm_logic (ALUOp.AndNot) ty x y)) (rule 3 (lower (has_type $I128 (band x (bnot y)))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y)) (rule 4 (lower (has_type $I128 (band (bnot y) x))) (i128_alu_bitop (ALUOp.AndNot) $I64 x y)) (rule 5 (lower (has_type (ty_vec128 ty) (band x (bnot y)))) (bic_vec x y (vector_size ty))) (rule 6 (lower (has_type (ty_vec128 ty) (band (bnot y) x))) (bic_vec x y (vector_size ty))) ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 ty) (bor x y))) (alu_rs_imm_logic_commutative (ALUOp.Orr) ty x y)) (rule (lower (has_type $I128 (bor x y))) (i128_alu_bitop (ALUOp.Orr) $I64 x y)) (rule -2 (lower (has_type (ty_vec128 ty) (bor x y))) (orr_vec x y (vector_size ty))) ;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced ;; by Cranelift's `bor_not` instruction that is legalized into the simpler ;; forms early on. (rule 1 (lower (has_type (fits_in_64 ty) (bor x (bnot y)))) (alu_rs_imm_logic (ALUOp.OrrNot) ty x y)) (rule 2 (lower (has_type (fits_in_64 ty) (bor (bnot y) x))) (alu_rs_imm_logic (ALUOp.OrrNot) ty x y)) (rule 3 (lower (has_type $I128 (bor x (bnot y)))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y)) (rule 4 (lower (has_type $I128 (bor (bnot y) x))) (i128_alu_bitop (ALUOp.OrrNot) $I64 x y)) ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type (fits_in_64 ty) (bxor x y))) (alu_rs_imm_logic_commutative (ALUOp.Eor) ty x y)) (rule (lower (has_type $I128 (bxor x y))) (i128_alu_bitop (ALUOp.Eor) $I64 x y)) (rule -2 (lower (has_type (ty_vec128 ty) (bxor x y))) (eor_vec x y (vector_size ty))) ;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced ;; by Cranelift's `bxor_not` instruction that is legalized into the simpler ;; forms early on. (rule 1 (lower (has_type (fits_in_64 ty) (bxor x (bnot y)))) (alu_rs_imm_logic (ALUOp.EorNot) ty x y)) (rule 2 (lower (has_type (fits_in_64 ty) (bxor (bnot y) x))) (alu_rs_imm_logic (ALUOp.EorNot) ty x y)) (rule 3 (lower (has_type $I128 (bxor x (bnot y)))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) (rule 4 (lower (has_type $I128 (bxor (bnot y) x))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y)) ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Shift for i8/i16/i32. (rule -1 (lower (has_type (fits_in_32 ty) (ishl x y))) (do_shift (ALUOp.Lsl) ty x y)) ;; Shift for i64. (rule (lower (has_type $I64 (ishl x y))) (do_shift (ALUOp.Lsl) $I64 x y)) ;; Shift for i128. (rule (lower (has_type $I128 (ishl x y))) (lower_shl128 x (value_regs_get y 0))) ;; lsl lo_lshift, src_lo, amt ;; lsl hi_lshift, src_hi, amt ;; mvn inv_amt, amt ;; lsr lo_rshift, src_lo, #1 ;; lsr lo_rshift, lo_rshift, inv_amt ;; orr maybe_hi, hi_lshift, lo_rshift ;; tst amt, #0x40 ;; csel dst_hi, lo_lshift, maybe_hi, ne ;; csel dst_lo, xzr, lo_lshift, ne (decl lower_shl128 (ValueRegs Reg) ValueRegs) (rule (lower_shl128 src amt) (let ((src_lo Reg (value_regs_get src 0)) (src_hi Reg (value_regs_get src 1)) (lo_lshift Reg (lsl $I64 src_lo amt)) (hi_lshift Reg (lsl $I64 src_hi amt)) (inv_amt Reg (orr_not $I32 (zero_reg) amt)) (lo_rshift Reg (lsr $I64 (lsr_imm $I64 src_lo (imm_shift_from_u8 1)) inv_amt)) (maybe_hi Reg (orr $I64 hi_lshift lo_rshift)) ) (with_flags (tst_imm $I64 amt (u64_into_imm_logic $I64 64)) (consumes_flags_concat (csel (Cond.Ne) (zero_reg) lo_lshift) (csel (Cond.Ne) lo_lshift maybe_hi))))) ;; Shift for vector types. (rule -2 (lower (has_type (ty_vec128 ty) (ishl x y))) (let ((size VectorSize (vector_size ty)) (masked_shift_amt Reg (and_imm $I32 y (shift_mask ty))) (shift Reg (vec_dup masked_shift_amt size))) (sshl x shift size))) ;; Helper function to emit a shift operation with the opcode specified and ;; the output type specified. The `Reg` provided is shifted by the `Value` ;; given. ;; ;; Note that this automatically handles the clif semantics of masking the ;; shift amount where necessary. (decl do_shift (ALUOp Type Reg Value) Reg) ;; 8/16-bit shift base case. ;; ;; When shifting for amounts larger than the size of the type, the CLIF shift ;; instructions implement a "wrapping" behaviour, such that an i8 << 8 is ;; equivalent to i8 << 0 ;; ;; On i32 and i64 types this matches what the aarch64 spec does, but on smaller ;; types (i16, i8) we need to do this manually, so we wrap the shift amount ;; with an AND instruction (rule -1 (do_shift op (fits_in_16 ty) x y) (let ((shift_amt Reg (value_regs_get y 0)) (masked_shift_amt Reg (and_imm $I32 shift_amt (shift_mask ty)))) (alu_rrr op $I32 x masked_shift_amt))) (decl shift_mask (Type) ImmLogic) (extern constructor shift_mask shift_mask) ;; 32/64-bit shift base cases. (rule (do_shift op $I32 x y) (alu_rrr op $I32 x (value_regs_get y 0))) (rule (do_shift op $I64 x y) (alu_rrr op $I64 x (value_regs_get y 0))) ;; Special case for shifting by a constant value where the value can fit into an ;; `ImmShift`. ;; ;; Note that this rule explicitly has a higher priority than the others ;; to ensure it's attempted first, otherwise the type-based filters on the ;; previous rules seem to take priority over this rule. (rule 1 (do_shift op ty x (iconst k)) (if-let shift (imm_shift_from_imm64 ty k)) (alu_rr_imm_shift op ty x shift)) ;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Shift for i8/i16/i32. (rule -1 (lower (has_type (fits_in_32 ty) (ushr x y))) (do_shift (ALUOp.Lsr) ty (put_in_reg_zext32 x) y)) ;; Shift for i64. (rule (lower (has_type $I64 (ushr x y))) (do_shift (ALUOp.Lsr) $I64 (put_in_reg_zext64 x) y)) ;; Shift for i128. (rule (lower (has_type $I128 (ushr x y))) (lower_ushr128 x (value_regs_get y 0))) ;; Vector shifts. (rule -2 (lower (has_type (ty_vec128 ty) (ushr x y))) (let ((size VectorSize (vector_size ty)) (masked_shift_amt Reg (and_imm $I32 y (shift_mask ty))) (shift Reg (vec_dup (sub $I64 (zero_reg) masked_shift_amt) size))) (ushl x shift size))) ;; lsr lo_rshift, src_lo, amt ;; lsr hi_rshift, src_hi, amt ;; mvn inv_amt, amt ;; lsl hi_lshift, src_hi, #1 ;; lsl hi_lshift, hi_lshift, inv_amt ;; tst amt, #0x40 ;; orr maybe_lo, lo_rshift, hi_lshift ;; csel dst_hi, xzr, hi_rshift, ne ;; csel dst_lo, hi_rshift, maybe_lo, ne (decl lower_ushr128 (ValueRegs Reg) ValueRegs) (rule (lower_ushr128 src amt) (let ((src_lo Reg (value_regs_get src 0)) (src_hi Reg (value_regs_get src 1)) (lo_rshift Reg (lsr $I64 src_lo amt)) (hi_rshift Reg (lsr $I64 src_hi amt)) (inv_amt Reg (orr_not $I32 (zero_reg) amt)) (hi_lshift Reg (lsl $I64 (lsl_imm $I64 src_hi (imm_shift_from_u8 1)) inv_amt)) (maybe_lo Reg (orr $I64 lo_rshift hi_lshift)) ) (with_flags (tst_imm $I64 amt (u64_into_imm_logic $I64 64)) (consumes_flags_concat (csel (Cond.Ne) hi_rshift maybe_lo) (csel (Cond.Ne) (zero_reg) hi_rshift))))) ;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Shift for i8/i16/i32. (rule -2 (lower (has_type (fits_in_32 ty) (sshr x y))) (do_shift (ALUOp.Asr) ty (put_in_reg_sext32 x) y)) ;; Shift for i64. (rule (lower (has_type $I64 (sshr x y))) (do_shift (ALUOp.Asr) $I64 (put_in_reg_sext64 x) y)) ;; Shift for i128. (rule (lower (has_type $I128 (sshr x y))) (lower_sshr128 x (value_regs_get y 0))) ;; Vector shifts. ;; ;; Note that right shifts are implemented with a negative left shift. (rule -1 (lower (has_type (ty_vec128 ty) (sshr x y))) (let ((size VectorSize (vector_size ty)) (masked_shift_amt Reg (and_imm $I32 y (shift_mask ty))) (shift Reg (vec_dup (sub $I64 (zero_reg) masked_shift_amt) size))) (sshl x shift size))) ;; lsr lo_rshift, src_lo, amt ;; asr hi_rshift, src_hi, amt ;; mvn inv_amt, amt ;; lsl hi_lshift, src_hi, #1 ;; lsl hi_lshift, hi_lshift, inv_amt ;; asr hi_sign, src_hi, #63 ;; orr maybe_lo, lo_rshift, hi_lshift ;; tst amt, #0x40 ;; csel dst_hi, hi_sign, hi_rshift, ne ;; csel dst_lo, hi_rshift, maybe_lo, ne (decl lower_sshr128 (ValueRegs Reg) ValueRegs) (rule (lower_sshr128 src amt) (let ((src_lo Reg (value_regs_get src 0)) (src_hi Reg (value_regs_get src 1)) (lo_rshift Reg (lsr $I64 src_lo amt)) (hi_rshift Reg (asr $I64 src_hi amt)) (inv_amt Reg (orr_not $I32 (zero_reg) amt)) (hi_lshift Reg (lsl $I64 (lsl_imm $I64 src_hi (imm_shift_from_u8 1)) inv_amt)) (hi_sign Reg (asr_imm $I64 src_hi (imm_shift_from_u8 63))) (maybe_lo Reg (orr $I64 lo_rshift hi_lshift)) ) (with_flags (tst_imm $I64 amt (u64_into_imm_logic $I64 64)) (consumes_flags_concat (csel (Cond.Ne) hi_rshift maybe_lo) (csel (Cond.Ne) hi_sign hi_rshift))))) ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General 8/16-bit case. (rule -2 (lower (has_type (fits_in_16 ty) (rotl x y))) (let ((amt Reg (value_regs_get y 0)) (neg_shift Reg (sub $I32 (zero_reg) amt))) (small_rotr ty (put_in_reg_zext32 x) neg_shift))) ;; Specialization for the 8/16-bit case when the rotation amount is an immediate. (rule -1 (lower (has_type (fits_in_16 ty) (rotl x (iconst k)))) (if-let n (imm_shift_from_imm64 ty k)) (small_rotr_imm ty (put_in_reg_zext32 x) (negate_imm_shift ty n))) ;; aarch64 doesn't have a left-rotate instruction, but a left rotation of K ;; places is effectively a right rotation of N - K places, if N is the integer's ;; bit size. We implement left rotations with this trick. ;; ;; Note that when negating the shift amount here the upper bits are ignored ;; by the rotr instruction, meaning that we'll still left-shift by the desired ;; amount. ;; General 32-bit case. (rule (lower (has_type $I32 (rotl x y))) (let ((amt Reg (value_regs_get y 0)) (neg_shift Reg (sub $I32 (zero_reg) amt))) (a64_rotr $I32 x neg_shift))) ;; General 64-bit case. (rule (lower (has_type $I64 (rotl x y))) (let ((amt Reg (value_regs_get y 0)) (neg_shift Reg (sub $I64 (zero_reg) amt))) (a64_rotr $I64 x neg_shift))) ;; Specialization for the 32-bit case when the rotation amount is an immediate. (rule 1 (lower (has_type $I32 (rotl x (iconst k)))) (if-let n (imm_shift_from_imm64 $I32 k)) (a64_rotr_imm $I32 x (negate_imm_shift $I32 n))) ;; Specialization for the 64-bit case when the rotation amount is an immediate. (rule 1 (lower (has_type $I64 (rotl x (iconst k)))) (if-let n (imm_shift_from_imm64 $I64 k)) (a64_rotr_imm $I64 x (negate_imm_shift $I64 n))) (decl negate_imm_shift (Type ImmShift) ImmShift) (extern constructor negate_imm_shift negate_imm_shift) ;; General 128-bit case. ;; ;; TODO: much better codegen is possible with a constant amount. (rule (lower (has_type $I128 (rotl x y))) (let ((val ValueRegs x) (amt Reg (value_regs_get y 0)) (neg_amt Reg (sub $I64 (imm $I64 (ImmExtend.Zero) 128) amt)) (lshift ValueRegs (lower_shl128 val amt)) (rshift ValueRegs (lower_ushr128 val neg_amt))) (value_regs (orr $I64 (value_regs_get lshift 0) (value_regs_get rshift 0)) (orr $I64 (value_regs_get lshift 1) (value_regs_get rshift 1))))) ;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General 8/16-bit case. (rule -3 (lower (has_type (fits_in_16 ty) (rotr x y))) (small_rotr ty (put_in_reg_zext32 x) (value_regs_get y 0))) ;; General 32-bit case. (rule -1 (lower (has_type $I32 (rotr x y))) (a64_rotr $I32 x (value_regs_get y 0))) ;; General 64-bit case. (rule -1 (lower (has_type $I64 (rotr x y))) (a64_rotr $I64 x (value_regs_get y 0))) ;; Specialization for the 8/16-bit case when the rotation amount is an immediate. (rule -2 (lower (has_type (fits_in_16 ty) (rotr x (iconst k)))) (if-let n (imm_shift_from_imm64 ty k)) (small_rotr_imm ty (put_in_reg_zext32 x) n)) ;; Specialization for the 32-bit case when the rotation amount is an immediate. (rule (lower (has_type $I32 (rotr x (iconst k)))) (if-let n (imm_shift_from_imm64 $I32 k)) (a64_rotr_imm $I32 x n)) ;; Specialization for the 64-bit case when the rotation amount is an immediate. (rule (lower (has_type $I64 (rotr x (iconst k)))) (if-let n (imm_shift_from_imm64 $I64 k)) (a64_rotr_imm $I64 x n)) ;; For a < 32-bit rotate-right, we synthesize this as: ;; ;; rotr rd, val, amt ;; ;; => ;; ;; and masked_amt, amt, ;; sub tmp_sub, masked_amt, ;; sub neg_amt, zero, tmp_sub ; neg ;; lsr val_rshift, val, masked_amt ;; lsl val_lshift, val, neg_amt ;; orr rd, val_lshift val_rshift (decl small_rotr (Type Reg Reg) Reg) (rule (small_rotr ty val amt) (let ((masked_amt Reg (and_imm $I32 amt (rotr_mask ty))) (tmp_sub Reg (sub_imm $I32 masked_amt (u8_into_imm12 (ty_bits ty)))) (neg_amt Reg (sub $I32 (zero_reg) tmp_sub)) (val_rshift Reg (lsr $I32 val masked_amt)) (val_lshift Reg (lsl $I32 val neg_amt))) (orr $I32 val_lshift val_rshift))) (decl rotr_mask (Type) ImmLogic) (extern constructor rotr_mask rotr_mask) ;; For a constant amount, we can instead do: ;; ;; rotr rd, val, #amt ;; ;; => ;; ;; lsr val_rshift, val, # ;; lsl val_lshift, val, ;; orr rd, val_lshift, val_rshift (decl small_rotr_imm (Type Reg ImmShift) Reg) (rule (small_rotr_imm ty val amt) (let ((val_rshift Reg (lsr_imm $I32 val amt)) (val_lshift Reg (lsl_imm $I32 val (rotr_opposite_amount ty amt)))) (orr $I32 val_lshift val_rshift))) (decl rotr_opposite_amount (Type ImmShift) ImmShift) (extern constructor rotr_opposite_amount rotr_opposite_amount) ;; General 128-bit case. ;; ;; TODO: much better codegen is possible with a constant amount. (rule (lower (has_type $I128 (rotr x y))) (let ((val ValueRegs x) (amt Reg (value_regs_get y 0)) (neg_amt Reg (sub $I64 (imm $I64 (ImmExtend.Zero) 128) amt)) (rshift ValueRegs (lower_ushr128 val amt)) (lshift ValueRegs (lower_shl128 val neg_amt)) (hi Reg (orr $I64 (value_regs_get rshift 1) (value_regs_get lshift 1))) (lo Reg (orr $I64 (value_regs_get rshift 0) (value_regs_get lshift 0)))) (value_regs lo hi))) ;;;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Reversing an 8-bit value with a 32-bit bitrev instruction will place ;; the reversed result in the highest 8 bits, so we need to shift them down into ;; place. (rule (lower (has_type $I8 (bitrev x))) (lsr_imm $I32 (rbit $I32 x) (imm_shift_from_u8 24))) ;; Reversing an 16-bit value with a 32-bit bitrev instruction will place ;; the reversed result in the highest 16 bits, so we need to shift them down into ;; place. (rule (lower (has_type $I16 (bitrev x))) (lsr_imm $I32 (rbit $I32 x) (imm_shift_from_u8 16))) (rule (lower (has_type $I128 (bitrev x))) (let ((val ValueRegs x) (lo_rev Reg (rbit $I64 (value_regs_get val 0))) (hi_rev Reg (rbit $I64 (value_regs_get val 1)))) (value_regs hi_rev lo_rev))) (rule -1 (lower (has_type ty (bitrev x))) (rbit ty x)) ;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (clz x))) (sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))) (rule (lower (has_type $I16 (clz x))) (sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))) (rule (lower (has_type $I128 (clz x))) (lower_clz128 x)) (rule -1 (lower (has_type ty (clz x))) (a64_clz ty x)) ;; clz hi_clz, hi ;; clz lo_clz, lo ;; lsr tmp, hi_clz, #6 ;; madd dst_lo, lo_clz, tmp, hi_clz ;; mov dst_hi, 0 (decl lower_clz128 (ValueRegs) ValueRegs) (rule (lower_clz128 val) (let ((hi_clz Reg (a64_clz $I64 (value_regs_get val 1))) (lo_clz Reg (a64_clz $I64 (value_regs_get val 0))) (tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6)))) (value_regs (madd $I64 lo_clz tmp hi_clz) (imm $I64 (ImmExtend.Zero) 0)))) ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Note that all `ctz` instructions are implemented by reversing the bits and ;; then using a `clz` instruction since the tail zeros are the same as the ;; leading zeros of the reversed value. (rule (lower (has_type $I8 (ctz x))) (a64_clz $I32 (orr_imm $I32 (rbit $I32 x) (u64_into_imm_logic $I32 0x800000)))) (rule (lower (has_type $I16 (ctz x))) (a64_clz $I32 (orr_imm $I32 (rbit $I32 x) (u64_into_imm_logic $I32 0x8000)))) (rule (lower (has_type $I128 (ctz x))) (let ((val ValueRegs x) (lo Reg (rbit $I64 (value_regs_get val 0))) (hi Reg (rbit $I64 (value_regs_get val 1)))) (lower_clz128 (value_regs hi lo)))) (rule -1 (lower (has_type ty (ctz x))) (a64_clz ty (rbit ty x))) ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (cls x))) (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24))) (rule (lower (has_type $I16 (cls x))) (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16))) ;; cls lo_cls, lo ;; cls hi_cls, hi ;; eon sign_eq_eor, hi, lo ;; lsr sign_eq, sign_eq_eor, #63 ;; madd lo_sign_bits, out_lo, sign_eq, sign_eq ;; cmp hi_cls, #63 ;; csel maybe_lo, lo_sign_bits, xzr, eq ;; add out_lo, maybe_lo, hi_cls ;; mov out_hi, 0 (rule (lower (has_type $I128 (cls x))) (let ((val ValueRegs x) (lo Reg (value_regs_get val 0)) (hi Reg (value_regs_get val 1)) (lo_cls Reg (a64_cls $I64 lo)) (hi_cls Reg (a64_cls $I64 hi)) (sign_eq_eon Reg (eon $I64 hi lo)) (sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63))) (lo_sign_bits Reg (madd $I64 lo_cls sign_eq sign_eq)) (maybe_lo Reg (with_flags_reg (cmp64_imm hi_cls (u8_into_imm12 63)) (csel (Cond.Eq) lo_sign_bits (zero_reg))))) (value_regs (add $I64 maybe_lo hi_cls) (imm $I64 (ImmExtend.Zero) 0)))) (rule -1 (lower (has_type ty (cls x))) (a64_cls ty x)) ;;;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I16 (bswap x))) (a64_rev16 $I16 x)) (rule (lower (has_type $I32 (bswap x))) (a64_rev32 $I32 x)) (rule (lower (has_type $I64 (bswap x))) (a64_rev64 $I64 x)) (rule (lower (has_type $I128 (bswap x))) (value_regs (a64_rev64 $I64 (value_regs_get x 1)) (a64_rev64 $I64 (value_regs_get x 0)))) ;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Bmask tests the value against zero, and uses `csetm` to assert the result. (rule (lower (has_type out_ty (bmask x @ (value_type in_ty)))) (lower_bmask out_ty in_ty x)) ;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The implementation of `popcnt` for scalar types is done by moving the value ;; into a vector register, using the `cnt` instruction, and then collating the ;; result back into a normal register. ;; ;; The general sequence emitted here is ;; ;; fmov tmp, in_lo ;; if ty == i128: ;; mov tmp.d[1], in_hi ;; ;; cnt tmp.16b, tmp.16b / cnt tmp.8b, tmp.8b ;; addv tmp, tmp.16b / addv tmp, tmp.8b / addp tmp.8b, tmp.8b, tmp.8b / (no instruction for 8-bit inputs) ;; ;; umov out_lo, tmp.b[0] ;; if ty == i128: ;; mov out_hi, 0 (rule (lower (has_type $I8 (popcnt x))) (let ((tmp Reg (mov_to_fpu x (ScalarSize.Size32))) (nbits Reg (vec_cnt tmp (VectorSize.Size8x8)))) (mov_from_vec nbits 0 (ScalarSize.Size8)))) ;; Note that this uses `addp` instead of `addv` as it's usually cheaper. (rule (lower (has_type $I16 (popcnt x))) (let ((tmp Reg (mov_to_fpu x (ScalarSize.Size32))) (nbits Reg (vec_cnt tmp (VectorSize.Size8x8))) (added Reg (addp nbits nbits (VectorSize.Size8x8)))) (mov_from_vec added 0 (ScalarSize.Size8)))) (rule (lower (has_type $I32 (popcnt x))) (let ((tmp Reg (mov_to_fpu x (ScalarSize.Size32))) (nbits Reg (vec_cnt tmp (VectorSize.Size8x8))) (added Reg (addv nbits (VectorSize.Size8x8)))) (mov_from_vec added 0 (ScalarSize.Size8)))) (rule (lower (has_type $I64 (popcnt x))) (let ((tmp Reg (mov_to_fpu x (ScalarSize.Size64))) (nbits Reg (vec_cnt tmp (VectorSize.Size8x8))) (added Reg (addv nbits (VectorSize.Size8x8)))) (mov_from_vec added 0 (ScalarSize.Size8)))) (rule (lower (has_type $I128 (popcnt x))) (let ((val ValueRegs x) (tmp_half Reg (mov_to_fpu (value_regs_get val 0) (ScalarSize.Size64))) (tmp Reg (mov_to_vec tmp_half (value_regs_get val 1) 1 (VectorSize.Size64x2))) (nbits Reg (vec_cnt tmp (VectorSize.Size8x16))) (added Reg (addv nbits (VectorSize.Size8x16)))) (value_regs (mov_from_vec added 0 (ScalarSize.Size8)) (imm $I64 (ImmExtend.Zero) 0)))) (rule (lower (has_type $I8X16 (popcnt x))) (vec_cnt x (VectorSize.Size8x16))) ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (bitselect c x y))) (if (ty_int_ref_scalar_64 ty)) (let ((tmp1 Reg (and_reg ty x c)) (tmp2 Reg (bic ty y c))) (orr ty tmp1 tmp2))) (rule 1 (lower (has_type (ty_vec128 ty) (bitselect c x y))) (bsl ty c x y)) ;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (ty_vec128 ty) (vselect c x y))) (bsl ty c x y)) ;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; T -> I{64,32,16,8}: We can simply pass through the value: values ;; are always stored with high bits undefined, so we can just leave ;; them be. (rule (lower (has_type ty (ireduce src))) (if (ty_int_ref_scalar_64 ty)) (value_regs_get src 0)) ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 4 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y))) (if (zero_value y)) (let ((rn Reg x) (vec_size VectorSize (vector_size ty))) (value_reg (not (fcmeq0 rn vec_size) vec_size)))) (rule 3 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y))) (if (zero_value y)) (let ((rn Reg x) (vec_size VectorSize (vector_size ty))) (value_reg (float_cmp_zero cond rn vec_size)))) (rule 2 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond_not_eq cond) x y))) (if (zero_value x)) (let ((rn Reg y) (vec_size VectorSize (vector_size ty))) (value_reg (not (fcmeq0 rn vec_size) vec_size)))) (rule 1 (lower (has_type ty @ (multi_lane _ _) (fcmp (fcmp_zero_cond cond) x y))) (if (zero_value x)) (let ((rn Reg y) (vec_size VectorSize (vector_size ty))) (value_reg (float_cmp_zero_swap cond rn vec_size)))) (rule 0 (lower (has_type out_ty (fcmp cond x @ (value_type (ty_scalar_float in_ty)) y))) (with_flags (fpu_cmp (scalar_size in_ty) x y) (materialize_bool_result (fp_cond_code cond)))) (rule -1 (lower (has_type out_ty (fcmp cond x @ (value_type in_ty) y))) (if (ty_vector_float in_ty)) (vec_cmp x y in_ty (fp_cond_code cond))) ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 3 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y))) (if (zero_value y)) (let ((rn Reg x) (vec_size VectorSize (vector_size ty))) (value_reg (not (cmeq0 rn vec_size) vec_size)))) (rule 2 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y))) (if (zero_value y)) (let ((rn Reg x) (vec_size VectorSize (vector_size ty))) (value_reg (int_cmp_zero cond rn vec_size)))) (rule 1 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond_not_eq cond) x y))) (if (zero_value x)) (let ((rn Reg y) (vec_size VectorSize (vector_size ty))) (value_reg (not (cmeq0 rn vec_size) vec_size)))) (rule 0 (lower (has_type ty @ (multi_lane _ _) (icmp (icmp_zero_cond cond) x y))) (if (zero_value x)) (let ((rn Reg y) (vec_size VectorSize (vector_size ty))) (value_reg (int_cmp_zero_swap cond rn vec_size)))) (rule -1 (lower (icmp cond x @ (value_type in_ty) y)) (lower_icmp_into_reg cond x y in_ty $I8)) ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (trap trap_code)) (side_effect (udf trap_code))) ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (resumable_trap trap_code)) (side_effect (udf trap_code))) ;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (select (maybe_uextend (icmp cc x @ (value_type in_ty) y)) rn rm))) (let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y in_ty))) (lower_select (flags_and_cc_flags comparison) (cond_code (flags_and_cc_cc comparison)) ty rn rm))) (rule (lower (has_type ty (select (maybe_uextend (fcmp cc x @ (value_type in_ty) y)) rn rm))) (let ((cond Cond (fp_cond_code cc))) (lower_select (fpu_cmp (scalar_size in_ty) x y) cond ty rn rm))) (rule -1 (lower (has_type ty (select rcond @ (value_type $I8) rn rm))) (let ((rcond Reg rcond)) (lower_select (tst_imm $I32 rcond (u64_into_imm_logic $I32 255)) (Cond.Ne) ty rn rm))) (rule -2 (lower (has_type ty (select rcond @ (value_type (fits_in_32 _)) rn rm))) (let ((rcond Reg (put_in_reg_zext32 rcond))) (lower_select (cmp (OperandSize.Size32) rcond (zero_reg)) (Cond.Ne) ty rn rm))) (rule -3 (lower (has_type ty (select rcond @ (value_type (fits_in_64 _)) rn rm))) (let ((rcond Reg (put_in_reg_zext64 rcond))) (lower_select (cmp (OperandSize.Size64) rcond (zero_reg)) (Cond.Ne) ty rn rm))) (rule -4 (lower (has_type ty (select rcond @ (value_type $I128) rn rm))) (let ((c ValueRegs (put_in_regs rcond)) (c_lo Reg (value_regs_get c 0)) (c_hi Reg (value_regs_get c 1)) (rt Reg (orr $I64 c_lo c_hi))) (lower_select (cmp (OperandSize.Size64) rt (zero_reg)) (Cond.Ne) ty rn rm))) ;;;; Rules for `select_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (select_spectre_guard (maybe_uextend (icmp cc x @ (value_type in_ty) y)) if_true if_false))) (let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y in_ty)) (dst ValueRegs (lower_select (flags_and_cc_flags comparison) (cond_code (flags_and_cc_cc comparison)) ty if_true if_false)) (_ InstOutput (side_effect (csdb)))) dst)) (rule -1 (lower (has_type ty (select_spectre_guard rcond @ (value_type (fits_in_64 _)) rn rm))) (let ((rcond Reg (put_in_reg_zext64 rcond))) (lower_select (cmp (OperandSize.Size64) rcond (zero_reg)) (Cond.Ne) ty rn rm))) (rule -2 (lower (has_type ty (select_spectre_guard rcond @ (value_type $I128) rn rm))) (let ((c ValueRegs (put_in_regs rcond)) (c_lo Reg (value_regs_get c 0)) (c_hi Reg (value_regs_get c 1)) (rt Reg (orr $I64 c_lo c_hi))) (lower_select (cmp (OperandSize.Size64) rt (zero_reg)) (Cond.Ne) ty rn rm))) ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (ty_vec128 _) (vconst (u128_from_constant x)))) (constant_f128 x)) (rule 1 (lower (has_type ty (vconst (u64_from_constant x)))) (if (ty_vec64 ty)) (constant_f64 x)) ;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule -1 (lower (has_type ty (splat x @ (value_type in_ty)))) (if (ty_int_ref_scalar_64 in_ty)) (vec_dup x (vector_size ty))) (rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _))))) (vec_dup_from_fpu x (vector_size ty))) (rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n))))) (splat_const n (vector_size ty))) (rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n))))) (splat_const n (vector_size ty))) (rule (lower (has_type ty (splat (iconst (u64_from_imm64 n))))) (splat_const n (vector_size ty))) (rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n)))))) (splat_const n (vector_size ty))) (rule (lower (has_type ty (splat x @ (load flags _ _)))) (if-let mem_op (is_sinkable_inst x)) (let ((addr AMode (sink_load_into_amode (lane_type ty) mem_op)) (address Reg (load_addr addr))) (ld1r address (vector_size ty) flags))) ;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr))) (load_acquire ty flags addr)) ;;;; Rules for `AtomicStore` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (atomic_store flags src @ (value_type (valid_atomic_transaction ty)) addr)) (side_effect (store_release ty flags src addr))) ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Add) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Add) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xor) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Or) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Set) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smax) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smin) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umax) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umin) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty flags)) (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.And) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Add) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Add) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Sub) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.And) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.And) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Nand) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Nand) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Or) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Orr) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xor) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Eor) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smin) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Smin) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smax) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Smax) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umin) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Umin) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umax) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Umax) addr src ty flags)) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xchg) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty flags)) ;;;; Rules for `AtomicCAS` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_cas flags addr src1 src2)))) (lse_atomic_cas addr src1 src2 ty flags)) (rule (lower (and (has_type (valid_atomic_transaction ty) (atomic_cas flags addr src1 src2)))) (atomic_cas_loop addr src1 src2 ty flags)) ;;;; Rules for 'fvdemote' ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fvdemote x)) (fcvtn x (ScalarSize.Size32))) ;;;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type (ty_vec128_int ty) (snarrow x y))) (if (zero_value y)) (sqxtn x (lane_size ty))) (rule 2 (lower (has_type (ty_vec64_int ty) (snarrow x y))) (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2)))) (sqxtn dst (lane_size ty)))) (rule 0 (lower (has_type (ty_vec128_int ty) (snarrow x y))) (let ((low_half Reg (sqxtn x (lane_size ty))) (result Reg (sqxtn2 low_half y (lane_size ty)))) result)) ;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type (ty_vec128_int ty) (unarrow x y))) (if (zero_value y)) (sqxtun x (lane_size ty))) (rule 2 (lower (has_type (ty_vec64_int ty) (unarrow x y))) (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2)))) (sqxtun dst (lane_size ty)))) (rule 0 (lower (has_type (ty_vec128_int ty) (unarrow x y))) (let ((low_half Reg (sqxtun x (lane_size ty))) (result Reg (sqxtun2 low_half y (lane_size ty)))) result)) ;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type (ty_vec128_int ty) (uunarrow x y))) (if (zero_value y)) (uqxtn x (lane_size ty))) (rule 2 (lower (has_type (ty_vec64_int ty) (uunarrow x y))) (let ((dst Reg (mov_vec_elem x y 1 0 (VectorSize.Size64x2)))) (uqxtn dst (lane_size ty)))) (rule 0 (lower (has_type (ty_vec128_int ty) (uunarrow x y))) (let ((low_half Reg (uqxtn x (lane_size ty))) (result Reg (uqxtn2 low_half y (lane_size ty)))) result)) ;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (swiden_low x))) (vec_extend (VecExtendOp.Sxtl) x $false (lane_size ty))) ;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type (ty_vec128 ty) (swiden_high x))) (vec_extend (VecExtendOp.Sxtl) x $true (lane_size ty))) (rule (lower (has_type ty (swiden_high x))) (if (ty_vec64 ty)) (let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2)))) (vec_extend (VecExtendOp.Sxtl) tmp $false (lane_size ty)))) ;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (uwiden_low x))) (vec_extend (VecExtendOp.Uxtl) x $false (lane_size ty))) ;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (has_type (ty_vec128 ty) (uwiden_high x))) (vec_extend (VecExtendOp.Uxtl) x $true (lane_size ty))) (rule (lower (has_type ty (uwiden_high x))) (if (ty_vec64 ty)) (let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2)))) (vec_extend (VecExtendOp.Uxtl) tmp $false (lane_size ty)))) ;;;; Rules for `Fence` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fence)) (side_effect (aarch64_fence))) ;;;; Rules for `IsNull` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (is_null x @ (value_type ty))) (with_flags (cmp_imm (operand_size ty) x (u8_into_imm12 0)) (materialize_bool_result (Cond.Eq)))) ;;;; Rules for `IsInvalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (is_invalid x @ (value_type ty))) (with_flags (cmn_imm (operand_size ty) x (u8_into_imm12 1)) (materialize_bool_result (Cond.Eq)))) ;;;; Rules for `Debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (debugtrap)) (side_effect (brk))) ;;;; Rules for `func_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (func_addr (func_ref_data _ extname _))) (load_ext_name (box_external_name extname) 0)) ;;;; Rules for `symbol_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (symbol_value (symbol_value_data extname _ offset))) (load_ext_name (box_external_name extname) offset)) ;;; Rules for `get_{frame,stack}_pointer` and `get_return_address` ;;;;;;;;;;;;; (rule (lower (get_frame_pointer)) (aarch64_fp)) (rule (lower (get_stack_pointer)) (aarch64_sp)) (rule (lower (get_return_address)) (aarch64_link)) ;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (call (func_ref_data sig_ref extname dist) inputs)) (gen_call sig_ref extname dist inputs)) (rule (lower (call_indirect sig_ref val inputs)) (gen_call_indirect sig_ref val inputs)) ;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; N.B.: the Ret itself is generated by the ABI. (rule (lower (return args)) (lower_return (range 0 (value_slice_len args)) args)) ;;;; Rules for loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (load flags address offset))) (aarch64_uload8 (amode $I8 address offset) flags)) (rule (lower (has_type $I16 (load flags address offset))) (aarch64_uload16 (amode $I16 address offset) flags)) (rule (lower (has_type $I32 (load flags address offset))) (aarch64_uload32 (amode $I32 address offset) flags)) (rule (lower (has_type $I64 (load flags address offset))) (aarch64_uload64 (amode $I64 address offset) flags)) (rule (lower (has_type $R64 (load flags address offset))) (aarch64_uload64 (amode $I64 address offset) flags)) (rule (lower (has_type $F32 (load flags address offset))) (aarch64_fpuload32 (amode $F32 address offset) flags)) (rule (lower (has_type $F64 (load flags address offset))) (aarch64_fpuload64 (amode $F64 address offset) flags)) (rule (lower (has_type $I128 (load flags address offset))) (aarch64_loadp64 (pair_amode address offset) flags)) (rule -1 (lower (has_type (ty_vec64 _) (load flags address offset))) (aarch64_fpuload128 (amode $F64 address offset) flags)) (rule -3 (lower (has_type (ty_vec128 _) (load flags address offset))) (aarch64_fpuload128 (amode $I8X16 address offset) flags)) (rule -2 (lower (has_type (ty_dyn_vec64 _) (load flags address offset))) (aarch64_fpuload64 (amode $F64 address offset) flags)) (rule -4 (lower (has_type (ty_dyn_vec128 _) (load flags address offset))) (aarch64_fpuload128 (amode $I8X16 address offset) flags)) (rule (lower (uload8 flags address offset)) (aarch64_uload8 (amode $I8 address offset) flags)) (rule (lower (sload8 flags address offset)) (aarch64_sload8 (amode $I8 address offset) flags)) (rule (lower (uload16 flags address offset)) (aarch64_uload16 (amode $I16 address offset) flags)) (rule (lower (sload16 flags address offset)) (aarch64_sload16 (amode $I16 address offset) flags)) (rule (lower (uload32 flags address offset)) (aarch64_uload32 (amode $I32 address offset) flags)) (rule (lower (sload32 flags address offset)) (aarch64_sload32 (amode $I32 address offset) flags)) (rule (lower (sload8x8 flags address offset)) (vec_extend (VecExtendOp.Sxtl) (aarch64_fpuload64 (amode $F64 address offset) flags) $false (ScalarSize.Size16))) (rule (lower (uload8x8 flags address offset)) (vec_extend (VecExtendOp.Uxtl) (aarch64_fpuload64 (amode $F64 address offset) flags) $false (ScalarSize.Size16))) (rule (lower (sload16x4 flags address offset)) (vec_extend (VecExtendOp.Sxtl) (aarch64_fpuload64 (amode $F64 address offset) flags) $false (ScalarSize.Size32))) (rule (lower (uload16x4 flags address offset)) (vec_extend (VecExtendOp.Uxtl) (aarch64_fpuload64 (amode $F64 address offset) flags) $false (ScalarSize.Size32))) (rule (lower (sload32x2 flags address offset)) (vec_extend (VecExtendOp.Sxtl) (aarch64_fpuload64 (amode $F64 address offset) flags) $false (ScalarSize.Size64))) (rule (lower (uload32x2 flags address offset)) (vec_extend (VecExtendOp.Uxtl) (aarch64_fpuload64 (amode $F64 address offset) flags) $false (ScalarSize.Size64))) ;;;; Rules for stores ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (store flags value @ (value_type $I8) address offset)) (side_effect (aarch64_store8 (amode $I8 address offset) flags value))) (rule (lower (store flags value @ (value_type $I16) address offset)) (side_effect (aarch64_store16 (amode $I16 address offset) flags value))) (rule (lower (store flags value @ (value_type $I32) address offset)) (side_effect (aarch64_store32 (amode $I32 address offset) flags value))) (rule (lower (store flags value @ (value_type $I64) address offset)) (side_effect (aarch64_store64 (amode $I64 address offset) flags value))) (rule (lower (store flags value @ (value_type $R64) address offset)) (side_effect (aarch64_store64 (amode $I64 address offset) flags value))) (rule (lower (istore8 flags value address offset)) (side_effect (aarch64_store8 (amode $I8 address offset) flags value))) (rule (lower (istore16 flags value address offset)) (side_effect (aarch64_store16 (amode $I16 address offset) flags value))) (rule (lower (istore32 flags value address offset)) (side_effect (aarch64_store32 (amode $I32 address offset) flags value))) (rule (lower (store flags value @ (value_type $F32) address offset)) (side_effect (aarch64_fpustore32 (amode $F32 address offset) flags value))) (rule (lower (store flags value @ (value_type $F64) address offset)) (side_effect (aarch64_fpustore64 (amode $F64 address offset) flags value))) (rule (lower (store flags value @ (value_type $I128) address offset)) (side_effect (aarch64_storep64 (pair_amode address offset) flags (value_regs_get value 0) (value_regs_get value 1)))) (rule -1 (lower (store flags value @ (value_type (ty_vec64 _)) address offset)) (side_effect (aarch64_fpustore64 (amode $F64 address offset) flags value))) (rule -3 (lower (store flags value @ (value_type (ty_vec128 _)) address offset)) (side_effect (aarch64_fpustore128 (amode $I8X16 address offset) flags value))) (rule -2 (lower (store flags value @ (value_type (ty_dyn_vec64 _)) address offset)) (side_effect (aarch64_fpustore64 (amode $F64 address offset) flags value))) (rule -4 (lower (store flags value @ (value_type (ty_dyn_vec128 _)) address offset)) (side_effect (aarch64_fpustore128 (amode $I8X16 address offset) flags value))) ;;; Rules for `{get,set}_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (get_pinned_reg)) (mov_from_preg (preg_pinned))) (rule (lower (set_pinned_reg val)) (side_effect (write_pinned_reg val))) ;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; SIMD&FP <=> SIMD&FP (rule 5 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type (ty_float_or_vec _))))) x) ; GPR => SIMD&FP (rule 4 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type in_ty)))) (if (ty_int_ref_scalar_64 in_ty)) (mov_to_fpu x (scalar_size in_ty))) ; SIMD&FP => GPR (rule 3 (lower (has_type out_ty (bitcast _ x @ (value_type (fits_in_64 (ty_float_or_vec _)))))) (if (ty_int_ref_scalar_64 out_ty)) (mov_from_vec x 0 (scalar_size out_ty))) ; GPR <=> GPR (rule 2 (lower (has_type out_ty (bitcast _ x @ (value_type in_ty)))) (if (ty_int_ref_scalar_64 out_ty)) (if (ty_int_ref_scalar_64 in_ty)) x) (rule 1 (lower (has_type $I128 (bitcast _ x @ (value_type $I128)))) x) ;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; extractlane with lane 0 can pass through the value unchanged; upper ;; bits are undefined when a narrower type is in a wider register. (rule 2 (lower (has_type (ty_scalar_float _) (extractlane val (u8_from_uimm8 0)))) val) (rule 0 (lower (has_type (ty_int ty) (extractlane val (u8_from_uimm8 lane)))) (mov_from_vec val lane (scalar_size ty))) (rule 1 (lower (has_type (ty_scalar_float ty) (extractlane val @ (value_type vty) (u8_from_uimm8 lane)))) (fpu_move_from_vec val lane (vector_size vty))) ;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 1 (lower (insertlane vec @ (value_type vty) val @ (value_type (ty_int _)) (u8_from_uimm8 lane))) (mov_to_vec vec val lane (vector_size vty))) (rule (lower (insertlane vec @ (value_type vty) val @ (value_type (ty_scalar_float _)) (u8_from_uimm8 lane))) (mov_vec_elem vec val lane 0 (vector_size vty))) ;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (stack_addr stack_slot offset)) (compute_stack_addr stack_slot offset)) ;;; Rules for `vhigh_bits` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; All three sequences use one integer temporary and two vector ;; temporaries. The shift is done early so as to give the register ;; allocator the possibility of using the same reg for `tmp_v1` and ;; `src_v` in the case that this is the last use of `src_v`. See ;; https://github.com/WebAssembly/simd/pull/201 for the background and ;; derivation of these sequences. Alternative sequences are discussed ;; in https://github.com/bytecodealliance/wasmtime/issues/2296, ;; although they are not used here. (rule (lower (vhigh_bits vec @ (value_type $I8X16))) (let ( ;; Replicate the MSB of each of the 16 byte lanes across ;; the whole lane (sshr is an arithmetic right shift). (shifted Reg (vec_shift_imm (VecShiftImmOp.Sshr) 7 vec (VectorSize.Size8x16))) ;; Bitwise-and with a mask ;; `0x80402010_08040201_80402010_08040201` to get the bit ;; in the proper location for each group of 8 lanes. (anded Reg (and_vec shifted (constant_f128 0x80402010_08040201_80402010_08040201) (VectorSize.Size8x16))) ;; Produce a version of `anded` with upper 8 lanes and ;; lower 8 lanes swapped. (anded_swapped Reg (vec_extract anded anded 8)) ;; Zip together the two; with the above this produces the lane permutation: ;; 15 7 14 6 13 5 12 4 11 3 10 2 9 1 8 0 (zipped Reg (zip1 anded anded_swapped (VectorSize.Size8x16))) ;; Add 16-bit lanes together ("add across vector"), so we ;; get, in the low 16 bits, 15+14+...+8 in the high byte ;; and 7+6+...+0 in the low byte. This effectively puts ;; the 16 MSBs together, giving our results. ;; ;; N.B.: `Size16x8` is not a typo! (result Reg (addv zipped (VectorSize.Size16x8)))) (mov_from_vec result 0 (ScalarSize.Size16)))) (rule (lower (vhigh_bits vec @ (value_type $I16X8))) (let ( ;; Replicate the MSB of each of the 8 16-bit lanes across ;; the whole lane (sshr is an arithmetic right shift). (shifted Reg (vec_shift_imm (VecShiftImmOp.Sshr) 15 vec (VectorSize.Size16x8))) ;; Bitwise-and with a mask ;; `0x0080_0040_0020_0010_0008_0004_0002_0001` to get the ;; bit in the proper location for each group of 4 lanes. (anded Reg (and_vec shifted (constant_f128 0x0080_0040_0020_0010_0008_0004_0002_0001) (VectorSize.Size16x8))) ;; Add lanes together to get the 8 MSBs in the low byte. (result Reg (addv anded (VectorSize.Size16x8)))) (mov_from_vec result 0 (ScalarSize.Size16)))) (rule (lower (vhigh_bits vec @ (value_type $I32X4))) (let ( ;; Replicate the MSB of each of the 4 32-bit lanes across ;; the whole lane (sshr is an arithmetic right shift). (shifted Reg (vec_shift_imm (VecShiftImmOp.Sshr) 31 vec (VectorSize.Size32x4))) ;; Bitwise-and with a mask ;; `0x00000008_00000004_00000002_00000001` to get the bit ;; in the proper location for each group of 4 lanes. (anded Reg (and_vec shifted (constant_f128 0x00000008_00000004_00000002_00000001) (VectorSize.Size32x4))) ;; Add lanes together to get the 4 MSBs in the low byte. (result Reg (addv anded (VectorSize.Size32x4)))) (mov_from_vec result 0 (ScalarSize.Size32)))) (rule (lower (vhigh_bits vec @ (value_type $I64X2))) (let ( ;; Grab the MSB out of each of the lanes, right-shift to ;; LSB, and add with a left-shift of upper lane's MSB back ;; to bit 1. the whole lane (sshr is an arithmetic right ;; shift). (upper_msb Reg (mov_from_vec vec 1 (ScalarSize.Size64))) (lower_msb Reg (mov_from_vec vec 0 (ScalarSize.Size64))) (upper_msb Reg (lsr_imm $I64 upper_msb (imm_shift_from_u8 63))) (lower_msb Reg (lsr_imm $I64 lower_msb (imm_shift_from_u8 63)))) (add_shift $I64 lower_msb upper_msb (lshl_from_u64 $I64 1)))) ;;; Rules for `iadd_cout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; For values smaller than a register, we do a normal `add` with both arguments ;; sign extended. We then check if the output sign bit has flipped. (rule 0 (lower (has_type (fits_in_16 ty) (iadd_cout a b))) (let ((extend ExtendOp (lower_extend_op ty $true)) ;; Instead of emitting two `sxt{b,h}` we do one as an instruction and ;; the other as an extend operation in the `add` instruction. ;; ;; sxtb a_sext, a ;; add out, a_sext, b, sxtb ;; cmp out, out, sxtb ;; cset out_carry, ne (a_sext Reg (put_in_reg_sext32 a)) (out Reg (add_extend_op ty a_sext b extend)) (out_carry Reg (with_flags_reg (cmp_extend (OperandSize.Size32) out out extend) (cset (Cond.Ne))))) (output_pair (value_reg out) (value_reg out_carry)))) ;; For register sized add's we just emit a adds+cset, without further masking. ;; ;; adds out, a, b ;; cset carry, vs (rule 1 (lower (has_type (ty_32_or_64 ty) (iadd_cout a b))) (let ((out ValueRegs (with_flags (add_with_flags_paired ty a b) (cset_paired (Cond.Vs))))) (output_pair (value_regs_get out 0) (value_regs_get out 1)))) ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (uadd_overflow_trap a b tc))) (trap_if_overflow (add_with_flags_paired ty a b) tc)) ;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (tls_value (symbol_value_data name _ _))) (if (tls_model_is_elf_gd)) (elf_tls_get_addr name)) ;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F64X2 (fcvt_low_from_sint val))) (let ((extended Reg (vec_extend (VecExtendOp.Sxtl) val $false (ScalarSize.Size64))) (converted Reg (vec_misc (VecMisc2.Scvtf) extended (VectorSize.Size64x2)))) converted)) ;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fvpromote_low val)) (vec_rr_long (VecRRLongOp.Fcvtl32) val $false)) ;;; Rules for `brif` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `brif` following `icmp` (rule (lower_branch (brif (maybe_uextend (icmp cc x @ (value_type ty) y)) _ _) targets) (let ((comparison FlagsAndCC (lower_icmp_into_flags cc x y ty)) (cond Cond (cond_code (flags_and_cc_cc comparison))) (taken BranchTarget (branch_target targets 0)) (not_taken BranchTarget (branch_target targets 1))) (emit_side_effect (with_flags_side_effect (flags_and_cc_flags comparison) (cond_br taken not_taken (cond_br_cond cond)))))) ;; `brif` following `fcmp` (rule (lower_branch (brif (maybe_uextend (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets) (let ((cond Cond (fp_cond_code cc)) (taken BranchTarget (branch_target targets 0)) (not_taken BranchTarget (branch_target targets 1))) (emit_side_effect (with_flags_side_effect (fpu_cmp (scalar_size ty) x y) (cond_br taken not_taken (cond_br_cond cond)))))) ;; standard `brif` (rule -1 (lower_branch (brif c @ (value_type $I128) _ _) targets) (let ((flags ProducesFlags (flags_to_producesflags c)) (c ValueRegs (put_in_regs c)) (c_lo Reg (value_regs_get c 0)) (c_hi Reg (value_regs_get c 1)) (rt Reg (orr $I64 c_lo c_hi)) (taken BranchTarget (branch_target targets 0)) (not_taken BranchTarget (branch_target targets 1))) (emit_side_effect (with_flags_side_effect flags (cond_br taken not_taken (cond_br_not_zero rt)))))) (rule -2 (lower_branch (brif c @ (value_type ty) _ _) targets) (if (ty_int_ref_scalar_64 ty)) (let ((flags ProducesFlags (flags_to_producesflags c)) (rt Reg (put_in_reg_zext64 c)) (taken BranchTarget (branch_target targets 0)) (not_taken BranchTarget (branch_target targets 1))) (emit_side_effect (with_flags_side_effect flags (cond_br taken not_taken (cond_br_not_zero rt)))))) ;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower_branch (jump _) targets) (emit_side_effect (aarch64_jump (branch_target targets 0)))) ;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `targets` contains the default target with the list of branch targets ;; concatenated. (rule (lower_branch (br_table idx _) targets) (let ((jt_size u32 (targets_jt_size targets)) (_ InstOutput (side_effect (emit_island (targets_jt_space targets)))) (ridx Reg (put_in_reg_zext32 idx))) (br_table_impl (u32_as_u64 jt_size) ridx targets)))