//! This module defines aarch64-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc}; use crate::ir::types::{F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64}; use crate::ir::{types, ExternalName, MemFlags, Opcode, Type}; use crate::isa::{CallConv, FunctionAlignment}; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; use crate::machinst::{PrettyPrint, Reg, RegClass, Writable}; use alloc::vec::Vec; use regalloc2::{PRegSet, VReg}; use smallvec::{smallvec, SmallVec}; use std::fmt::Write; use std::string::{String, ToString}; pub(crate) mod regs; pub(crate) use self::regs::*; pub mod imms; pub use self::imms::*; pub mod args; pub use self::args::*; pub mod emit; pub(crate) use self::emit::*; use crate::isa::aarch64::abi::AArch64MachineDeps; pub(crate) mod unwind; #[cfg(test)] mod emit_tests; //============================================================================= // Instructions (top level): definition pub use crate::isa::aarch64::lower::isle::generated_code::{ ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp, VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp, VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp, }; /// A floating-point unit (FPU) operation with two args, a register and an immediate. #[derive(Copy, Clone, Debug)] pub enum FPUOpRI { /// Unsigned right shift. Rd = Rn << #imm UShr32(FPURightShiftImm), /// Unsigned right shift. Rd = Rn << #imm UShr64(FPURightShiftImm), } /// A floating-point unit (FPU) operation with two args, a register and /// an immediate that modifies its dest (so takes that input value as a /// separate virtual register). #[derive(Copy, Clone, Debug)] pub enum FPUOpRIMod { /// Shift left and insert. Rd |= Rn << #imm Sli32(FPULeftShiftImm), /// Shift left and insert. Rd |= Rn << #imm Sli64(FPULeftShiftImm), } impl BitOp { /// Get the assembly mnemonic for this opcode. pub fn op_str(&self) -> &'static str { match self { BitOp::RBit => "rbit", BitOp::Clz => "clz", BitOp::Cls => "cls", BitOp::Rev16 => "rev16", BitOp::Rev32 => "rev32", BitOp::Rev64 => "rev64", } } } /// Additional information for (direct) Call instructions, left out of line to lower the size of /// the Inst enum. #[derive(Clone, Debug)] pub struct CallInfo { /// Call destination. pub dest: ExternalName, /// Arguments to the call instruction. pub uses: CallArgList, /// Return values from the call instruction. pub defs: CallRetList, /// Clobbers register set. pub clobbers: PRegSet, /// Instruction opcode. pub opcode: Opcode, /// Caller calling convention. pub caller_callconv: CallConv, /// Callee calling convention. pub callee_callconv: CallConv, /// The number of bytes that the callee will pop from the stack for the /// caller, if any. (Used for popping stack arguments with the `tail` /// calling convention.) pub callee_pop_size: u32, } /// Additional information for CallInd instructions, left out of line to lower the size of the Inst /// enum. #[derive(Clone, Debug)] pub struct CallIndInfo { /// Function pointer for indirect call. pub rn: Reg, /// Arguments to the call instruction. pub uses: SmallVec<[CallArgPair; 8]>, /// Return values from the call instruction. pub defs: SmallVec<[CallRetPair; 8]>, /// Clobbers register set. pub clobbers: PRegSet, /// Instruction opcode. pub opcode: Opcode, /// Caller calling convention. pub caller_callconv: CallConv, /// Callee calling convention. pub callee_callconv: CallConv, /// The number of bytes that the callee will pop from the stack for the /// caller, if any. (Used for popping stack arguments with the `tail` /// calling convention.) pub callee_pop_size: u32, } /// Additional information for `return_call[_ind]` instructions, left out of /// line to lower the size of the `Inst` enum. #[derive(Clone, Debug)] pub struct ReturnCallInfo { /// Arguments to the call instruction. pub uses: CallArgList, /// Instruction opcode. pub opcode: Opcode, /// The size of the current/old stack frame's stack arguments. pub old_stack_arg_size: u32, /// The size of the new stack frame's stack arguments. This is necessary /// for copying the frame over our current frame. It must already be /// allocated on the stack. pub new_stack_arg_size: u32, /// API key to use to restore the return address, if any. pub key: Option, } fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize { let mut count = 0; for _ in 0..num_half_words { if value & 0xffff == 0 { count += 1; } value >>= 16; } count } #[test] fn inst_size_test() { // This test will help with unintentionally growing the size // of the Inst enum. assert_eq!(32, std::mem::size_of::()); } impl Inst { /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN, /// logical immediate, or constant pool). pub fn load_constant Writable>( rd: Writable, value: u64, alloc_tmp: &mut F, ) -> SmallVec<[Inst; 4]> { // NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now, // if modifications are made here before this is deleted after moving to // ISLE then those locations should be updated as well. if let Some(imm) = MoveWideConst::maybe_from_u64(value) { // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ smallvec![Inst::MovWide { op: MoveWideOp::MovZ, rd, imm, size: OperandSize::Size64 }] } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) { // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN smallvec![Inst::MovWide { op: MoveWideOp::MovN, rd, imm, size: OperandSize::Size64 }] } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) { // Weird logical-instruction immediate in ORI using zero register smallvec![Inst::AluRRImmLogic { alu_op: ALUOp::Orr, size: OperandSize::Size64, rd, rn: zero_reg(), imml, }] } else { let mut insts = smallvec![]; // If the top 32 bits are zero, use 32-bit `mov` operations. let (num_half_words, size, negated) = if value >> 32 == 0 { (2, OperandSize::Size32, (!value << 32) >> 32) } else { (4, OperandSize::Size64, !value) }; // If the number of 0xffff half words is greater than the number of 0x0000 half words // it is more efficient to use `movn` for the first instruction. let first_is_inverted = count_zero_half_words(negated, num_half_words) > count_zero_half_words(value, num_half_words); // Either 0xffff or 0x0000 half words can be skipped, depending on the first // instruction used. let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; let halfwords: SmallVec<[_; 4]> = (0..num_half_words) .filter_map(|i| { let imm16 = (value >> (16 * i)) & 0xffff; if imm16 == ignored_halfword { None } else { Some((i, imm16)) } }) .collect(); let mut prev_result = None; let last_index = halfwords.last().unwrap().0; for (i, imm16) in halfwords { let shift = i * 16; let rd = if i == last_index { rd } else { alloc_tmp(I16) }; if let Some(rn) = prev_result { let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap(); insts.push(Inst::MovK { rd, rn, imm, size }); } else { if first_is_inverted { let imm = MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift) .unwrap(); insts.push(Inst::MovWide { op: MoveWideOp::MovN, rd, imm, size, }); } else { let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap(); insts.push(Inst::MovWide { op: MoveWideOp::MovZ, rd, imm, size, }); } } prev_result = Some(rd.to_reg()); } assert!(prev_result.is_some()); insts } } /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(into_reg: Writable, mem: AMode, ty: Type, flags: MemFlags) -> Inst { match ty { I8 => Inst::ULoad8 { rd: into_reg, mem, flags, }, I16 => Inst::ULoad16 { rd: into_reg, mem, flags, }, I32 | R32 => Inst::ULoad32 { rd: into_reg, mem, flags, }, I64 | R64 => Inst::ULoad64 { rd: into_reg, mem, flags, }, F32 => Inst::FpuLoad32 { rd: into_reg, mem, flags, }, F64 => Inst::FpuLoad64 { rd: into_reg, mem, flags, }, _ => { if ty.is_vector() { let bits = ty_bits(ty); let rd = into_reg; if bits == 128 { Inst::FpuLoad128 { rd, mem, flags } } else { assert_eq!(bits, 64); Inst::FpuLoad64 { rd, mem, flags } } } else { unimplemented!("gen_load({})", ty); } } } } /// Generic constructor for a store. pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { match ty { I8 => Inst::Store8 { rd: from_reg, mem, flags, }, I16 => Inst::Store16 { rd: from_reg, mem, flags, }, I32 | R32 => Inst::Store32 { rd: from_reg, mem, flags, }, I64 | R64 => Inst::Store64 { rd: from_reg, mem, flags, }, F32 => Inst::FpuStore32 { rd: from_reg, mem, flags, }, F64 => Inst::FpuStore64 { rd: from_reg, mem, flags, }, _ => { if ty.is_vector() { let bits = ty_bits(ty); let rd = from_reg; if bits == 128 { Inst::FpuStore128 { rd, mem, flags } } else { assert_eq!(bits, 64); Inst::FpuStore64 { rd, mem, flags } } } else { unimplemented!("gen_store({})", ty); } } } } } //============================================================================= // Instructions: get_regs fn memarg_operands VReg>(memarg: &AMode, collector: &mut OperandCollector<'_, F>) { // This should match `AMode::with_allocs()`. match memarg { &AMode::Unscaled { rn, .. } | &AMode::UnsignedOffset { rn, .. } => { collector.reg_use(rn); } &AMode::RegReg { rn, rm, .. } | &AMode::RegScaled { rn, rm, .. } | &AMode::RegScaledExtended { rn, rm, .. } | &AMode::RegExtended { rn, rm, .. } => { collector.reg_use(rn); collector.reg_use(rm); } &AMode::Label { .. } => {} &AMode::SPPreIndexed { .. } | &AMode::SPPostIndexed { .. } => {} &AMode::FPOffset { .. } => {} &AMode::SPOffset { .. } | &AMode::NominalSPOffset { .. } => {} &AMode::RegOffset { rn, .. } => { collector.reg_use(rn); } &AMode::Const { .. } => {} } } fn pairmemarg_operands VReg>( pairmemarg: &PairAMode, collector: &mut OperandCollector<'_, F>, ) { // This should match `PairAMode::with_allocs()`. match pairmemarg { &PairAMode::SignedOffset { reg, .. } => { collector.reg_use(reg); } &PairAMode::SPPreIndexed { .. } | &PairAMode::SPPostIndexed { .. } => {} } } fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { match inst { &Inst::AluRRR { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::AluRRRR { rd, rn, rm, ra, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); collector.reg_use(ra); } &Inst::AluRRImm12 { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::AluRRImmLogic { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::AluRRImmShift { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::AluRRRShift { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::AluRRRExtend { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::BitRR { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::ULoad8 { rd, ref mem, .. } | &Inst::SLoad8 { rd, ref mem, .. } | &Inst::ULoad16 { rd, ref mem, .. } | &Inst::SLoad16 { rd, ref mem, .. } | &Inst::ULoad32 { rd, ref mem, .. } | &Inst::SLoad32 { rd, ref mem, .. } | &Inst::ULoad64 { rd, ref mem, .. } => { collector.reg_def(rd); memarg_operands(mem, collector); } &Inst::Store8 { rd, ref mem, .. } | &Inst::Store16 { rd, ref mem, .. } | &Inst::Store32 { rd, ref mem, .. } | &Inst::Store64 { rd, ref mem, .. } => { collector.reg_use(rd); memarg_operands(mem, collector); } &Inst::StoreP64 { rt, rt2, ref mem, .. } => { collector.reg_use(rt); collector.reg_use(rt2); pairmemarg_operands(mem, collector); } &Inst::LoadP64 { rt, rt2, ref mem, .. } => { collector.reg_def(rt); collector.reg_def(rt2); pairmemarg_operands(mem, collector); } &Inst::Mov { rd, rm, .. } => { collector.reg_def(rd); collector.reg_use(rm); } &Inst::MovFromPReg { rd, rm } => { debug_assert!(rd.to_reg().is_virtual()); collector.reg_def(rd); collector.reg_fixed_nonallocatable(rm); } &Inst::MovToPReg { rd, rm } => { debug_assert!(rm.is_virtual()); collector.reg_fixed_nonallocatable(rd); collector.reg_use(rm); } &Inst::MovK { rd, rn, .. } => { collector.reg_use(rn); collector.reg_reuse_def(rd, 0); // `rn` == `rd`. } &Inst::MovWide { rd, .. } => { collector.reg_def(rd); } &Inst::CSel { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::CSNeg { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => { collector.reg_def(rd); } &Inst::CCmp { rn, rm, .. } => { collector.reg_use(rn); collector.reg_use(rm); } &Inst::CCmpImm { rn, .. } => { collector.reg_use(rn); } &Inst::AtomicRMWLoop { op, addr, operand, oldval, scratch1, scratch2, .. } => { collector.reg_fixed_use(addr, xreg(25)); collector.reg_fixed_use(operand, xreg(26)); collector.reg_fixed_def(oldval, xreg(27)); collector.reg_fixed_def(scratch1, xreg(24)); if op != AtomicRMWLoopOp::Xchg { collector.reg_fixed_def(scratch2, xreg(28)); } } &Inst::AtomicRMW { rs, rt, rn, .. } => { collector.reg_use(rs); collector.reg_def(rt); collector.reg_use(rn); } &Inst::AtomicCAS { rd, rs, rt, rn, .. } => { collector.reg_reuse_def(rd, 1); // reuse `rs`. collector.reg_use(rs); collector.reg_use(rt); collector.reg_use(rn); } &Inst::AtomicCASLoop { addr, expected, replacement, oldval, scratch, .. } => { collector.reg_fixed_use(addr, xreg(25)); collector.reg_fixed_use(expected, xreg(26)); collector.reg_fixed_use(replacement, xreg(28)); collector.reg_fixed_def(oldval, xreg(27)); collector.reg_fixed_def(scratch, xreg(24)); } &Inst::LoadAcquire { rt, rn, .. } => { collector.reg_use(rn); collector.reg_def(rt); } &Inst::StoreRelease { rt, rn, .. } => { collector.reg_use(rn); collector.reg_use(rt); } &Inst::Fence {} | &Inst::Csdb {} => {} &Inst::FpuMove64 { rd, rn } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuMove128 { rd, rn } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuMoveFromVec { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuExtend { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuRR { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuRRR { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::FpuRRI { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuRRIMod { rd, ri, rn, .. } => { collector.reg_reuse_def(rd, 1); // reuse `ri`. collector.reg_use(ri); collector.reg_use(rn); } &Inst::FpuRRRR { rd, rn, rm, ra, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); collector.reg_use(ra); } &Inst::VecMisc { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecLanes { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecShiftImm { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecShiftImmMod { rd, ri, rn, .. } => { collector.reg_reuse_def(rd, 1); // `rd` == `ri`. collector.reg_use(ri); collector.reg_use(rn); } &Inst::VecExtract { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::VecTbl { rd, rn, rm } => { collector.reg_use(rn); collector.reg_use(rm); collector.reg_def(rd); } &Inst::VecTblExt { rd, ri, rn, rm } => { collector.reg_use(rn); collector.reg_use(rm); collector.reg_reuse_def(rd, 3); // `rd` == `ri`. collector.reg_use(ri); } &Inst::VecTbl2 { rd, rn, rn2, rm } => { // Constrain to v30 / v31 so that we satisfy the "adjacent // registers" constraint without use of pinned vregs in // lowering. collector.reg_fixed_use(rn, vreg(30)); collector.reg_fixed_use(rn2, vreg(31)); collector.reg_use(rm); collector.reg_def(rd); } &Inst::VecTbl2Ext { rd, ri, rn, rn2, rm, } => { // Constrain to v30 / v31 so that we satisfy the "adjacent // registers" constraint without use of pinned vregs in // lowering. collector.reg_fixed_use(rn, vreg(30)); collector.reg_fixed_use(rn2, vreg(31)); collector.reg_use(rm); collector.reg_reuse_def(rd, 4); // `rd` == `ri`. collector.reg_use(ri); } &Inst::VecLoadReplicate { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecCSel { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::FpuCmp { rn, rm, .. } => { collector.reg_use(rn); collector.reg_use(rm); } &Inst::FpuLoad32 { rd, ref mem, .. } => { collector.reg_def(rd); memarg_operands(mem, collector); } &Inst::FpuLoad64 { rd, ref mem, .. } => { collector.reg_def(rd); memarg_operands(mem, collector); } &Inst::FpuLoad128 { rd, ref mem, .. } => { collector.reg_def(rd); memarg_operands(mem, collector); } &Inst::FpuStore32 { rd, ref mem, .. } => { collector.reg_use(rd); memarg_operands(mem, collector); } &Inst::FpuStore64 { rd, ref mem, .. } => { collector.reg_use(rd); memarg_operands(mem, collector); } &Inst::FpuStore128 { rd, ref mem, .. } => { collector.reg_use(rd); memarg_operands(mem, collector); } &Inst::FpuLoadP64 { rt, rt2, ref mem, .. } => { collector.reg_def(rt); collector.reg_def(rt2); pairmemarg_operands(mem, collector); } &Inst::FpuStoreP64 { rt, rt2, ref mem, .. } => { collector.reg_use(rt); collector.reg_use(rt2); pairmemarg_operands(mem, collector); } &Inst::FpuLoadP128 { rt, rt2, ref mem, .. } => { collector.reg_def(rt); collector.reg_def(rt2); pairmemarg_operands(mem, collector); } &Inst::FpuStoreP128 { rt, rt2, ref mem, .. } => { collector.reg_use(rt); collector.reg_use(rt2); pairmemarg_operands(mem, collector); } &Inst::FpuToInt { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::IntToFpu { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::FpuRound { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::MovToFpu { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::FpuMoveFPImm { rd, .. } => { collector.reg_def(rd); } &Inst::MovToVec { rd, ri, rn, .. } => { collector.reg_reuse_def(rd, 1); // `rd` == `ri`. collector.reg_use(ri); collector.reg_use(rn); } &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecDup { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecDupFromFpu { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecDupFPImm { rd, .. } => { collector.reg_def(rd); } &Inst::VecDupImm { rd, .. } => { collector.reg_def(rd); } &Inst::VecExtend { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecMovElement { rd, ri, rn, .. } => { collector.reg_reuse_def(rd, 1); // `rd` == `ri`. collector.reg_use(ri); collector.reg_use(rn); } &Inst::VecRRLong { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecRRNarrowLow { rd, rn, .. } => { collector.reg_use(rn); collector.reg_def(rd); } &Inst::VecRRNarrowHigh { rd, ri, rn, .. } => { collector.reg_use(rn); collector.reg_reuse_def(rd, 2); // `rd` == `ri`. collector.reg_use(ri); } &Inst::VecRRPair { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecRRRLong { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => { collector.reg_reuse_def(rd, 1); // `rd` == `ri`. collector.reg_use(ri); collector.reg_use(rn); collector.reg_use(rm); } &Inst::VecRRPairLong { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::VecRRR { rd, rn, rm, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } &Inst::VecRRRMod { rd, ri, rn, rm, .. } | &Inst::VecFmlaElem { rd, ri, rn, rm, .. } => { collector.reg_reuse_def(rd, 1); // `rd` == `ri`. collector.reg_use(ri); collector.reg_use(rn); collector.reg_use(rm); } &Inst::MovToNZCV { rn } => { collector.reg_use(rn); } &Inst::MovFromNZCV { rd } => { collector.reg_def(rd); } &Inst::Extend { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } &Inst::Args { ref args } => { for arg in args { collector.reg_fixed_def(arg.vreg, arg.preg); } } &Inst::Rets { ref rets } => { for ret in rets { collector.reg_fixed_use(ret.vreg, ret.preg); } } &Inst::Ret { .. } | &Inst::AuthenticatedRet { .. } => {} &Inst::Jump { .. } => {} &Inst::Call { ref info, .. } => { for u in &info.uses { collector.reg_fixed_use(u.vreg, u.preg); } for d in &info.defs { collector.reg_fixed_def(d.vreg, d.preg); } collector.reg_clobbers(info.clobbers); } &Inst::CallInd { ref info, .. } => { if info.callee_callconv == CallConv::Tail { // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): // This shouldn't be a fixed register constraint. collector.reg_fixed_use(info.rn, xreg(1)); } else { collector.reg_use(info.rn); } for u in &info.uses { collector.reg_fixed_use(u.vreg, u.preg); } for d in &info.defs { collector.reg_fixed_def(d.vreg, d.preg); } collector.reg_clobbers(info.clobbers); } &Inst::ReturnCall { ref info, callee: _, } => { for u in &info.uses { collector.reg_fixed_use(u.vreg, u.preg); } } &Inst::ReturnCallInd { ref info, callee } => { collector.reg_use(callee); for u in &info.uses { collector.reg_fixed_use(u.vreg, u.preg); } } &Inst::CondBr { ref kind, .. } => match kind { CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { collector.reg_use(*rt); } CondBrKind::Cond(_) => {} }, &Inst::TestBitAndBranch { rn, .. } => { collector.reg_use(rn); } &Inst::IndirectBr { rn, .. } => { collector.reg_use(rn); } &Inst::Nop0 | Inst::Nop4 => {} &Inst::Brk => {} &Inst::Udf { .. } => {} &Inst::TrapIf { ref kind, .. } => match kind { CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { collector.reg_use(*rt); } CondBrKind::Cond(_) => {} }, &Inst::Adr { rd, .. } | &Inst::Adrp { rd, .. } => { collector.reg_def(rd); } &Inst::Word4 { .. } | &Inst::Word8 { .. } => {} &Inst::JTSequence { ridx, rtmp1, rtmp2, .. } => { collector.reg_use(ridx); collector.reg_early_def(rtmp1); collector.reg_early_def(rtmp2); } &Inst::LoadExtName { rd, .. } => { collector.reg_def(rd); } &Inst::LoadAddr { rd, ref mem } => { collector.reg_def(rd); memarg_operands(mem, collector); } &Inst::Paci { .. } | &Inst::Xpaclri => { // Neither LR nor SP is an allocatable register, so there is no need // to do anything. } &Inst::Bti { .. } => {} &Inst::VirtualSPOffsetAdj { .. } => {} &Inst::ElfTlsGetAddr { rd, tmp, .. } => { // TLSDESC has a very neat calling convention. It is required to preserve // all registers except x0 and x30. X30 is non allocatable in cranelift since // its the link register. // // Additionally we need a second register as a temporary register for the // TLSDESC sequence. This register can be any register other than x0 (and x30). collector.reg_fixed_def(rd, regs::xreg(0)); collector.reg_early_def(tmp); } &Inst::MachOTlsGetAddr { rd, .. } => { collector.reg_fixed_def(rd, regs::xreg(0)); let mut clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64); clobbers.remove(regs::xreg_preg(0)); collector.reg_clobbers(clobbers); } &Inst::Unwind { .. } => {} &Inst::EmitIsland { .. } => {} &Inst::DummyUse { reg } => { collector.reg_use(reg); } &Inst::StackProbeLoop { start, end, .. } => { collector.reg_early_def(start); collector.reg_use(end); } } } //============================================================================= // Instructions: misc functions and external interface impl MachInst for Inst { type ABIMachineSpec = AArch64MachineDeps; type LabelUse = LabelUse; // "CLIF" in hex, to make the trap recognizable during // debugging. const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes(); fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { aarch64_get_operands(self, collector); } fn is_move(&self) -> Option<(Writable, Reg)> { match self { &Inst::Mov { size: OperandSize::Size64, rd, rm, } => Some((rd, rm)), &Inst::FpuMove64 { rd, rn } => Some((rd, rn)), &Inst::FpuMove128 { rd, rn } => Some((rd, rn)), _ => None, } } fn is_included_in_clobbers(&self) -> bool { let (caller_callconv, callee_callconv) = match self { Inst::Args { .. } => return false, Inst::Call { info } => (info.caller_callconv, info.callee_callconv), Inst::CallInd { info } => (info.caller_callconv, info.callee_callconv), _ => return true, }; // We exclude call instructions from the clobber-set when they are calls // from caller to callee that both clobber the same register (such as // using the same or similar ABIs). Such calls cannot possibly force any // new registers to be saved in the prologue, because anything that the // callee clobbers, the caller is also allowed to clobber. This both // saves work and enables us to more precisely follow the // half-caller-save, half-callee-save SysV ABI for some vector // registers. // // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for // more information on this ABI-implementation hack. let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller_callconv); let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee_callconv); let mut all_clobbers = caller_clobbers; all_clobbers.union_from(callee_clobbers); all_clobbers != caller_clobbers } fn is_trap(&self) -> bool { match self { Self::Udf { .. } => true, _ => false, } } fn is_args(&self) -> bool { match self { Self::Args { .. } => true, _ => false, } } fn is_term(&self) -> MachTerminator { match self { &Inst::Rets { .. } => MachTerminator::Ret, &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall, &Inst::Jump { .. } => MachTerminator::Uncond, &Inst::CondBr { .. } => MachTerminator::Cond, &Inst::TestBitAndBranch { .. } => MachTerminator::Cond, &Inst::IndirectBr { .. } => MachTerminator::Indirect, &Inst::JTSequence { .. } => MachTerminator::Indirect, _ => MachTerminator::None, } } fn is_mem_access(&self) -> bool { match self { &Inst::ULoad8 { .. } | &Inst::SLoad8 { .. } | &Inst::ULoad16 { .. } | &Inst::SLoad16 { .. } | &Inst::ULoad32 { .. } | &Inst::SLoad32 { .. } | &Inst::ULoad64 { .. } | &Inst::LoadP64 { .. } | &Inst::FpuLoad32 { .. } | &Inst::FpuLoad64 { .. } | &Inst::FpuLoad128 { .. } | &Inst::FpuLoadP64 { .. } | &Inst::FpuLoadP128 { .. } | &Inst::Store8 { .. } | &Inst::Store16 { .. } | &Inst::Store32 { .. } | &Inst::Store64 { .. } | &Inst::StoreP64 { .. } | &Inst::FpuStore32 { .. } | &Inst::FpuStore64 { .. } | &Inst::FpuStore128 { .. } => true, // TODO: verify this carefully _ => false, } } fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { let bits = ty.bits(); assert!(bits <= 128); assert!(to_reg.to_reg().class() == from_reg.class()); match from_reg.class() { RegClass::Int => Inst::Mov { size: OperandSize::Size64, rd: to_reg, rm: from_reg, }, RegClass::Float => { if bits > 64 { Inst::FpuMove128 { rd: to_reg, rn: from_reg, } } else { Inst::FpuMove64 { rd: to_reg, rn: from_reg, } } } RegClass::Vector => unreachable!(), } } fn is_safepoint(&self) -> bool { match self { &Inst::Call { .. } | &Inst::CallInd { .. } | &Inst::TrapIf { .. } | &Inst::Udf { .. } => true, _ => false, } } fn gen_dummy_use(reg: Reg) -> Inst { Inst::DummyUse { reg } } fn gen_nop(preferred_size: usize) -> Inst { if preferred_size == 0 { return Inst::Nop0; } // We can't give a NOP (or any insn) < 4 bytes. assert!(preferred_size >= 4); Inst::Nop4 } fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { match ty { I8 => Ok((&[RegClass::Int], &[I8])), I16 => Ok((&[RegClass::Int], &[I16])), I32 => Ok((&[RegClass::Int], &[I32])), I64 => Ok((&[RegClass::Int], &[I64])), R32 => panic!("32-bit reftype pointer should never be seen on AArch64"), R64 => Ok((&[RegClass::Int], &[R64])), F32 => Ok((&[RegClass::Float], &[F32])), F64 => Ok((&[RegClass::Float], &[F64])), I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), _ if ty.is_vector() => { assert!(ty.bits() <= 128); Ok((&[RegClass::Float], &[I8X16])) } _ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty ))), } } fn canonical_type_for_rc(rc: RegClass) -> Type { match rc { RegClass::Float => types::I8X16, RegClass::Int => types::I64, RegClass::Vector => unreachable!(), } } fn gen_jump(target: MachLabel) -> Inst { Inst::Jump { dest: BranchTarget::Label(target), } } fn worst_case_size() -> CodeOffset { // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of // an 8-instruction sequence (saturating int-to-float conversions) with three embedded // 64-bit f64 constants. // // Note that inline jump-tables handle island/pool insertion separately, so we do not need // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not // feasible for other reasons). 44 } fn ref_type_regclass(_: &settings::Flags) -> RegClass { RegClass::Int } fn gen_block_start( is_indirect_branch_target: bool, is_forward_edge_cfi_enabled: bool, ) -> Option { if is_indirect_branch_target && is_forward_edge_cfi_enabled { Some(Inst::Bti { targets: BranchTargetType::J, }) } else { None } } fn function_alignment() -> FunctionAlignment { // We use 32-byte alignment for performance reasons, but for correctness // we would only need 4-byte alignment. FunctionAlignment { minimum: 4, preferred: 32, } } } //============================================================================= // Pretty-printing of instructions. fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) { let (mem_insts, mem) = mem_finalize(None, mem, state); let mut mem_str = mem_insts .into_iter() .map(|inst| { inst.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])) }) .collect::>() .join(" ; "); if !mem_str.is_empty() { mem_str += " ; "; } (mem_str, mem) } impl Inst { fn print_with_state(&self, state: &mut EmitState, allocs: &mut AllocationConsumer) -> String { let mut empty_allocs = AllocationConsumer::default(); fn op_name(alu_op: ALUOp) -> &'static str { match alu_op { ALUOp::Add => "add", ALUOp::Sub => "sub", ALUOp::Orr => "orr", ALUOp::And => "and", ALUOp::AndS => "ands", ALUOp::Eor => "eor", ALUOp::AddS => "adds", ALUOp::SubS => "subs", ALUOp::SMulH => "smulh", ALUOp::UMulH => "umulh", ALUOp::SDiv => "sdiv", ALUOp::UDiv => "udiv", ALUOp::AndNot => "bic", ALUOp::OrrNot => "orn", ALUOp::EorNot => "eon", ALUOp::RotR => "ror", ALUOp::Lsr => "lsr", ALUOp::Asr => "asr", ALUOp::Lsl => "lsl", ALUOp::Adc => "adc", ALUOp::AdcS => "adcs", ALUOp::Sbc => "sbc", ALUOp::SbcS => "sbcs", } } // N.B.: order of `allocs` consumption (via register // pretty-printing or memarg.with_allocs()) needs to match the // order in `aarch64_get_operands` above. match self { &Inst::Nop0 => "nop-zero-len".to_string(), &Inst::Nop4 => "nop".to_string(), &Inst::AluRRR { alu_op, size, rd, rn, rm, } => { let op = op_name(alu_op); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let rm = pretty_print_ireg(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::AluRRRR { alu_op, size, rd, rn, rm, ra, } => { let (op, da_size) = match alu_op { ALUOp3::MAdd => ("madd", size), ALUOp3::MSub => ("msub", size), ALUOp3::UMAddL => ("umaddl", OperandSize::Size64), ALUOp3::SMAddL => ("smaddl", OperandSize::Size64), }; let rd = pretty_print_ireg(rd.to_reg(), da_size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let rm = pretty_print_ireg(rm, size, allocs); let ra = pretty_print_ireg(ra, da_size, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) } &Inst::AluRRImm12 { alu_op, size, rd, rn, ref imm12, } => { let op = op_name(alu_op); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() { // special-case MOV (used for moving into SP). format!("mov {}, {}", rd, rn) } else { let imm12 = imm12.pretty_print(0, allocs); format!("{} {}, {}, {}", op, rd, rn, imm12) } } &Inst::AluRRImmLogic { alu_op, size, rd, rn, ref imml, } => { let op = op_name(alu_op); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let imml = imml.pretty_print(0, allocs); format!("{} {}, {}, {}", op, rd, rn, imml) } &Inst::AluRRImmShift { alu_op, size, rd, rn, ref immshift, } => { let op = op_name(alu_op); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let immshift = immshift.pretty_print(0, allocs); format!("{} {}, {}, {}", op, rd, rn, immshift) } &Inst::AluRRRShift { alu_op, size, rd, rn, rm, ref shiftop, } => { let op = op_name(alu_op); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let rm = pretty_print_ireg(rm, size, allocs); let shiftop = shiftop.pretty_print(0, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop) } &Inst::AluRRRExtend { alu_op, size, rd, rn, rm, ref extendop, } => { let op = op_name(alu_op); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let rm = pretty_print_ireg(rm, size, allocs); let extendop = extendop.pretty_print(0, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop) } &Inst::BitRR { op, size, rd, rn } => { let op = op.op_str(); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::ULoad8 { rd, ref mem, .. } | &Inst::SLoad8 { rd, ref mem, .. } | &Inst::ULoad16 { rd, ref mem, .. } | &Inst::SLoad16 { rd, ref mem, .. } | &Inst::ULoad32 { rd, ref mem, .. } | &Inst::SLoad32 { rd, ref mem, .. } | &Inst::ULoad64 { rd, ref mem, .. } => { let is_unscaled = match &mem { &AMode::Unscaled { .. } => true, _ => false, }; let (op, size) = match (self, is_unscaled) { (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32), (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32), (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64), (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64), (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32), (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32), (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64), (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64), (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32), (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32), (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64), (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64), (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64), (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64), _ => unreachable!(), }; let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op, rd, mem) } &Inst::Store8 { rd, ref mem, .. } | &Inst::Store16 { rd, ref mem, .. } | &Inst::Store32 { rd, ref mem, .. } | &Inst::Store64 { rd, ref mem, .. } => { let is_unscaled = match &mem { &AMode::Unscaled { .. } => true, _ => false, }; let (op, size) = match (self, is_unscaled) { (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32), (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32), (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32), (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32), (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32), (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32), (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64), (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64), _ => unreachable!(), }; let rd = pretty_print_ireg(rd, size, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op, rd, mem) } &Inst::StoreP64 { rt, rt2, ref mem, .. } => { let rt = pretty_print_ireg(rt, OperandSize::Size64, allocs); let rt2 = pretty_print_ireg(rt2, OperandSize::Size64, allocs); let mem = mem.with_allocs(allocs); let mem = mem.pretty_print_default(); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::LoadP64 { rt, rt2, ref mem, .. } => { let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64, allocs); let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64, allocs); let mem = mem.with_allocs(allocs); let mem = mem.pretty_print_default(); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::Mov { size, rd, rm } => { let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rm = pretty_print_ireg(rm, size, allocs); format!("mov {}, {}", rd, rm) } &Inst::MovFromPReg { rd, rm } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); allocs.next_fixed_nonallocatable(rm); let rm = show_ireg_sized(rm.into(), OperandSize::Size64); format!("mov {}, {}", rd, rm) } &Inst::MovToPReg { rd, rm } => { allocs.next_fixed_nonallocatable(rd); let rd = show_ireg_sized(rd.into(), OperandSize::Size64); let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); format!("mov {}, {}", rd, rm) } &Inst::MovWide { op, rd, ref imm, size, } => { let op_str = match op { MoveWideOp::MovZ => "movz", MoveWideOp::MovN => "movn", }; let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let imm = imm.pretty_print(0, allocs); format!("{} {}, {}", op_str, rd, imm) } &Inst::MovK { rd, rn, ref imm, size, } => { let rn = pretty_print_ireg(rn, size, allocs); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let imm = imm.pretty_print(0, allocs); format!("movk {}, {}, {}", rd, rn, imm) } &Inst::CSel { rd, rn, rm, cond } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); let cond = cond.pretty_print(0, allocs); format!("csel {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::CSNeg { rd, rn, rm, cond } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); let cond = cond.pretty_print(0, allocs); format!("csneg {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::CSet { rd, cond } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let cond = cond.pretty_print(0, allocs); format!("cset {}, {}", rd, cond) } &Inst::CSetm { rd, cond } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); let cond = cond.pretty_print(0, allocs); format!("csetm {}, {}", rd, cond) } &Inst::CCmp { size, rn, rm, nzcv, cond, } => { let rn = pretty_print_ireg(rn, size, allocs); let rm = pretty_print_ireg(rm, size, allocs); let nzcv = nzcv.pretty_print(0, allocs); let cond = cond.pretty_print(0, allocs); format!("ccmp {}, {}, {}, {}", rn, rm, nzcv, cond) } &Inst::CCmpImm { size, rn, imm, nzcv, cond, } => { let rn = pretty_print_ireg(rn, size, allocs); let imm = imm.pretty_print(0, allocs); let nzcv = nzcv.pretty_print(0, allocs); let cond = cond.pretty_print(0, allocs); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } &Inst::AtomicRMW { rs, rt, rn, ty, op, .. } => { let op = match op { AtomicRMWOp::Add => "ldaddal", AtomicRMWOp::Clr => "ldclral", AtomicRMWOp::Eor => "ldeoral", AtomicRMWOp::Set => "ldsetal", AtomicRMWOp::Smax => "ldsmaxal", AtomicRMWOp::Umax => "ldumaxal", AtomicRMWOp::Smin => "ldsminal", AtomicRMWOp::Umin => "lduminal", AtomicRMWOp::Swp => "swpal", }; let size = OperandSize::from_ty(ty); let rs = pretty_print_ireg(rs, size, allocs); let rt = pretty_print_ireg(rt.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let ty_suffix = match ty { I8 => "b", I16 => "h", _ => "", }; format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn) } &Inst::AtomicRMWLoop { ty, op, addr, operand, oldval, scratch1, scratch2, .. } => { let op = match op { AtomicRMWLoopOp::Add => "add", AtomicRMWLoopOp::Sub => "sub", AtomicRMWLoopOp::Eor => "eor", AtomicRMWLoopOp::Orr => "orr", AtomicRMWLoopOp::And => "and", AtomicRMWLoopOp::Nand => "nand", AtomicRMWLoopOp::Smin => "smin", AtomicRMWLoopOp::Smax => "smax", AtomicRMWLoopOp::Umin => "umin", AtomicRMWLoopOp::Umax => "umax", AtomicRMWLoopOp::Xchg => "xchg", }; let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs); let operand = pretty_print_ireg(operand, OperandSize::Size64, allocs); let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs); let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64, allocs); let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64, allocs); format!( "atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}", op, ty.bits(), addr, operand, oldval, scratch1, scratch2, ) } &Inst::AtomicCAS { rd, rs, rt, rn, ty, .. } => { let op = match ty { I8 => "casalb", I16 => "casalh", I32 | I64 => "casal", _ => panic!("Unsupported type: {}", ty), }; let size = OperandSize::from_ty(ty); let rd = pretty_print_ireg(rd.to_reg(), size, allocs); let rs = pretty_print_ireg(rs, size, allocs); let rt = pretty_print_ireg(rt, size, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); format!("{} {}, {}, {}, [{}]", op, rd, rs, rt, rn) } &Inst::AtomicCASLoop { ty, addr, expected, replacement, oldval, scratch, .. } => { let addr = pretty_print_ireg(addr, OperandSize::Size64, allocs); let expected = pretty_print_ireg(expected, OperandSize::Size64, allocs); let replacement = pretty_print_ireg(replacement, OperandSize::Size64, allocs); let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64, allocs); let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64, allocs); format!( "atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}", ty.bits(), addr, expected, replacement, oldval, scratch, ) } &Inst::LoadAcquire { access_ty, rt, rn, .. } => { let (op, ty) = match access_ty { I8 => ("ldarb", I32), I16 => ("ldarh", I32), I32 => ("ldar", I32), I64 => ("ldar", I64), _ => panic!("Unsupported type: {}", access_ty), }; let size = OperandSize::from_ty(ty); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let rt = pretty_print_ireg(rt.to_reg(), size, allocs); format!("{} {}, [{}]", op, rt, rn) } &Inst::StoreRelease { access_ty, rt, rn, .. } => { let (op, ty) = match access_ty { I8 => ("stlrb", I32), I16 => ("stlrh", I32), I32 => ("stlr", I32), I64 => ("stlr", I64), _ => panic!("Unsupported type: {}", access_ty), }; let size = OperandSize::from_ty(ty); let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let rt = pretty_print_ireg(rt, size, allocs); format!("{} {}, [{}]", op, rt, rn) } &Inst::Fence {} => { format!("dmb ish") } &Inst::Csdb {} => { format!("csdb") } &Inst::FpuMove64 { rd, rn } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs); format!("fmov {}, {}", rd, rn) } &Inst::FpuMove128 { rd, rn } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); format!("mov {}.16b, {}.16b", rd, rn) } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size(), allocs); let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size(), allocs); format!("mov {}, {}", rd, rn) } &Inst::FpuExtend { rd, rn, size } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs); format!("fmov {}, {}", rd, rn) } &Inst::FpuRR { fpu_op, size, rd, rn, } => { let op = match fpu_op { FPUOp1::Abs => "fabs", FPUOp1::Neg => "fneg", FPUOp1::Sqrt => "fsqrt", FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt", }; let dst_size = match fpu_op { FPUOp1::Cvt32To64 => ScalarSize::Size64, FPUOp1::Cvt64To32 => ScalarSize::Size32, _ => size, }; let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::FpuRRR { fpu_op, size, rd, rn, rm, } => { let op = match fpu_op { FPUOp2::Add => "fadd", FPUOp2::Sub => "fsub", FPUOp2::Mul => "fmul", FPUOp2::Div => "fdiv", FPUOp2::Max => "fmax", FPUOp2::Min => "fmin", }; let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs); let rm = pretty_print_vreg_scalar(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::FpuRRI { fpu_op, rd, rn } => { let (op, imm, vector) = match fpu_op { FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true), FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false), }; let (rd, rn) = if vector { ( pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs), pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs), ) } else { ( pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs), pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs), ) }; format!("{} {}, {}, {}", op, rd, rn, imm) } &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => { let (op, imm, vector) = match fpu_op { FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true), FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false), }; let (rd, ri, rn) = if vector { ( pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs), pretty_print_vreg_vector(ri, VectorSize::Size32x2, allocs), pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs), ) } else { ( pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs), pretty_print_vreg_scalar(ri, ScalarSize::Size64, allocs), pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs), ) }; format!("{} {}, {}, {}, {}", op, rd, ri, rn, imm) } &Inst::FpuRRRR { fpu_op, size, rd, rn, rm, ra, } => { let op = match fpu_op { FPUOp3::MAdd => "fmadd", }; let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs); let rm = pretty_print_vreg_scalar(rm, size, allocs); let ra = pretty_print_vreg_scalar(ra, size, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) } &Inst::FpuCmp { size, rn, rm } => { let rn = pretty_print_vreg_scalar(rn, size, allocs); let rm = pretty_print_vreg_scalar(rm, size, allocs); format!("fcmp {}, {}", rn, rm) } &Inst::FpuLoad32 { rd, ref mem, .. } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad64 { rd, ref mem, .. } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad128 { rd, ref mem, .. } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let rd = "q".to_string() + &rd[1..]; let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuStore32 { rd, ref mem, .. } => { let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore64 { rd, ref mem, .. } => { let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64, allocs); let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore128 { rd, ref mem, .. } => { let rd = pretty_print_reg(rd, allocs); let rd = "q".to_string() + &rd[1..]; let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show(&mem, state); let mem = mem.pretty_print_default(); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuLoadP64 { rt, rt2, ref mem, .. } => { let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64, allocs); let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64, allocs); let mem = mem.with_allocs(allocs); let mem = mem.pretty_print_default(); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::FpuStoreP64 { rt, rt2, ref mem, .. } => { let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64, allocs); let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64, allocs); let mem = mem.with_allocs(allocs); let mem = mem.pretty_print_default(); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::FpuLoadP128 { rt, rt2, ref mem, .. } => { let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128, allocs); let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128, allocs); let mem = mem.with_allocs(allocs); let mem = mem.pretty_print_default(); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::FpuStoreP128 { rt, rt2, ref mem, .. } => { let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128, allocs); let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128, allocs); let mem = mem.with_allocs(allocs); let mem = mem.pretty_print_default(); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::FpuToInt { op, rd, rn } => { let (op, sizesrc, sizedest) = match op { FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32), FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32), FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64), FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64), FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32), FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32), FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64), FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64), }; let rd = pretty_print_ireg(rd.to_reg(), sizedest, allocs); let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::IntToFpu { op, rd, rn } => { let (op, sizesrc, sizedest) = match op { IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32), IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32), IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32), IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32), IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64), IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64), IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64), IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64), }; let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest, allocs); let rn = pretty_print_ireg(rn, sizesrc, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::FpuCSel32 { rd, rn, rm, cond } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs); let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs); let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32, allocs); let cond = cond.pretty_print(0, allocs); format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::FpuCSel64 { rd, rn, rm, cond } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs); let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64, allocs); let cond = cond.pretty_print(0, allocs); format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::FpuRound { op, rd, rn } => { let (inst, size) = match op { FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32), FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64), FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32), FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64), FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32), FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64), FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32), FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64), }; let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs); format!("{} {}, {}", inst, rd, rn) } &Inst::MovToFpu { rd, rn, size } => { let operand_size = size.operand_size(); let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, operand_size, allocs); format!("fmov {}, {}", rd, rn) } &Inst::FpuMoveFPImm { rd, imm, size } => { let imm = imm.pretty_print(0, allocs); let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); format!("fmov {}, {}", rd, imm) } &Inst::MovToVec { rd, ri, rn, idx, size, } => { let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size(), allocs); let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size(), allocs); let rn = pretty_print_ireg(rn, size.operand_size(), allocs); format!("mov {}, {}, {}", rd, ri, rn) } &Inst::MovFromVec { rd, rn, idx, size } => { let op = match size { ScalarSize::Size8 => "umov", ScalarSize::Size16 => "umov", ScalarSize::Size32 => "mov", ScalarSize::Size64 => "mov", _ => unimplemented!(), }; let rd = pretty_print_ireg(rd.to_reg(), size.operand_size(), allocs); let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::MovFromVecSigned { rd, rn, idx, size, scalar_size, } => { let rd = pretty_print_ireg(rd.to_reg(), scalar_size, allocs); let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size(), allocs); format!("smov {}, {}", rd, rn) } &Inst::VecDup { rd, rn, size } => { let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rn = pretty_print_ireg(rn, size.operand_size(), allocs); format!("dup {}, {}", rd, rn) } &Inst::VecDupFromFpu { rd, rn, size, lane } => { let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size(), allocs); format!("dup {}, {}", rd, rn) } &Inst::VecDupFPImm { rd, imm, size } => { let imm = imm.pretty_print(0, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); format!("fmov {}, {}", rd, imm) } &Inst::VecDupImm { rd, imm, invert, size, } => { let imm = imm.pretty_print(0, allocs); let op = if invert { "mvni" } else { "movi" }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); format!("{} {}, {}", op, rd, imm) } &Inst::VecExtend { t, rd, rn, high_half, lane_size, } => { let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false); let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true); let rd_size = VectorSize::from_lane_size(lane_size, true); let (op, rn_size) = match (t, high_half) { (VecExtendOp::Sxtl, false) => ("sxtl", vec64), (VecExtendOp::Sxtl, true) => ("sxtl2", vec128), (VecExtendOp::Uxtl, false) => ("uxtl", vec64), (VecExtendOp::Uxtl, true) => ("uxtl2", vec128), }; let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); let rn = pretty_print_vreg_vector(rn, rn_size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::VecMovElement { rd, ri, rn, dest_idx, src_idx, size, } => { let rd = pretty_print_vreg_element( rd.to_reg(), dest_idx as usize, size.lane_size(), allocs, ); let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size(), allocs); let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size(), allocs); format!("mov {}, {}, {}", rd, ri, rn) } &Inst::VecRRLong { op, rd, rn, high_half, } => { let (op, rd_size, size, suffix) = match (op, high_half) { (VecRRLongOp::Fcvtl16, false) => { ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "") } (VecRRLongOp::Fcvtl16, true) => { ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "") } (VecRRLongOp::Fcvtl32, false) => { ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "") } (VecRRLongOp::Fcvtl32, true) => { ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "") } (VecRRLongOp::Shll8, false) => { ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8") } (VecRRLongOp::Shll8, true) => { ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8") } (VecRRLongOp::Shll16, false) => { ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16") } (VecRRLongOp::Shll16, true) => { ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16") } (VecRRLongOp::Shll32, false) => { ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32") } (VecRRLongOp::Shll32, true) => { ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32") } }; let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}{}", op, rd, rn, suffix) } &Inst::VecRRNarrowLow { op, rd, rn, lane_size, .. } | &Inst::VecRRNarrowHigh { op, rd, rn, lane_size, .. } => { let vec64 = VectorSize::from_lane_size(lane_size, false); let vec128 = VectorSize::from_lane_size(lane_size, true); let rn_size = VectorSize::from_lane_size(lane_size.widen(), true); let high_half = match self { &Inst::VecRRNarrowLow { .. } => false, &Inst::VecRRNarrowHigh { .. } => true, _ => unreachable!(), }; let (op, rd_size) = match (op, high_half) { (VecRRNarrowOp::Xtn, false) => ("xtn", vec64), (VecRRNarrowOp::Xtn, true) => ("xtn2", vec128), (VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64), (VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128), (VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64), (VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128), (VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64), (VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128), (VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64), (VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128), }; let rn = pretty_print_vreg_vector(rn, rn_size, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); let ri = match self { &Inst::VecRRNarrowLow { .. } => "".to_string(), &Inst::VecRRNarrowHigh { ri, .. } => { format!("{}, ", pretty_print_vreg_vector(ri, rd_size, allocs)) } _ => unreachable!(), }; format!("{} {}, {}{}", op, rd, ri, rn) } &Inst::VecRRPair { op, rd, rn } => { let op = match op { VecPairOp::Addp => "addp", }; let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::VecRRPairLong { op, rd, rn } => { let (op, dest, src) = match op { VecRRPairLongOp::Saddlp8 => { ("saddlp", VectorSize::Size16x8, VectorSize::Size8x16) } VecRRPairLongOp::Saddlp16 => { ("saddlp", VectorSize::Size32x4, VectorSize::Size16x8) } VecRRPairLongOp::Uaddlp8 => { ("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16) } VecRRPairLongOp::Uaddlp16 => { ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8) } }; let rd = pretty_print_vreg_vector(rd.to_reg(), dest, allocs); let rn = pretty_print_vreg_vector(rn, src, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::VecRRR { rd, rn, rm, alu_op, size, } => { let (op, size) = match alu_op { VecALUOp::Sqadd => ("sqadd", size), VecALUOp::Uqadd => ("uqadd", size), VecALUOp::Sqsub => ("sqsub", size), VecALUOp::Uqsub => ("uqsub", size), VecALUOp::Cmeq => ("cmeq", size), VecALUOp::Cmge => ("cmge", size), VecALUOp::Cmgt => ("cmgt", size), VecALUOp::Cmhs => ("cmhs", size), VecALUOp::Cmhi => ("cmhi", size), VecALUOp::Fcmeq => ("fcmeq", size), VecALUOp::Fcmgt => ("fcmgt", size), VecALUOp::Fcmge => ("fcmge", size), VecALUOp::And => ("and", VectorSize::Size8x16), VecALUOp::Bic => ("bic", VectorSize::Size8x16), VecALUOp::Orr => ("orr", VectorSize::Size8x16), VecALUOp::Eor => ("eor", VectorSize::Size8x16), VecALUOp::Umaxp => ("umaxp", size), VecALUOp::Add => ("add", size), VecALUOp::Sub => ("sub", size), VecALUOp::Mul => ("mul", size), VecALUOp::Sshl => ("sshl", size), VecALUOp::Ushl => ("ushl", size), VecALUOp::Umin => ("umin", size), VecALUOp::Smin => ("smin", size), VecALUOp::Umax => ("umax", size), VecALUOp::Smax => ("smax", size), VecALUOp::Urhadd => ("urhadd", size), VecALUOp::Fadd => ("fadd", size), VecALUOp::Fsub => ("fsub", size), VecALUOp::Fdiv => ("fdiv", size), VecALUOp::Fmax => ("fmax", size), VecALUOp::Fmin => ("fmin", size), VecALUOp::Fmul => ("fmul", size), VecALUOp::Addp => ("addp", size), VecALUOp::Zip1 => ("zip1", size), VecALUOp::Zip2 => ("zip2", size), VecALUOp::Sqrdmulh => ("sqrdmulh", size), VecALUOp::Uzp1 => ("uzp1", size), VecALUOp::Uzp2 => ("uzp2", size), VecALUOp::Trn1 => ("trn1", size), VecALUOp::Trn2 => ("trn2", size), }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); let rm = pretty_print_vreg_vector(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecRRRMod { rd, ri, rn, rm, alu_op, size, } => { let (op, size) = match alu_op { VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16), VecALUModOp::Fmla => ("fmla", size), VecALUModOp::Fmls => ("fmls", size), }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let ri = pretty_print_vreg_vector(ri, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); let rm = pretty_print_vreg_vector(rm, size, allocs); format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm) } &Inst::VecFmlaElem { rd, ri, rn, rm, alu_op, size, idx, } => { let (op, size) = match alu_op { VecALUModOp::Fmla => ("fmla", size), VecALUModOp::Fmls => ("fmls", size), _ => unreachable!(), }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let ri = pretty_print_vreg_vector(ri, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size(), allocs); format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm) } &Inst::VecRRRLong { rd, rn, rm, alu_op, high_half, } => { let (op, dest_size, src_size) = match (alu_op, high_half) { (VecRRRLongOp::Smull8, false) => { ("smull", VectorSize::Size16x8, VectorSize::Size8x8) } (VecRRRLongOp::Smull8, true) => { ("smull2", VectorSize::Size16x8, VectorSize::Size8x16) } (VecRRRLongOp::Smull16, false) => { ("smull", VectorSize::Size32x4, VectorSize::Size16x4) } (VecRRRLongOp::Smull16, true) => { ("smull2", VectorSize::Size32x4, VectorSize::Size16x8) } (VecRRRLongOp::Smull32, false) => { ("smull", VectorSize::Size64x2, VectorSize::Size32x2) } (VecRRRLongOp::Smull32, true) => { ("smull2", VectorSize::Size64x2, VectorSize::Size32x4) } (VecRRRLongOp::Umull8, false) => { ("umull", VectorSize::Size16x8, VectorSize::Size8x8) } (VecRRRLongOp::Umull8, true) => { ("umull2", VectorSize::Size16x8, VectorSize::Size8x16) } (VecRRRLongOp::Umull16, false) => { ("umull", VectorSize::Size32x4, VectorSize::Size16x4) } (VecRRRLongOp::Umull16, true) => { ("umull2", VectorSize::Size32x4, VectorSize::Size16x8) } (VecRRRLongOp::Umull32, false) => { ("umull", VectorSize::Size64x2, VectorSize::Size32x2) } (VecRRRLongOp::Umull32, true) => { ("umull2", VectorSize::Size64x2, VectorSize::Size32x4) } }; let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs); let rn = pretty_print_vreg_vector(rn, src_size, allocs); let rm = pretty_print_vreg_vector(rm, src_size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecRRRLongMod { rd, ri, rn, rm, alu_op, high_half, } => { let (op, dest_size, src_size) = match (alu_op, high_half) { (VecRRRLongModOp::Umlal8, false) => { ("umlal", VectorSize::Size16x8, VectorSize::Size8x8) } (VecRRRLongModOp::Umlal8, true) => { ("umlal2", VectorSize::Size16x8, VectorSize::Size8x16) } (VecRRRLongModOp::Umlal16, false) => { ("umlal", VectorSize::Size32x4, VectorSize::Size16x4) } (VecRRRLongModOp::Umlal16, true) => { ("umlal2", VectorSize::Size32x4, VectorSize::Size16x8) } (VecRRRLongModOp::Umlal32, false) => { ("umlal", VectorSize::Size64x2, VectorSize::Size32x2) } (VecRRRLongModOp::Umlal32, true) => { ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4) } }; let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs); let ri = pretty_print_vreg_vector(ri, dest_size, allocs); let rn = pretty_print_vreg_vector(rn, src_size, allocs); let rm = pretty_print_vreg_vector(rm, src_size, allocs); format!("{} {}, {}, {}, {}", op, rd, ri, rn, rm) } &Inst::VecMisc { op, rd, rn, size } => { let (op, size, suffix) = match op { VecMisc2::Not => ( "mvn", if size.is_128bits() { VectorSize::Size8x16 } else { VectorSize::Size8x8 }, "", ), VecMisc2::Neg => ("neg", size, ""), VecMisc2::Abs => ("abs", size, ""), VecMisc2::Fabs => ("fabs", size, ""), VecMisc2::Fneg => ("fneg", size, ""), VecMisc2::Fsqrt => ("fsqrt", size, ""), VecMisc2::Rev16 => ("rev16", size, ""), VecMisc2::Rev32 => ("rev32", size, ""), VecMisc2::Rev64 => ("rev64", size, ""), VecMisc2::Fcvtzs => ("fcvtzs", size, ""), VecMisc2::Fcvtzu => ("fcvtzu", size, ""), VecMisc2::Scvtf => ("scvtf", size, ""), VecMisc2::Ucvtf => ("ucvtf", size, ""), VecMisc2::Frintn => ("frintn", size, ""), VecMisc2::Frintz => ("frintz", size, ""), VecMisc2::Frintm => ("frintm", size, ""), VecMisc2::Frintp => ("frintp", size, ""), VecMisc2::Cnt => ("cnt", size, ""), VecMisc2::Cmeq0 => ("cmeq", size, ", #0"), VecMisc2::Cmge0 => ("cmge", size, ", #0"), VecMisc2::Cmgt0 => ("cmgt", size, ", #0"), VecMisc2::Cmle0 => ("cmle", size, ", #0"), VecMisc2::Cmlt0 => ("cmlt", size, ", #0"), VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"), VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"), VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"), VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"), VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"), }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}{}", op, rd, rn, suffix) } &Inst::VecLanes { op, rd, rn, size } => { let op = match op { VecLanesOp::Uminv => "uminv", VecLanesOp::Addv => "addv", }; let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size(), allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::VecShiftImm { op, rd, rn, size, imm, } => { let op = match op { VecShiftImmOp::Shl => "shl", VecShiftImmOp::Ushr => "ushr", VecShiftImmOp::Sshr => "sshr", }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}, #{}", op, rd, rn, imm) } &Inst::VecShiftImmMod { op, rd, ri, rn, size, imm, } => { let op = match op { VecShiftImmModOp::Sli => "sli", }; let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let ri = pretty_print_vreg_vector(ri, size, allocs); let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}, {}, #{}", op, rd, ri, rn, imm) } &Inst::VecExtract { rd, rn, rm, imm4 } => { let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4) } &Inst::VecTbl { rd, rn, rm } => { let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); format!("tbl {}, {{ {} }}, {}", rd, rn, rm) } &Inst::VecTblExt { rd, ri, rn, rm } => { let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs); format!("tbx {}, {}, {{ {} }}, {}", rd, ri, rn, rm) } &Inst::VecTbl2 { rd, rn, rn2, rm } => { let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); format!("tbl {}, {{ {}, {} }}, {}", rd, rn, rn2, rm) } &Inst::VecTbl2Ext { rd, ri, rn, rn2, rm, } => { let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16, allocs); format!("tbx {}, {}, {{ {}, {} }}, {}", rd, ri, rn, rn2, rm) } &Inst::VecLoadReplicate { rd, rn, size, .. } => { let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); let rn = pretty_print_reg(rn, allocs); format!("ld1r {{ {} }}, [{}]", rd, rn) } &Inst::VecCSel { rd, rn, rm, cond } => { let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); let cond = cond.pretty_print(0, allocs); format!( "vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond ) } &Inst::MovToNZCV { rn } => { let rn = pretty_print_reg(rn, allocs); format!("msr nzcv, {}", rn) } &Inst::MovFromNZCV { rd } => { let rd = pretty_print_reg(rd.to_reg(), allocs); format!("mrs {}, nzcv", rd) } &Inst::Extend { rd, rn, signed: false, from_bits: 1, .. } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size32, allocs); format!("and {}, {}, #1", rd, rn) } &Inst::Extend { rd, rn, signed: false, from_bits: 32, to_bits: 64, } => { // The case of a zero extension from 32 to 64 bits, is implemented // with a "mov" to a 32-bit (W-reg) dest, because this zeroes // the top 32 bits. let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs); let rn = pretty_print_ireg(rn, OperandSize::Size32, allocs); format!("mov {}, {}", rd, rn) } &Inst::Extend { rd, rn, signed, from_bits, to_bits, } => { assert!(from_bits <= to_bits); let op = match (signed, from_bits) { (false, 8) => "uxtb", (true, 8) => "sxtb", (false, 16) => "uxth", (true, 16) => "sxth", (true, 32) => "sxtw", (true, _) => "sbfx", (false, _) => "ubfx", }; if op == "sbfx" || op == "ubfx" { let dest_size = OperandSize::from_bits(to_bits); let rd = pretty_print_ireg(rd.to_reg(), dest_size, allocs); let rn = pretty_print_ireg(rn, dest_size, allocs); format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits) } else { let dest_size = if signed { OperandSize::from_bits(to_bits) } else { OperandSize::Size32 }; let rd = pretty_print_ireg(rd.to_reg(), dest_size, allocs); let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits), allocs); format!("{} {}, {}", op, rd, rn) } } &Inst::Call { .. } => format!("bl 0"), &Inst::CallInd { ref info, .. } => { let rn = pretty_print_reg(info.rn, allocs); format!("blr {}", rn) } &Inst::ReturnCall { ref callee, ref info, } => { let mut s = format!( "return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}", info.old_stack_arg_size, info.new_stack_arg_size ); for ret in &info.uses { let preg = pretty_print_reg(ret.preg, &mut empty_allocs); let vreg = pretty_print_reg(ret.vreg, allocs); write!(&mut s, " {vreg}={preg}").unwrap(); } s } &Inst::ReturnCallInd { callee, ref info } => { let callee = pretty_print_reg(callee, allocs); let mut s = format!( "return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}", info.old_stack_arg_size, info.new_stack_arg_size ); for ret in &info.uses { let preg = pretty_print_reg(ret.preg, &mut empty_allocs); let vreg = pretty_print_reg(ret.vreg, allocs); write!(&mut s, " {vreg}={preg}").unwrap(); } s } &Inst::Args { ref args } => { let mut s = "args".to_string(); for arg in args { let preg = pretty_print_reg(arg.preg, &mut empty_allocs); let def = pretty_print_reg(arg.vreg.to_reg(), allocs); write!(&mut s, " {}={}", def, preg).unwrap(); } s } &Inst::Rets { ref rets } => { let mut s = "rets".to_string(); for ret in rets { let preg = pretty_print_reg(ret.preg, &mut empty_allocs); let vreg = pretty_print_reg(ret.vreg, allocs); write!(&mut s, " {vreg}={preg}").unwrap(); } s } &Inst::Ret {} => "ret".to_string(), &Inst::AuthenticatedRet { key, is_hint } => { let key = match key { APIKey::AZ => "az", APIKey::BZ => "bz", APIKey::ASP => "asp", APIKey::BSP => "bsp", }; match is_hint { false => format!("reta{key}"), true => format!("auti{key} ; ret"), } } &Inst::Jump { ref dest } => { let dest = dest.pretty_print(0, allocs); format!("b {}", dest) } &Inst::CondBr { ref taken, ref not_taken, ref kind, } => { let taken = taken.pretty_print(0, allocs); let not_taken = not_taken.pretty_print(0, allocs); match kind { &CondBrKind::Zero(reg) => { let reg = pretty_print_reg(reg, allocs); format!("cbz {}, {} ; b {}", reg, taken, not_taken) } &CondBrKind::NotZero(reg) => { let reg = pretty_print_reg(reg, allocs); format!("cbnz {}, {} ; b {}", reg, taken, not_taken) } &CondBrKind::Cond(c) => { let c = c.pretty_print(0, allocs); format!("b.{} {} ; b {}", c, taken, not_taken) } } } &Inst::TestBitAndBranch { kind, ref taken, ref not_taken, rn, bit, } => { let cond = match kind { TestBitAndBranchKind::Z => "z", TestBitAndBranchKind::NZ => "nz", }; let taken = taken.pretty_print(0, allocs); let not_taken = not_taken.pretty_print(0, allocs); let rn = pretty_print_reg(rn, allocs); format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}") } &Inst::IndirectBr { rn, .. } => { let rn = pretty_print_reg(rn, allocs); format!("br {}", rn) } &Inst::Brk => "brk #0".to_string(), &Inst::Udf { .. } => "udf #0xc11f".to_string(), &Inst::TrapIf { ref kind, trap_code, } => match kind { &CondBrKind::Zero(reg) => { let reg = pretty_print_reg(reg, allocs); format!("cbz {reg}, #trap={trap_code}") } &CondBrKind::NotZero(reg) => { let reg = pretty_print_reg(reg, allocs); format!("cbnz {reg}, #trap={trap_code}") } &CondBrKind::Cond(c) => { let c = c.pretty_print(0, allocs); format!("b.{c} #trap={trap_code}") } }, &Inst::Adr { rd, off } => { let rd = pretty_print_reg(rd.to_reg(), allocs); format!("adr {}, pc+{}", rd, off) } &Inst::Adrp { rd, off } => { let rd = pretty_print_reg(rd.to_reg(), allocs); // This instruction addresses 4KiB pages, so multiply it by the page size. let byte_offset = off * 4096; format!("adrp {}, pc+{}", rd, byte_offset) } &Inst::Word4 { data } => format!("data.i32 {}", data), &Inst::Word8 { data } => format!("data.i64 {}", data), &Inst::JTSequence { default, ref targets, ridx, rtmp1, rtmp2, .. } => { let ridx = pretty_print_reg(ridx, allocs); let rtmp1 = pretty_print_reg(rtmp1.to_reg(), allocs); let rtmp2 = pretty_print_reg(rtmp2.to_reg(), allocs); let default_target = BranchTarget::Label(default).pretty_print(0, allocs); format!( concat!( "b.hs {} ; ", "csel {}, xzr, {}, hs ; ", "csdb ; ", "adr {}, pc+16 ; ", "ldrsw {}, [{}, {}, uxtw #2] ; ", "add {}, {}, {} ; ", "br {} ; ", "jt_entries {:?}" ), default_target, rtmp2, ridx, rtmp1, rtmp2, rtmp1, rtmp2, rtmp1, rtmp1, rtmp2, rtmp1, targets ) } &Inst::LoadExtName { rd, ref name, offset, } => { let rd = pretty_print_reg(rd.to_reg(), allocs); format!("load_ext_name {rd}, {name:?}+{offset}") } &Inst::LoadAddr { rd, ref mem } => { // TODO: we really should find a better way to avoid duplication of // this logic between `emit()` and `show_rru()` -- a separate 1-to-N // expansion stage (i.e., legalization, but without the slow edit-in-place // of the existing legalization framework). let rd = allocs.next_writable(rd); let mem = mem.with_allocs(allocs); let (mem_insts, mem) = mem_finalize(None, &mem, state); let mut ret = String::new(); for inst in mem_insts.into_iter() { ret.push_str( &inst.print_with_state(&mut EmitState::default(), &mut empty_allocs), ); } let (reg, index_reg, offset) = match mem { AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0), AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()), AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32), _ => panic!("Unsupported case for LoadAddr: {:?}", mem), }; let abs_offset = if offset < 0 { -offset as u64 } else { offset as u64 }; let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add }; if let Some((idx, extendop)) = index_reg { let add = Inst::AluRRRExtend { alu_op: ALUOp::Add, size: OperandSize::Size64, rd, rn: reg, rm: idx, extendop, }; ret.push_str( &add.print_with_state(&mut EmitState::default(), &mut empty_allocs), ); } else if offset == 0 { let mov = Inst::gen_move(rd, reg, I64); ret.push_str( &mov.print_with_state(&mut EmitState::default(), &mut empty_allocs), ); } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { let add = Inst::AluRRImm12 { alu_op, size: OperandSize::Size64, rd, rn: reg, imm12, }; ret.push_str( &add.print_with_state(&mut EmitState::default(), &mut empty_allocs), ); } else { let tmp = writable_spilltmp_reg(); for inst in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() { ret.push_str( &inst.print_with_state(&mut EmitState::default(), &mut empty_allocs), ); } let add = Inst::AluRRR { alu_op, size: OperandSize::Size64, rd, rn: reg, rm: tmp.to_reg(), }; ret.push_str( &add.print_with_state(&mut EmitState::default(), &mut empty_allocs), ); } ret } &Inst::Paci { key } => { let key = match key { APIKey::AZ => "az", APIKey::BZ => "bz", APIKey::ASP => "asp", APIKey::BSP => "bsp", }; "paci".to_string() + key } &Inst::Xpaclri => "xpaclri".to_string(), &Inst::Bti { targets } => { let targets = match targets { BranchTargetType::None => "", BranchTargetType::C => " c", BranchTargetType::J => " j", BranchTargetType::JC => " jc", }; "bti".to_string() + targets } &Inst::VirtualSPOffsetAdj { offset } => { state.virtual_sp_offset += offset; format!("virtual_sp_offset_adjust {}", offset) } &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space), &Inst::ElfTlsGetAddr { ref symbol, rd, tmp, } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let tmp = pretty_print_reg(tmp.to_reg(), allocs); format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None)) } &Inst::MachOTlsGetAddr { ref symbol, rd } => { let rd = pretty_print_reg(rd.to_reg(), allocs); format!("macho_tls_get_addr {}, {}", rd, symbol.display(None)) } &Inst::Unwind { ref inst } => { format!("unwind {:?}", inst) } &Inst::DummyUse { reg } => { let reg = pretty_print_reg(reg, allocs); format!("dummy_use {}", reg) } &Inst::StackProbeLoop { start, end, step } => { let start = pretty_print_reg(start.to_reg(), allocs); let end = pretty_print_reg(end, allocs); let step = step.pretty_print(0, allocs); format!("stack_probe_loop {start}, {end}, {step}") } } } } //============================================================================= // Label fixups and jump veneers. /// Different forms of label references for different instruction formats. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum LabelUse { /// 14-bit branch offset (conditional branches). PC-rel, offset is imm << /// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz. Branch14, /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19 /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond. Branch19, /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26 /// signed bits, in bits 25:0. Used by b, bl. Branch26, #[allow(dead_code)] /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits, /// in bits 23:5. Ldr19, #[allow(dead_code)] /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29. Adr21, /// 32-bit PC relative constant offset (from address of constant itself), /// signed. Used in jump tables. PCRel32, } impl MachInstLabelUse for LabelUse { /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned. const ALIGN: CodeOffset = 4; /// Maximum PC-relative range (positive), inclusive. fn max_pos_range(self) -> CodeOffset { match self { // N-bit immediate, left-shifted by 2, for (N+2) bits of total // range. Signed, so +2^(N+1) from zero. Likewise for two other // shifted cases below. LabelUse::Branch14 => (1 << 15) - 1, LabelUse::Branch19 => (1 << 20) - 1, LabelUse::Branch26 => (1 << 27) - 1, LabelUse::Ldr19 => (1 << 20) - 1, // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total // range. LabelUse::Adr21 => (1 << 20) - 1, LabelUse::PCRel32 => 0x7fffffff, } } /// Maximum PC-relative range (negative). fn max_neg_range(self) -> CodeOffset { // All forms are twos-complement signed offsets, so negative limit is one more than // positive limit. self.max_pos_range() + 1 } /// Size of window into code needed to do the patch. fn patch_size(self) -> CodeOffset { // Patch is on one instruction only for all of these label reference types. 4 } /// Perform the patch. fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { let pc_rel = (label_offset as i64) - (use_offset as i64); debug_assert!(pc_rel <= self.max_pos_range() as i64); debug_assert!(pc_rel >= -(self.max_neg_range() as i64)); let pc_rel = pc_rel as u32; let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); let mask = match self { LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive LabelUse::PCRel32 => 0xffffffff, }; let pc_rel_shifted = match self { LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel, _ => { debug_assert!(pc_rel & 3 == 0); pc_rel >> 2 } }; let pc_rel_inserted = match self { LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5, LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5, LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff, LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10, LabelUse::PCRel32 => pc_rel_shifted, }; let is_add = match self { LabelUse::PCRel32 => true, _ => false, }; let insn_word = if is_add { insn_word.wrapping_add(pc_rel_inserted) } else { (insn_word & !mask) | pc_rel_inserted }; buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word)); } /// Is a veneer supported for this label reference type? fn supports_veneer(self) -> bool { match self { LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch26 LabelUse::Branch26 => true, // veneer is a PCRel32 _ => false, } } /// How large is the veneer, if supported? fn veneer_size(self) -> CodeOffset { match self { LabelUse::Branch14 | LabelUse::Branch19 => 4, LabelUse::Branch26 => 20, _ => unreachable!(), } } fn worst_case_veneer_size() -> CodeOffset { 20 } /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return /// an offset and label-use for the veneer's use of the original label. fn generate_veneer( self, buffer: &mut [u8], veneer_offset: CodeOffset, ) -> (CodeOffset, LabelUse) { match self { LabelUse::Branch14 | LabelUse::Branch19 => { // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't // bother with constructing an Inst. let insn_word = 0b000101 << 26; buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word)); (veneer_offset, LabelUse::Branch26) } // This is promoting a 26-bit call/jump to a 32-bit call/jump to // get a further range. This jump translates to a jump to a // relative location based on the address of the constant loaded // from here. // // If this path is taken from a call instruction then caller-saved // registers are available (minus arguments), so x16/x17 are // available. Otherwise for intra-function jumps we also reserve // x16/x17 as spill-style registers. In both cases these are // available for us to use. LabelUse::Branch26 => { let tmp1 = regs::spilltmp_reg(); let tmp1_w = regs::writable_spilltmp_reg(); let tmp2 = regs::tmp2_reg(); let tmp2_w = regs::writable_tmp2_reg(); // ldrsw x16, 16 let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1); // adr x17, 12 let adr = emit::enc_adr(12, tmp2_w); // add x16, x16, x17 let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2); // br x16 let br = emit::enc_br(tmp1); buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr)); buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr)); buffer[8..12].clone_from_slice(&u32::to_le_bytes(add)); buffer[12..16].clone_from_slice(&u32::to_le_bytes(br)); // the 4-byte signed immediate we'll load is after these // instructions, 16-bytes in. (veneer_offset + 16, LabelUse::PCRel32) } _ => panic!("Unsupported label-reference type for veneer generation!"), } } fn from_reloc(reloc: Reloc, addend: Addend) -> Option { match (reloc, addend) { (Reloc::Arm64Call, 0) => Some(LabelUse::Branch26), _ => None, } } }