//! Implementation of a standard S390x ABI. //! //! This machine uses the "vanilla" ABI implementation from abi.rs, //! however a few details are different from the description there: //! //! - On s390x, the caller must provide a "register save area" of 160 //! bytes to any function it calls. The called function is free to use //! this space for any purpose; usually to save callee-saved GPRs. //! (Note that while this area is allocated by the caller, it is counted //! as part of the callee's stack frame; in particular, the callee's CFA //! is the top of the register save area, not the incoming SP value.) //! //! - Overflow arguments are passed on the stack starting immediately //! above the register save area. On s390x, this space is allocated //! only once directly in the prologue, using a size large enough to //! hold overflow arguments for every call in the function. //! //! - On s390x we do not use a frame pointer register; instead, every //! element of the stack frame is addressed via (constant) offsets //! from the stack pointer. Note that due to the above (and because //! there are no variable-sized stack allocations in cranelift), the //! value of the stack pointer register never changes after the //! initial allocation in the function prologue. //! //! - If we are asked to "preserve frame pointers" to enable stack //! unwinding, we use the stack backchain feature instead, which //! is documented by the s390x ELF ABI, but marked as optional. //! This ensures that at all times during execution of a function, //! the lowest word on the stack (part of the register save area) //! holds a copy of the stack pointer at function entry. //! //! Overall, the stack frame layout on s390x is as follows: //! //! ```plain //! (high address) //! //! +---------------------------+ //! | ... | //! CFA -----> | stack args | //! +---------------------------+ //! | ... | //! | 160 bytes reg save area | //! | (used to save GPRs) | //! SP at function entry -----> | (incl. caller's backchain)| //! +---------------------------+ //! | ... | //! | clobbered callee-saves | //! | (used to save FPRs) | //! unwind-frame base ----> | (alloc'd by prologue) | //! +---------------------------+ //! | ... | //! | spill slots | //! | (accessed via SP) | //! | ... | //! | stack slots | //! | (accessed via SP) | //! | (alloc'd by prologue) | //! +---------------------------+ //! | ... | //! | args for call | //! | outgoing reg save area | //! | (alloc'd by prologue) | //! SP during function ------> | (incl. callee's backchain)| //! +---------------------------+ //! //! (low address) //! ``` //! //! //! The tail-call ABI has the following changes to the system ABI: //! //! - %r6 and %r7 are both non-callee-saved argument registers. //! //! - The argument save area for outgoing (non-tail) calls to //! a tail-call ABI function is placed *below* the caller's //! stack frame. This means the caller temporarily allocates //! a part of the callee's frame, including temporary space //! for a register save area holding a copy of the backchain. //! //! - For tail calls, the caller puts outgoing arguments at the //! very top of its stack frame, overlapping the incoming //! argument area. This is extended by the prolog if needed. //! //! Overall, the tail-call stack frame layout on s390x is as follows: //! //! ```plain //! (high address) //! //! +---------------------------+ //! | ... | //! CFA -----> | (caller's frame) | //! +---------------------------+ //! | ... | //! | 160 bytes reg save area | //! | (used to save GPRs) | //! SP at function return-----> | (incl. caller's backchain)| //! +---------------------------+ //! | ... | //! | incoming stack args | //! SP at function entry -----> | (incl. backchain copy) | //! +---------------------------+ //! | ... | //! | outgoing tail call args | //! | (overlaps incoming args) | //! | (incl. backchain copy) | //! SP at tail cail ----> | (alloc'd by prologue) | //! +---------------------------+ //! | ... | //! | clobbered callee-saves | //! | (used to save FPRs) | //! unwind-frame base ----> | (alloc'd by prologue) | //! +---------------------------+ //! | ... | //! | spill slots | //! | (accessed via SP) | //! | ... | //! | stack slots | //! | (accessed via SP) | //! | (alloc'd by prologue) | //! +---------------------------+ //! | ... | //! | outgoing calls return buf | //! | outgoing reg save area | //! | (alloc'd by prologue) | //! SP during function ------> | (incl. callee's backchain)| //! +---------------------------+ //! | ... | //! | outgoing stack args | //! | (alloc'd by call sequence)| //! SP at non-tail call -----> | (incl. backchain copy) | //! +---------------------------+ //! (low address) //! ``` use crate::ir; use crate::ir::condcodes::IntCC; use crate::ir::types; use crate::ir::MemFlags; use crate::ir::Signature; use crate::ir::Type; use crate::isa; use crate::isa::s390x::{inst::*, settings as s390x_settings}; use crate::isa::unwind::UnwindInst; use crate::machinst::*; use crate::settings; use crate::CodegenResult; use alloc::vec::Vec; use regalloc2::{MachineEnv, PRegSet}; use smallvec::{smallvec, SmallVec}; use std::borrow::ToOwned; use std::sync::OnceLock; // We use a generic implementation that factors out ABI commonalities. /// Support for the S390x ABI from the callee side (within a function body). pub type S390xCallee = Callee; /// ABI Register usage fn in_int_reg(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 | types::I64 => true, _ => false, } } fn in_flt_reg(ty: Type) -> bool { match ty { types::F32 | types::F64 => true, _ => false, } } fn in_vec_reg(ty: Type) -> bool { ty.is_vector() && ty.bits() == 128 } fn get_intreg_for_arg(call_conv: isa::CallConv, idx: usize) -> Option { match idx { 0 => Some(regs::gpr(2)), 1 => Some(regs::gpr(3)), 2 => Some(regs::gpr(4)), 3 => Some(regs::gpr(5)), 4 => Some(regs::gpr(6)), 5 if call_conv == isa::CallConv::Tail => Some(regs::gpr(7)), _ => None, } } fn get_fltreg_for_arg(idx: usize) -> Option { match idx { 0 => Some(regs::vr(0)), 1 => Some(regs::vr(2)), 2 => Some(regs::vr(4)), 3 => Some(regs::vr(6)), _ => None, } } fn get_vecreg_for_arg(idx: usize) -> Option { match idx { 0 => Some(regs::vr(24)), 1 => Some(regs::vr(25)), 2 => Some(regs::vr(26)), 3 => Some(regs::vr(27)), 4 => Some(regs::vr(28)), 5 => Some(regs::vr(29)), 6 => Some(regs::vr(30)), 7 => Some(regs::vr(31)), _ => None, } } fn get_intreg_for_ret(call_conv: isa::CallConv, idx: usize) -> Option { match idx { 0 => Some(regs::gpr(2)), // ABI extension to support multi-value returns: 1 => Some(regs::gpr(3)), 2 => Some(regs::gpr(4)), 3 => Some(regs::gpr(5)), 4 if call_conv == isa::CallConv::Tail => Some(regs::gpr(6)), 5 if call_conv == isa::CallConv::Tail => Some(regs::gpr(7)), _ => None, } } fn get_fltreg_for_ret(idx: usize) -> Option { match idx { 0 => Some(regs::vr(0)), // ABI extension to support multi-value returns: 1 => Some(regs::vr(2)), 2 => Some(regs::vr(4)), 3 => Some(regs::vr(6)), _ => None, } } fn get_vecreg_for_ret(idx: usize) -> Option { match idx { 0 => Some(regs::vr(24)), // ABI extension to support multi-value returns: 1 => Some(regs::vr(25)), 2 => Some(regs::vr(26)), 3 => Some(regs::vr(27)), 4 => Some(regs::vr(28)), 5 => Some(regs::vr(29)), 6 => Some(regs::vr(30)), 7 => Some(regs::vr(31)), _ => None, } } /// The size of the register save area pub static REG_SAVE_AREA_SIZE: u32 = 160; impl Into for StackAMode { fn into(self) -> MemArg { match self { // Argument area always begins at the initial SP. StackAMode::IncomingArg(off, _) => MemArg::InitialSPOffset { off }, StackAMode::Slot(off) => MemArg::SlotOffset { off }, StackAMode::OutgoingArg(off) => MemArg::NominalSPOffset { off }, } } } /// S390x-specific ABI behavior. This struct just serves as an implementation /// point for the trait; it is never actually instantiated. pub struct S390xMachineDeps; impl IsaFlags for s390x_settings::Flags {} impl ABIMachineSpec for S390xMachineDeps { type I = Inst; type F = s390x_settings::Flags; /// This is the limit for the size of argument and return-value areas on the /// stack. We place a reasonable limit here to avoid integer overflow issues /// with 32-bit arithmetic: for now, 128 MB. const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; fn word_bits() -> u32 { 64 } /// Return required stack alignment in bytes. fn stack_align(_call_conv: isa::CallConv) -> u32 { 8 } fn compute_arg_locs( call_conv: isa::CallConv, flags: &settings::Flags, params: &[ir::AbiParam], args_or_rets: ArgsOrRets, add_ret_area_ptr: bool, mut args: ArgsAccumulator, ) -> CodegenResult<(u32, Option)> { assert_ne!( call_conv, isa::CallConv::Winch, "s390x does not support the 'winch' calling convention yet" ); let mut next_gpr = 0; let mut next_fpr = 0; let mut next_vr = 0; let mut next_stack: u32 = 0; // The bottom of the stack frame holds the register save area. To simplify // offset computation, include this area as part of the argument area; // however, this does not apply to the tail-call convention, which uses the // callee frame instead to pass arguments. if call_conv != isa::CallConv::Tail && args_or_rets == ArgsOrRets::Args { next_stack = REG_SAVE_AREA_SIZE; } let ret_area_ptr = if add_ret_area_ptr { debug_assert_eq!(args_or_rets, ArgsOrRets::Args); next_gpr += 1; Some(ABIArg::reg( get_intreg_for_arg(call_conv, 0) .unwrap() .to_real_reg() .unwrap(), types::I64, ir::ArgumentExtension::None, ir::ArgumentPurpose::Normal, )) } else { None }; for mut param in params.into_iter().copied() { if let ir::ArgumentPurpose::StructArgument(_) = param.purpose { panic!( "StructArgument parameters are not supported on s390x. \ Use regular pointer arguments instead." ); } let intreg = in_int_reg(param.value_type); let fltreg = in_flt_reg(param.value_type); let vecreg = in_vec_reg(param.value_type); debug_assert!(intreg as i32 + fltreg as i32 + vecreg as i32 <= 1); let (next_reg, candidate, implicit_ref) = if intreg { let candidate = match args_or_rets { ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr), ArgsOrRets::Rets => get_intreg_for_ret(call_conv, next_gpr), }; (&mut next_gpr, candidate, None) } else if fltreg { let candidate = match args_or_rets { ArgsOrRets::Args => get_fltreg_for_arg(next_fpr), ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr), }; (&mut next_fpr, candidate, None) } else if vecreg { let candidate = match args_or_rets { ArgsOrRets::Args => get_vecreg_for_arg(next_vr), ArgsOrRets::Rets => get_vecreg_for_ret(next_vr), }; (&mut next_vr, candidate, None) } else { // We must pass this by implicit reference. if args_or_rets == ArgsOrRets::Rets { // For return values, just force them to memory. (&mut next_gpr, None, None) } else { // For arguments, implicitly convert to pointer type. let implicit_ref = Some(param.value_type); param = ir::AbiParam::new(types::I64); let candidate = get_intreg_for_arg(call_conv, next_gpr); (&mut next_gpr, candidate, implicit_ref) } }; let slot = if let Some(reg) = candidate { *next_reg += 1; ABIArgSlot::Reg { reg: reg.to_real_reg().unwrap(), ty: param.value_type, extension: param.extension, } } else { if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() { return Err(crate::CodegenError::Unsupported( "Too many return values to fit in registers. \ Use a StructReturn argument instead. (#9510)" .to_owned(), )); } // Compute size. Every argument or return value takes a slot of // at least 8 bytes. let size = (ty_bits(param.value_type) / 8) as u32; let slot_size = std::cmp::max(size, 8); // Align the stack slot. debug_assert!(slot_size.is_power_of_two()); let slot_align = std::cmp::min(slot_size, 8); next_stack = align_to(next_stack, slot_align); // If the type is actually of smaller size (and the argument // was not extended), it is passed right-aligned. let offset = if size < slot_size && param.extension == ir::ArgumentExtension::None { slot_size - size } else { 0 }; let offset = (next_stack + offset) as i64; next_stack += slot_size; ABIArgSlot::Stack { offset, ty: param.value_type, extension: param.extension, } }; if let Some(ty) = implicit_ref { assert!( (ty_bits(ty) / 8) % 8 == 0, "implicit argument size is not properly aligned" ); args.push(ABIArg::ImplicitPtrArg { pointer: slot, offset: 0, // Will be filled in later ty, purpose: param.purpose, }); } else { args.push(ABIArg::Slots { slots: smallvec![slot], purpose: param.purpose, }); } } next_stack = align_to(next_stack, 8); let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr { args.push_non_formal(ret_area_ptr); Some(args.args().len() - 1) } else { None }; // After all arguments are in their well-defined location, // allocate buffers for all ImplicitPtrArg arguments. for arg in args.args_mut() { match arg { ABIArg::StructArg { .. } => unreachable!(), ABIArg::ImplicitPtrArg { offset, ty, .. } => { *offset = next_stack as i64; next_stack += (ty_bits(*ty) / 8) as u32; } _ => {} } } // With the tail-call convention, arguments are passed in the *callee*'s // frame instead of the caller's frame. Update all offsets accordingly // (note that resulting offsets will all be negative). if call_conv == isa::CallConv::Tail && args_or_rets == ArgsOrRets::Args && next_stack != 0 { for arg in args.args_mut() { match arg { ABIArg::Slots { slots, .. } => { for slot in slots { match slot { ABIArgSlot::Reg { .. } => {} ABIArgSlot::Stack { offset, .. } => { *offset -= next_stack as i64; } } } } ABIArg::StructArg { .. } => unreachable!(), ABIArg::ImplicitPtrArg { offset, .. } => { *offset -= next_stack as i64; } } } // If we have any stack arguments, also allow for a temporary copy // of the register save area. This is only used until the callee // has finished setting up its own frame. next_stack += REG_SAVE_AREA_SIZE; } Ok((next_stack, extra_arg)) } fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { Inst::gen_load(into_reg, mem.into(), ty) } fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { Inst::gen_store(mem.into(), from_reg, ty) } fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { Inst::gen_move(to_reg, from_reg, ty) } fn gen_extend( to_reg: Writable, from_reg: Reg, signed: bool, from_bits: u8, to_bits: u8, ) -> Inst { assert!(from_bits < to_bits); Inst::Extend { rd: to_reg, rn: from_reg, signed, from_bits, to_bits, } } fn gen_args(args: Vec) -> Inst { Inst::Args { args } } fn gen_rets(rets: Vec) -> Inst { Inst::Rets { rets } } fn gen_add_imm( _call_conv: isa::CallConv, into_reg: Writable, from_reg: Reg, imm: u32, ) -> SmallInstVec { let mut insts = SmallVec::new(); if let Some(imm) = UImm12::maybe_from_u64(imm as u64) { insts.push(Inst::LoadAddr { rd: into_reg, mem: MemArg::BXD12 { base: from_reg, index: zero_reg(), disp: imm, flags: MemFlags::trusted(), }, }); } else if let Some(imm) = SImm20::maybe_from_i64(imm as i64) { insts.push(Inst::LoadAddr { rd: into_reg, mem: MemArg::BXD20 { base: from_reg, index: zero_reg(), disp: imm, flags: MemFlags::trusted(), }, }); } else { if from_reg != into_reg.to_reg() { insts.push(Inst::mov64(into_reg, from_reg)); } insts.push(Inst::AluRUImm32 { alu_op: ALUOp::AddLogical64, rd: into_reg, ri: into_reg.to_reg(), imm, }); } insts } fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { let mut insts = SmallVec::new(); insts.push(Inst::CmpTrapRR { op: CmpOp::CmpL64, rn: stack_reg(), rm: limit_reg, cond: Cond::from_intcc(IntCC::UnsignedLessThanOrEqual), trap_code: ir::TrapCode::STACK_OVERFLOW, }); insts } fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable) -> Inst { let mem = mem.into(); Inst::LoadAddr { rd: into_reg, mem } } fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { spilltmp_reg() } fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted()); Inst::gen_load(into_reg, mem, ty) } fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted()); Inst::gen_store(mem, from_reg, ty) } fn gen_sp_reg_adjust(imm: i32) -> SmallInstVec { if imm == 0 { return SmallVec::new(); } let mut insts = SmallVec::new(); if let Ok(imm) = i16::try_from(imm) { insts.push(Inst::AluRSImm16 { alu_op: ALUOp::Add64, rd: writable_stack_reg(), ri: stack_reg(), imm, }); } else { insts.push(Inst::AluRSImm32 { alu_op: ALUOp::Add64, rd: writable_stack_reg(), ri: stack_reg(), imm, }); } insts } fn gen_prologue_frame_setup( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &s390x_settings::Flags, _frame_layout: &FrameLayout, ) -> SmallInstVec { SmallVec::new() } fn gen_epilogue_frame_restore( _call_conv: isa::CallConv, _flags: &settings::Flags, _isa_flags: &s390x_settings::Flags, _frame_layout: &FrameLayout, ) -> SmallInstVec { SmallVec::new() } fn gen_return( _call_conv: isa::CallConv, _isa_flags: &s390x_settings::Flags, _frame_layout: &FrameLayout, ) -> SmallInstVec { smallvec![Inst::Ret { link: gpr(14) }] } fn gen_probestack(_insts: &mut SmallInstVec, _: u32) { // TODO: implement if we ever require stack probes on an s390x host // (unlikely unless Lucet is ported) unimplemented!("Stack probing is unimplemented on S390x"); } fn gen_inline_probestack( insts: &mut SmallInstVec, _call_conv: isa::CallConv, frame_size: u32, guard_size: u32, ) { // The stack probe loop currently takes 4 instructions and each unrolled // probe takes 2. Set this to 2 to keep the max size to 4 instructions. const PROBE_MAX_UNROLL: u32 = 2; // Calculate how many probes we need to perform. Round down, as we only // need to probe whole guard_size regions we'd otherwise skip over. let probe_count = frame_size / guard_size; if probe_count == 0 { // No probe necessary } else if probe_count <= PROBE_MAX_UNROLL { // Unrolled probe loop. for _ in 0..probe_count { insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32))); insts.push(Inst::StoreImm8 { imm: 0, mem: MemArg::reg(stack_reg(), MemFlags::trusted()), }); } } else { // Explicit probe loop. // Load the number of probes into a register used as loop counter. // `gen_inline_probestack` is called after regalloc2, so we can // use the nonallocatable spilltmp register for this purpose. let probe_count_reg = writable_spilltmp_reg(); if let Ok(probe_count) = i16::try_from(probe_count) { insts.push(Inst::Mov32SImm16 { rd: probe_count_reg, imm: probe_count, }); } else { insts.push(Inst::Mov32Imm { rd: probe_count_reg, imm: probe_count, }); } // Emit probe loop. The guard size is assumed to fit in 16 bits. insts.push(Inst::StackProbeLoop { probe_count: probe_count_reg, guard_size: i16::try_from(guard_size).unwrap(), }); } // Restore the stack pointer to its original position. insts.extend(Self::gen_sp_reg_adjust((probe_count * guard_size) as i32)); } fn gen_clobber_save( call_conv: isa::CallConv, flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); // With the tail call convention, the caller already allocated the // part of our stack frame that contains incoming arguments. let incoming_tail_args_size = if call_conv == isa::CallConv::Tail { frame_layout.incoming_args_size } else { 0 }; // Define unwind stack frame. if flags.unwind_info() { insts.push(Inst::Unwind { inst: UnwindInst::DefineNewFrame { offset_upward_to_caller_sp: REG_SAVE_AREA_SIZE + incoming_tail_args_size, offset_downward_to_clobbers: frame_layout.clobber_size - incoming_tail_args_size, }, }); } // Use STMG to save clobbered GPRs into save area. // Note that we always save SP (%r15) here if anything is saved. if let Some((first_clobbered_gpr, _)) = get_clobbered_gprs(frame_layout) { let mut last_clobbered_gpr = 15; let offset = 8 * first_clobbered_gpr as i64 + incoming_tail_args_size as i64; insts.push(Inst::StoreMultiple64 { rt: gpr(first_clobbered_gpr), rt2: gpr(last_clobbered_gpr), mem: MemArg::reg_plus_off(stack_reg(), offset, MemFlags::trusted()), }); if flags.unwind_info() { // Normally, we instruct the unwinder to restore the stack pointer // from its slot in the save area. However, if we have incoming // tail-call arguments, the value saved in that slot is incorrect. // In that case, we instead instruct the unwinder to compute the // unwound SP relative to the current CFA, as CFA == SP + 160. if incoming_tail_args_size != 0 { insts.push(Inst::Unwind { inst: UnwindInst::RegStackOffset { clobber_offset: frame_layout.clobber_size, reg: gpr(last_clobbered_gpr).to_real_reg().unwrap(), }, }); last_clobbered_gpr = last_clobbered_gpr - 1; } for i in first_clobbered_gpr..(last_clobbered_gpr + 1) { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset: frame_layout.clobber_size + (i * 8) as u32, reg: gpr(i).to_real_reg().unwrap(), }, }); } } } // Save current stack pointer value if we need to write the backchain. if flags.preserve_frame_pointers() { if incoming_tail_args_size == 0 { insts.push(Inst::mov64(writable_gpr(1), stack_reg())); } else { insts.extend(Self::gen_add_imm( call_conv, writable_gpr(1), stack_reg(), incoming_tail_args_size, )); } } // Decrement stack pointer. let stack_size = frame_layout.outgoing_args_size as i32 + frame_layout.clobber_size as i32 + frame_layout.fixed_frame_storage_size as i32 - incoming_tail_args_size as i32; insts.extend(Self::gen_sp_reg_adjust(-stack_size)); if flags.unwind_info() { insts.push(Inst::Unwind { inst: UnwindInst::StackAlloc { size: stack_size as u32, }, }); } // Write the stack backchain if requested, using the value saved above. if flags.preserve_frame_pointers() { insts.push(Inst::Store64 { rd: gpr(1), mem: MemArg::reg_plus_off(stack_reg(), 0, MemFlags::trusted()), }); } // Save FPRs. for (i, reg) in get_clobbered_fprs(frame_layout).iter().enumerate() { insts.push(Inst::VecStoreLane { size: 64, rd: reg.to_reg().into(), mem: MemArg::reg_plus_off( stack_reg(), (i * 8) as i64 + frame_layout.outgoing_args_size as i64 + frame_layout.fixed_frame_storage_size as i64, MemFlags::trusted(), ), lane_imm: 0, }); if flags.unwind_info() { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset: (i * 8) as u32, reg: reg.to_reg(), }, }); } } insts } fn gen_clobber_restore( call_conv: isa::CallConv, _flags: &settings::Flags, frame_layout: &FrameLayout, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); // Restore FPRs. insts.extend(gen_restore_fprs(frame_layout)); // Restore GPRs (including SP). insts.extend(gen_restore_gprs(call_conv, frame_layout, 0)); insts } fn gen_call(_dest: &CallDest, _tmp: Writable, _info: CallInfo<()>) -> SmallVec<[Inst; 2]> { unreachable!(); } fn gen_memcpy Writable>( _call_conv: isa::CallConv, _dst: Reg, _src: Reg, _size: usize, _alloc: F, ) -> SmallVec<[Self::I; 8]> { unimplemented!("StructArgs not implemented for S390X yet"); } fn get_number_of_spillslots_for_value( rc: RegClass, _vector_scale: u32, _isa_flags: &Self::F, ) -> u32 { // We allocate in terms of 8-byte slots. match rc { RegClass::Int => 1, RegClass::Float => 2, RegClass::Vector => unreachable!(), } } fn get_machine_env(_flags: &settings::Flags, call_conv: isa::CallConv) -> &MachineEnv { match call_conv { isa::CallConv::Tail => { static TAIL_MACHINE_ENV: OnceLock = OnceLock::new(); TAIL_MACHINE_ENV.get_or_init(tail_create_machine_env) } _ => { static SYSV_MACHINE_ENV: OnceLock = OnceLock::new(); SYSV_MACHINE_ENV.get_or_init(sysv_create_machine_env) } } } fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet { match call_conv_of_callee { isa::CallConv::Tail => TAIL_CLOBBERS, _ => SYSV_CLOBBERS, } } fn get_ext_mode( _call_conv: isa::CallConv, specified: ir::ArgumentExtension, ) -> ir::ArgumentExtension { specified } fn compute_frame_layout( call_conv: isa::CallConv, flags: &settings::Flags, _sig: &Signature, regs: &[Writable], _is_leaf: bool, incoming_args_size: u32, tail_args_size: u32, fixed_frame_storage_size: u32, mut outgoing_args_size: u32, ) -> FrameLayout { assert!( !flags.enable_pinned_reg(), "Pinned register not supported on s390x" ); let mut regs: Vec> = regs .iter() .cloned() .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg())) .collect(); // If the front end asks to preserve frame pointers (which we do not // really have in the s390x ABI), we use the stack backchain instead. // For this to work in all cases, we must allocate a stack frame with // at least the outgoing register save area even in leaf functions. // Update our caller's outgoing_args_size to reflect this. if flags.preserve_frame_pointers() { if outgoing_args_size < REG_SAVE_AREA_SIZE { outgoing_args_size = REG_SAVE_AREA_SIZE; } } // We need to save/restore the link register in non-leaf functions. // This is not included in the clobber list because we have excluded // call instructions via the is_included_in_clobbers callback. // We also want to enforce saving the link register in leaf functions // for stack unwinding, if we're asked to preserve frame pointers. if outgoing_args_size > 0 { let link_reg = Writable::from_reg(RealReg::from(gpr_preg(14))); if !regs.contains(&link_reg) { regs.push(link_reg); } } // Sort registers for deterministic code output. We can do an unstable // sort because the registers will be unique (there are no dups). regs.sort_unstable(); // Compute clobber size. We only need to count FPR save slots. let mut clobber_size = 0; for reg in ®s { match reg.to_reg().class() { RegClass::Int => {} RegClass::Float => { clobber_size += 8; } RegClass::Vector => unreachable!(), } } // Common code assumes that tail-call arguments are part of the caller's // frame. This is not correct for our tail-call convention. To ensure // common code still gets the total size of this stack frame correct, // we add the (incoming and outgoing) taill-call argument size to the // "clobber" size. if call_conv == isa::CallConv::Tail { clobber_size += tail_args_size; } // Return FrameLayout structure. FrameLayout { incoming_args_size, // We already accounted for tail-call arguments above, so reset // this value to its default. tail_args_size: incoming_args_size, setup_area_size: 0, clobber_size, fixed_frame_storage_size, outgoing_args_size, clobbered_callee_saves: regs, } } } impl S390xMachineDeps { pub fn gen_tail_epilogue( frame_layout: &FrameLayout, callee_pop_size: u32, target_reg: Option<&mut Reg>, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); let call_conv = isa::CallConv::Tail; // Restore FPRs. insts.extend(gen_restore_fprs(frame_layout)); // If the tail call target is in a callee-saved GPR, we need to move it // to %r1 (as the only available temp register) before restoring GPRs // (but after restoring FPRs, which might clobber %r1). if let Some(reg) = target_reg { if is_reg_saved_in_prologue(call_conv, reg.to_real_reg().unwrap()) { insts.push(Inst::Mov64 { rd: writable_gpr(1), rm: *reg, }); *reg = gpr(1); } } // Restore GPRs (including SP). insts.extend(gen_restore_gprs(call_conv, frame_layout, callee_pop_size)); insts } } fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool { match (call_conv, r.class()) { (isa::CallConv::Tail, RegClass::Int) => { // r8 - r15 inclusive are callee-saves. r.hw_enc() >= 8 && r.hw_enc() <= 15 } (_, RegClass::Int) => { // r6 - r15 inclusive are callee-saves. r.hw_enc() >= 6 && r.hw_enc() <= 15 } (_, RegClass::Float) => { // f8 - f15 inclusive are callee-saves. r.hw_enc() >= 8 && r.hw_enc() <= 15 } (_, RegClass::Vector) => unreachable!(), } } fn get_clobbered_gprs(frame_layout: &FrameLayout) -> Option<(u8, u8)> { // Collect clobbered GPRs. Note we save/restore GPR always as // a block of registers using LOAD MULTIPLE / STORE MULTIPLE, starting // with the clobbered GPR with the lowest number up to the clobbered GPR // with the highest number. let (clobbered_gpr, _) = frame_layout.clobbered_callee_saves_by_class(); if clobbered_gpr.is_empty() { return None; } let first = clobbered_gpr.first().unwrap().to_reg().hw_enc(); let last = clobbered_gpr.last().unwrap().to_reg().hw_enc(); debug_assert!(clobbered_gpr.iter().all(|r| r.to_reg().hw_enc() >= first)); debug_assert!(clobbered_gpr.iter().all(|r| r.to_reg().hw_enc() <= last)); Some((first, last)) } fn get_clobbered_fprs(frame_layout: &FrameLayout) -> &[Writable] { // Collect clobbered floating-point registers. let (_, clobbered_fpr) = frame_layout.clobbered_callee_saves_by_class(); clobbered_fpr } // Restore GPRs (including SP) from the register save area. // This must not clobber any register, specifically including %r1. fn gen_restore_gprs( call_conv: isa::CallConv, frame_layout: &FrameLayout, callee_pop_size: u32, ) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); // Determine GPRs to be restored. let clobbered_gpr = get_clobbered_gprs(frame_layout); // Increment stack pointer unless it will be restored implicitly. // Note that implicit stack pointer restoration cannot be done in the // presence of either incoming or outgoing tail call arguments. let stack_size = frame_layout.outgoing_args_size as i32 + frame_layout.clobber_size as i32 + frame_layout.fixed_frame_storage_size as i32; let implicit_sp_restore = callee_pop_size == 0 && (call_conv != isa::CallConv::Tail || frame_layout.incoming_args_size == 0) && clobbered_gpr.map_or(false, |(first, _)| { SImm20::maybe_from_i64(8 * first as i64 + stack_size as i64).is_some() }); if !implicit_sp_restore { insts.extend(S390xMachineDeps::gen_sp_reg_adjust( stack_size - callee_pop_size as i32, )); } // Use LMG to restore clobbered GPRs from save area. if let Some((first, mut last)) = clobbered_gpr { // Attempt to restore via SP, taking implicit restoration into account. let mut reg = stack_reg(); let mut offset = callee_pop_size as i64 + 8 * first as i64; if implicit_sp_restore { offset += stack_size as i64 - callee_pop_size as i64; last = 15; } // If the offset still overflows, use the first restored GPR // as temporary holding the address, as we cannot use %r1. if SImm20::maybe_from_i64(offset).is_none() { insts.extend(S390xMachineDeps::gen_add_imm( call_conv, writable_gpr(first), stack_reg(), offset as u32, )); reg = gpr(first); offset = 0; } // Now this LMG will always have an in-range offset. insts.push(Inst::LoadMultiple64 { rt: writable_gpr(first), rt2: writable_gpr(last), mem: MemArg::reg_plus_off(reg, offset, MemFlags::trusted()), }); } insts } // Restore FPRs from the clobber area. fn gen_restore_fprs(frame_layout: &FrameLayout) -> SmallVec<[Inst; 16]> { let mut insts = SmallVec::new(); // Determine FPRs to be restored. let clobbered_fpr = get_clobbered_fprs(frame_layout); // Restore FPRs. for (i, reg) in clobbered_fpr.iter().enumerate() { insts.push(Inst::VecLoadLaneUndef { size: 64, rd: Writable::from_reg(reg.to_reg().into()), mem: MemArg::reg_plus_off( stack_reg(), (i * 8) as i64 + frame_layout.outgoing_args_size as i64 + frame_layout.fixed_frame_storage_size as i64, MemFlags::trusted(), ), lane_imm: 0, }); } insts } const fn sysv_clobbers() -> PRegSet { PRegSet::empty() .with(gpr_preg(0)) .with(gpr_preg(1)) .with(gpr_preg(2)) .with(gpr_preg(3)) .with(gpr_preg(4)) .with(gpr_preg(5)) // v0 - v7 inclusive and v16 - v31 inclusive are // caller-saves. The upper 64 bits of v8 - v15 inclusive are // also caller-saves. However, because we cannot currently // represent partial registers to regalloc2, we indicate here // that every vector register is caller-save. Because this // function is used at *callsites*, approximating in this // direction (save more than necessary) is conservative and // thus safe. // // Note that we exclude clobbers from a call instruction when // a call instruction's callee has the same ABI as the caller // (the current function body); this is safe (anything // clobbered by callee can be clobbered by caller as well) and // avoids unnecessary saves of v8-v15 in the prologue even // though we include them as defs here. .with(vr_preg(0)) .with(vr_preg(1)) .with(vr_preg(2)) .with(vr_preg(3)) .with(vr_preg(4)) .with(vr_preg(5)) .with(vr_preg(6)) .with(vr_preg(7)) .with(vr_preg(8)) .with(vr_preg(9)) .with(vr_preg(10)) .with(vr_preg(11)) .with(vr_preg(12)) .with(vr_preg(13)) .with(vr_preg(14)) .with(vr_preg(15)) .with(vr_preg(16)) .with(vr_preg(17)) .with(vr_preg(18)) .with(vr_preg(19)) .with(vr_preg(20)) .with(vr_preg(21)) .with(vr_preg(22)) .with(vr_preg(23)) .with(vr_preg(24)) .with(vr_preg(25)) .with(vr_preg(26)) .with(vr_preg(27)) .with(vr_preg(28)) .with(vr_preg(29)) .with(vr_preg(30)) .with(vr_preg(31)) } const SYSV_CLOBBERS: PRegSet = sysv_clobbers(); const fn tail_clobbers() -> PRegSet { // Same as the SystemV ABI, except that %r6 and %r7 are clobbered. PRegSet::empty() .with(gpr_preg(0)) .with(gpr_preg(1)) .with(gpr_preg(2)) .with(gpr_preg(3)) .with(gpr_preg(4)) .with(gpr_preg(5)) .with(gpr_preg(6)) .with(gpr_preg(7)) .with(vr_preg(0)) .with(vr_preg(1)) .with(vr_preg(2)) .with(vr_preg(3)) .with(vr_preg(4)) .with(vr_preg(5)) .with(vr_preg(6)) .with(vr_preg(7)) .with(vr_preg(8)) .with(vr_preg(9)) .with(vr_preg(10)) .with(vr_preg(11)) .with(vr_preg(12)) .with(vr_preg(13)) .with(vr_preg(14)) .with(vr_preg(15)) .with(vr_preg(16)) .with(vr_preg(17)) .with(vr_preg(18)) .with(vr_preg(19)) .with(vr_preg(20)) .with(vr_preg(21)) .with(vr_preg(22)) .with(vr_preg(23)) .with(vr_preg(24)) .with(vr_preg(25)) .with(vr_preg(26)) .with(vr_preg(27)) .with(vr_preg(28)) .with(vr_preg(29)) .with(vr_preg(30)) .with(vr_preg(31)) } const TAIL_CLOBBERS: PRegSet = tail_clobbers(); fn sysv_create_machine_env() -> MachineEnv { MachineEnv { preferred_regs_by_class: [ vec![ // no r0; can't use for addressing? // no r1; it is our spilltmp. gpr_preg(2), gpr_preg(3), gpr_preg(4), gpr_preg(5), ], vec![ vr_preg(0), vr_preg(1), vr_preg(2), vr_preg(3), vr_preg(4), vr_preg(5), vr_preg(6), vr_preg(7), vr_preg(16), vr_preg(17), vr_preg(18), vr_preg(19), vr_preg(20), vr_preg(21), vr_preg(22), vr_preg(23), vr_preg(24), vr_preg(25), vr_preg(26), vr_preg(27), vr_preg(28), vr_preg(29), vr_preg(30), vr_preg(31), ], // Vector Regclass is unused vec![], ], non_preferred_regs_by_class: [ vec![ gpr_preg(6), gpr_preg(7), gpr_preg(8), gpr_preg(9), gpr_preg(10), gpr_preg(11), gpr_preg(12), gpr_preg(13), gpr_preg(14), // no r15; it is the stack pointer. ], vec![ vr_preg(8), vr_preg(9), vr_preg(10), vr_preg(11), vr_preg(12), vr_preg(13), vr_preg(14), vr_preg(15), ], // Vector Regclass is unused vec![], ], fixed_stack_slots: vec![], scratch_by_class: [None, None, None], } } fn tail_create_machine_env() -> MachineEnv { // Same as the SystemV ABI, except that %r6 and %r7 are preferred. MachineEnv { preferred_regs_by_class: [ vec![ // no r0; can't use for addressing? // no r1; it is our spilltmp. gpr_preg(2), gpr_preg(3), gpr_preg(4), gpr_preg(5), gpr_preg(6), gpr_preg(7), ], vec![ vr_preg(0), vr_preg(1), vr_preg(2), vr_preg(3), vr_preg(4), vr_preg(5), vr_preg(6), vr_preg(7), vr_preg(16), vr_preg(17), vr_preg(18), vr_preg(19), vr_preg(20), vr_preg(21), vr_preg(22), vr_preg(23), vr_preg(24), vr_preg(25), vr_preg(26), vr_preg(27), vr_preg(28), vr_preg(29), vr_preg(30), vr_preg(31), ], // Vector Regclass is unused vec![], ], non_preferred_regs_by_class: [ vec![ gpr_preg(8), gpr_preg(9), gpr_preg(10), gpr_preg(11), gpr_preg(12), gpr_preg(13), gpr_preg(14), // no r15; it is the stack pointer. ], vec![ vr_preg(8), vr_preg(9), vr_preg(10), vr_preg(11), vr_preg(12), vr_preg(13), vr_preg(14), vr_preg(15), ], // Vector Regclass is unused vec![], ], fixed_stack_slots: vec![], scratch_by_class: [None, None, None], } }