| //! Implementation of a standard AArch64 ABI. |
| |
| use crate::ir; |
| use crate::ir::types; |
| use crate::ir::types::*; |
| use crate::ir::MemFlags; |
| use crate::ir::Opcode; |
| use crate::ir::{dynamic_to_fixed, ExternalName, LibCall, Signature}; |
| use crate::isa; |
| use crate::isa::aarch64::{inst::EmitState, inst::*, settings as aarch64_settings}; |
| use crate::isa::unwind::UnwindInst; |
| use crate::machinst::*; |
| use crate::settings; |
| use crate::{CodegenError, CodegenResult}; |
| use alloc::boxed::Box; |
| use alloc::vec::Vec; |
| use regalloc2::{PRegSet, VReg}; |
| use smallvec::{smallvec, SmallVec}; |
| |
| // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because |
| // these ABIs are very similar. |
| |
| /// Support for the AArch64 ABI from the callee side (within a function body). |
| pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>; |
| |
| /// Support for the AArch64 ABI from the caller side (at a callsite). |
| pub(crate) type AArch64CallSite = CallSite<AArch64MachineDeps>; |
| |
| /// This is the limit for the size of argument and return-value areas on the |
| /// stack. We place a reasonable limit here to avoid integer overflow issues |
| /// with 32-bit arithmetic: for now, 128 MB. |
| static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; |
| |
| impl Into<AMode> for StackAMode { |
| fn into(self) -> AMode { |
| match self { |
| StackAMode::FPOffset(off, ty) => AMode::FPOffset { off, ty }, |
| StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset { off, ty }, |
| StackAMode::SPOffset(off, ty) => AMode::SPOffset { off, ty }, |
| } |
| } |
| } |
| |
| // Returns the size of stack space needed to store the |
| // `int_reg` and `vec_reg`. |
| fn saved_reg_stack_size( |
| int_reg: &[Writable<RealReg>], |
| vec_reg: &[Writable<RealReg>], |
| ) -> (usize, usize) { |
| // Round up to multiple of 2, to keep 16-byte stack alignment. |
| let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8; |
| // The Procedure Call Standard for the Arm 64-bit Architecture |
| // (AAPCS64, including several related ABIs such as the one used by |
| // Windows) mandates saving only the bottom 8 bytes of the vector |
| // registers, so we round up the number of registers to ensure |
| // proper stack alignment (similarly to the situation with |
| // `int_reg`). |
| let vec_reg_size = 8; |
| let vec_save_padding = vec_reg.len() & 1; |
| // FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs? |
| let vec_save_bytes = (vec_reg.len() + vec_save_padding) * vec_reg_size; |
| |
| (int_save_bytes, vec_save_bytes) |
| } |
| |
| /// AArch64-specific ABI behavior. This struct just serves as an implementation |
| /// point for the trait; it is never actually instantiated. |
| pub struct AArch64MachineDeps; |
| |
| impl IsaFlags for aarch64_settings::Flags { |
| fn is_forward_edge_cfi_enabled(&self) -> bool { |
| self.use_bti() |
| } |
| } |
| |
| impl ABIMachineSpec for AArch64MachineDeps { |
| type I = Inst; |
| |
| type F = aarch64_settings::Flags; |
| |
| fn word_bits() -> u32 { |
| 64 |
| } |
| |
| /// Return required stack alignment in bytes. |
| fn stack_align(_call_conv: isa::CallConv) -> u32 { |
| 16 |
| } |
| |
| fn compute_arg_locs<'a, I>( |
| call_conv: isa::CallConv, |
| _flags: &settings::Flags, |
| params: I, |
| args_or_rets: ArgsOrRets, |
| add_ret_area_ptr: bool, |
| mut args: ArgsAccumulator<'_>, |
| ) -> CodegenResult<(u32, Option<usize>)> |
| where |
| I: IntoIterator<Item = &'a ir::AbiParam>, |
| { |
| if call_conv == isa::CallConv::Tail { |
| return compute_arg_locs_tail(params, add_ret_area_ptr, args); |
| } |
| |
| let is_apple_cc = call_conv.extends_apple_aarch64(); |
| |
| // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4. |
| // |
| // MacOS aarch64 is slightly different, see also |
| // https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms. |
| // We are diverging from the MacOS aarch64 implementation in the |
| // following ways: |
| // - sign- and zero- extensions of data types less than 32 bits are not |
| // implemented yet. |
| // - we align the arguments stack space to a 16-bytes boundary, while |
| // the MacOS allows aligning only on 8 bytes. In practice it means we're |
| // slightly overallocating when calling, which is fine, and doesn't |
| // break our other invariants that the stack is always allocated in |
| // 16-bytes chunks. |
| |
| let mut next_xreg = 0; |
| let mut next_vreg = 0; |
| let mut next_stack: u32 = 0; |
| |
| let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets { |
| ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7 |
| |
| // Note on return values: on the regular ABI, we may return values |
| // in 8 registers for V128 and I64 registers independently of the |
| // number of register values returned in the other class. That is, |
| // we can return values in up to 8 integer and |
| // 8 vector registers at once. |
| ArgsOrRets::Rets => { |
| (8, 16) // x0-x7 and v0-v7 |
| } |
| }; |
| |
| for param in params { |
| assert!( |
| legal_type_for_machine(param.value_type), |
| "Invalid type for AArch64: {:?}", |
| param.value_type |
| ); |
| |
| let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?; |
| |
| if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { |
| assert_eq!(args_or_rets, ArgsOrRets::Args); |
| let offset = next_stack as i64; |
| let size = size; |
| assert!(size % 8 == 0, "StructArgument size is not properly aligned"); |
| next_stack += size; |
| args.push(ABIArg::StructArg { |
| pointer: None, |
| offset, |
| size: size as u64, |
| purpose: param.purpose, |
| }); |
| continue; |
| } |
| |
| if let ir::ArgumentPurpose::StructReturn = param.purpose { |
| // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once |
| // ensure_struct_return_ptr_is_returned is gone. |
| assert!( |
| param.value_type == types::I64, |
| "StructReturn must be a pointer sized integer" |
| ); |
| args.push(ABIArg::Slots { |
| slots: smallvec![ABIArgSlot::Reg { |
| reg: xreg(8).to_real_reg().unwrap(), |
| ty: types::I64, |
| extension: param.extension, |
| },], |
| purpose: ir::ArgumentPurpose::StructReturn, |
| }); |
| continue; |
| } |
| |
| // Handle multi register params |
| // |
| // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C). |
| // |
| // For arguments with alignment of 16 we round up the the register number |
| // to the next even value. So we can never allocate for example an i128 |
| // to X1 and X2, we have to skip one register and do X2, X3 |
| // (Stage C.8) |
| // Note: The Apple ABI deviates a bit here. They don't respect Stage C.8 |
| // and will happily allocate a i128 to X1 and X2 |
| // |
| // For integer types with alignment of 16 we also have the additional |
| // restriction of passing the lower half in Xn and the upper half in Xn+1 |
| // (Stage C.9) |
| // |
| // For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh |
| // |
| // On the Apple ABI it is unspecified if we can spill half the value into the stack |
| // i.e load the lower half into x7 and the upper half into the stack |
| // LLVM does not seem to do this, so we are going to replicate that behaviour |
| let is_multi_reg = rcs.len() >= 2; |
| if is_multi_reg { |
| assert!( |
| rcs.len() == 2, |
| "Unable to handle multi reg params with more than 2 regs" |
| ); |
| assert!( |
| rcs == &[RegClass::Int, RegClass::Int], |
| "Unable to handle non i64 regs" |
| ); |
| |
| let reg_class_space = max_per_class_reg_vals - next_xreg; |
| let reg_space = remaining_reg_vals; |
| |
| if reg_space >= 2 && reg_class_space >= 2 { |
| // The aarch64 ABI does not allow us to start a split argument |
| // at an odd numbered register. So we need to skip one register |
| // |
| // TODO: The Fast ABI should probably not skip the register |
| if !is_apple_cc && next_xreg % 2 != 0 { |
| next_xreg += 1; |
| } |
| |
| let lower_reg = xreg(next_xreg); |
| let upper_reg = xreg(next_xreg + 1); |
| |
| args.push(ABIArg::Slots { |
| slots: smallvec![ |
| ABIArgSlot::Reg { |
| reg: lower_reg.to_real_reg().unwrap(), |
| ty: reg_types[0], |
| extension: param.extension, |
| }, |
| ABIArgSlot::Reg { |
| reg: upper_reg.to_real_reg().unwrap(), |
| ty: reg_types[1], |
| extension: param.extension, |
| }, |
| ], |
| purpose: param.purpose, |
| }); |
| |
| next_xreg += 2; |
| remaining_reg_vals -= 2; |
| continue; |
| } |
| } else { |
| // Single Register parameters |
| let rc = rcs[0]; |
| let next_reg = match rc { |
| RegClass::Int => &mut next_xreg, |
| RegClass::Float => &mut next_vreg, |
| RegClass::Vector => unreachable!(), |
| }; |
| |
| if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 { |
| let reg = match rc { |
| RegClass::Int => xreg(*next_reg), |
| RegClass::Float => vreg(*next_reg), |
| RegClass::Vector => unreachable!(), |
| }; |
| // Overlay Z-regs on V-regs for parameter passing. |
| let ty = if param.value_type.is_dynamic_vector() { |
| dynamic_to_fixed(param.value_type) |
| } else { |
| param.value_type |
| }; |
| args.push(ABIArg::reg( |
| reg.to_real_reg().unwrap(), |
| ty, |
| param.extension, |
| param.purpose, |
| )); |
| *next_reg += 1; |
| remaining_reg_vals -= 1; |
| continue; |
| } |
| } |
| |
| // Spill to the stack |
| |
| // Compute the stack slot's size. |
| let size = (ty_bits(param.value_type) / 8) as u32; |
| |
| let size = if is_apple_cc { |
| // MacOS aarch64 allows stack slots with |
| // sizes less than 8 bytes. They still need to be |
| // properly aligned on their natural data alignment, |
| // though. |
| size |
| } else { |
| // Every arg takes a minimum slot of 8 bytes. (16-byte stack |
| // alignment happens separately after all args.) |
| std::cmp::max(size, 8) |
| }; |
| |
| // Align the stack slot. |
| debug_assert!(size.is_power_of_two()); |
| next_stack = align_to(next_stack, size); |
| |
| let slots = reg_types |
| .iter() |
| .copied() |
| // Build the stack locations from each slot |
| .scan(next_stack, |next_stack, ty| { |
| let slot_offset = *next_stack as i64; |
| *next_stack += (ty_bits(ty) / 8) as u32; |
| |
| Some((ty, slot_offset)) |
| }) |
| .map(|(ty, offset)| ABIArgSlot::Stack { |
| offset, |
| ty, |
| extension: param.extension, |
| }) |
| .collect(); |
| |
| args.push(ABIArg::Slots { |
| slots, |
| purpose: param.purpose, |
| }); |
| |
| next_stack += size; |
| } |
| |
| let extra_arg = if add_ret_area_ptr { |
| debug_assert!(args_or_rets == ArgsOrRets::Args); |
| if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 { |
| args.push_non_formal(ABIArg::reg( |
| xreg(next_xreg).to_real_reg().unwrap(), |
| I64, |
| ir::ArgumentExtension::None, |
| ir::ArgumentPurpose::Normal, |
| )); |
| } else { |
| args.push_non_formal(ABIArg::stack( |
| next_stack as i64, |
| I64, |
| ir::ArgumentExtension::None, |
| ir::ArgumentPurpose::Normal, |
| )); |
| next_stack += 8; |
| } |
| Some(args.args().len() - 1) |
| } else { |
| None |
| }; |
| |
| next_stack = align_to(next_stack, 16); |
| |
| // To avoid overflow issues, limit the arg/return size to something |
| // reasonable -- here, 128 MB. |
| if next_stack > STACK_ARG_RET_SIZE_LIMIT { |
| return Err(CodegenError::ImplLimitExceeded); |
| } |
| |
| Ok((next_stack, extra_arg)) |
| } |
| |
| fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { |
| 16 // frame pointer + return address. |
| } |
| |
| fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst { |
| Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) |
| } |
| |
| fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { |
| Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()) |
| } |
| |
| fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst { |
| Inst::gen_move(to_reg, from_reg, ty) |
| } |
| |
| fn gen_extend( |
| to_reg: Writable<Reg>, |
| from_reg: Reg, |
| signed: bool, |
| from_bits: u8, |
| to_bits: u8, |
| ) -> Inst { |
| assert!(from_bits < to_bits); |
| Inst::Extend { |
| rd: to_reg, |
| rn: from_reg, |
| signed, |
| from_bits, |
| to_bits, |
| } |
| } |
| |
| fn gen_args(_isa_flags: &aarch64_settings::Flags, args: Vec<ArgPair>) -> Inst { |
| Inst::Args { args } |
| } |
| |
| fn gen_ret( |
| setup_frame: bool, |
| isa_flags: &aarch64_settings::Flags, |
| rets: Vec<RetPair>, |
| stack_bytes_to_pop: u32, |
| ) -> Inst { |
| if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) { |
| let key = if isa_flags.sign_return_address_with_bkey() { |
| APIKey::B |
| } else { |
| APIKey::A |
| }; |
| |
| Inst::AuthenticatedRet { |
| key, |
| is_hint: !isa_flags.has_pauth(), |
| rets, |
| stack_bytes_to_pop, |
| } |
| } else { |
| Inst::Ret { |
| rets, |
| stack_bytes_to_pop, |
| } |
| } |
| } |
| |
| fn gen_add_imm( |
| _call_conv: isa::CallConv, |
| into_reg: Writable<Reg>, |
| from_reg: Reg, |
| imm: u32, |
| ) -> SmallInstVec<Inst> { |
| let imm = imm as u64; |
| let mut insts = SmallVec::new(); |
| if let Some(imm12) = Imm12::maybe_from_u64(imm) { |
| insts.push(Inst::AluRRImm12 { |
| alu_op: ALUOp::Add, |
| size: OperandSize::Size64, |
| rd: into_reg, |
| rn: from_reg, |
| imm12, |
| }); |
| } else { |
| let scratch2 = writable_tmp2_reg(); |
| assert_ne!(scratch2.to_reg(), from_reg); |
| // `gen_add_imm` is only ever called after register allocation has taken place, and as a |
| // result it's ok to reuse the scratch2 register here. If that changes, we'll need to |
| // plumb through a way to allocate temporary virtual registers |
| insts.extend(Inst::load_constant(scratch2, imm.into(), &mut |_| scratch2)); |
| insts.push(Inst::AluRRRExtend { |
| alu_op: ALUOp::Add, |
| size: OperandSize::Size64, |
| rd: into_reg, |
| rn: from_reg, |
| rm: scratch2.to_reg(), |
| extendop: ExtendOp::UXTX, |
| }); |
| } |
| insts |
| } |
| |
| fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> { |
| let mut insts = SmallVec::new(); |
| insts.push(Inst::AluRRRExtend { |
| alu_op: ALUOp::SubS, |
| size: OperandSize::Size64, |
| rd: writable_zero_reg(), |
| rn: stack_reg(), |
| rm: limit_reg, |
| extendop: ExtendOp::UXTX, |
| }); |
| insts.push(Inst::TrapIf { |
| trap_code: ir::TrapCode::StackOverflow, |
| // Here `Lo` == "less than" when interpreting the two |
| // operands as unsigned integers. |
| kind: CondBrKind::Cond(Cond::Lo), |
| }); |
| insts |
| } |
| |
| fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst { |
| // FIXME: Do something different for dynamic types? |
| let mem = mem.into(); |
| Inst::LoadAddr { rd: into_reg, mem } |
| } |
| |
| fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { |
| spilltmp_reg() |
| } |
| |
| fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst { |
| let mem = AMode::RegOffset { |
| rn: base, |
| off: offset as i64, |
| ty, |
| }; |
| Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()) |
| } |
| |
| fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { |
| let mem = AMode::RegOffset { |
| rn: base, |
| off: offset as i64, |
| ty, |
| }; |
| Inst::gen_store(mem, from_reg, ty, MemFlags::trusted()) |
| } |
| |
| fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> { |
| if amount == 0 { |
| return SmallVec::new(); |
| } |
| |
| let (amount, is_sub) = if amount > 0 { |
| (amount as u64, false) |
| } else { |
| (-amount as u64, true) |
| }; |
| |
| let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add }; |
| |
| let mut ret = SmallVec::new(); |
| if let Some(imm12) = Imm12::maybe_from_u64(amount) { |
| let adj_inst = Inst::AluRRImm12 { |
| alu_op, |
| size: OperandSize::Size64, |
| rd: writable_stack_reg(), |
| rn: stack_reg(), |
| imm12, |
| }; |
| ret.push(adj_inst); |
| } else { |
| let tmp = writable_spilltmp_reg(); |
| // `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for |
| // intermediates in `load_constant`. |
| let const_inst = Inst::load_constant(tmp, amount, &mut |_| tmp); |
| let adj_inst = Inst::AluRRRExtend { |
| alu_op, |
| size: OperandSize::Size64, |
| rd: writable_stack_reg(), |
| rn: stack_reg(), |
| rm: tmp.to_reg(), |
| extendop: ExtendOp::UXTX, |
| }; |
| ret.extend(const_inst); |
| ret.push(adj_inst); |
| } |
| ret |
| } |
| |
| fn gen_nominal_sp_adj(offset: i32) -> Inst { |
| Inst::VirtualSPOffsetAdj { |
| offset: offset as i64, |
| } |
| } |
| |
| fn gen_prologue_start( |
| setup_frame: bool, |
| call_conv: isa::CallConv, |
| flags: &settings::Flags, |
| isa_flags: &aarch64_settings::Flags, |
| ) -> SmallInstVec<Inst> { |
| let mut insts = SmallVec::new(); |
| |
| if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) { |
| let key = if isa_flags.sign_return_address_with_bkey() { |
| APIKey::B |
| } else { |
| APIKey::A |
| }; |
| |
| insts.push(Inst::Pacisp { key }); |
| |
| if flags.unwind_info() { |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::Aarch64SetPointerAuth { |
| return_addresses: true, |
| }, |
| }); |
| } |
| } else { |
| if isa_flags.use_bti() { |
| insts.push(Inst::Bti { |
| targets: BranchTargetType::C, |
| }); |
| } |
| |
| if flags.unwind_info() && call_conv.extends_apple_aarch64() { |
| // The macOS unwinder seems to require this. |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::Aarch64SetPointerAuth { |
| return_addresses: false, |
| }, |
| }); |
| } |
| } |
| |
| insts |
| } |
| |
| fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Inst> { |
| let mut insts = SmallVec::new(); |
| |
| // stp fp (x29), lr (x30), [sp, #-16]! |
| insts.push(Inst::StoreP64 { |
| rt: fp_reg(), |
| rt2: link_reg(), |
| mem: PairAMode::SPPreIndexed(SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap()), |
| flags: MemFlags::trusted(), |
| }); |
| |
| if flags.unwind_info() { |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::PushFrameRegs { |
| offset_upward_to_caller_sp: 16, // FP, LR |
| }, |
| }); |
| } |
| |
| // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because |
| // the usual encoding (`ORR`) does not work with SP. |
| insts.push(Inst::AluRRImm12 { |
| alu_op: ALUOp::Add, |
| size: OperandSize::Size64, |
| rd: writable_fp_reg(), |
| rn: stack_reg(), |
| imm12: Imm12 { |
| bits: 0, |
| shift12: false, |
| }, |
| }); |
| insts |
| } |
| |
| fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Inst> { |
| let mut insts = SmallVec::new(); |
| |
| // N.B.: sp is already adjusted to the appropriate place by the |
| // clobber-restore code (which also frees the fixed frame). Hence, there |
| // is no need for the usual `mov sp, fp` here. |
| |
| // `ldp fp, lr, [sp], #16` |
| insts.push(Inst::LoadP64 { |
| rt: writable_fp_reg(), |
| rt2: writable_link_reg(), |
| mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, types::I64).unwrap()), |
| flags: MemFlags::trusted(), |
| }); |
| insts |
| } |
| |
| fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) { |
| // TODO: implement if we ever require stack probes on an AArch64 host |
| // (unlikely unless Lucet is ported) |
| unimplemented!("Stack probing is unimplemented on AArch64"); |
| } |
| |
| fn gen_inline_probestack( |
| insts: &mut SmallInstVec<Self::I>, |
| _call_conv: isa::CallConv, |
| frame_size: u32, |
| guard_size: u32, |
| ) { |
| // The stack probe loop currently takes 6 instructions and each inline |
| // probe takes 2 (ish, these numbers sort of depend on the constants). |
| // Set this to 3 to keep the max size of the probe to 6 instructions. |
| const PROBE_MAX_UNROLL: u32 = 3; |
| |
| let probe_count = align_to(frame_size, guard_size) / guard_size; |
| if probe_count <= PROBE_MAX_UNROLL { |
| // When manually unrolling stick an instruction that stores 0 at a |
| // constant offset relative to the stack pointer. This will |
| // turn into something like `movn tmp, #n ; stur xzr [sp, tmp]`. |
| // |
| // Note that this may actually store beyond the stack size for the |
| // last item but that's ok since it's unused stack space and if |
| // that faults accidentally we're so close to faulting it shouldn't |
| // make too much difference to fault there. |
| insts.reserve(probe_count as usize); |
| for i in 0..probe_count { |
| let offset = (guard_size * (i + 1)) as i64; |
| insts.push(Self::gen_store_stack( |
| StackAMode::SPOffset(-offset, I8), |
| zero_reg(), |
| I32, |
| )); |
| } |
| } else { |
| // The non-unrolled version uses two temporary registers. The |
| // `start` contains the current offset from sp and counts downwards |
| // during the loop by increments of `guard_size`. The `end` is |
| // the size of the frame and where we stop. |
| // |
| // Note that this emission is all post-regalloc so it should be ok |
| // to use the temporary registers here as input/output as the loop |
| // itself is not allowed to use the registers. |
| let start = writable_spilltmp_reg(); |
| let end = writable_tmp2_reg(); |
| // `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse |
| // `start` and `end` as temporaries in load_constant. |
| insts.extend(Inst::load_constant(start, 0, &mut |_| start)); |
| insts.extend(Inst::load_constant(end, frame_size.into(), &mut |_| end)); |
| insts.push(Inst::StackProbeLoop { |
| start, |
| end: end.to_reg(), |
| step: Imm12::maybe_from_u64(guard_size.into()).unwrap(), |
| }); |
| } |
| } |
| |
| // Returns stack bytes used as well as instructions. Does not adjust |
| // nominal SP offset; abi generic code will do that. |
| fn gen_clobber_save( |
| _call_conv: isa::CallConv, |
| setup_frame: bool, |
| flags: &settings::Flags, |
| clobbered_callee_saves: &[Writable<RealReg>], |
| fixed_frame_storage_size: u32, |
| _outgoing_args_size: u32, |
| ) -> (u64, SmallVec<[Inst; 16]>) { |
| let mut clobbered_int = vec![]; |
| let mut clobbered_vec = vec![]; |
| |
| for ® in clobbered_callee_saves.iter() { |
| match reg.to_reg().class() { |
| RegClass::Int => clobbered_int.push(reg), |
| RegClass::Float => clobbered_vec.push(reg), |
| RegClass::Vector => unreachable!(), |
| } |
| } |
| |
| let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec); |
| let total_save_bytes = int_save_bytes + vec_save_bytes; |
| let clobber_size = total_save_bytes as i32; |
| let mut insts = SmallVec::new(); |
| |
| if flags.unwind_info() && setup_frame { |
| // The *unwind* frame (but not the actual frame) starts at the |
| // clobbers, just below the saved FP/LR pair. |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::DefineNewFrame { |
| offset_downward_to_clobbers: clobber_size as u32, |
| offset_upward_to_caller_sp: 16, // FP, LR |
| }, |
| }); |
| } |
| |
| // We use pre-indexed addressing modes here, rather than the possibly |
| // more efficient "subtract sp once then used fixed offsets" scheme, |
| // because (i) we cannot necessarily guarantee that the offset of a |
| // clobber-save slot will be within a SImm7Scaled (+504-byte) offset |
| // range of the whole frame including other slots, it is more complex to |
| // conditionally generate a two-stage SP adjustment (clobbers then fixed |
| // frame) otherwise, and generally we just want to maintain simplicity |
| // here for maintainability. Because clobbers are at the top of the |
| // frame, just below FP, all that is necessary is to use the pre-indexed |
| // "push" `[sp, #-16]!` addressing mode. |
| // |
| // `frame_offset` tracks offset above start-of-clobbers for unwind-info |
| // purposes. |
| let mut clobber_offset = clobber_size as u32; |
| let clobber_offset_change = 16; |
| let iter = clobbered_int.chunks_exact(2); |
| |
| if let [rd] = iter.remainder() { |
| let rd: Reg = rd.to_reg().into(); |
| |
| debug_assert_eq!(rd.class(), RegClass::Int); |
| // str rd, [sp, #-16]! |
| insts.push(Inst::Store64 { |
| rd, |
| mem: AMode::SPPreIndexed { |
| simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), |
| }, |
| flags: MemFlags::trusted(), |
| }); |
| |
| if flags.unwind_info() { |
| clobber_offset -= clobber_offset_change as u32; |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::SaveReg { |
| clobber_offset, |
| reg: rd.to_real_reg().unwrap(), |
| }, |
| }); |
| } |
| } |
| |
| let mut iter = iter.rev(); |
| |
| while let Some([rt, rt2]) = iter.next() { |
| // .to_reg().into(): Writable<RealReg> --> RealReg --> Reg |
| let rt: Reg = rt.to_reg().into(); |
| let rt2: Reg = rt2.to_reg().into(); |
| |
| debug_assert!(rt.class() == RegClass::Int); |
| debug_assert!(rt2.class() == RegClass::Int); |
| |
| // stp rt, rt2, [sp, #-16]! |
| insts.push(Inst::StoreP64 { |
| rt, |
| rt2, |
| mem: PairAMode::SPPreIndexed( |
| SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(), |
| ), |
| flags: MemFlags::trusted(), |
| }); |
| |
| if flags.unwind_info() { |
| clobber_offset -= clobber_offset_change as u32; |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::SaveReg { |
| clobber_offset, |
| reg: rt.to_real_reg().unwrap(), |
| }, |
| }); |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::SaveReg { |
| clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32, |
| reg: rt2.to_real_reg().unwrap(), |
| }, |
| }); |
| } |
| } |
| |
| let store_vec_reg = |rd| Inst::FpuStore64 { |
| rd, |
| mem: AMode::SPPreIndexed { |
| simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(), |
| }, |
| flags: MemFlags::trusted(), |
| }; |
| let iter = clobbered_vec.chunks_exact(2); |
| |
| if let [rd] = iter.remainder() { |
| let rd: Reg = rd.to_reg().into(); |
| |
| debug_assert_eq!(rd.class(), RegClass::Float); |
| insts.push(store_vec_reg(rd)); |
| |
| if flags.unwind_info() { |
| clobber_offset -= clobber_offset_change as u32; |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::SaveReg { |
| clobber_offset, |
| reg: rd.to_real_reg().unwrap(), |
| }, |
| }); |
| } |
| } |
| |
| let store_vec_reg_pair = |rt, rt2| { |
| let clobber_offset_change = 16; |
| |
| ( |
| Inst::FpuStoreP64 { |
| rt, |
| rt2, |
| mem: PairAMode::SPPreIndexed( |
| SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(), |
| ), |
| flags: MemFlags::trusted(), |
| }, |
| clobber_offset_change as u32, |
| ) |
| }; |
| let mut iter = iter.rev(); |
| |
| while let Some([rt, rt2]) = iter.next() { |
| let rt: Reg = rt.to_reg().into(); |
| let rt2: Reg = rt2.to_reg().into(); |
| |
| debug_assert_eq!(rt.class(), RegClass::Float); |
| debug_assert_eq!(rt2.class(), RegClass::Float); |
| |
| let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2); |
| |
| insts.push(inst); |
| |
| if flags.unwind_info() { |
| clobber_offset -= clobber_offset_change; |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::SaveReg { |
| clobber_offset, |
| reg: rt.to_real_reg().unwrap(), |
| }, |
| }); |
| insts.push(Inst::Unwind { |
| inst: UnwindInst::SaveReg { |
| clobber_offset: clobber_offset + clobber_offset_change / 2, |
| reg: rt2.to_real_reg().unwrap(), |
| }, |
| }); |
| } |
| } |
| |
| // Allocate the fixed frame below the clobbers if necessary. |
| if fixed_frame_storage_size > 0 { |
| insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32))); |
| } |
| |
| (total_save_bytes as u64, insts) |
| } |
| |
| fn gen_clobber_restore( |
| call_conv: isa::CallConv, |
| sig: &Signature, |
| flags: &settings::Flags, |
| clobbers: &[Writable<RealReg>], |
| fixed_frame_storage_size: u32, |
| _outgoing_args_size: u32, |
| ) -> SmallVec<[Inst; 16]> { |
| let mut insts = SmallVec::new(); |
| let (clobbered_int, clobbered_vec) = |
| get_regs_restored_in_epilogue(call_conv, flags, sig, clobbers); |
| |
| // Free the fixed frame if necessary. |
| if fixed_frame_storage_size > 0 { |
| insts.extend(Self::gen_sp_reg_adjust(fixed_frame_storage_size as i32)); |
| } |
| |
| let load_vec_reg = |rd| Inst::FpuLoad64 { |
| rd, |
| mem: AMode::SPPostIndexed { |
| simm9: SImm9::maybe_from_i64(16).unwrap(), |
| }, |
| flags: MemFlags::trusted(), |
| }; |
| let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 { |
| rt, |
| rt2, |
| mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, F64).unwrap()), |
| flags: MemFlags::trusted(), |
| }; |
| |
| let mut iter = clobbered_vec.chunks_exact(2); |
| |
| while let Some([rt, rt2]) = iter.next() { |
| let rt: Writable<Reg> = rt.map(|r| r.into()); |
| let rt2: Writable<Reg> = rt2.map(|r| r.into()); |
| |
| debug_assert_eq!(rt.to_reg().class(), RegClass::Float); |
| debug_assert_eq!(rt2.to_reg().class(), RegClass::Float); |
| insts.push(load_vec_reg_pair(rt, rt2)); |
| } |
| |
| debug_assert!(iter.remainder().len() <= 1); |
| |
| if let [rd] = iter.remainder() { |
| let rd: Writable<Reg> = rd.map(|r| r.into()); |
| |
| debug_assert_eq!(rd.to_reg().class(), RegClass::Float); |
| insts.push(load_vec_reg(rd)); |
| } |
| |
| let mut iter = clobbered_int.chunks_exact(2); |
| |
| while let Some([rt, rt2]) = iter.next() { |
| let rt: Writable<Reg> = rt.map(|r| r.into()); |
| let rt2: Writable<Reg> = rt2.map(|r| r.into()); |
| |
| debug_assert_eq!(rt.to_reg().class(), RegClass::Int); |
| debug_assert_eq!(rt2.to_reg().class(), RegClass::Int); |
| // ldp rt, rt2, [sp], #16 |
| insts.push(Inst::LoadP64 { |
| rt, |
| rt2, |
| mem: PairAMode::SPPostIndexed(SImm7Scaled::maybe_from_i64(16, I64).unwrap()), |
| flags: MemFlags::trusted(), |
| }); |
| } |
| |
| debug_assert!(iter.remainder().len() <= 1); |
| |
| if let [rd] = iter.remainder() { |
| let rd: Writable<Reg> = rd.map(|r| r.into()); |
| |
| debug_assert_eq!(rd.to_reg().class(), RegClass::Int); |
| // ldr rd, [sp], #16 |
| insts.push(Inst::ULoad64 { |
| rd, |
| mem: AMode::SPPostIndexed { |
| simm9: SImm9::maybe_from_i64(16).unwrap(), |
| }, |
| flags: MemFlags::trusted(), |
| }); |
| } |
| |
| insts |
| } |
| |
| fn gen_call( |
| dest: &CallDest, |
| uses: CallArgList, |
| defs: CallRetList, |
| clobbers: PRegSet, |
| opcode: ir::Opcode, |
| tmp: Writable<Reg>, |
| callee_conv: isa::CallConv, |
| caller_conv: isa::CallConv, |
| callee_pop_size: u32, |
| ) -> SmallVec<[Inst; 2]> { |
| let mut insts = SmallVec::new(); |
| match &dest { |
| &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call { |
| info: Box::new(CallInfo { |
| dest: name.clone(), |
| uses, |
| defs, |
| clobbers, |
| opcode, |
| caller_callconv: caller_conv, |
| callee_callconv: callee_conv, |
| callee_pop_size, |
| }), |
| }), |
| &CallDest::ExtName(ref name, RelocDistance::Far) => { |
| insts.push(Inst::LoadExtName { |
| rd: tmp, |
| name: Box::new(name.clone()), |
| offset: 0, |
| }); |
| insts.push(Inst::CallInd { |
| info: Box::new(CallIndInfo { |
| rn: tmp.to_reg(), |
| uses, |
| defs, |
| clobbers, |
| opcode, |
| caller_callconv: caller_conv, |
| callee_callconv: callee_conv, |
| callee_pop_size, |
| }), |
| }); |
| } |
| &CallDest::Reg(reg) => insts.push(Inst::CallInd { |
| info: Box::new(CallIndInfo { |
| rn: *reg, |
| uses, |
| defs, |
| clobbers, |
| opcode, |
| caller_callconv: caller_conv, |
| callee_callconv: callee_conv, |
| callee_pop_size, |
| }), |
| }), |
| } |
| |
| insts |
| } |
| |
| fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>( |
| call_conv: isa::CallConv, |
| dst: Reg, |
| src: Reg, |
| size: usize, |
| mut alloc_tmp: F, |
| ) -> SmallVec<[Self::I; 8]> { |
| let mut insts = SmallVec::new(); |
| let arg0 = writable_xreg(0); |
| let arg1 = writable_xreg(1); |
| let arg2 = writable_xreg(2); |
| let tmp = alloc_tmp(Self::word_type()); |
| insts.extend(Inst::load_constant(tmp, size as u64, &mut alloc_tmp)); |
| insts.push(Inst::Call { |
| info: Box::new(CallInfo { |
| dest: ExternalName::LibCall(LibCall::Memcpy), |
| uses: smallvec![ |
| CallArgPair { |
| vreg: dst, |
| preg: arg0.to_reg() |
| }, |
| CallArgPair { |
| vreg: src, |
| preg: arg1.to_reg() |
| }, |
| CallArgPair { |
| vreg: tmp.to_reg(), |
| preg: arg2.to_reg() |
| } |
| ], |
| defs: smallvec![], |
| clobbers: Self::get_regs_clobbered_by_call(call_conv), |
| opcode: Opcode::Call, |
| caller_callconv: call_conv, |
| callee_callconv: call_conv, |
| callee_pop_size: 0, |
| }), |
| }); |
| insts |
| } |
| |
| fn get_number_of_spillslots_for_value( |
| rc: RegClass, |
| vector_size: u32, |
| _isa_flags: &Self::F, |
| ) -> u32 { |
| assert_eq!(vector_size % 8, 0); |
| // We allocate in terms of 8-byte slots. |
| match rc { |
| RegClass::Int => 1, |
| RegClass::Float => vector_size / 8, |
| RegClass::Vector => unreachable!(), |
| } |
| } |
| |
| /// Get the current virtual-SP offset from an instruction-emission state. |
| fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { |
| s.virtual_sp_offset |
| } |
| |
| /// Get the nominal-SP-to-FP offset from an instruction-emission state. |
| fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { |
| s.nominal_sp_to_fp |
| } |
| |
| fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet { |
| if call_conv_of_callee == isa::CallConv::Tail { |
| TAIL_CLOBBERS |
| } else { |
| DEFAULT_AAPCS_CLOBBERS |
| } |
| } |
| |
| fn get_ext_mode( |
| _call_conv: isa::CallConv, |
| _specified: ir::ArgumentExtension, |
| ) -> ir::ArgumentExtension { |
| ir::ArgumentExtension::None |
| } |
| |
| fn get_clobbered_callee_saves( |
| call_conv: isa::CallConv, |
| flags: &settings::Flags, |
| sig: &Signature, |
| regs: &[Writable<RealReg>], |
| ) -> Vec<Writable<RealReg>> { |
| let mut regs: Vec<Writable<RealReg>> = regs |
| .iter() |
| .cloned() |
| .filter(|r| { |
| is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg()) |
| }) |
| .collect(); |
| |
| // Sort registers for deterministic code output. We can do an unstable |
| // sort because the registers will be unique (there are no dups). |
| regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); |
| regs |
| } |
| |
| fn is_frame_setup_needed( |
| is_leaf: bool, |
| stack_args_size: u32, |
| num_clobbered_callee_saves: usize, |
| fixed_frame_storage_size: u32, |
| ) -> bool { |
| !is_leaf |
| // The function arguments that are passed on the stack are addressed |
| // relative to the Frame Pointer. |
| || stack_args_size > 0 |
| || num_clobbered_callee_saves > 0 |
| || fixed_frame_storage_size > 0 |
| } |
| } |
| |
| fn compute_arg_locs_tail<'a, I>( |
| params: I, |
| add_ret_area_ptr: bool, |
| mut args: ArgsAccumulator<'_>, |
| ) -> CodegenResult<(u32, Option<usize>)> |
| where |
| I: IntoIterator<Item = &'a ir::AbiParam>, |
| { |
| let mut xregs = TAIL_CLOBBERS |
| .into_iter() |
| .filter(|r| r.class() == RegClass::Int) |
| // We reserve `x0` for the return area pointer. For simplicity, we |
| // reserve it even when there is no return area pointer needed. This |
| // also means that identity functions don't have to shuffle arguments to |
| // different return registers because we shifted all argument register |
| // numbers down by one to make space for the return area pointer. |
| // |
| // Also, we cannot use all allocatable GPRs as arguments because we need |
| // at least one allocatable register for holding the callee address in |
| // indirect calls. So skip `x1` also, reserving it for that role. |
| .skip(2); |
| |
| let mut vregs = TAIL_CLOBBERS |
| .into_iter() |
| .filter(|r| r.class() == RegClass::Float); |
| |
| let mut next_stack: u32 = 0; |
| |
| // Get the next stack slot for the given type. |
| let stack = |next_stack: &mut u32, ty: ir::Type| { |
| *next_stack = align_to(*next_stack, ty.bytes()); |
| let offset = i64::from(*next_stack); |
| *next_stack += ty.bytes(); |
| ABIArgSlot::Stack { |
| offset, |
| ty, |
| extension: ir::ArgumentExtension::None, |
| } |
| }; |
| |
| // Get the next `x` register available, or a stack slot if all are in use. |
| let mut xreg = |next_stack: &mut u32, ty| { |
| xregs |
| .next() |
| .map(|reg| ABIArgSlot::Reg { |
| reg: reg.into(), |
| ty, |
| extension: ir::ArgumentExtension::None, |
| }) |
| .unwrap_or_else(|| stack(next_stack, ty)) |
| }; |
| |
| // Get the next `v` register available, or a stack slot if all are in use. |
| let mut vreg = |next_stack: &mut u32, ty| { |
| vregs |
| .next() |
| .map(|reg| ABIArgSlot::Reg { |
| reg: reg.into(), |
| ty, |
| extension: ir::ArgumentExtension::None, |
| }) |
| .unwrap_or_else(|| stack(next_stack, ty)) |
| }; |
| |
| for param in params { |
| assert!( |
| legal_type_for_machine(param.value_type), |
| "Invalid type for AArch64: {:?}", |
| param.value_type |
| ); |
| |
| match param.purpose { |
| ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {} |
| ir::ArgumentPurpose::StructArgument(_) |
| | ir::ArgumentPurpose::StructReturn |
| | ir::ArgumentPurpose::StackLimit => unimplemented!( |
| "support for {:?} parameters is not implemented for the `tail` \ |
| calling convention yet", |
| param.purpose, |
| ), |
| } |
| |
| let (reg_classes, reg_types) = Inst::rc_for_type(param.value_type)?; |
| args.push(ABIArg::Slots { |
| slots: reg_classes |
| .iter() |
| .zip(reg_types) |
| .map(|(cls, ty)| match cls { |
| RegClass::Int => xreg(&mut next_stack, *ty), |
| RegClass::Float => vreg(&mut next_stack, *ty), |
| RegClass::Vector => unreachable!(), |
| }) |
| .collect(), |
| purpose: param.purpose, |
| }); |
| } |
| |
| let ret_ptr = if add_ret_area_ptr { |
| let idx = args.args().len(); |
| args.push(ABIArg::reg( |
| xreg_preg(0).into(), |
| types::I64, |
| ir::ArgumentExtension::None, |
| ir::ArgumentPurpose::Normal, |
| )); |
| Some(idx) |
| } else { |
| None |
| }; |
| |
| next_stack = align_to(next_stack, 16); |
| |
| // To avoid overflow issues, limit the arg/return size to something |
| // reasonable -- here, 128 MB. |
| if next_stack > STACK_ARG_RET_SIZE_LIMIT { |
| return Err(CodegenError::ImplLimitExceeded); |
| } |
| |
| Ok((next_stack, ret_ptr)) |
| } |
| |
| /// Is this type supposed to be seen on this machine? E.g. references of the |
| /// wrong width are invalid. |
| fn legal_type_for_machine(ty: Type) -> bool { |
| match ty { |
| R32 => false, |
| _ => true, |
| } |
| } |
| |
| /// Is the given register saved in the prologue if clobbered, i.e., is it a |
| /// callee-save? |
| fn is_reg_saved_in_prologue( |
| call_conv: isa::CallConv, |
| enable_pinned_reg: bool, |
| sig: &Signature, |
| r: RealReg, |
| ) -> bool { |
| if call_conv == isa::CallConv::Tail { |
| return false; |
| } |
| |
| // FIXME: We need to inspect whether a function is returning Z or P regs too. |
| let save_z_regs = sig |
| .params |
| .iter() |
| .filter(|p| p.value_type.is_dynamic_vector()) |
| .count() |
| != 0; |
| |
| match r.class() { |
| RegClass::Int => { |
| // x19 - x28 inclusive are callee-saves. |
| // However, x21 is the pinned reg if `enable_pinned_reg` |
| // is set, and is implicitly globally-allocated, hence not |
| // callee-saved in prologues. |
| if enable_pinned_reg && r.hw_enc() == PINNED_REG { |
| false |
| } else { |
| r.hw_enc() >= 19 && r.hw_enc() <= 28 |
| } |
| } |
| RegClass::Float => { |
| // If a subroutine takes at least one argument in scalable vector registers |
| // or scalable predicate registers, or if it is a function that returns |
| // results in such registers, it must ensure that the entire contents of |
| // z8-z23 are preserved across the call. In other cases it need only |
| // preserve the low 64 bits of z8-z15. |
| if save_z_regs { |
| r.hw_enc() >= 8 && r.hw_enc() <= 23 |
| } else { |
| // v8 - v15 inclusive are callee-saves. |
| r.hw_enc() >= 8 && r.hw_enc() <= 15 |
| } |
| } |
| RegClass::Vector => unreachable!(), |
| } |
| } |
| |
| /// Return the set of all integer and vector registers that must be saved in the |
| /// prologue and restored in the epilogue, given the set of all registers |
| /// written by the function's body. |
| fn get_regs_restored_in_epilogue( |
| call_conv: isa::CallConv, |
| flags: &settings::Flags, |
| sig: &Signature, |
| regs: &[Writable<RealReg>], |
| ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) { |
| let mut int_saves = vec![]; |
| let mut vec_saves = vec![]; |
| for ® in regs { |
| if is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, reg.to_reg()) { |
| match reg.to_reg().class() { |
| RegClass::Int => int_saves.push(reg), |
| RegClass::Float => vec_saves.push(reg), |
| RegClass::Vector => unreachable!(), |
| } |
| } |
| } |
| // Sort registers for deterministic code output. We can do an unstable sort because the |
| // registers will be unique (there are no dups). |
| int_saves.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); |
| vec_saves.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); |
| (int_saves, vec_saves) |
| } |
| |
| const fn default_aapcs_clobbers() -> PRegSet { |
| PRegSet::empty() |
| // x0 - x17 inclusive are caller-saves. |
| .with(xreg_preg(0)) |
| .with(xreg_preg(1)) |
| .with(xreg_preg(2)) |
| .with(xreg_preg(3)) |
| .with(xreg_preg(4)) |
| .with(xreg_preg(5)) |
| .with(xreg_preg(6)) |
| .with(xreg_preg(7)) |
| .with(xreg_preg(8)) |
| .with(xreg_preg(9)) |
| .with(xreg_preg(10)) |
| .with(xreg_preg(11)) |
| .with(xreg_preg(12)) |
| .with(xreg_preg(13)) |
| .with(xreg_preg(14)) |
| .with(xreg_preg(15)) |
| .with(xreg_preg(16)) |
| .with(xreg_preg(17)) |
| // v0 - v7 inclusive and v16 - v31 inclusive are |
| // caller-saves. The upper 64 bits of v8 - v15 inclusive are |
| // also caller-saves. However, because we cannot currently |
| // represent partial registers to regalloc2, we indicate here |
| // that every vector register is caller-save. Because this |
| // function is used at *callsites*, approximating in this |
| // direction (save more than necessary) is conservative and |
| // thus safe. |
| // |
| // Note that we exclude clobbers from a call instruction when |
| // a call instruction's callee has the same ABI as the caller |
| // (the current function body); this is safe (anything |
| // clobbered by callee can be clobbered by caller as well) and |
| // avoids unnecessary saves of v8-v15 in the prologue even |
| // though we include them as defs here. |
| .with(vreg_preg(0)) |
| .with(vreg_preg(1)) |
| .with(vreg_preg(2)) |
| .with(vreg_preg(3)) |
| .with(vreg_preg(4)) |
| .with(vreg_preg(5)) |
| .with(vreg_preg(6)) |
| .with(vreg_preg(7)) |
| .with(vreg_preg(8)) |
| .with(vreg_preg(9)) |
| .with(vreg_preg(10)) |
| .with(vreg_preg(11)) |
| .with(vreg_preg(12)) |
| .with(vreg_preg(13)) |
| .with(vreg_preg(14)) |
| .with(vreg_preg(15)) |
| .with(vreg_preg(16)) |
| .with(vreg_preg(17)) |
| .with(vreg_preg(18)) |
| .with(vreg_preg(19)) |
| .with(vreg_preg(20)) |
| .with(vreg_preg(21)) |
| .with(vreg_preg(22)) |
| .with(vreg_preg(23)) |
| .with(vreg_preg(24)) |
| .with(vreg_preg(25)) |
| .with(vreg_preg(26)) |
| .with(vreg_preg(27)) |
| .with(vreg_preg(28)) |
| .with(vreg_preg(29)) |
| .with(vreg_preg(30)) |
| .with(vreg_preg(31)) |
| } |
| |
| const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers(); |
| |
| // NB: The `tail` calling convention clobbers all allocatable registers. |
| const TAIL_CLOBBERS: PRegSet = PRegSet::empty() |
| .with(xreg_preg(0)) |
| .with(xreg_preg(1)) |
| .with(xreg_preg(2)) |
| .with(xreg_preg(3)) |
| .with(xreg_preg(4)) |
| .with(xreg_preg(5)) |
| .with(xreg_preg(6)) |
| .with(xreg_preg(7)) |
| .with(xreg_preg(8)) |
| .with(xreg_preg(9)) |
| .with(xreg_preg(10)) |
| .with(xreg_preg(11)) |
| .with(xreg_preg(12)) |
| .with(xreg_preg(13)) |
| .with(xreg_preg(14)) |
| .with(xreg_preg(15)) |
| // Cranelift reserves x16 and x17 as unallocatable scratch registers. |
| // |
| // x18 can be used by the platform and therefore is not allocatable. |
| .with(xreg_preg(19)) |
| .with(xreg_preg(20)) |
| .with(xreg_preg(21)) |
| .with(xreg_preg(22)) |
| .with(xreg_preg(23)) |
| .with(xreg_preg(24)) |
| .with(xreg_preg(25)) |
| .with(xreg_preg(26)) |
| .with(xreg_preg(27)) |
| .with(xreg_preg(28)) |
| // NB: x29 is the FP, x30 is the link register, and x31 is the SP. None of |
| // these are allocatable. |
| .with(vreg_preg(0)) |
| .with(vreg_preg(1)) |
| .with(vreg_preg(2)) |
| .with(vreg_preg(3)) |
| .with(vreg_preg(4)) |
| .with(vreg_preg(5)) |
| .with(vreg_preg(6)) |
| .with(vreg_preg(7)) |
| .with(vreg_preg(8)) |
| .with(vreg_preg(9)) |
| .with(vreg_preg(10)) |
| .with(vreg_preg(11)) |
| .with(vreg_preg(12)) |
| .with(vreg_preg(13)) |
| .with(vreg_preg(14)) |
| .with(vreg_preg(15)) |
| .with(vreg_preg(16)) |
| .with(vreg_preg(17)) |
| .with(vreg_preg(18)) |
| .with(vreg_preg(19)) |
| .with(vreg_preg(20)) |
| .with(vreg_preg(21)) |
| .with(vreg_preg(22)) |
| .with(vreg_preg(23)) |
| .with(vreg_preg(24)) |
| .with(vreg_preg(25)) |
| .with(vreg_preg(26)) |
| .with(vreg_preg(27)) |
| .with(vreg_preg(28)) |
| .with(vreg_preg(29)) |
| .with(vreg_preg(30)) |
| .with(vreg_preg(31)); |