blob: 61cd28fbe0e0a965294f0b5825791357f49c27e9 [file] [log] [blame]
//! Implementation of the standard x64 ABI.
use crate::ir::{self, types, LibCall, MemFlags, Opcode, Signature, TrapCode, Type};
use crate::ir::{types::*, ExternalName};
use crate::isa;
use crate::isa::{unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings, CallConv};
use crate::machinst::abi::*;
use crate::machinst::*;
use crate::settings;
use crate::{CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use args::*;
use regalloc2::{PReg, PRegSet, VReg};
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;
/// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
/// Support for the x64 ABI from the callee side (within a function body).
pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
/// Support for the x64 ABI from the caller side (at a callsite).
pub(crate) type X64CallSite = CallSite<X64ABIMachineSpec>;
/// Implementation of ABI primitives for x64.
pub struct X64ABIMachineSpec;
impl X64ABIMachineSpec {
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
insts.reserve(probe_count as usize);
for i in 0..probe_count {
let offset = (guard_size * (i + 1)) as i64;
// TODO: It would be nice if we could store the imm 0, but we don't have insts for those
// so store the stack pointer. Any register will do, since the stack is undefined at this point
insts.push(Self::gen_store_stack(
StackAMode::SPOffset(-offset, I8),
regs::rsp(),
I32,
));
}
}
fn gen_probestack_loop(
insts: &mut SmallInstVec<Inst>,
call_conv: isa::CallConv,
frame_size: u32,
guard_size: u32,
) {
// We have to use a caller-saved register since clobbering only
// happens after stack probing.
let tmp = match call_conv {
// All registers are caller-saved on the `tail` calling convention,
// and `r15` is not used to pass arguments.
isa::CallConv::Tail => regs::r15(),
// `r11` is caller saved on both Fastcall and SystemV, and not used
// for argument passing, so it's pretty much free. It is also not
// used by the stacklimit mechanism.
_ => {
let tmp = regs::r11();
debug_assert!({
let real_reg = tmp.to_real_reg().unwrap();
!is_callee_save_systemv(real_reg, false)
&& !is_callee_save_fastcall(real_reg, false)
});
tmp
}
};
insts.push(Inst::StackProbeLoop {
tmp: Writable::from_reg(tmp),
frame_size,
guard_size,
});
}
}
impl IsaFlags for x64_settings::Flags {}
impl ABIMachineSpec for X64ABIMachineSpec {
type I = Inst;
type F = x64_settings::Flags;
fn word_bits() -> u32 {
64
}
/// Return required stack alignment in bytes.
fn stack_align(_call_conv: isa::CallConv) -> u32 {
16
}
fn compute_arg_locs<'a, I>(
call_conv: isa::CallConv,
flags: &settings::Flags,
params: I,
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
mut args: ArgsAccumulator<'_>,
) -> CodegenResult<(u32, Option<usize>)>
where
I: IntoIterator<Item = &'a ir::AbiParam>,
{
let is_fastcall = call_conv.extends_windows_fastcall();
let mut next_gpr = 0;
let mut next_vreg = 0;
let mut next_stack: u32 = 0;
let mut next_param_idx = 0; // Fastcall cares about overall param index
if args_or_rets == ArgsOrRets::Args && is_fastcall {
// Fastcall always reserves 32 bytes of shadow space corresponding to
// the four initial in-arg parameters.
//
// (See:
// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-160)
next_stack = 32;
}
for param in params {
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64;
let size = size;
assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size;
args.push(ABIArg::StructArg {
pointer: None,
offset,
size: size as u64,
purpose: param.purpose,
});
continue;
}
// Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;
// Now assign ABIArgSlots for each register-sized part.
//
// Note that the handling of `i128` values is unique here:
//
// - If `enable_llvm_abi_extensions` is set in the flags, each
// `i128` is split into two `i64`s and assigned exactly as if it
// were two consecutive 64-bit args. This is consistent with LLVM's
// behavior, and is needed for some uses of Cranelift (e.g., the
// rustc backend).
//
// - Otherwise, both SysV and Fastcall specify behavior (use of
// vector register, a register pair, or passing by reference
// depending on the case), but for simplicity, we will just panic if
// an i128 type appears in a signature and the LLVM extensions flag
// is not set.
//
// For examples of how rustc compiles i128 args and return values on
// both SysV and Fastcall platforms, see:
// https://godbolt.org/z/PhG3ob
if param.value_type.bits() > 64
&& !param.value_type.is_vector()
&& !flags.enable_llvm_abi_extensions()
{
panic!(
"i128 args/return values not supported unless LLVM ABI extensions are enabled"
);
}
// Windows fastcall dictates that `__m128i` paramters to a function
// are passed indirectly as pointers, so handle that as a special
// case before the loop below.
if param.value_type.is_vector()
&& param.value_type.bits() >= 128
&& args_or_rets == ArgsOrRets::Args
&& is_fastcall
{
let pointer = match get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
Some(reg) => {
next_gpr += 1;
ABIArgSlot::Reg {
reg: reg.to_real_reg().unwrap(),
ty: ir::types::I64,
extension: ir::ArgumentExtension::None,
}
}
None => {
next_stack = align_to(next_stack, 8) + 8;
ABIArgSlot::Stack {
offset: (next_stack - 8) as i64,
ty: ir::types::I64,
extension: param.extension,
}
}
};
next_param_idx += 1;
args.push(ABIArg::ImplicitPtrArg {
// NB: this is filled in after this loop
offset: 0,
pointer,
ty: param.value_type,
purpose: param.purpose,
});
continue;
}
let mut slots = ABIArgSlotVec::new();
for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) {
let intreg = *rc == RegClass::Int;
let nextreg = if intreg {
match args_or_rets {
ArgsOrRets::Args => {
get_intreg_for_arg(&call_conv, next_gpr, next_param_idx)
}
ArgsOrRets::Rets => get_intreg_for_retval(&call_conv, next_gpr),
}
} else {
match args_or_rets {
ArgsOrRets::Args => {
get_fltreg_for_arg(&call_conv, next_vreg, next_param_idx)
}
ArgsOrRets::Rets => get_fltreg_for_retval(&call_conv, next_vreg),
}
};
next_param_idx += 1;
if let Some(reg) = nextreg {
if intreg {
next_gpr += 1;
} else {
next_vreg += 1;
}
slots.push(ABIArgSlot::Reg {
reg: reg.to_real_reg().unwrap(),
ty: *reg_ty,
extension: param.extension,
});
} else {
let size = reg_ty.bits() / 8;
let size = std::cmp::max(size, 8);
// Align.
debug_assert!(size.is_power_of_two());
next_stack = align_to(next_stack, size);
slots.push(ABIArgSlot::Stack {
offset: next_stack as i64,
ty: *reg_ty,
extension: param.extension,
});
next_stack += size;
}
}
args.push(ABIArg::Slots {
slots,
purpose: param.purpose,
});
}
// Fastcall's indirect 128+ bit vector arguments are all located on the
// stack, and stack space is reserved after all paramters are passed,
// so allocate from the space now.
if args_or_rets == ArgsOrRets::Args && is_fastcall {
for arg in args.args_mut() {
if let ABIArg::ImplicitPtrArg { offset, .. } = arg {
assert_eq!(*offset, 0);
next_stack = align_to(next_stack, 16);
*offset = next_stack as i64;
next_stack += 16;
}
}
}
let extra_arg = if add_ret_area_ptr {
debug_assert!(args_or_rets == ArgsOrRets::Args);
if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) {
args.push_non_formal(ABIArg::reg(
reg.to_real_reg().unwrap(),
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
} else {
args.push_non_formal(ABIArg::stack(
next_stack as i64,
types::I64,
ir::ArgumentExtension::None,
ir::ArgumentPurpose::Normal,
));
next_stack += 8;
}
Some(args.args().len() - 1)
} else {
None
};
next_stack = align_to(next_stack, 16);
// To avoid overflow issues, limit the arg/return size to something reasonable.
if next_stack > STACK_ARG_RET_SIZE_LIMIT {
return Err(CodegenError::ImplLimitExceeded);
}
Ok((next_stack, extra_arg))
}
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
16 // frame pointer + return address.
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
// For integer-typed values, we always load a full 64 bits (and we always spill a full 64
// bits as well -- see `Inst::store()`).
let ty = match ty {
types::I8 | types::I16 | types::I32 => types::I64,
_ => ty,
};
Inst::load(ty, mem, into_reg, ExtKind::None)
}
fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
Inst::store(ty, from_reg, mem)
}
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
Inst::gen_move(to_reg, from_reg, ty)
}
/// Generate an integer-extend operation.
fn gen_extend(
to_reg: Writable<Reg>,
from_reg: Reg,
is_signed: bool,
from_bits: u8,
to_bits: u8,
) -> Self::I {
let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
.unwrap_or_else(|| panic!("invalid extension: {} -> {}", from_bits, to_bits));
if is_signed {
Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
} else {
Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
}
}
fn gen_args(_isa_flags: &x64_settings::Flags, args: Vec<ArgPair>) -> Inst {
Inst::Args { args }
}
fn gen_ret(
_setup_frame: bool,
_isa_flags: &x64_settings::Flags,
rets: Vec<RetPair>,
stack_bytes_to_pop: u32,
) -> Self::I {
Inst::ret(rets, stack_bytes_to_pop)
}
fn gen_add_imm(
_call_conv: isa::CallConv,
into_reg: Writable<Reg>,
from_reg: Reg,
imm: u32,
) -> SmallInstVec<Self::I> {
let mut ret = SmallVec::new();
if from_reg != into_reg.to_reg() {
ret.push(Inst::gen_move(into_reg, from_reg, I64));
}
ret.push(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::imm(imm),
into_reg,
));
ret
}
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {
smallvec![
Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(regs::rsp()), limit_reg),
Inst::TrapIf {
// NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
cc: CC::NBE,
trap_code: TrapCode::StackOverflow,
},
]
}
fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Self::I {
let mem: SyntheticAmode = mem.into();
Inst::lea(mem, into_reg)
}
fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg {
// As per comment on trait definition, we must return a caller-save
// register that is not used as an argument here.
match call_conv {
isa::CallConv::Tail => regs::r14(),
_ => {
debug_assert!(!is_callee_save_systemv(
regs::r10().to_real_reg().unwrap(),
false
));
regs::r10()
}
}
}
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
// Only ever used for I64s and vectors; if that changes, see if the
// ExtKind below needs to be changed.
assert!(ty == I64 || ty.is_vector());
let simm32 = offset as u32;
let mem = Amode::imm_reg(simm32, base);
Inst::load(ty, mem, into_reg, ExtKind::None)
}
fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
let simm32 = offset as u32;
let mem = Amode::imm_reg(simm32, base);
Inst::store(ty, from_reg, mem)
}
fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {
let (alu_op, amount) = if amount >= 0 {
(AluRmiROpcode::Add, amount)
} else {
(AluRmiROpcode::Sub, -amount)
};
let amount = amount as u32;
smallvec![Inst::alu_rmi_r(
OperandSize::Size64,
alu_op,
RegMemImm::imm(amount),
Writable::from_reg(regs::rsp()),
)]
}
fn gen_nominal_sp_adj(offset: i32) -> Self::I {
Inst::VirtualSPOffsetAdj {
offset: offset as i64,
}
}
fn gen_prologue_frame_setup(flags: &settings::Flags) -> SmallInstVec<Self::I> {
let r_rsp = regs::rsp();
let r_rbp = regs::rbp();
let w_rbp = Writable::from_reg(r_rbp);
let mut insts = SmallVec::new();
// `push %rbp`
// RSP before the call will be 0 % 16. So here, it is 8 % 16.
insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::PushFrameRegs {
offset_upward_to_caller_sp: 16, // RBP, return address
},
});
}
// `mov %rsp, %rbp`
// RSP is now 0 % 16
insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp));
insts
}
fn gen_epilogue_frame_restore(_: &settings::Flags) -> SmallInstVec<Self::I> {
let mut insts = SmallVec::new();
// `mov %rbp, %rsp`
insts.push(Inst::mov_r_r(
OperandSize::Size64,
regs::rbp(),
Writable::from_reg(regs::rsp()),
));
// `pop %rbp`
insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
insts
}
fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {
insts.push(Inst::imm(
OperandSize::Size32,
frame_size as u64,
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
dest: ExternalName::LibCall(LibCall::Probestack),
info: Box::new(CallInfo {
// No need to include arg here: we are post-regalloc
// so no constraints will be seen anyway.
uses: smallvec![],
defs: smallvec![],
clobbers: PRegSet::empty(),
opcode: Opcode::Call,
callee_pop_size: 0,
callee_conv: CallConv::Probestack,
}),
});
}
fn gen_inline_probestack(
insts: &mut SmallInstVec<Self::I>,
call_conv: isa::CallConv,
frame_size: u32,
guard_size: u32,
) {
// Unroll at most n consecutive probes, before falling back to using a loop
//
// This was number was picked because the loop version is 38 bytes long. We can fit
// 5 inline probes in that space, so unroll if its beneficial in terms of code size.
const PROBE_MAX_UNROLL: u32 = 5;
// Number of probes that we need to perform
let probe_count = align_to(frame_size, guard_size) / guard_size;
if probe_count <= PROBE_MAX_UNROLL {
Self::gen_probestack_unroll(insts, guard_size, probe_count)
} else {
Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
}
}
fn gen_clobber_save(
call_conv: isa::CallConv,
setup_frame: bool,
flags: &settings::Flags,
clobbered_callee_saves: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>) {
if call_conv == isa::CallConv::Tail {
assert!(clobbered_callee_saves.is_empty());
}
let mut insts = SmallVec::new();
let clobbered_size = compute_clobber_size(&clobbered_callee_saves);
if flags.unwind_info() && setup_frame {
// Emit unwind info: start the frame. The frame (from unwind
// consumers' point of view) starts at clobbbers, just below
// the FP and return address. Spill slots and stack slots are
// part of our actual frame but do not concern the unwinder.
insts.push(Inst::Unwind {
inst: UnwindInst::DefineNewFrame {
offset_downward_to_clobbers: clobbered_size,
offset_upward_to_caller_sp: 16, // RBP, return address
},
});
}
// Adjust the stack pointer downward for clobbers and the function fixed
// frame (spillslots and storage slots).
let stack_size = fixed_frame_storage_size + clobbered_size;
if stack_size > 0 {
insts.push(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::imm(stack_size),
Writable::from_reg(regs::rsp()),
));
}
// Store each clobbered register in order at offsets from RSP,
// placing them above the fixed frame slots.
let mut cur_offset = fixed_frame_storage_size;
for reg in clobbered_callee_saves {
let r_reg = reg.to_reg();
let off = cur_offset;
match r_reg.class() {
RegClass::Int => {
insts.push(Inst::store(
types::I64,
r_reg.into(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 8;
}
RegClass::Float => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::store(
types::I8X16,
r_reg.into(),
Amode::imm_reg(cur_offset, regs::rsp()),
));
cur_offset += 16;
}
RegClass::Vector => unreachable!(),
};
if flags.unwind_info() {
insts.push(Inst::Unwind {
inst: UnwindInst::SaveReg {
clobber_offset: off - fixed_frame_storage_size,
reg: r_reg,
},
});
}
}
(clobbered_size as u64, insts)
}
fn gen_clobber_restore(
call_conv: isa::CallConv,
sig: &Signature,
flags: &settings::Flags,
clobbers: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
_outgoing_args_size: u32,
) -> SmallVec<[Self::I; 16]> {
let mut insts = SmallVec::new();
let clobbered_callee_saves =
Self::get_clobbered_callee_saves(call_conv, flags, sig, clobbers);
let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);
// Restore regs by loading from offsets of RSP. RSP will be
// returned to nominal-RSP at this point, so we can use the
// same offsets that we used when saving clobbers above.
let mut cur_offset = fixed_frame_storage_size;
for reg in &clobbered_callee_saves {
let rreg = reg.to_reg();
match rreg.class() {
RegClass::Int => {
insts.push(Inst::mov64_m_r(
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.into()),
));
cur_offset += 8;
}
RegClass::Float => {
cur_offset = align_to(cur_offset, 16);
insts.push(Inst::load(
types::I8X16,
Amode::imm_reg(cur_offset, regs::rsp()),
Writable::from_reg(rreg.into()),
ExtKind::None,
));
cur_offset += 16;
}
RegClass::Vector => unreachable!(),
}
}
// Adjust RSP back upward.
if stack_size > 0 {
insts.push(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::imm(stack_size),
Writable::from_reg(regs::rsp()),
));
}
insts
}
/// Generate a call instruction/sequence.
fn gen_call(
dest: &CallDest,
uses: CallArgList,
defs: CallRetList,
clobbers: PRegSet,
opcode: ir::Opcode,
tmp: Writable<Reg>,
callee_conv: isa::CallConv,
_caller_conv: isa::CallConv,
callee_pop_size: u32,
) -> SmallVec<[Self::I; 2]> {
let mut insts = SmallVec::new();
match dest {
&CallDest::ExtName(ref name, RelocDistance::Near) => {
insts.push(Inst::call_known(
name.clone(),
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
));
}
&CallDest::ExtName(ref name, RelocDistance::Far) => {
insts.push(Inst::LoadExtName {
dst: tmp,
name: Box::new(name.clone()),
offset: 0,
distance: RelocDistance::Far,
});
insts.push(Inst::call_unknown(
RegMem::reg(tmp.to_reg()),
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
));
}
&CallDest::Reg(reg) => {
insts.push(Inst::call_unknown(
RegMem::reg(reg),
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
));
}
}
insts
}
fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
call_conv: isa::CallConv,
dst: Reg,
src: Reg,
size: usize,
mut alloc_tmp: F,
) -> SmallVec<[Self::I; 8]> {
let mut insts = SmallVec::new();
let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap();
let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap();
let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap();
let temp = alloc_tmp(Self::word_type());
let temp2 = alloc_tmp(Self::word_type());
insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));
// We use an indirect call and a full LoadExtName because we do not have
// information about the libcall `RelocDistance` here, so we
// conservatively use the more flexible calling sequence.
insts.push(Inst::LoadExtName {
dst: temp2,
name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),
offset: 0,
distance: RelocDistance::Far,
});
let callee_pop_size = 0;
insts.push(Inst::call_unknown(
RegMem::reg(temp2.to_reg()),
/* uses = */
smallvec![
CallArgPair {
vreg: dst,
preg: arg0
},
CallArgPair {
vreg: src,
preg: arg1
},
CallArgPair {
vreg: temp.to_reg(),
preg: arg2
},
],
/* defs = */ smallvec![],
/* clobbers = */ Self::get_regs_clobbered_by_call(call_conv),
Opcode::Call,
callee_pop_size,
call_conv,
));
insts
}
fn get_number_of_spillslots_for_value(
rc: RegClass,
vector_scale: u32,
_isa_flags: &Self::F,
) -> u32 {
// We allocate in terms of 8-byte slots.
match rc {
RegClass::Int => 1,
RegClass::Float => vector_scale / 8,
RegClass::Vector => unreachable!(),
}
}
fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64 {
s.virtual_sp_offset()
}
fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64 {
s.nominal_sp_to_fp()
}
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
if call_conv_of_callee == isa::CallConv::Tail {
TAIL_CLOBBERS
} else if call_conv_of_callee.extends_windows_fastcall() {
WINDOWS_CLOBBERS
} else {
SYSV_CLOBBERS
}
}
fn get_ext_mode(
_call_conv: isa::CallConv,
_specified: ir::ArgumentExtension,
) -> ir::ArgumentExtension {
ir::ArgumentExtension::None
}
fn get_clobbered_callee_saves(
call_conv: CallConv,
flags: &settings::Flags,
_sig: &Signature,
regs: &[Writable<RealReg>],
) -> Vec<Writable<RealReg>> {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
// The `tail` calling convention doesn't have any callee-save
// registers.
CallConv::Tail => vec![],
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
.iter()
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
.collect(),
CallConv::WindowsFastcall => regs
.iter()
.cloned()
.filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))
.collect(),
CallConv::Probestack => todo!("probestack?"),
CallConv::WasmtimeSystemV | CallConv::AppleAarch64 => unreachable!(),
};
// Sort registers for deterministic code output. We can do an unstable sort because the
// registers will be unique (there are no dups).
regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg());
regs
}
fn is_frame_setup_needed(
_is_leaf: bool,
_stack_args_size: u32,
_num_clobbered_callee_saves: usize,
_frame_storage_size: u32,
) -> bool {
true
}
}
impl X64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
// Allocate additional stack space for the new stack frame. We will
// build it in the newly allocated space, but then copy it over our
// current frame at the last moment.
let new_stack_arg_size = self.emit_allocate_tail_call_frame(ctx);
let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs());
// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = ctx.temp_writable_gpr();
let rbp = PReg::from(regs::rbp().to_real_reg().unwrap());
ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp });
// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = ctx.temp_writable_gpr();
ctx.emit(Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: *fp.to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
});
Some(ret_addr.to_reg())
} else {
None
};
// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
if let Some(i) = ctx.sigs()[self.sig()].stack_ret_arg() {
let ret_area_ptr = ctx.abi().ret_area_ptr().expect(
"if the tail callee has a return pointer, then the tail caller \
must as well",
);
for inst in self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr.to_reg())) {
ctx.emit(inst);
}
}
// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
let dest = self.dest().clone();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp: fp.to_reg(),
tmp: ctx.temp_writable_gpr(),
uses: self.take_uses(),
});
match dest {
CallDest::ExtName(callee, RelocDistance::Near) => {
ctx.emit(Inst::ReturnCallKnown { callee, info });
}
CallDest::ExtName(callee, RelocDistance::Far) => {
let tmp2 = ctx.temp_writable_gpr();
ctx.emit(Inst::LoadExtName {
dst: tmp2.to_writable_reg(),
name: Box::new(callee),
offset: 0,
distance: RelocDistance::Far,
});
ctx.emit(Inst::ReturnCallUnknown {
callee: tmp2.to_writable_reg().into(),
info,
});
}
CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallUnknown {
callee: callee.into(),
info,
}),
}
}
}
impl From<StackAMode> for SyntheticAmode {
fn from(amode: StackAMode) -> Self {
// We enforce a 128 MB stack-frame size limit above, so these
// `expect()`s should never fail.
match amode {
StackAMode::FPOffset(off, _ty) => {
let off = i32::try_from(off)
.expect("Offset in FPOffset is greater than 2GB; should hit impl limit first");
let simm32 = off as u32;
SyntheticAmode::Real(Amode::ImmReg {
simm32,
base: regs::rbp(),
flags: MemFlags::trusted(),
})
}
StackAMode::NominalSPOffset(off, _ty) => {
let off = i32::try_from(off).expect(
"Offset in NominalSPOffset is greater than 2GB; should hit impl limit first",
);
let simm32 = off as u32;
SyntheticAmode::nominal_sp_offset(simm32)
}
StackAMode::SPOffset(off, _ty) => {
let off = i32::try_from(off)
.expect("Offset in SPOffset is greater than 2GB; should hit impl limit first");
let simm32 = off as u32;
SyntheticAmode::Real(Amode::ImmReg {
simm32,
base: regs::rsp(),
flags: MemFlags::trusted(),
})
}
}
}
}
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = call_conv.extends_windows_fastcall();
if *call_conv == isa::CallConv::Tail {
return match idx {
0 => Some(regs::rax()),
1 => Some(regs::rcx()),
2 => Some(regs::rdx()),
3 => Some(regs::rbx()),
4 => Some(regs::rsi()),
5 => Some(regs::rdi()),
6 => Some(regs::r8()),
7 => Some(regs::r9()),
8 => Some(regs::r10()),
9 => Some(regs::r11()),
// NB: `r12`, `r13`, `r14` and `r15` are reserved for indirect
// callee addresses and temporaries required for our tail call
// sequence (fp, ret_addr, tmp).
_ => None,
};
}
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (integer) class.
let i = if is_fastcall { arg_idx } else { idx };
match (i, is_fastcall) {
(0, false) => Some(regs::rdi()),
(1, false) => Some(regs::rsi()),
(2, false) => Some(regs::rdx()),
(3, false) => Some(regs::rcx()),
(4, false) => Some(regs::r8()),
(5, false) => Some(regs::r9()),
(0, true) => Some(regs::rcx()),
(1, true) => Some(regs::rdx()),
(2, true) => Some(regs::r8()),
(3, true) => Some(regs::r9()),
_ => None,
}
}
fn get_fltreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = call_conv.extends_windows_fastcall();
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (floating-point) class.
let i = if is_fastcall { arg_idx } else { idx };
match (i, is_fastcall) {
(0, false) => Some(regs::xmm0()),
(1, false) => Some(regs::xmm1()),
(2, false) => Some(regs::xmm2()),
(3, false) => Some(regs::xmm3()),
(4, false) => Some(regs::xmm4()),
(5, false) => Some(regs::xmm5()),
(6, false) => Some(regs::xmm6()),
(7, false) => Some(regs::xmm7()),
(0, true) => Some(regs::xmm0()),
(1, true) => Some(regs::xmm1()),
(2, true) => Some(regs::xmm2()),
(3, true) => Some(regs::xmm3()),
_ => None,
}
}
fn get_intreg_for_retval(call_conv: &CallConv, intreg_idx: usize) -> Option<Reg> {
match call_conv {
CallConv::Tail => match intreg_idx {
0 => Some(regs::rax()),
1 => Some(regs::rcx()),
2 => Some(regs::rdx()),
3 => Some(regs::rbx()),
4 => Some(regs::rsi()),
5 => Some(regs::rdi()),
6 => Some(regs::r8()),
7 => Some(regs::r9()),
8 => Some(regs::r10()),
9 => Some(regs::r11()),
10 => Some(regs::r12()),
11 => Some(regs::r13()),
12 => Some(regs::r14()),
// NB: `r15` is reserved as a scratch register.
_ => None,
},
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()),
_ => None,
},
CallConv::WindowsFastcall => match intreg_idx {
0 => Some(regs::rax()),
1 => Some(regs::rdx()), // The Rust ABI for i128s needs this.
_ => None,
},
CallConv::Probestack => todo!(),
CallConv::WasmtimeSystemV | CallConv::AppleAarch64 => unreachable!(),
}
}
fn get_fltreg_for_retval(call_conv: &CallConv, fltreg_idx: usize) -> Option<Reg> {
match call_conv {
CallConv::Tail => match fltreg_idx {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
2 => Some(regs::xmm2()),
3 => Some(regs::xmm3()),
4 => Some(regs::xmm4()),
5 => Some(regs::xmm5()),
6 => Some(regs::xmm6()),
7 => Some(regs::xmm7()),
_ => None,
},
CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
_ => None,
},
CallConv::WindowsFastcall => match fltreg_idx {
0 => Some(regs::xmm0()),
_ => None,
},
CallConv::Probestack => todo!(),
CallConv::WasmtimeSystemV | CallConv::AppleAarch64 => unreachable!(),
}
}
fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {
use regs::*;
match r.class() {
RegClass::Int => match r.hw_enc() {
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 => true,
// R15 is the pinned register; if we're using it that way,
// it is effectively globally-allocated, and is not
// callee-saved.
ENC_R15 => !enable_pinned_reg,
_ => false,
},
RegClass::Float => false,
RegClass::Vector => unreachable!(),
}
}
fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {
use regs::*;
match r.class() {
RegClass::Int => match r.hw_enc() {
ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 => true,
// See above for SysV: we must treat the pinned reg specially.
ENC_R15 => !enable_pinned_reg,
_ => false,
},
RegClass::Float => match r.hw_enc() {
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true,
_ => false,
},
RegClass::Vector => unreachable!(),
}
}
fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {
let mut clobbered_size = 0;
for reg in clobbers {
match reg.to_reg().class() {
RegClass::Int => {
clobbered_size += 8;
}
RegClass::Float => {
clobbered_size = align_to(clobbered_size, 16);
clobbered_size += 16;
}
RegClass::Vector => unreachable!(),
}
}
align_to(clobbered_size, 16)
}
const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();
const SYSV_CLOBBERS: PRegSet = sysv_clobbers();
const TAIL_CLOBBERS: PRegSet = tail_clobbers();
const fn windows_clobbers() -> PRegSet {
PRegSet::empty()
.with(regs::gpr_preg(regs::ENC_RAX))
.with(regs::gpr_preg(regs::ENC_RCX))
.with(regs::gpr_preg(regs::ENC_RDX))
.with(regs::gpr_preg(regs::ENC_R8))
.with(regs::gpr_preg(regs::ENC_R9))
.with(regs::gpr_preg(regs::ENC_R10))
.with(regs::gpr_preg(regs::ENC_R11))
.with(regs::fpr_preg(0))
.with(regs::fpr_preg(1))
.with(regs::fpr_preg(2))
.with(regs::fpr_preg(3))
.with(regs::fpr_preg(4))
.with(regs::fpr_preg(5))
}
const fn sysv_clobbers() -> PRegSet {
PRegSet::empty()
.with(regs::gpr_preg(regs::ENC_RAX))
.with(regs::gpr_preg(regs::ENC_RCX))
.with(regs::gpr_preg(regs::ENC_RDX))
.with(regs::gpr_preg(regs::ENC_RSI))
.with(regs::gpr_preg(regs::ENC_RDI))
.with(regs::gpr_preg(regs::ENC_R8))
.with(regs::gpr_preg(regs::ENC_R9))
.with(regs::gpr_preg(regs::ENC_R10))
.with(regs::gpr_preg(regs::ENC_R11))
.with(regs::fpr_preg(0))
.with(regs::fpr_preg(1))
.with(regs::fpr_preg(2))
.with(regs::fpr_preg(3))
.with(regs::fpr_preg(4))
.with(regs::fpr_preg(5))
.with(regs::fpr_preg(6))
.with(regs::fpr_preg(7))
.with(regs::fpr_preg(8))
.with(regs::fpr_preg(9))
.with(regs::fpr_preg(10))
.with(regs::fpr_preg(11))
.with(regs::fpr_preg(12))
.with(regs::fpr_preg(13))
.with(regs::fpr_preg(14))
.with(regs::fpr_preg(15))
}
const fn tail_clobbers() -> PRegSet {
PRegSet::empty()
.with(regs::gpr_preg(regs::ENC_RAX))
.with(regs::gpr_preg(regs::ENC_RCX))
.with(regs::gpr_preg(regs::ENC_RDX))
.with(regs::gpr_preg(regs::ENC_RBX))
.with(regs::gpr_preg(regs::ENC_RSI))
.with(regs::gpr_preg(regs::ENC_RDI))
.with(regs::gpr_preg(regs::ENC_R8))
.with(regs::gpr_preg(regs::ENC_R9))
.with(regs::gpr_preg(regs::ENC_R10))
.with(regs::gpr_preg(regs::ENC_R11))
.with(regs::gpr_preg(regs::ENC_R12))
.with(regs::gpr_preg(regs::ENC_R13))
.with(regs::gpr_preg(regs::ENC_R14))
.with(regs::gpr_preg(regs::ENC_R15))
.with(regs::fpr_preg(0))
.with(regs::fpr_preg(1))
.with(regs::fpr_preg(2))
.with(regs::fpr_preg(3))
.with(regs::fpr_preg(4))
.with(regs::fpr_preg(5))
.with(regs::fpr_preg(6))
.with(regs::fpr_preg(7))
.with(regs::fpr_preg(8))
.with(regs::fpr_preg(9))
.with(regs::fpr_preg(10))
.with(regs::fpr_preg(11))
.with(regs::fpr_preg(12))
.with(regs::fpr_preg(13))
.with(regs::fpr_preg(14))
.with(regs::fpr_preg(15))
}