blob: 130eeaa121426bb7559e337b016e8bb3dc8fbf24 [file] [log] [blame]
//! Riscv64 ISA: binary code emission.
use crate::binemit::StackMap;
use crate::ir::{self, LibCall, RelSourceLoc, TrapCode};
use crate::isa::riscv64::inst::*;
use crate::isa::riscv64::lower::isle::generated_code::{
CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, ZcbMemOp,
};
use crate::machinst::{AllocationConsumer, Reg, Writable};
use crate::trace;
use cranelift_control::ControlPlane;
use regalloc2::Allocation;
pub struct EmitInfo {
shared_flag: settings::Flags,
isa_flags: super::super::riscv_settings::Flags,
}
impl EmitInfo {
pub(crate) fn new(
shared_flag: settings::Flags,
isa_flags: super::super::riscv_settings::Flags,
) -> Self {
Self {
shared_flag,
isa_flags,
}
}
}
pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
}
pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
let real_reg = m.to_real_reg().unwrap().hw_enc();
debug_assert!(real_reg >= 8 && real_reg < 16);
let compressed_reg = real_reg - 8;
u32::try_from(compressed_reg).unwrap()
}
#[derive(Clone, Debug, PartialEq, Default)]
pub enum EmitVState {
#[default]
Unknown,
Known(VState),
}
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
pub(crate) virtual_sp_offset: i64,
pub(crate) nominal_sp_to_fp: i64,
/// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
stack_map: Option<StackMap>,
/// Current source-code location corresponding to instruction to be emitted.
cur_srcloc: RelSourceLoc,
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
/// Vector State
/// Controls the current state of the vector unit at the emission point.
vstate: EmitVState,
}
impl EmitState {
fn take_stack_map(&mut self) -> Option<StackMap> {
self.stack_map.take()
}
fn clear_post_insn(&mut self) {
self.stack_map = None;
}
fn cur_srcloc(&self) -> RelSourceLoc {
self.cur_srcloc
}
}
impl MachInstEmitState<Inst> for EmitState {
fn new(
abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
ctrl_plane: ControlPlane,
) -> Self {
EmitState {
virtual_sp_offset: 0,
nominal_sp_to_fp: abi.frame_size() as i64,
stack_map: None,
cur_srcloc: RelSourceLoc::default(),
ctrl_plane,
vstate: EmitVState::Unknown,
}
}
fn pre_safepoint(&mut self, stack_map: StackMap) {
self.stack_map = Some(stack_map);
}
fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) {
self.cur_srcloc = srcloc;
}
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
&mut self.ctrl_plane
}
fn take_ctrl_plane(self) -> ControlPlane {
self.ctrl_plane
}
fn on_new_block(&mut self) {
// Reset the vector state.
self.vstate = EmitVState::Unknown;
}
}
impl Inst {
/// Load int mask.
/// If ty is int then 0xff in rd.
pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
let mut insts = SmallInstVec::new();
assert!(ty.is_int() && ty.bits() <= 64);
match ty {
I64 => {
insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
}
I32 | I16 => {
insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
insts.push(Inst::Extend {
rd: rd,
rn: rd.to_reg(),
signed: false,
from_bits: ty.bits() as u8,
to_bits: 64,
});
}
I8 => {
insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
}
_ => unreachable!("ty:{:?}", ty),
}
insts
}
/// inverse all bit
pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
Inst::AluRRImm12 {
alu_op: AluOPRRI::Xori,
rd,
rs,
imm12: Imm12::from_i16(-1),
}
}
// emit a float is not a nan.
pub(crate) fn emit_not_nan(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst {
Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FeqS
} else {
FpuOPRRR::FeqD
},
frm: None,
rd: rd,
rs1: rs,
rs2: rs,
}
}
pub(crate) fn emit_fabs(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst {
Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjxS
} else {
FpuOPRRR::FsgnjxD
},
frm: None,
rd: rd,
rs1: rs,
rs2: rs,
}
}
/// If a float is zero.
pub(crate) fn emit_if_float_not_zero(
tmp: Writable<Reg>,
rs: Reg,
ty: Type,
taken: CondBrTarget,
not_taken: CondBrTarget,
) -> SmallInstVec<Inst> {
let mut insts = SmallInstVec::new();
let class_op = if ty == F32 {
FpuOPRR::FclassS
} else {
FpuOPRR::FclassD
};
insts.push(Inst::FpuRR {
alu_op: class_op,
frm: None,
rd: tmp,
rs: rs,
});
insts.push(Inst::AluRRImm12 {
alu_op: AluOPRRI::Andi,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::from_i16(FClassResult::is_zero_bits() as i16),
});
insts.push(Inst::CondBr {
taken,
not_taken,
kind: IntegerCompare {
kind: IntCC::Equal,
rs1: tmp.to_reg(),
rs2: zero_reg(),
},
});
insts
}
pub(crate) fn lower_br_icmp(
cc: IntCC,
a: ValueRegs<Reg>,
b: ValueRegs<Reg>,
taken: CondBrTarget,
not_taken: CondBrTarget,
ty: Type,
) -> SmallInstVec<Inst> {
let mut insts = SmallInstVec::new();
if ty.bits() <= 64 {
let rs1 = a.only_reg().unwrap();
let rs2 = b.only_reg().unwrap();
let inst = Inst::CondBr {
taken,
not_taken,
kind: IntegerCompare { kind: cc, rs1, rs2 },
};
insts.push(inst);
return insts;
}
// compare i128
let low = |cc: IntCC| -> IntegerCompare {
IntegerCompare {
rs1: a.regs()[0],
rs2: b.regs()[0],
kind: cc,
}
};
let high = |cc: IntCC| -> IntegerCompare {
IntegerCompare {
rs1: a.regs()[1],
rs2: b.regs()[1],
kind: cc,
}
};
match cc {
IntCC::Equal => {
// if high part not equal,
// then we can go to not_taken otherwise fallthrough.
insts.push(Inst::CondBr {
taken: not_taken,
not_taken: CondBrTarget::Fallthrough,
kind: high(IntCC::NotEqual),
});
// the rest part.
insts.push(Inst::CondBr {
taken,
not_taken,
kind: low(IntCC::Equal),
});
}
IntCC::NotEqual => {
// if the high part not equal ,
// we know the whole must be not equal,
// we can goto the taken part , otherwise fallthrought.
insts.push(Inst::CondBr {
taken,
not_taken: CondBrTarget::Fallthrough, // no branch
kind: high(IntCC::NotEqual),
});
insts.push(Inst::CondBr {
taken,
not_taken,
kind: low(IntCC::NotEqual),
});
}
IntCC::SignedGreaterThanOrEqual
| IntCC::SignedLessThanOrEqual
| IntCC::UnsignedGreaterThanOrEqual
| IntCC::UnsignedLessThanOrEqual
| IntCC::SignedGreaterThan
| IntCC::SignedLessThan
| IntCC::UnsignedLessThan
| IntCC::UnsignedGreaterThan => {
//
insts.push(Inst::CondBr {
taken,
not_taken: CondBrTarget::Fallthrough,
kind: high(cc.without_equal()),
});
//
insts.push(Inst::CondBr {
taken: not_taken,
not_taken: CondBrTarget::Fallthrough,
kind: high(IntCC::NotEqual),
});
insts.push(Inst::CondBr {
taken,
not_taken,
kind: low(cc.unsigned()),
});
}
}
insts
}
/// Returns Some(VState) if this insturction is expecting a specific vector state
/// before emission.
fn expected_vstate(&self) -> Option<&VState> {
match self {
Inst::Nop0
| Inst::Nop4
| Inst::BrTable { .. }
| Inst::Auipc { .. }
| Inst::Lui { .. }
| Inst::LoadInlineConst { .. }
| Inst::AluRRR { .. }
| Inst::FpuRRR { .. }
| Inst::AluRRImm12 { .. }
| Inst::CsrReg { .. }
| Inst::CsrImm { .. }
| Inst::Load { .. }
| Inst::Store { .. }
| Inst::Args { .. }
| Inst::Rets { .. }
| Inst::Ret { .. }
| Inst::Extend { .. }
| Inst::AdjustSp { .. }
| Inst::Call { .. }
| Inst::CallInd { .. }
| Inst::ReturnCall { .. }
| Inst::ReturnCallInd { .. }
| Inst::Jal { .. }
| Inst::CondBr { .. }
| Inst::LoadExtName { .. }
| Inst::ElfTlsGetAddr { .. }
| Inst::LoadAddr { .. }
| Inst::VirtualSPOffsetAdj { .. }
| Inst::Mov { .. }
| Inst::MovFromPReg { .. }
| Inst::Fence { .. }
| Inst::EBreak
| Inst::Udf { .. }
| Inst::FpuRR { .. }
| Inst::FpuRRRR { .. }
| Inst::Jalr { .. }
| Inst::Atomic { .. }
| Inst::Select { .. }
| Inst::AtomicCas { .. }
| Inst::Icmp { .. }
| Inst::FcvtToInt { .. }
| Inst::RawData { .. }
| Inst::AtomicStore { .. }
| Inst::AtomicLoad { .. }
| Inst::AtomicRmwLoop { .. }
| Inst::TrapIf { .. }
| Inst::Unwind { .. }
| Inst::DummyUse { .. }
| Inst::FloatRound { .. }
| Inst::FloatSelect { .. }
| Inst::Popcnt { .. }
| Inst::Rev8 { .. }
| Inst::Cltz { .. }
| Inst::Brev8 { .. }
| Inst::StackProbeLoop { .. } => None,
// VecSetState does not expect any vstate, rather it updates it.
Inst::VecSetState { .. } => None,
// `vmv` instructions copy a set of registers and ignore vstate.
Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
Inst::VecAluRR { vstate, .. } |
Inst::VecAluRRR { vstate, .. } |
Inst::VecAluRRRR { vstate, .. } |
Inst::VecAluRImm5 { vstate, .. } |
Inst::VecAluRRImm5 { vstate, .. } |
Inst::VecAluRRRImm5 { vstate, .. } |
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
// the full vtype. A future optimization could be to decouple these two when
// updating vstate. This would allow us to avoid emitting a VecSetState in
// some cases.
Inst::VecLoad { vstate, .. }
| Inst::VecStore { vstate, .. } => Some(vstate),
}
}
}
impl MachInstEmit for Inst {
type State = EmitState;
type Info = EmitInfo;
fn emit(
&self,
allocs: &[Allocation],
sink: &mut MachBuffer<Inst>,
emit_info: &Self::Info,
state: &mut EmitState,
) {
// Transform this into a instruction with all the physical regs
let mut allocs = AllocationConsumer::new(allocs);
let inst = self.clone().allocate(&mut allocs);
// Check if we need to update the vector state before emitting this instruction
if let Some(expected) = inst.expected_vstate() {
if state.vstate != EmitVState::Known(expected.clone()) {
// Update the vector state.
Inst::VecSetState {
rd: writable_zero_reg(),
vstate: expected.clone(),
}
.emit(&[], sink, emit_info, state);
}
}
// N.B.: we *must* not exceed the "worst-case size" used to compute
// where to insert islands, except when islands are explicitly triggered
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
// to allow disabling the check for `JTSequence`, which is always
// emitted following an `EmitIsland`.
let mut start_off = sink.cur_offset();
// First try to emit this as a compressed instruction
let res = inst.try_emit_compressed(sink, emit_info, state, &mut start_off);
if res.is_none() {
// If we can't lets emit it as a normal instruction
inst.emit_uncompressed(sink, emit_info, state, &mut start_off);
}
let end_off = sink.cur_offset();
assert!(
(end_off - start_off) <= Inst::worst_case_size(),
"Inst:{:?} length:{} worst_case_size:{}",
self,
end_off - start_off,
Inst::worst_case_size()
);
}
fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String {
let mut allocs = AllocationConsumer::new(allocs);
self.print_with_state(state, &mut allocs)
}
}
impl Inst {
/// Tries to emit an instruction as compressed, if we can't return false.
fn try_emit_compressed(
&self,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
start_off: &mut u32,
) -> Option<()> {
let has_m = emit_info.isa_flags.has_m();
let has_zba = emit_info.isa_flags.has_zba();
let has_zbb = emit_info.isa_flags.has_zbb();
let has_zca = emit_info.isa_flags.has_zca();
let has_zcb = emit_info.isa_flags.has_zcb();
let has_zcd = emit_info.isa_flags.has_zcd();
// Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
// to be enabled, so check it early.
if !has_zca {
return None;
}
fn reg_is_compressible(r: Reg) -> bool {
r.to_real_reg()
.map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
.unwrap_or(false)
}
match *self {
// C.ADD
Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd,
rs1,
rs2,
} if rd.to_reg() == rs1 && rs1 != zero_reg() && rs2 != zero_reg() => {
sink.put2(encode_cr_type(CrOp::CAdd, rd, rs2));
}
// C.MV
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
rd,
rs,
imm12,
} if rd.to_reg() != rs
&& rd.to_reg() != zero_reg()
&& rs != zero_reg()
&& imm12.as_i16() == 0 =>
{
sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
}
// CA Ops
Inst::AluRRR {
alu_op:
alu_op @ (AluOPRRR::And
| AluOPRRR::Or
| AluOPRRR::Xor
| AluOPRRR::Sub
| AluOPRRR::Addw
| AluOPRRR::Subw
| AluOPRRR::Mul),
rd,
rs1,
rs2,
} if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
let op = match alu_op {
AluOPRRR::And => CaOp::CAnd,
AluOPRRR::Or => CaOp::COr,
AluOPRRR::Xor => CaOp::CXor,
AluOPRRR::Sub => CaOp::CSub,
AluOPRRR::Addw => CaOp::CAddw,
AluOPRRR::Subw => CaOp::CSubw,
AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
_ => return None,
};
sink.put2(encode_ca_type(op, rd, rs2));
}
// c.j
//
// We don't have a separate JAL as that is only availabile in RV32C
Inst::Jal { label } => {
sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
sink.add_uncond_branch(*start_off, *start_off + 2, label);
sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
}
// c.jr
Inst::Jalr { rd, base, offset }
if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
{
sink.put2(encode_cr2_type(CrOp::CJr, base));
}
// c.jalr
Inst::Jalr { rd, base, offset }
if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
{
sink.put2(encode_cr2_type(CrOp::CJalr, base));
}
// c.ebreak
Inst::EBreak => {
sink.put2(encode_cr_type(
CrOp::CEbreak,
writable_zero_reg(),
zero_reg(),
));
}
// c.unimp
Inst::Udf { trap_code } => {
sink.add_trap(trap_code);
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s);
}
sink.put2(0x0000);
}
// c.addi16sp
//
// c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
// c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
// where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
// to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
// is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs,
imm12,
} if rd.to_reg() == rs
&& rs == stack_reg()
&& imm12.as_i16() != 0
&& (imm12.as_i16() % 16) == 0
&& Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
{
let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
sink.put2(encode_c_addi16sp(imm6));
}
// c.addi4spn
//
// c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
// immediate, scaled by 4, to the stack pointer, x2, and writes the result to
// rd. This instruction is used to generate pointers to stack-allocated variables
// and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
// the code points with nzuimm=0 are reserved.
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs,
imm12,
} if reg_is_compressible(rd.to_reg())
&& rs == stack_reg()
&& imm12.as_i16() != 0
&& (imm12.as_i16() % 4) == 0
&& u8::try_from(imm12.as_i16() / 4).is_ok() =>
{
let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
}
// c.li
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs,
imm12,
} if rd.to_reg() != zero_reg() && rs == zero_reg() && imm12.as_i16() != 0 => {
let imm6 = Imm6::maybe_from_imm12(imm12)?;
sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
}
// c.addi
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs,
imm12,
} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
let imm6 = Imm6::maybe_from_imm12(imm12)?;
sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
}
// c.addiw
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addiw,
rd,
rs,
imm12,
} if rd.to_reg() == rs && rs != zero_reg() => {
let imm6 = Imm6::maybe_from_imm12(imm12)?;
sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
}
// c.lui
//
// c.lui loads the non-zero 6-bit immediate field into bits 17–12
// of the destination register, clears the bottom 12 bits, and
// sign-extends bit 17 into all higher bits of the destination.
Inst::Lui { rd, imm: imm20 }
if rd.to_reg() != zero_reg()
&& rd.to_reg() != stack_reg()
&& imm20.as_i32() != 0 =>
{
// Check that the top bits are sign extended
let imm = imm20.as_i32() << 14 >> 14;
if imm != imm20.as_i32() {
return None;
}
let imm6 = Imm6::maybe_from_i32(imm)?;
sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
}
// c.slli
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd,
rs,
imm12,
} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
// The shift amount is unsigned, but we encode it as signed.
let shift = imm12.as_i16() & 0x3f;
let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
}
// c.srli / c.srai
Inst::AluRRImm12 {
alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
rd,
rs,
imm12,
} if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
let op = match op {
AluOPRRI::Srli => CbOp::CSrli,
AluOPRRI::Srai => CbOp::CSrai,
_ => unreachable!(),
};
// The shift amount is unsigned, but we encode it as signed.
let shift = imm12.as_i16() & 0x3f;
let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
sink.put2(encode_cb_type(op, rd, imm6));
}
// c.zextb
//
// This is an alias for `andi rd, rd, 0xff`
Inst::AluRRImm12 {
alu_op: AluOPRRI::Andi,
rd,
rs,
imm12,
} if has_zcb
&& rd.to_reg() == rs
&& reg_is_compressible(rs)
&& imm12.as_i16() == 0xff =>
{
sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
}
// c.andi
Inst::AluRRImm12 {
alu_op: AluOPRRI::Andi,
rd,
rs,
imm12,
} if rd.to_reg() == rs && reg_is_compressible(rs) => {
let imm6 = Imm6::maybe_from_imm12(imm12)?;
sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
}
// Stack Based Loads
Inst::Load {
rd,
op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
from,
flags,
} if from.get_base_register() == Some(stack_reg())
&& (from.get_offset_with_state(state) % op.size()) == 0 =>
{
// We encode the offset in multiples of the load size.
let offset = from.get_offset_with_state(state);
let imm6 = u8::try_from(offset / op.size())
.ok()
.and_then(Uimm6::maybe_from_u8)?;
// Some additional constraints on these instructions.
//
// Integer loads are not allowed to target x0, but floating point loads
// are, since f0 is not a special register.
//
// Floating point loads are not included in the base Zca extension
// but in a separate Zcd extension. Both of these are part of the C Extension.
let rd_is_zero = rd.to_reg() == zero_reg();
let op = match op {
LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
LoadOP::Fld if has_zcd => CiOp::CFldsp,
_ => return None,
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put2(encode_ci_sp_load(op, rd, imm6));
}
// Regular Loads
Inst::Load {
rd,
op:
op
@ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
from,
flags,
} if reg_is_compressible(rd.to_reg())
&& from
.get_base_register()
.map(reg_is_compressible)
.unwrap_or(false)
&& (from.get_offset_with_state(state) % op.size()) == 0 =>
{
let base = from.get_base_register().unwrap();
// We encode the offset in multiples of the store size.
let offset = from.get_offset_with_state(state);
let offset = u8::try_from(offset / op.size()).ok()?;
// We mix two different formats here.
//
// c.lw / c.ld / c.fld instructions are available in the standard Zca
// extension using the CL format.
//
// c.lbu / c.lhu / c.lh are only available in the Zcb extension and
// are also encoded differently. Technically they each have a different
// format, but they are similar enough that we can group them.
let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
let encoded = if is_zcb_load {
if !has_zcb {
return None;
}
let op = match op {
LoadOP::Lbu => ZcbMemOp::CLbu,
LoadOP::Lhu => ZcbMemOp::CLhu,
LoadOP::Lh => ZcbMemOp::CLh,
_ => unreachable!(),
};
// Byte stores & loads have 2 bits of immediate offset. Halfword stores
// and loads only have 1 bit.
let imm2 = Uimm2::maybe_from_u8(offset)?;
if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
return None;
}
encode_zcbmem_load(op, rd, base, imm2)
} else {
// Floating point loads are not included in the base Zca extension
// but in a separate Zcd extension. Both of these are part of the C Extension.
let op = match op {
LoadOP::Lw => ClOp::CLw,
LoadOP::Ld => ClOp::CLd,
LoadOP::Fld if has_zcd => ClOp::CFld,
_ => return None,
};
let imm5 = Uimm5::maybe_from_u8(offset)?;
encode_cl_type(op, rd, base, imm5)
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put2(encoded);
}
// Stack Based Stores
Inst::Store {
src,
op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
to,
flags,
} if to.get_base_register() == Some(stack_reg())
&& (to.get_offset_with_state(state) % op.size()) == 0 =>
{
// We encode the offset in multiples of the store size.
let offset = to.get_offset_with_state(state);
let imm6 = u8::try_from(offset / op.size())
.ok()
.and_then(Uimm6::maybe_from_u8)?;
// Floating point stores are not included in the base Zca extension
// but in a separate Zcd extension. Both of these are part of the C Extension.
let op = match op {
StoreOP::Sw => CssOp::CSwsp,
StoreOP::Sd => CssOp::CSdsp,
StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
_ => return None,
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put2(encode_css_type(op, src, imm6));
}
// Regular Stores
Inst::Store {
src,
op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
to,
flags,
} if reg_is_compressible(src)
&& to
.get_base_register()
.map(reg_is_compressible)
.unwrap_or(false)
&& (to.get_offset_with_state(state) % op.size()) == 0 =>
{
let base = to.get_base_register().unwrap();
// We encode the offset in multiples of the store size.
let offset = to.get_offset_with_state(state);
let offset = u8::try_from(offset / op.size()).ok()?;
// We mix two different formats here.
//
// c.sw / c.sd / c.fsd instructions are available in the standard Zca
// extension using the CL format.
//
// c.sb / c.sh are only available in the Zcb extension and are also
// encoded differently.
let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
let encoded = if is_zcb_store {
if !has_zcb {
return None;
}
let op = match op {
StoreOP::Sh => ZcbMemOp::CSh,
StoreOP::Sb => ZcbMemOp::CSb,
_ => unreachable!(),
};
// Byte stores & loads have 2 bits of immediate offset. Halfword stores
// and loads only have 1 bit.
let imm2 = Uimm2::maybe_from_u8(offset)?;
if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
return None;
}
encode_zcbmem_store(op, src, base, imm2)
} else {
// Floating point stores are not included in the base Zca extension
// but in a separate Zcd extension. Both of these are part of the C Extension.
let op = match op {
StoreOP::Sw => CsOp::CSw,
StoreOP::Sd => CsOp::CSd,
StoreOP::Fsd if has_zcd => CsOp::CFsd,
_ => return None,
};
let imm5 = Uimm5::maybe_from_u8(offset)?;
encode_cs_type(op, src, base, imm5)
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put2(encoded);
}
// c.not
//
// This is an alias for `xori rd, rd, -1`
Inst::AluRRImm12 {
alu_op: AluOPRRI::Xori,
rd,
rs,
imm12,
} if has_zcb
&& rd.to_reg() == rs
&& reg_is_compressible(rs)
&& imm12.as_i16() == -1 =>
{
sink.put2(encode_cszn_type(CsznOp::CNot, rd));
}
// c.sext.b / c.sext.h / c.zext.h
//
// These are all the extend instructions present in `Zcb`, they
// also require `Zbb` since they aren't available in the base ISA.
Inst::AluRRImm12 {
alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
rd,
rs,
imm12,
} if has_zcb
&& has_zbb
&& rd.to_reg() == rs
&& reg_is_compressible(rs)
&& imm12.as_i16() == 0 =>
{
let op = match alu_op {
AluOPRRI::Sextb => CsznOp::CSextb,
AluOPRRI::Sexth => CsznOp::CSexth,
AluOPRRI::Zexth => CsznOp::CZexth,
_ => unreachable!(),
};
sink.put2(encode_cszn_type(op, rd));
}
// c.zext.w
//
// This is an alias for `add.uw rd, rd, zero`
Inst::AluRRR {
alu_op: AluOPRRR::Adduw,
rd,
rs1,
rs2,
} if has_zcb
&& has_zba
&& rd.to_reg() == rs1
&& reg_is_compressible(rs1)
&& rs2 == zero_reg() =>
{
sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
}
_ => return None,
}
return Some(());
}
fn emit_uncompressed(
&self,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
start_off: &mut u32,
) {
match self {
&Inst::Nop0 => {
// do nothing
}
// Addi x0, x0, 0
&Inst::Nop4 => {
let x = Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: Writable::from_reg(zero_reg()),
rs: zero_reg(),
imm12: Imm12::ZERO,
};
x.emit(&[], sink, emit_info, state)
}
&Inst::RawData { ref data } => {
// Right now we only put a u32 or u64 in this instruction.
// It is not very long, no need to check if need `emit_island`.
// If data is very long , this is a bug because RawData is typecial
// use to load some data and rely on some positon in the code stream.
// and we may exceed `Inst::worst_case_size`.
// for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
sink.put_data(&data[..]);
}
&Inst::Lui { rd, ref imm } => {
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
sink.put4(x);
}
&Inst::LoadInlineConst { rd, ty, imm } => {
let data = &imm.to_le_bytes()[..ty.bytes() as usize];
let label_data: MachLabel = sink.get_label();
let label_end: MachLabel = sink.get_label();
// Load into rd
Inst::Load {
rd,
op: LoadOP::from_type(ty),
flags: MemFlags::new(),
from: AMode::Label(label_data),
}
.emit(&[], sink, emit_info, state);
// Jump over the inline pool
Inst::gen_jump(label_end).emit(&[], sink, emit_info, state);
// Emit the inline data
sink.bind_label(label_data, &mut state.ctrl_plane);
Inst::RawData { data: data.into() }.emit(&[], sink, emit_info, state);
sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::FpuRR {
frm,
alu_op,
rd,
rs,
} => {
let x = alu_op.op_code()
| reg_to_gpr_num(rd.to_reg()) << 7
| alu_op.funct3(frm) << 12
| reg_to_gpr_num(rs) << 15
| alu_op.rs2_funct5() << 20
| alu_op.funct7() << 25;
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && alu_op.is_convert_to_int() {
sink.add_trap(TrapCode::BadConversionToInteger);
}
sink.put4(x);
}
&Inst::FpuRRRR {
alu_op,
rd,
rs1,
rs2,
rs3,
frm,
} => {
let x = alu_op.op_code()
| reg_to_gpr_num(rd.to_reg()) << 7
| alu_op.funct3(frm) << 12
| reg_to_gpr_num(rs1) << 15
| reg_to_gpr_num(rs2) << 20
| alu_op.funct2() << 25
| reg_to_gpr_num(rs3) << 27;
sink.put4(x);
}
&Inst::FpuRRR {
alu_op,
frm,
rd,
rs1,
rs2,
} => {
let x: u32 = alu_op.op_code()
| reg_to_gpr_num(rd.to_reg()) << 7
| (alu_op.funct3(frm)) << 12
| reg_to_gpr_num(rs1) << 15
| reg_to_gpr_num(rs2) << 20
| alu_op.funct7() << 25;
sink.put4(x);
}
&Inst::Unwind { ref inst } => {
sink.add_unwind(inst.clone());
}
&Inst::DummyUse { .. } => {
// This has already been handled by Inst::allocate.
}
&Inst::AluRRR {
alu_op,
rd,
rs1,
rs2,
} => {
let (rs1, rs2) = if alu_op.reverse_rs() {
(rs2, rs1)
} else {
(rs1, rs2)
};
sink.put4(encode_r_type(
alu_op.op_code(),
rd,
alu_op.funct3(),
rs1,
rs2,
alu_op.funct7(),
));
}
&Inst::AluRRImm12 {
alu_op,
rd,
rs,
imm12,
} => {
let x = alu_op.op_code()
| reg_to_gpr_num(rd.to_reg()) << 7
| alu_op.funct3() << 12
| reg_to_gpr_num(rs) << 15
| alu_op.imm12(imm12) << 20;
sink.put4(x);
}
&Inst::CsrReg { op, rd, rs, csr } => {
sink.put4(encode_csr_reg(op, rd, rs, csr));
}
&Inst::CsrImm { op, rd, csr, imm } => {
sink.put4(encode_csr_imm(op, rd, csr, imm));
}
&Inst::Load {
rd,
op,
from,
flags,
} => {
let base = from.get_base_register();
let offset = from.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);
let label = from.get_label_with_sink(sink);
let (addr, imm12) = match (base, offset_imm12, label) {
// When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
(Some(base), Some(imm12), None) => (base, imm12),
// Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
// register and load from that.
(Some(_), None, None) => {
let tmp = writable_spilltmp_reg();
Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state);
(tmp.to_reg(), Imm12::ZERO)
}
// If the AMode contains a label we can emit an internal relocation that gets
// resolved with the correct address later.
(None, Some(imm), Some(label)) => {
debug_assert_eq!(imm.as_i16(), 0);
// Get the current PC.
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
Inst::Auipc {
rd,
imm: Imm20::ZERO,
}
.emit_uncompressed(sink, emit_info, state, start_off);
// Emit a relocation for the load. This patches the offset into the instruction.
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
// Imm12 here is meaningless since it's going to get replaced.
(rd.to_reg(), Imm12::ZERO)
}
// These cases are impossible with the current AModes that we have. We either
// always have a register, or always have a label. Never both, and never neither.
(None, None, None)
| (None, Some(_), None)
| (Some(_), None, Some(_))
| (Some(_), Some(_), Some(_))
| (None, None, Some(_)) => {
unreachable!("Invalid load address")
}
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
}
&Inst::Store { op, src, flags, to } => {
let base = to.get_base_register();
let offset = to.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);
let (addr, imm12) = match (base, offset_imm12) {
// If the offset fits into an imm12 we can directly encode it.
(Some(base), Some(imm12)) => (base, imm12),
// Otherwise load the address it into a reg and load from it.
_ => {
let tmp = writable_spilltmp_reg();
Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state);
(tmp.to_reg(), Imm12::ZERO)
}
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
}
&Inst::Args { .. } | &Inst::Rets { .. } => {
// Nothing: this is a pseudoinstruction that serves
// only to constrain registers at a certain point.
}
&Inst::Ret {} => {
// RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
// `jalr x0, x1, 0` that jumps to the return address.
Inst::Jalr {
rd: writable_zero_reg(),
base: link_reg(),
offset: Imm12::ZERO,
}
.emit(&[], sink, emit_info, state);
}
&Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits: _to_bits,
} => {
let mut insts = SmallInstVec::new();
let shift_bits = (64 - from_bits) as i16;
let is_u8 = || from_bits == 8 && signed == false;
if is_u8() {
// special for u8.
insts.push(Inst::AluRRImm12 {
alu_op: AluOPRRI::Andi,
rd,
rs: rn,
imm12: Imm12::from_i16(255),
});
} else {
insts.push(Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd,
rs: rn,
imm12: Imm12::from_i16(shift_bits),
});
insts.push(Inst::AluRRImm12 {
alu_op: if signed {
AluOPRRI::Srai
} else {
AluOPRRI::Srli
},
rd,
rs: rd.to_reg(),
imm12: Imm12::from_i16(shift_bits),
});
}
insts
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
}
&Inst::AdjustSp { amount } => {
if let Some(imm) = Imm12::maybe_from_i64(amount) {
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: writable_stack_reg(),
rs: stack_reg(),
imm12: imm,
}
.emit(&[], sink, emit_info, state);
} else {
let tmp = writable_spilltmp_reg();
let mut insts = Inst::load_constant_u64(tmp, amount as u64);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: writable_stack_reg(),
rs1: tmp.to_reg(),
rs2: stack_reg(),
});
insts
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
}
}
&Inst::Call { ref info } => {
// call
match info.dest {
ExternalName::User { .. } => {
if info.opcode.is_call() {
sink.add_call_site(info.opcode);
}
sink.add_reloc(Reloc::RiscvCall, &info.dest, 0);
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s);
}
Inst::construct_auipc_and_jalr(
Some(writable_link_reg()),
writable_link_reg(),
0,
)
.into_iter()
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
}
ExternalName::LibCall(..)
| ExternalName::TestCase { .. }
| ExternalName::KnownSymbol(..) => {
// use indirect call. it is more simple.
// load ext name.
Inst::LoadExtName {
rd: writable_spilltmp_reg2(),
name: Box::new(info.dest.clone()),
offset: 0,
}
.emit(&[], sink, emit_info, state);
// call
Inst::CallInd {
info: Box::new(CallIndInfo {
rn: spilltmp_reg2(),
// This doesen't really matter but we might as well send
// the correct info.
uses: info.uses.clone(),
defs: info.defs.clone(),
clobbers: info.clobbers,
opcode: Opcode::CallIndirect,
caller_callconv: info.caller_callconv,
callee_callconv: info.callee_callconv,
// Send this as 0 to avoid updating the pop size twice.
callee_pop_size: 0,
}),
}
.emit(&[], sink, emit_info, state);
}
}
let callee_pop_size = i64::from(info.callee_pop_size);
state.virtual_sp_offset -= callee_pop_size;
trace!(
"call adjusts virtual sp offset by {callee_pop_size} -> {}",
state.virtual_sp_offset
);
}
&Inst::CallInd { ref info } => {
let start_offset = sink.cur_offset();
Inst::Jalr {
rd: writable_link_reg(),
base: info.rn,
offset: Imm12::ZERO,
}
.emit(&[], sink, emit_info, state);
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
}
if info.opcode.is_call() {
sink.add_call_site(info.opcode);
}
let callee_pop_size = i64::from(info.callee_pop_size);
state.virtual_sp_offset -= callee_pop_size;
trace!(
"call adjusts virtual sp offset by {callee_pop_size} -> {}",
state.virtual_sp_offset
);
}
&Inst::ReturnCall {
ref callee,
ref info,
} => {
emit_return_call_common_sequence(
sink,
emit_info,
state,
info.new_stack_arg_size,
info.old_stack_arg_size,
);
sink.add_call_site(ir::Opcode::ReturnCall);
sink.add_reloc(Reloc::RiscvCall, &**callee, 0);
Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
.into_iter()
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
// `emit_return_call_common_sequence` emits an island if
// necessary, so we can safely disable the worst-case-size check
// in this case.
*start_off = sink.cur_offset();
}
&Inst::ReturnCallInd { callee, ref info } => {
emit_return_call_common_sequence(
sink,
emit_info,
state,
info.new_stack_arg_size,
info.old_stack_arg_size,
);
Inst::Jalr {
rd: writable_zero_reg(),
base: callee,
offset: Imm12::ZERO,
}
.emit(&[], sink, emit_info, state);
// `emit_return_call_common_sequence` emits an island if
// necessary, so we can safely disable the worst-case-size check
// in this case.
*start_off = sink.cur_offset();
}
&Inst::Jal { label } => {
sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
sink.add_uncond_branch(*start_off, *start_off + 4, label);
sink.put4(0b1101111);
}
&Inst::CondBr {
taken,
not_taken,
kind,
} => {
match taken {
CondBrTarget::Label(label) => {
let code = kind.emit();
let code_inverse = kind.inverse().emit().to_le_bytes();
sink.use_label_at_offset(*start_off, label, LabelUse::B12);
sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
sink.put4(code);
}
CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
}
match not_taken {
CondBrTarget::Label(label) => {
Inst::gen_jump(label).emit(&[], sink, emit_info, state)
}
CondBrTarget::Fallthrough => {}
};
}
&Inst::Mov { rd, rm, ty } => {
debug_assert_eq!(rd.to_reg().class(), rm.class());
if rd.to_reg() == rm {
return;
}
match rm.class() {
RegClass::Int => Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: rd,
rs: rm,
imm12: Imm12::ZERO,
},
RegClass::Float => Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjS
} else {
FpuOPRRR::FsgnjD
},
frm: None,
rd: rd,
rs1: rm,
rs2: rm,
},
RegClass::Vector => Inst::VecAluRRImm5 {
op: VecAluOpRRImm5::VmvrV,
vd: rd,
vs2: rm,
// Imm 0 means copy 1 register.
imm: Imm5::maybe_from_i8(0).unwrap(),
mask: VecOpMasking::Disabled,
// Vstate for this instruction is ignored.
vstate: VState::from_type(ty),
},
}
.emit(&[], sink, emit_info, state);
}
&Inst::MovFromPReg { rd, rm } => {
Inst::gen_move(rd, Reg::from(rm), I64).emit(&[], sink, emit_info, state);
}
&Inst::BrTable {
index,
tmp1,
tmp2,
ref targets,
} => {
let ext_index = writable_spilltmp_reg();
let label_compute_target = sink.get_label();
// The default target is passed in as the 0th element of `targets`
// separate it here for clarity.
let default_target = targets[0];
let targets = &targets[1..];
// We are going to potentially emit a large amount of instructions, so ensure that we emit an island
// now if we need one.
//
// The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
// Check if we need to emit a jump table here to support that jump.
let inst_count = 12 + (targets.len() * 2);
let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
if sink.island_needed(distance) {
let jump_around_label = sink.get_label();
Inst::gen_jump(jump_around_label).emit(&[], sink, emit_info, state);
sink.emit_island(distance + 4, &mut state.ctrl_plane);
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
}
// We emit a bounds check on the index, if the index is larger than the number of
// jump table entries, we jump to the default block. Otherwise we compute a jump
// offset by multiplying the index by 8 (the size of each entry) and then jump to
// that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
//
// Build the following sequence:
//
// extend_index:
// zext.w ext_index, index
// bounds_check:
// li tmp, n_labels
// bltu ext_index, tmp, compute_target
// jump_to_default_block:
// auipc pc, 0
// jalr zero, pc, default_block
// compute_target:
// auipc pc, 0
// slli tmp, ext_index, 3
// add pc, pc, tmp
// jalr zero, pc, 0x10
// jump_table:
// ; This repeats for each entry in the jumptable
// auipc pc, 0
// jalr zero, pc, block_target
// Extend the index to 64 bits.
//
// This prevents us branching on the top 32 bits of the index, which
// are undefined.
Inst::Extend {
rd: ext_index,
rn: index,
signed: false,
from_bits: 32,
to_bits: 64,
}
.emit(&[], sink, emit_info, state);
// Bounds check.
//
// Check if the index passed in is larger than the number of jumptable
// entries that we have. If it is, we fallthrough to a jump into the
// default block.
Inst::load_constant_u32(tmp2, targets.len() as u64)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::CondBr {
taken: CondBrTarget::Label(label_compute_target),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::UnsignedLessThan,
rs1: ext_index.to_reg(),
rs2: tmp2.to_reg(),
},
}
.emit(&[], sink, emit_info, state);
sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
Inst::construct_auipc_and_jalr(None, tmp2, 0)
.iter()
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
// Compute the jump table offset.
// We need to emit a PC relative offset,
sink.bind_label(label_compute_target, &mut state.ctrl_plane);
// Get the current PC.
Inst::Auipc {
rd: tmp1,
imm: Imm20::ZERO,
}
.emit_uncompressed(sink, emit_info, state, start_off);
// These instructions must be emitted as uncompressed since we
// are manually computing the offset from the PC.
// Multiply the index by 8, since that is the size in
// bytes of each jump table entry
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: tmp2,
rs: ext_index.to_reg(),
imm12: Imm12::from_i16(3),
}
.emit_uncompressed(sink, emit_info, state, start_off);
// Calculate the base of the jump, PC + the offset from above.
Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: tmp1,
rs1: tmp1.to_reg(),
rs2: tmp2.to_reg(),
}
.emit_uncompressed(sink, emit_info, state, start_off);
// Jump to the middle of the jump table.
// We add a 16 byte offset here, since we used 4 instructions
// since the AUIPC that was used to get the PC.
Inst::Jalr {
rd: writable_zero_reg(),
base: tmp1.to_reg(),
offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
}
.emit_uncompressed(sink, emit_info, state, start_off);
// Emit the jump table.
//
// Each entry is a auipc + jalr to the target block. We also start with a island
// if necessary.
// Emit the jumps back to back
for target in targets.iter() {
sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
Inst::construct_auipc_and_jalr(None, tmp2, 0)
.iter()
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
}
// We've just emitted an island that is safe up to *here*.
// Mark it as such so that we don't needlessly emit additional islands.
*start_off = sink.cur_offset();
}
&Inst::VirtualSPOffsetAdj { amount } => {
crate::trace!(
"virtual sp offset adjusted by {} -> {}",
amount,
state.virtual_sp_offset + amount
);
state.virtual_sp_offset += amount;
}
&Inst::Atomic {
op,
rd,
addr,
src,
amo,
} => {
let srcloc = state.cur_srcloc();
if !srcloc.is_default() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
let x = op.op_code()
| reg_to_gpr_num(rd.to_reg()) << 7
| op.funct3() << 12
| reg_to_gpr_num(addr) << 15
| reg_to_gpr_num(src) << 20
| op.funct7(amo) << 25;
sink.put4(x);
}
&Inst::Fence { pred, succ } => {
let x = 0b0001111
| 0b00000 << 7
| 0b000 << 12
| 0b00000 << 15
| (succ as u32) << 20
| (pred as u32) << 24;
sink.put4(x);
}
&Inst::Auipc { rd, imm } => {
sink.put4(enc_auipc(rd, imm));
}
&Inst::LoadAddr { rd, mem } => {
let base = mem.get_base_register();
let offset = mem.get_offset_with_state(state);
let offset_imm12 = Imm12::maybe_from_i64(offset);
match (mem, base, offset_imm12) {
(_, Some(rs), Some(imm12)) => {
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs,
imm12,
}
.emit(&[], sink, emit_info, state);
}
(_, Some(rs), None) => {
let mut insts = Inst::load_constant_u64(rd, offset as u64);
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd,
rs1: rd.to_reg(),
rs2: rs,
});
insts
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
}
(AMode::Const(addr), None, _) => {
// Get an address label for the constant and recurse.
let label = sink.get_label_for_constant(addr);
Inst::LoadAddr {
rd,
mem: AMode::Label(label),
}
.emit(&[], sink, emit_info, state);
}
(AMode::Label(label), None, _) => {
// Get the current PC.
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
let inst = Inst::Auipc {
rd,
imm: Imm20::ZERO,
};
inst.emit_uncompressed(sink, emit_info, state, start_off);
// Emit an add to the address with a relocation.
// This later gets patched up with the correct offset.
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd,
rs: rd.to_reg(),
imm12: Imm12::ZERO,
}
.emit_uncompressed(sink, emit_info, state, start_off);
}
(amode, _, _) => {
unimplemented!("LoadAddr: {:?}", amode);
}
}
}
&Inst::Select {
ref dst,
condition,
ref x,
ref y,
} => {
let label_true = sink.get_label();
let label_false = sink.get_label();
let label_end = sink.get_label();
Inst::CondBr {
taken: CondBrTarget::Label(label_true),
not_taken: CondBrTarget::Label(label_false),
kind: condition,
}
.emit(&[], sink, emit_info, state);
sink.bind_label(label_true, &mut state.ctrl_plane);
// here is the true
// select the first value
for i in gen_moves(dst.regs(), x.regs()) {
i.emit(&[], sink, emit_info, state);
}
Inst::gen_jump(label_end).emit(&[], sink, emit_info, state);
sink.bind_label(label_false, &mut state.ctrl_plane);
for i in gen_moves(dst.regs(), y.regs()) {
i.emit(&[], sink, emit_info, state);
}
sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::Jalr { rd, base, offset } => {
sink.put4(enc_jalr(rd, base, offset));
}
&Inst::EBreak => {
sink.put4(0x00100073);
}
&Inst::Icmp { cc, rd, a, b, ty } => {
let label_true = sink.get_label();
let label_false = sink.get_label();
let label_end = sink.get_label();
Inst::lower_br_icmp(
cc,
a,
b,
CondBrTarget::Label(label_true),
CondBrTarget::Label(label_false),
ty,
)
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
sink.bind_label(label_true, &mut state.ctrl_plane);
Inst::load_imm12(rd, Imm12::ONE).emit(&[], sink, emit_info, state);
Inst::gen_jump(label_end).emit(&[], sink, emit_info, state);
sink.bind_label(label_false, &mut state.ctrl_plane);
Inst::load_imm12(rd, Imm12::ZERO).emit(&[], sink, emit_info, state);
sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::AtomicCas {
offset,
t0,
dst,
e,
addr,
v,
ty,
} => {
// # addr holds address of memory location
// # e holds expected value
// # v holds desired value
// # dst holds return value
// cas:
// lr.w dst, (addr) # Load original value.
// bne dst, e, fail # Doesn’t match, so fail.
// sc.w t0, v, (addr) # Try to update.
// bnez t0 , cas # if store not ok,retry.
// fail:
let fail_label = sink.get_label();
let cas_lebel = sink.get_label();
sink.bind_label(cas_lebel, &mut state.ctrl_plane);
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: dst,
addr,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
if ty.bits() < 32 {
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
} else if ty.bits() == 32 {
Inst::Extend {
rd: dst,
rn: dst.to_reg(),
signed: false,
from_bits: 32,
to_bits: 64,
}
.emit(&[], sink, emit_info, state);
}
Inst::CondBr {
taken: CondBrTarget::Label(fail_label),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::NotEqual,
rs1: e,
rs2: dst.to_reg(),
},
}
.emit(&[], sink, emit_info, state);
let store_value = if ty.bits() < 32 {
// reload value to t0.
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: t0,
addr,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
// set reset part.
AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
t0.to_reg()
} else {
v
};
Inst::Atomic {
op: AtomicOP::store_op(ty),
rd: t0,
addr,
src: store_value,
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
// check is our value stored.
Inst::CondBr {
taken: CondBrTarget::Label(cas_lebel),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::NotEqual,
rs1: t0.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
sink.bind_label(fail_label, &mut state.ctrl_plane);
}
&Inst::AtomicRmwLoop {
offset,
op,
dst,
ty,
p,
x,
t0,
} => {
let retry = sink.get_label();
sink.bind_label(retry, &mut state.ctrl_plane);
// load old value.
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: dst,
addr: p,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
//
let store_value: Reg = match op {
crate::ir::AtomicRmwOp::Add
| crate::ir::AtomicRmwOp::Sub
| crate::ir::AtomicRmwOp::And
| crate::ir::AtomicRmwOp::Or
| crate::ir::AtomicRmwOp::Xor => {
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::AluRRR {
alu_op: match op {
crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
crate::ir::AtomicRmwOp::And => AluOPRRR::And,
crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
_ => unreachable!(),
},
rd: t0,
rs1: dst.to_reg(),
rs2: x,
}
.emit(&[], sink, emit_info, state);
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: writable_spilltmp_reg2(),
addr: p,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
AtomicOP::merge(
writable_spilltmp_reg2(),
writable_spilltmp_reg(),
offset,
t0.to_reg(),
ty,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
spilltmp_reg2()
}
crate::ir::AtomicRmwOp::Nand => {
if ty.bits() < 32 {
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
}
Inst::AluRRR {
alu_op: AluOPRRR::And,
rd: t0,
rs1: x,
rs2: dst.to_reg(),
}
.emit(&[], sink, emit_info, state);
Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state);
if ty.bits() < 32 {
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: writable_spilltmp_reg2(),
addr: p,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
AtomicOP::merge(
writable_spilltmp_reg2(),
writable_spilltmp_reg(),
offset,
t0.to_reg(),
ty,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
spilltmp_reg2()
} else {
t0.to_reg()
}
}
crate::ir::AtomicRmwOp::Umin
| crate::ir::AtomicRmwOp::Umax
| crate::ir::AtomicRmwOp::Smin
| crate::ir::AtomicRmwOp::Smax => {
let label_select_dst = sink.get_label();
let label_select_done = sink.get_label();
if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
{
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
} else {
AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
}
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::lower_br_icmp(
match op {
crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
_ => unreachable!(),
},
ValueRegs::one(dst.to_reg()),
ValueRegs::one(x),
CondBrTarget::Label(label_select_dst),
CondBrTarget::Fallthrough,
ty,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
// here we select x.
Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state);
Inst::gen_jump(label_select_done).emit(&[], sink, emit_info, state);
sink.bind_label(label_select_dst, &mut state.ctrl_plane);
Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state);
sink.bind_label(label_select_done, &mut state.ctrl_plane);
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: writable_spilltmp_reg2(),
addr: p,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
AtomicOP::merge(
writable_spilltmp_reg2(),
writable_spilltmp_reg(),
offset,
t0.to_reg(),
ty,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
spilltmp_reg2()
}
crate::ir::AtomicRmwOp::Xchg => {
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::Atomic {
op: AtomicOP::load_op(ty),
rd: writable_spilltmp_reg2(),
addr: p,
src: zero_reg(),
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
AtomicOP::merge(
writable_spilltmp_reg2(),
writable_spilltmp_reg(),
offset,
x,
ty,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
spilltmp_reg2()
}
};
Inst::Atomic {
op: AtomicOP::store_op(ty),
rd: t0,
addr: p,
src: store_value,
amo: AMO::SeqCst,
}
.emit(&[], sink, emit_info, state);
// if store is not ok,retry.
Inst::CondBr {
taken: CondBrTarget::Label(retry),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::NotEqual,
rs1: t0.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
}
&Inst::FcvtToInt {
is_sat,
rd,
rs,
is_signed,
in_type,
out_type,
tmp,
} => {
let label_nan = sink.get_label();
let label_jump_over = sink.get_label();
// get if nan.
Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state);
// jump to nan.
Inst::CondBr {
taken: CondBrTarget::Label(label_nan),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::Equal,
rs2: zero_reg(),
rs1: rd.to_reg(),
},
}
.emit(&[], sink, emit_info, state);
if !is_sat {
let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8);
if in_type == F32 {
Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| {
writable_spilltmp_reg()
})
} else {
Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| {
writable_spilltmp_reg()
})
}
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
let le_op = if in_type == F32 {
FpuOPRRR::FleS
} else {
FpuOPRRR::FleD
};
// rd := rs <= tmp
Inst::FpuRRR {
alu_op: le_op,
frm: None,
rd,
rs1: rs,
rs2: tmp.to_reg(),
}
.emit(&[], sink, emit_info, state);
Inst::TrapIf {
cc: IntCC::NotEqual,
rs1: rd.to_reg(),
rs2: zero_reg(),
trap_code: TrapCode::IntegerOverflow,
}
.emit(&[], sink, emit_info, state);
if in_type == F32 {
Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| {
writable_spilltmp_reg()
})
} else {
Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| {
writable_spilltmp_reg()
})
}
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
// rd := rs >= tmp
Inst::FpuRRR {
alu_op: le_op,
frm: None,
rd,
rs1: tmp.to_reg(),
rs2: rs,
}
.emit(&[], sink, emit_info, state);
Inst::TrapIf {
cc: IntCC::NotEqual,
rs1: rd.to_reg(),
rs2: zero_reg(),
trap_code: TrapCode::IntegerOverflow,
}
.emit(&[], sink, emit_info, state);
}
// convert to int normally.
Inst::FpuRR {
frm: Some(FRM::RTZ),
alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type),
rd,
rs,
}
.emit(&[], sink, emit_info, state);
if out_type.bits() < 32 && is_signed {
// load value part mask.
Inst::load_constant_u32(
writable_spilltmp_reg(),
if 16 == out_type.bits() {
(u16::MAX >> 1) as u64
} else {
// I8
(u8::MAX >> 1) as u64
},
)
.into_iter()
.for_each(|x| x.emit(&[], sink, emit_info, state));
// keep value part.
Inst::AluRRR {
alu_op: AluOPRRR::And,
rd: writable_spilltmp_reg(),
rs1: rd.to_reg(),
rs2: spilltmp_reg(),
}
.emit(&[], sink, emit_info, state);
// extact sign bit.
Inst::AluRRImm12 {
alu_op: AluOPRRI::Srli,
rd: rd,
rs: rd.to_reg(),
imm12: Imm12::from_i16(31),
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: rd,
rs: rd.to_reg(),
imm12: Imm12::from_i16(if 16 == out_type.bits() {
15
} else {
// I8
7
}),
}
.emit(&[], sink, emit_info, state);
// make result,sign bit and value part.
Inst::AluRRR {
alu_op: AluOPRRR::Or,
rd: rd,
rs1: rd.to_reg(),
rs2: spilltmp_reg(),
}
.emit(&[], sink, emit_info, state);
}
// I already have the result,jump over.
Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state);
// here is nan , move 0 into rd register
sink.bind_label(label_nan, &mut state.ctrl_plane);
if is_sat {
Inst::load_imm12(rd, Imm12::ZERO).emit(&[], sink, emit_info, state);
} else {
// here is ud2.
Inst::Udf {
trap_code: TrapCode::BadConversionToInteger,
}
.emit(&[], sink, emit_info, state);
}
// bind jump_over
sink.bind_label(label_jump_over, &mut state.ctrl_plane);
}
&Inst::LoadExtName {
rd,
ref name,
offset,
} => {
let label_data = sink.get_label();
let label_end = sink.get_label();
// Load the value from a label
Inst::Load {
rd,
op: LoadOP::Ld,
flags: MemFlags::trusted(),
from: AMode::Label(label_data),
}
.emit(&[], sink, emit_info, state);
// Jump over the data
Inst::gen_jump(label_end).emit(&[], sink, emit_info, state);
sink.bind_label(label_data, &mut state.ctrl_plane);
sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
sink.put8(0);
sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::ElfTlsGetAddr { rd, ref name } => {
// RISC-V's TLS GD model is slightly different from other arches.
//
// We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
// of the address relative to the GOT entry. This relocation points to
// the symbol as usual.
//
// However when loading the bottom 12bits of the address, we need to
// use a label that points to the previous AUIPC instruction.
//
// label:
// auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol)
// addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label)
//
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
// Create the lable that is going to be published to the final binary object.
let auipc_label = sink.get_label();
sink.bind_label(auipc_label, &mut state.ctrl_plane);
// Get the current PC.
sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
Inst::Auipc {
rd: rd,
imm: Imm20::from_i32(0),
}
.emit_uncompressed(sink, emit_info, state, start_off);
// The `addi` here, points to the `auipc` label instead of directly to the symbol.
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: rd,
rs: rd.to_reg(),
imm12: Imm12::from_i16(0),
}
.emit_uncompressed(sink, emit_info, state, start_off);
Inst::Call {
info: Box::new(CallInfo {
dest: ExternalName::LibCall(LibCall::ElfTlsGetAddr),
uses: smallvec![],
defs: smallvec![],
opcode: crate::ir::Opcode::TlsValue,
caller_callconv: CallConv::SystemV,
callee_callconv: CallConv::SystemV,
callee_pop_size: 0,
clobbers: PRegSet::empty(),
}),
}
.emit_uncompressed(sink, emit_info, state, start_off);
}
&Inst::TrapIf {
rs1,
rs2,
cc,
trap_code,
} => {
let label_end = sink.get_label();
let cond = IntegerCompare { kind: cc, rs1, rs2 };
// Jump over the trap if we the condition is false.
Inst::CondBr {
taken: CondBrTarget::Label(label_end),
not_taken: CondBrTarget::Fallthrough,
kind: cond.inverse(),
}
.emit(&[], sink, emit_info, state);
Inst::Udf { trap_code }.emit(&[], sink, emit_info, state);
sink.bind_label(label_end, &mut state.ctrl_plane);
}
&Inst::Udf { trap_code } => {
sink.add_trap(trap_code);
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(
StackMapExtent::UpcomingBytes(Inst::TRAP_OPCODE.len() as u32),
s,
);
}
sink.put_data(Inst::TRAP_OPCODE);
}
&Inst::AtomicLoad { rd, ty, p } => {
// emit the fence.
Inst::Fence {
pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
}
.emit(&[], sink, emit_info, state);
// load.
Inst::Load {
rd: rd,
op: LoadOP::from_type(ty),
flags: MemFlags::new(),
from: AMode::RegOffset(p, 0, ty),
}
.emit(&[], sink, emit_info, state);
Inst::Fence {
pred: Inst::FENCE_REQ_R,
succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
}
.emit(&[], sink, emit_info, state);
}
&Inst::AtomicStore { src, ty, p } => {
Inst::Fence {
pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
succ: Inst::FENCE_REQ_W,
}
.emit(&[], sink, emit_info, state);
Inst::Store {
to: AMode::RegOffset(p, 0, ty),
op: StoreOP::from_type(ty),
flags: MemFlags::new(),
src,
}
.emit(&[], sink, emit_info, state);
}
&Inst::FloatRound {
op,
rd,
int_tmp,
f_tmp,
rs,
ty,
} => {
// this code is port from glibc ceil floor ... implementation.
let label_nan = sink.get_label();
let label_x = sink.get_label();
let label_jump_over = sink.get_label();
// check if is nan.
Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state);
Inst::CondBr {
taken: CondBrTarget::Label(label_nan),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::Equal,
rs1: int_tmp.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
fn max_value_need_round(ty: Type) -> u64 {
match ty {
F32 => {
let x: u64 = 1 << f32::MANTISSA_DIGITS;
let x = x as f32;
let x = u32::from_le_bytes(x.to_le_bytes());
x as u64
}
F64 => {
let x: u64 = 1 << f64::MANTISSA_DIGITS;
let x = x as f64;
u64::from_le_bytes(x.to_le_bytes())
}
_ => unreachable!(),
}
}
// load max value need to round.
if ty == F32 {
Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| {
writable_spilltmp_reg()
})
} else {
Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| {
writable_spilltmp_reg()
})
}
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
// get abs value.
Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state);
// branch if f_tmp < rd
Inst::FpuRRR {
frm: None,
alu_op: if ty == F32 {
FpuOPRRR::FltS
} else {
FpuOPRRR::FltD
},
rd: int_tmp,
rs1: f_tmp.to_reg(),
rs2: rd.to_reg(),
}
.emit(&[], sink, emit_info, state);
Inst::CondBr {
taken: CondBrTarget::Label(label_x),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::NotEqual,
rs1: int_tmp.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
//convert to int.
Inst::FpuRR {
alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64),
frm: Some(op.to_frm()),
rd: int_tmp,
rs: rs,
}
.emit(&[], sink, emit_info, state);
//convert back.
Inst::FpuRR {
alu_op: if ty == F32 {
FpuOPRR::FcvtSL
} else {
FpuOPRR::FcvtDL
},
frm: Some(op.to_frm()),
rd,
rs: int_tmp.to_reg(),
}
.emit(&[], sink, emit_info, state);
// copy sign.
Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FsgnjS
} else {
FpuOPRRR::FsgnjD
},
frm: None,
rd,
rs1: rd.to_reg(),
rs2: rs,
}
.emit(&[], sink, emit_info, state);
// jump over.
Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state);
// here is nan.
sink.bind_label(label_nan, &mut state.ctrl_plane);
Inst::FpuRRR {
alu_op: if ty == F32 {
FpuOPRRR::FaddS
} else {
FpuOPRRR::FaddD
},
frm: None,
rd: rd,
rs1: rs,
rs2: rs,
}
.emit(&[], sink, emit_info, state);
Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state);
// here select origin x.
sink.bind_label(label_x, &mut state.ctrl_plane);
Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state);
sink.bind_label(label_jump_over, &mut state.ctrl_plane);
}
&Inst::FloatSelect {
op,
rd,
tmp,
rs1,
rs2,
ty,
} => {
let label_nan = sink.get_label();
let label_jump_over = sink.get_label();
// check if rs1 is nan.
Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state);
Inst::CondBr {
taken: CondBrTarget::Label(label_nan),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::Equal,
rs1: tmp.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
// check if rs2 is nan.
Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state);
Inst::CondBr {
taken: CondBrTarget::Label(label_nan),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::Equal,
rs1: tmp.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
// here rs1 and rs2 is not nan.
Inst::FpuRRR {
alu_op: op.to_fpuoprrr(ty),
frm: None,
rd: rd,
rs1: rs1,
rs2: rs2,
}
.emit(&[], sink, emit_info, state);
// special handle for +0 or -0.
{
// check is rs1 and rs2 all equal to zero.
let label_done = sink.get_label();
{
// if rs1 == 0
let mut insts = Inst::emit_if_float_not_zero(
tmp,
rs1,
ty,
CondBrTarget::Label(label_done),
CondBrTarget::Fallthrough,
);
insts.extend(Inst::emit_if_float_not_zero(
tmp,
rs2,
ty,
CondBrTarget::Label(label_done),
CondBrTarget::Fallthrough,
));
insts
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
}
Inst::FpuRR {
alu_op: FpuOPRR::move_f_to_x_op(ty),
frm: None,
rd: tmp,
rs: rs1,
}
.emit(&[], sink, emit_info, state);
Inst::FpuRR {
alu_op: FpuOPRR::move_f_to_x_op(ty),
frm: None,
rd: writable_spilltmp_reg(),
rs: rs2,
}
.emit(&[], sink, emit_info, state);
Inst::AluRRR {
alu_op: if op == FloatSelectOP::Max {
AluOPRRR::And
} else {
AluOPRRR::Or
},
rd: tmp,
rs1: tmp.to_reg(),
rs2: spilltmp_reg(),
}
.emit(&[], sink, emit_info, state);
// move back to rd.
Inst::FpuRR {
alu_op: FpuOPRR::move_x_to_f_op(ty),
frm: None,
rd,
rs: tmp.to_reg(),
}
.emit(&[], sink, emit_info, state);
//
sink.bind_label(label_done, &mut state.ctrl_plane);
}
// we have the reuslt,jump over.
Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state);
// here is nan.
sink.bind_label(label_nan, &mut state.ctrl_plane);
op.snan_bits(tmp, ty)
.into_iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
// move to rd.
Inst::FpuRR {
alu_op: FpuOPRR::move_x_to_f_op(ty),
frm: None,
rd,
rs: tmp.to_reg(),
}
.emit(&[], sink, emit_info, state);
sink.bind_label(label_jump_over, &mut state.ctrl_plane);
}
&Inst::Popcnt {
sum,
tmp,
step,
rs,
ty,
} => {
// load 0 to sum , init.
Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state);
// load
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)).emit(
&[],
sink,
emit_info,
state,
);
//
Inst::load_imm12(tmp, Imm12::ONE).emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
}
.emit(&[], sink, emit_info, state);
let label_done = sink.get_label();
let label_loop = sink.get_label();
sink.bind_label(label_loop, &mut state.ctrl_plane);
Inst::CondBr {
taken: CondBrTarget::Label(label_done),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::SignedLessThanOrEqual,
rs1: step.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
// test and add sum.
{
Inst::AluRRR {
alu_op: AluOPRRR::And,
rd: writable_spilltmp_reg2(),
rs1: tmp.to_reg(),
rs2: rs,
}
.emit(&[], sink, emit_info, state);
let label_over = sink.get_label();
Inst::CondBr {
taken: CondBrTarget::Label(label_over),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::Equal,
rs1: zero_reg(),
rs2: spilltmp_reg2(),
},
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: sum,
rs: sum.to_reg(),
imm12: Imm12::ONE,
}
.emit(&[], sink, emit_info, state);
sink.bind_label(label_over, &mut state.ctrl_plane);
}
// set step and tmp.
{
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: step,
rs: step.to_reg(),
imm12: Imm12::from_i16(-1),
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Srli,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::ONE,
}
.emit(&[], sink, emit_info, state);
Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state);
}
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::Rev8 { rs, rd, tmp, step } => {
// init.
Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state);
Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state);
// load 56 to step.
Inst::load_imm12(step, Imm12::from_i16(56)).emit(&[], sink, emit_info, state);
let label_done = sink.get_label();
let label_loop = sink.get_label();
sink.bind_label(label_loop, &mut state.ctrl_plane);
Inst::CondBr {
taken: CondBrTarget::Label(label_done),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::SignedLessThan,
rs1: step.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Andi,
rd: writable_spilltmp_reg(),
rs: tmp.to_reg(),
imm12: Imm12::from_i16(255),
}
.emit(&[], sink, emit_info, state);
Inst::AluRRR {
alu_op: AluOPRRR::Sll,
rd: writable_spilltmp_reg(),
rs1: spilltmp_reg(),
rs2: step.to_reg(),
}
.emit(&[], sink, emit_info, state);
Inst::AluRRR {
alu_op: AluOPRRR::Or,
rd: rd,
rs1: rd.to_reg(),
rs2: spilltmp_reg(),
}
.emit(&[], sink, emit_info, state);
{
// reset step
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: step,
rs: step.to_reg(),
imm12: Imm12::from_i16(-8),
}
.emit(&[], sink, emit_info, state);
//reset tmp.
Inst::AluRRImm12 {
alu_op: AluOPRRI::Srli,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::from_i16(8),
}
.emit(&[], sink, emit_info, state);
// loop.
Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state);
}
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::Cltz {
sum,
tmp,
step,
rs,
leading,
ty,
} => {
// load 0 to sum , init.
Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state);
// load
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)).emit(
&[],
sink,
emit_info,
state,
);
//
Inst::load_imm12(tmp, Imm12::ONE).emit(&[], sink, emit_info, state);
if leading {
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
}
.emit(&[], sink, emit_info, state);
}
let label_done = sink.get_label();
let label_loop = sink.get_label();
sink.bind_label(label_loop, &mut state.ctrl_plane);
Inst::CondBr {
taken: CondBrTarget::Label(label_done),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::SignedLessThanOrEqual,
rs1: step.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
// test and add sum.
{
Inst::AluRRR {
alu_op: AluOPRRR::And,
rd: writable_spilltmp_reg2(),
rs1: tmp.to_reg(),
rs2: rs,
}
.emit(&[], sink, emit_info, state);
Inst::CondBr {
taken: CondBrTarget::Label(label_done),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::NotEqual,
rs1: zero_reg(),
rs2: spilltmp_reg2(),
},
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: sum,
rs: sum.to_reg(),
imm12: Imm12::ONE,
}
.emit(&[], sink, emit_info, state);
}
// set step and tmp.
{
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: step,
rs: step.to_reg(),
imm12: Imm12::from_i16(-1),
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: if leading {
AluOPRRI::Srli
} else {
AluOPRRI::Slli
},
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::ONE,
}
.emit(&[], sink, emit_info, state);
Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state);
}
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::Brev8 {
rs,
ty,
step,
tmp,
tmp2,
rd,
} => {
Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state);
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)).emit(
&[],
sink,
emit_info,
state,
);
//
Inst::load_imm12(tmp, Imm12::ONE).emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
}
.emit(&[], sink, emit_info, state);
Inst::load_imm12(tmp2, Imm12::ONE).emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: tmp2,
rs: tmp2.to_reg(),
imm12: Imm12::from_i16((ty.bits() - 8) as i16),
}
.emit(&[], sink, emit_info, state);
let label_done = sink.get_label();
let label_loop = sink.get_label();
sink.bind_label(label_loop, &mut state.ctrl_plane);
Inst::CondBr {
taken: CondBrTarget::Label(label_done),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::SignedLessThanOrEqual,
rs1: step.to_reg(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
// test and set bit.
{
Inst::AluRRR {
alu_op: AluOPRRR::And,
rd: writable_spilltmp_reg2(),
rs1: tmp.to_reg(),
rs2: rs,
}
.emit(&[], sink, emit_info, state);
let label_over = sink.get_label();
Inst::CondBr {
taken: CondBrTarget::Label(label_over),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::Equal,
rs1: zero_reg(),
rs2: spilltmp_reg2(),
},
}
.emit(&[], sink, emit_info, state);
Inst::AluRRR {
alu_op: AluOPRRR::Or,
rd: rd,
rs1: rd.to_reg(),
rs2: tmp2.to_reg(),
}
.emit(&[], sink, emit_info, state);
sink.bind_label(label_over, &mut state.ctrl_plane);
}
// set step and tmp.
{
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: step,
rs: step.to_reg(),
imm12: Imm12::from_i16(-1),
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Srli,
rd: tmp,
rs: tmp.to_reg(),
imm12: Imm12::ONE,
}
.emit(&[], sink, emit_info, state);
{
// reset tmp2
// if (step %=8 == 0) then tmp2 = tmp2 >> 15
// if (step %=8 != 0) then tmp2 = tmp2 << 1
let label_over = sink.get_label();
let label_sll_1 = sink.get_label();
Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8)).emit(
&[],
sink,
emit_info,
state,
);
Inst::AluRRR {
alu_op: AluOPRRR::Rem,
rd: writable_spilltmp_reg2(),
rs1: step.to_reg(),
rs2: spilltmp_reg2(),
}
.emit(&[], sink, emit_info, state);
Inst::CondBr {
taken: CondBrTarget::Label(label_sll_1),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::NotEqual,
rs1: spilltmp_reg2(),
rs2: zero_reg(),
},
}
.emit(&[], sink, emit_info, state);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Srli,
rd: tmp2,
rs: tmp2.to_reg(),
imm12: Imm12::from_i16(15),
}
.emit(&[], sink, emit_info, state);
Inst::gen_jump(label_over).emit(&[], sink, emit_info, state);
sink.bind_label(label_sll_1, &mut state.ctrl_plane);
Inst::AluRRImm12 {
alu_op: AluOPRRI::Slli,
rd: tmp2,
rs: tmp2.to_reg(),
imm12: Imm12::ONE,
}
.emit(&[], sink, emit_info, state);
sink.bind_label(label_over, &mut state.ctrl_plane);
}
Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state);
}
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::StackProbeLoop {
guard_size,
probe_count,
tmp: guard_size_tmp,
} => {
let step = writable_spilltmp_reg();
Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
let loop_start = sink.get_label();
let label_done = sink.get_label();
sink.bind_label(loop_start, &mut state.ctrl_plane);
Inst::CondBr {
taken: CondBrTarget::Label(label_done),
not_taken: CondBrTarget::Fallthrough,
kind: IntegerCompare {
kind: IntCC::UnsignedLessThanOrEqual,
rs1: step.to_reg(),
rs2: guard_size_tmp.to_reg(),
},
}
.emit(&[], sink, emit_info, state);
// compute address.
Inst::AluRRR {
alu_op: AluOPRRR::Sub,
rd: writable_spilltmp_reg2(),
rs1: stack_reg(),
rs2: step.to_reg(),
}
.emit(&[], sink, emit_info, state);
Inst::Store {
to: AMode::RegOffset(spilltmp_reg2(), 0, I8),
op: StoreOP::Sb,
flags: MemFlags::new(),
src: zero_reg(),
}
.emit(&[], sink, emit_info, state);
// reset step.
Inst::AluRRR {
alu_op: AluOPRRR::Sub,
rd: step,
rs1: step.to_reg(),
rs2: guard_size_tmp.to_reg(),
}
.emit(&[], sink, emit_info, state);
Inst::gen_jump(loop_start).emit(&[], sink, emit_info, state);
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::VecAluRRRImm5 {
op,
vd,
vd_src,
imm,
vs2,
ref mask,
..
} => {
debug_assert_eq!(vd.to_reg(), vd_src);
sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
}
&Inst::VecAluRRRR {
op,
vd,
vd_src,
vs1,
vs2,
ref mask,
..
} => {
debug_assert_eq!(vd.to_reg(), vd_src);
sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
}
&Inst::VecAluRRR {
op,
vd,
vs1,
vs2,
ref mask,
..
} => {
sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
}
&Inst::VecAluRRImm5 {
op,
vd,
imm,
vs2,
ref mask,
..
} => {
sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
}
&Inst::VecAluRR {
op,
vd,
vs,
ref mask,
..
} => {
sink.put4(encode_valu_rr(op, vd, vs, *mask));
}
&Inst::VecAluRImm5 {
op,
vd,
imm,
ref mask,
..
} => {
sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
}
&Inst::VecSetState { rd, ref vstate } => {
sink.put4(encode_vcfg_imm(
0x57,
rd.to_reg(),
vstate.avl.unwrap_static(),
&vstate.vtype,
));
// Update the current vector emit state.
state.vstate = EmitVState::Known(vstate.clone());
}
&Inst::VecLoad {
eew,
to,
ref from,
ref mask,
flags,
..
} => {
// Vector Loads don't support immediate offsets, so we need to load it into a register.
let addr = match from {
VecAMode::UnitStride { base } => {
let base_reg = base.get_base_register();
let offset = base.get_offset_with_state(state);
// Reg+0 Offset can be directly encoded
if let (Some(base_reg), 0) = (base_reg, offset) {
base_reg
} else {
// Otherwise load the address it into a reg and load from it.
let tmp = writable_spilltmp_reg();
Inst::LoadAddr {
rd: tmp,
mem: base.clone(),
}
.emit(&[], sink, emit_info, state);
tmp.to_reg()
}
}
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(encode_vmem_load(
0x07,
to.to_reg(),
eew,
addr,
from.lumop(),
*mask,
from.mop(),
from.nf(),
));
}
&Inst::VecStore {
eew,
ref to,
from,
ref mask,
flags,
..
} => {
// Vector Stores don't support immediate offsets, so we need to load it into a register.
let addr = match to {
VecAMode::UnitStride { base } => {
let base_reg = base.get_base_register();
let offset = base.get_offset_with_state(state);
// Reg+0 Offset can be directly encoded
if let (Some(base_reg), 0) = (base_reg, offset) {
base_reg
} else {
// Otherwise load the address it into a reg and load from it.
let tmp = writable_spilltmp_reg();
Inst::LoadAddr {
rd: tmp,
mem: base.clone(),
}
.emit(&[], sink, emit_info, state);
tmp.to_reg()
}
}
};
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
sink.put4(encode_vmem_store(
0x27,
from,
eew,
addr,
to.sumop(),
*mask,
to.mop(),
to.nf(),
));
}
};
}
fn allocate(self, allocs: &mut AllocationConsumer) -> Self {
fn alloc_value_regs(
orgin: &ValueRegs<Reg>,
alloc: &mut AllocationConsumer,
) -> ValueRegs<Reg> {
match orgin.regs().len() {
1 => ValueRegs::one(alloc.next(orgin.regs()[0])),
2 => ValueRegs::two(alloc.next(orgin.regs()[0]), alloc.next(orgin.regs()[1])),
_ => unreachable!(),
}
}
fn alloc_writable_value_regs(
origin: &ValueRegs<Writable<Reg>>,
alloc: &mut AllocationConsumer,
) -> ValueRegs<Writable<Reg>> {
alloc_value_regs(&origin.map(|r| r.to_reg()), alloc).map(Writable::from_reg)
}
match self {
Inst::Nop0 => self,
Inst::Nop4 => self,
Inst::RawData { .. } => self,
Inst::Lui { rd, imm } => Inst::Lui {
rd: allocs.next_writable(rd),
imm,
},
Inst::LoadInlineConst { rd, ty, imm } => Inst::LoadInlineConst {
rd: allocs.next_writable(rd),
ty,
imm,
},
Inst::FpuRR {
frm,
alu_op,
rd,
rs,
} => Inst::FpuRR {
rs: allocs.next(rs),
rd: allocs.next_writable(rd),
frm,
alu_op,
},
Inst::FpuRRRR {
alu_op,
rd,
rs1,
rs2,
rs3,
frm,
} => Inst::FpuRRRR {
rs1: allocs.next(rs1),
rs2: allocs.next(rs2),
rs3: allocs.next(rs3),
rd: allocs.next_writable(rd),
alu_op,
frm,
},
Inst::FpuRRR {
alu_op,
frm,
rd,
rs1,
rs2,
} => Inst::FpuRRR {
alu_op,
frm,
rs1: allocs.next(rs1),
rs2: allocs.next(rs2),
rd: allocs.next_writable(rd),
},
Inst::Unwind { .. } => self,
Inst::DummyUse { reg } => Inst::DummyUse {
reg: allocs.next(reg),
},
Inst::AluRRR {
alu_op,
rd,
rs1,
rs2,
} => Inst::AluRRR {
alu_op,
rs1: allocs.next(rs1),
rs2: allocs.next(rs2),
rd: allocs.next_writable(rd),
},
Inst::AluRRImm12 {
alu_op,
rd,
rs,
imm12,
} => Inst::AluRRImm12 {
alu_op,
rs: allocs.next(rs),
rd: allocs.next_writable(rd),
imm12,
},
Inst::CsrReg { op, rd, rs, csr } => Inst::CsrReg {
op,
rs: allocs.next(rs),
rd: allocs.next_writable(rd),
csr,
},
Inst::CsrImm { op, rd, csr, imm } => Inst::CsrImm {
op,
rd: allocs.next_writable(rd),
csr,
imm,
},
Inst::Load {
rd,
op,
from,
flags,
} => Inst::Load {
from: from.clone().with_allocs(allocs),
rd: allocs.next_writable(rd),
op,
flags,
},
Inst::Store { op, src, flags, to } => Inst::Store {
op,
flags,
to: to.clone().with_allocs(allocs),
src: allocs.next(src),
},
Inst::Args { .. } => self,
Inst::Rets { .. } => self,
Inst::Ret { .. } => self,
Inst::Extend {
rd,
rn,
signed,
from_bits,
to_bits,
} => Inst::Extend {
rn: allocs.next(rn),
rd: allocs.next_writable(rd),
signed,
from_bits,
to_bits,
},
Inst::AdjustSp { .. } => self,
Inst::Call { .. } => self,
Inst::CallInd { mut info } => {
info.rn = allocs.next(info.rn);
Inst::CallInd { info }
}
Inst::ReturnCall { callee, info } => {
for u in &info.uses {
let _ = allocs.next(u.vreg);
}
Inst::ReturnCall { callee, info }
}
Inst::ReturnCallInd { callee, info } => {
let callee = allocs.next(callee);
for u in &info.uses {
let _ = allocs.next(u.vreg);
}
Inst::ReturnCallInd { callee, info }
}
Inst::Jal { .. } => self,
Inst::CondBr {
taken,
not_taken,
mut kind,
} => {
kind.rs1 = allocs.next(kind.rs1);
kind.rs2 = allocs.next(kind.rs2);
Inst::CondBr {
taken,
not_taken,
kind,
}
}
Inst::Mov { rd, rm, ty } => Inst::Mov {
ty,
rm: allocs.next(rm),
rd: allocs.next_writable(rd),
},
Inst::MovFromPReg { rd, rm } => {
debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
let rd = allocs.next_writable(rd);
Inst::MovFromPReg { rd, rm }
}
Inst::BrTable {
index,
tmp1,
tmp2,
targets,
} => Inst::BrTable {
index: allocs.next(index),
tmp1: allocs.next_writable(tmp1),
tmp2: allocs.next_writable(tmp2),
targets,
},
Inst::VirtualSPOffsetAdj { .. } => self,
Inst::Atomic {
op,
rd,
addr,
src,
amo,
} => Inst::Atomic {
op,
amo,
addr: allocs.next(addr),
src: allocs.next(src),
rd: allocs.next_writable(rd),
},
Inst::Fence { .. } => self,
Inst::Auipc { rd, imm } => Inst::Auipc {
rd: allocs.next_writable(rd),
imm,
},
Inst::LoadAddr { rd, mem } => Inst::LoadAddr {
mem: mem.with_allocs(allocs),
rd: allocs.next_writable(rd),
},
Inst::Select {
ref dst,
condition,
ref x,
ref y,
} => {
let mut condition: IntegerCompare = condition.clone();
condition.rs1 = allocs.next(condition.rs1);
condition.rs2 = allocs.next(condition.rs2);
let x = alloc_value_regs(x, allocs);
let y = alloc_value_regs(y, allocs);
let dst = alloc_writable_value_regs(dst, allocs);
Inst::Select {
dst,
condition,
x,
y,
}
}
Inst::Jalr { rd, base, offset } => {
// For some reason this does not use base?
debug_assert!(base.is_real());
Inst::Jalr {
rd: allocs.next_writable(rd),
base,
offset,
}
}
Inst::EBreak => self,
Inst::Icmp {
cc,
rd,
ref a,
ref b,
ty,
} => Inst::Icmp {
cc,
a: alloc_value_regs(a, allocs),
b: alloc_value_regs(b, allocs),
rd: allocs.next_writable(rd),
ty,
},
Inst::AtomicCas {
offset,
t0,
dst,
e,
addr,
v,
ty,
} => Inst::AtomicCas {
ty,
offset: allocs.next(offset),
e: allocs.next(e),
addr: allocs.next(addr),
v: allocs.next(v),
t0: allocs.next_writable(t0),
dst: allocs.next_writable(dst),
},
Inst::AtomicRmwLoop {
offset,
op,
dst,
ty,
p,
x,
t0,
} => Inst::AtomicRmwLoop {
op,
ty,
offset: allocs.next(offset),
p: allocs.next(p),
x: allocs.next(x),
t0: allocs.next_writable(t0),
dst: allocs.next_writable(dst),
},
Inst::FcvtToInt {
is_sat,
rd,
rs,
is_signed,
in_type,
out_type,
tmp,
} => Inst::FcvtToInt {
is_sat,
is_signed,
in_type,
out_type,
rs: allocs.next(rs),
tmp: allocs.next_writable(tmp),
rd: allocs.next_writable(rd),
},
Inst::LoadExtName { rd, name, offset } => Inst::LoadExtName {
rd: allocs.next_writable(rd),
name,
offset,
},
Inst::ElfTlsGetAddr { rd, name } => {
let rd = allocs.next_writable(rd);
debug_assert_eq!(a0(), rd.to_reg());
Inst::ElfTlsGetAddr { rd, name }
}
Inst::TrapIf {
rs1,
rs2,
cc,
trap_code,
} => Inst::TrapIf {
rs1: allocs.next(rs1),
rs2: allocs.next(rs2),
cc,
trap_code,
},
Inst::Udf { .. } => self,
Inst::AtomicLoad { rd, ty, p } => Inst::AtomicLoad {
ty,
p: allocs.next(p),
rd: allocs.next_writable(rd),
},
Inst::AtomicStore { src, ty, p } => Inst::AtomicStore {
ty,
src: allocs.next(src),
p: allocs.next(p),
},
Inst::FloatRound {
op,
rd,
int_tmp,
f_tmp,
rs,
ty,
} => Inst::FloatRound {
op,
ty,
rs: allocs.next(rs),
int_tmp: allocs.next_writable(int_tmp),
f_tmp: allocs.next_writable(f_tmp),
rd: allocs.next_writable(rd),
},
Inst::FloatSelect {
op,
rd,
tmp,
rs1,
rs2,
ty,
} => Inst::FloatSelect {
op,
ty,
rs1: allocs.next(rs1),
rs2: allocs.next(rs2),
tmp: allocs.next_writable(tmp),
rd: allocs.next_writable(rd),
},
Inst::Popcnt {
sum,
tmp,
step,
rs,
ty,
} => Inst::Popcnt {
rs: allocs.next(rs),
tmp: allocs.next_writable(tmp),
step: allocs.next_writable(step),
sum: allocs.next_writable(sum),
ty,
},
Inst::Rev8 { rs, rd, tmp, step } => Inst::Rev8 {
rs: allocs.next(rs),
tmp: allocs.next_writable(tmp),
step: allocs.next_writable(step),
rd: allocs.next_writable(rd),
},
Inst::Cltz {
sum,
tmp,
step,
rs,
leading,
ty,
} => Inst::Cltz {
rs: allocs.next(rs),
tmp: allocs.next_writable(tmp),
step: allocs.next_writable(step),
sum: allocs.next_writable(sum),
leading,
ty,
},
Inst::Brev8 {
rs,
ty,
step,
tmp,
tmp2,
rd,
} => Inst::Brev8 {
rs: allocs.next(rs),
step: allocs.next_writable(step),
tmp: allocs.next_writable(tmp),
tmp2: allocs.next_writable(tmp2),
rd: allocs.next_writable(rd),
ty,
},
Inst::StackProbeLoop { .. } => self,
Inst::VecAluRRRImm5 {
op,
vd,
vd_src,
imm,
vs2,
mask,
vstate,
} => Inst::VecAluRRRImm5 {
op,
vs2: allocs.next(vs2),
vd_src: allocs.next(vd_src),
vd: allocs.next_writable(vd),
mask: mask.with_allocs(allocs),
imm,
vstate,
},
Inst::VecAluRRRR {
op,
vd,
vd_src,
vs1,
vs2,
mask,
vstate,
} => Inst::VecAluRRRR {
op,
vs1: allocs.next(vs1),
vs2: allocs.next(vs2),
vd_src: allocs.next(vd_src),
vd: allocs.next_writable(vd),
mask: mask.with_allocs(allocs),
vstate,
},
Inst::VecAluRRR {
op,
vd,
vs1,
vs2,
mask,
vstate,
} => Inst::VecAluRRR {
op,
vs1: allocs.next(vs1),
vs2: allocs.next(vs2),
vd: allocs.next_writable(vd),
mask: mask.with_allocs(allocs),
vstate,
},
Inst::VecAluRRImm5 {
op,
vd,
imm,
vs2,
mask,
vstate,
} => Inst::VecAluRRImm5 {
op,
imm,
vs2: allocs.next(vs2),
vd: allocs.next_writable(vd),
mask: mask.with_allocs(allocs),
vstate,
},
Inst::VecAluRR {
op,
vd,
vs,
mask,
vstate,
} => Inst::VecAluRR {
op,
vs: allocs.next(vs),
vd: allocs.next_writable(vd),
mask: mask.with_allocs(allocs),
vstate,
},
Inst::VecAluRImm5 {
op,
vd,
imm,
mask,
vstate,
} => Inst::VecAluRImm5 {
vd: allocs.next_writable(vd),
mask: mask.with_allocs(allocs),
op,
imm,
vstate,
},
Inst::VecSetState { rd, vstate } => Inst::VecSetState {
rd: allocs.next_writable(rd),
vstate,
},
Inst::VecLoad {
eew,
to,
from,
mask,
flags,
vstate,
} => Inst::VecLoad {
eew,
from: from.clone().with_allocs(allocs),
to: allocs.next_writable(to),
mask: mask.with_allocs(allocs),
flags,
vstate,
},
Inst::VecStore {
eew,
to,
from,
mask,
flags,
vstate,
} => Inst::VecStore {
eew,
to: to.clone().with_allocs(allocs),
from: allocs.next(from),
mask: mask.with_allocs(allocs),
flags,
vstate,
},
}
}
}
fn emit_return_call_common_sequence(
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
new_stack_arg_size: u32,
old_stack_arg_size: u32,
) {
// We are emitting a dynamic number of instructions and might need an
// island. We emit four instructions regardless of how many stack arguments
// we have, up to two instructions for the actual call, and then two
// instructions per word of stack argument space.
let new_stack_words = new_stack_arg_size / 8;
let insts = 4 + 2 + 2 * new_stack_words;
let space_needed = insts * u32::try_from(Inst::UNCOMPRESSED_INSTRUCTION_SIZE).unwrap();
if sink.island_needed(space_needed) {
let jump_around_label = sink.get_label();
Inst::gen_jump(jump_around_label).emit(&[], sink, emit_info, state);
sink.emit_island(space_needed + 4, &mut state.ctrl_plane);
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
}
// Copy the new frame on top of our current frame.
//
// The current stack layout is the following:
//
// | ... |
// +---------------------+
// | ... |
// | stack arguments |
// | ... |
// current | return address |
// frame | old FP | <-- FP
// | ... |
// | old stack slots |
// | ... |
// +---------------------+
// | ... |
// new | new stack arguments |
// frame | ... | <-- SP
// +---------------------+
//
// We need to restore the old FP, restore the return address from the stack
// to the link register, copy the new stack arguments over the old stack
// arguments, adjust SP to point to the new stack arguments, and then jump
// to the callee (which will push the old FP and RA again). Note that the
// actual jump happens outside this helper function.
assert_eq!(
new_stack_arg_size % 8,
0,
"size of new stack arguments must be 8-byte aligned"
);
// The delta from our frame pointer to the (eventual) stack pointer value
// when we jump to the tail callee. This is the difference in size of stack
// arguments as well as accounting for the two words we pushed onto the
// stack upon entry to this function (the return address and old frame
// pointer).
let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16;
let tmp1 = regs::writable_spilltmp_reg();
let tmp2 = regs::writable_spilltmp_reg2();
// Restore the return address to the link register, and load the old FP into
// a temporary register.
//
// We can't put the old FP into the FP register until after we copy the
// stack arguments into place, since that uses address modes that are
// relative to our current FP.
//
// Note that the FP is saved in the function prologue for all non-leaf
// functions, even when `preserve_frame_pointers=false`. Note also that
// `return_call` instructions make it so that a function is considered
// non-leaf. Therefore we always have an FP to restore here.
Inst::gen_load(
writable_link_reg(),
AMode::FPOffset(8, I64),
I64,
MemFlags::trusted(),
)
.emit(&[], sink, emit_info, state);
Inst::gen_load(tmp1, AMode::FPOffset(0, I64), I64, MemFlags::trusted()).emit(
&[],
sink,
emit_info,
state,
);
// Copy the new stack arguments over the old stack arguments.
for i in (0..new_stack_words).rev() {
// Load the `i`th new stack argument word from the temporary stack
// space.
Inst::gen_load(
tmp2,
AMode::SPOffset(i64::from(i * 8), types::I64),
types::I64,
ir::MemFlags::trusted(),
)
.emit(&[], sink, emit_info, state);
// Store it to its final destination on the stack, overwriting our
// current frame.
Inst::gen_store(
AMode::FPOffset(fp_to_callee_sp + i64::from(i * 8), types::I64),
tmp2.to_reg(),
types::I64,
ir::MemFlags::trusted(),
)
.emit(&[], sink, emit_info, state);
}
// Initialize the SP for the tail callee, deallocating the temporary stack
// argument space and our current frame at the same time.
Inst::AluRRImm12 {
alu_op: AluOPRRI::Addi,
rd: regs::writable_stack_reg(),
rs: regs::fp_reg(),
imm12: Imm12::maybe_from_i64(fp_to_callee_sp).unwrap(),
}
.emit(&[], sink, emit_info, state);
// Move the old FP value from the temporary into the FP register.
Inst::Mov {
ty: types::I64,
rd: regs::writable_fp_reg(),
rm: tmp1.to_reg(),
}
.emit(&[], sink, emit_info, state);
state.virtual_sp_offset -= i64::from(new_stack_arg_size);
trace!(
"return_call[_ind] adjusts virtual sp offset by {} -> {}",
new_stack_arg_size,
state.virtual_sp_offset
);
}