| //! Riscv64 ISA: binary code emission. |
| |
| use crate::binemit::StackMap; |
| use crate::ir::{self, LibCall, RelSourceLoc, TrapCode}; |
| use crate::isa::riscv64::inst::*; |
| use crate::isa::riscv64::lower::isle::generated_code::{ |
| CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, ZcbMemOp, |
| }; |
| use crate::machinst::{AllocationConsumer, Reg, Writable}; |
| use crate::trace; |
| use cranelift_control::ControlPlane; |
| use regalloc2::Allocation; |
| |
| pub struct EmitInfo { |
| shared_flag: settings::Flags, |
| isa_flags: super::super::riscv_settings::Flags, |
| } |
| |
| impl EmitInfo { |
| pub(crate) fn new( |
| shared_flag: settings::Flags, |
| isa_flags: super::super::riscv_settings::Flags, |
| ) -> Self { |
| Self { |
| shared_flag, |
| isa_flags, |
| } |
| } |
| } |
| |
| pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 { |
| u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() |
| } |
| |
| pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 { |
| let real_reg = m.to_real_reg().unwrap().hw_enc(); |
| debug_assert!(real_reg >= 8 && real_reg < 16); |
| let compressed_reg = real_reg - 8; |
| u32::try_from(compressed_reg).unwrap() |
| } |
| |
| #[derive(Clone, Debug, PartialEq, Default)] |
| pub enum EmitVState { |
| #[default] |
| Unknown, |
| Known(VState), |
| } |
| |
| /// State carried between emissions of a sequence of instructions. |
| #[derive(Default, Clone, Debug)] |
| pub struct EmitState { |
| pub(crate) virtual_sp_offset: i64, |
| pub(crate) nominal_sp_to_fp: i64, |
| /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. |
| stack_map: Option<StackMap>, |
| /// Current source-code location corresponding to instruction to be emitted. |
| cur_srcloc: RelSourceLoc, |
| /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and |
| /// optimized away at compiletime. See [cranelift_control]. |
| ctrl_plane: ControlPlane, |
| /// Vector State |
| /// Controls the current state of the vector unit at the emission point. |
| vstate: EmitVState, |
| } |
| |
| impl EmitState { |
| fn take_stack_map(&mut self) -> Option<StackMap> { |
| self.stack_map.take() |
| } |
| |
| fn clear_post_insn(&mut self) { |
| self.stack_map = None; |
| } |
| |
| fn cur_srcloc(&self) -> RelSourceLoc { |
| self.cur_srcloc |
| } |
| } |
| |
| impl MachInstEmitState<Inst> for EmitState { |
| fn new( |
| abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>, |
| ctrl_plane: ControlPlane, |
| ) -> Self { |
| EmitState { |
| virtual_sp_offset: 0, |
| nominal_sp_to_fp: abi.frame_size() as i64, |
| stack_map: None, |
| cur_srcloc: RelSourceLoc::default(), |
| ctrl_plane, |
| vstate: EmitVState::Unknown, |
| } |
| } |
| |
| fn pre_safepoint(&mut self, stack_map: StackMap) { |
| self.stack_map = Some(stack_map); |
| } |
| |
| fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) { |
| self.cur_srcloc = srcloc; |
| } |
| |
| fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { |
| &mut self.ctrl_plane |
| } |
| |
| fn take_ctrl_plane(self) -> ControlPlane { |
| self.ctrl_plane |
| } |
| |
| fn on_new_block(&mut self) { |
| // Reset the vector state. |
| self.vstate = EmitVState::Unknown; |
| } |
| } |
| |
| impl Inst { |
| /// Load int mask. |
| /// If ty is int then 0xff in rd. |
| pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> { |
| let mut insts = SmallInstVec::new(); |
| assert!(ty.is_int() && ty.bits() <= 64); |
| match ty { |
| I64 => { |
| insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1))); |
| } |
| I32 | I16 => { |
| insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1))); |
| insts.push(Inst::Extend { |
| rd: rd, |
| rn: rd.to_reg(), |
| signed: false, |
| from_bits: ty.bits() as u8, |
| to_bits: 64, |
| }); |
| } |
| I8 => { |
| insts.push(Inst::load_imm12(rd, Imm12::from_i16(255))); |
| } |
| _ => unreachable!("ty:{:?}", ty), |
| } |
| insts |
| } |
| /// inverse all bit |
| pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Xori, |
| rd, |
| rs, |
| imm12: Imm12::from_i16(-1), |
| } |
| } |
| |
| // emit a float is not a nan. |
| pub(crate) fn emit_not_nan(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst { |
| Inst::FpuRRR { |
| alu_op: if ty == F32 { |
| FpuOPRRR::FeqS |
| } else { |
| FpuOPRRR::FeqD |
| }, |
| frm: None, |
| rd: rd, |
| rs1: rs, |
| rs2: rs, |
| } |
| } |
| |
| pub(crate) fn emit_fabs(rd: Writable<Reg>, rs: Reg, ty: Type) -> Inst { |
| Inst::FpuRRR { |
| alu_op: if ty == F32 { |
| FpuOPRRR::FsgnjxS |
| } else { |
| FpuOPRRR::FsgnjxD |
| }, |
| frm: None, |
| rd: rd, |
| rs1: rs, |
| rs2: rs, |
| } |
| } |
| /// If a float is zero. |
| pub(crate) fn emit_if_float_not_zero( |
| tmp: Writable<Reg>, |
| rs: Reg, |
| ty: Type, |
| taken: CondBrTarget, |
| not_taken: CondBrTarget, |
| ) -> SmallInstVec<Inst> { |
| let mut insts = SmallInstVec::new(); |
| let class_op = if ty == F32 { |
| FpuOPRR::FclassS |
| } else { |
| FpuOPRR::FclassD |
| }; |
| insts.push(Inst::FpuRR { |
| alu_op: class_op, |
| frm: None, |
| rd: tmp, |
| rs: rs, |
| }); |
| insts.push(Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Andi, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::from_i16(FClassResult::is_zero_bits() as i16), |
| }); |
| insts.push(Inst::CondBr { |
| taken, |
| not_taken, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs1: tmp.to_reg(), |
| rs2: zero_reg(), |
| }, |
| }); |
| insts |
| } |
| |
| pub(crate) fn lower_br_icmp( |
| cc: IntCC, |
| a: ValueRegs<Reg>, |
| b: ValueRegs<Reg>, |
| taken: CondBrTarget, |
| not_taken: CondBrTarget, |
| ty: Type, |
| ) -> SmallInstVec<Inst> { |
| let mut insts = SmallInstVec::new(); |
| if ty.bits() <= 64 { |
| let rs1 = a.only_reg().unwrap(); |
| let rs2 = b.only_reg().unwrap(); |
| let inst = Inst::CondBr { |
| taken, |
| not_taken, |
| kind: IntegerCompare { kind: cc, rs1, rs2 }, |
| }; |
| insts.push(inst); |
| return insts; |
| } |
| // compare i128 |
| let low = |cc: IntCC| -> IntegerCompare { |
| IntegerCompare { |
| rs1: a.regs()[0], |
| rs2: b.regs()[0], |
| kind: cc, |
| } |
| }; |
| let high = |cc: IntCC| -> IntegerCompare { |
| IntegerCompare { |
| rs1: a.regs()[1], |
| rs2: b.regs()[1], |
| kind: cc, |
| } |
| }; |
| match cc { |
| IntCC::Equal => { |
| // if high part not equal, |
| // then we can go to not_taken otherwise fallthrough. |
| insts.push(Inst::CondBr { |
| taken: not_taken, |
| not_taken: CondBrTarget::Fallthrough, |
| kind: high(IntCC::NotEqual), |
| }); |
| // the rest part. |
| insts.push(Inst::CondBr { |
| taken, |
| not_taken, |
| kind: low(IntCC::Equal), |
| }); |
| } |
| |
| IntCC::NotEqual => { |
| // if the high part not equal , |
| // we know the whole must be not equal, |
| // we can goto the taken part , otherwise fallthrought. |
| insts.push(Inst::CondBr { |
| taken, |
| not_taken: CondBrTarget::Fallthrough, // no branch |
| kind: high(IntCC::NotEqual), |
| }); |
| |
| insts.push(Inst::CondBr { |
| taken, |
| not_taken, |
| kind: low(IntCC::NotEqual), |
| }); |
| } |
| IntCC::SignedGreaterThanOrEqual |
| | IntCC::SignedLessThanOrEqual |
| | IntCC::UnsignedGreaterThanOrEqual |
| | IntCC::UnsignedLessThanOrEqual |
| | IntCC::SignedGreaterThan |
| | IntCC::SignedLessThan |
| | IntCC::UnsignedLessThan |
| | IntCC::UnsignedGreaterThan => { |
| // |
| insts.push(Inst::CondBr { |
| taken, |
| not_taken: CondBrTarget::Fallthrough, |
| kind: high(cc.without_equal()), |
| }); |
| // |
| insts.push(Inst::CondBr { |
| taken: not_taken, |
| not_taken: CondBrTarget::Fallthrough, |
| kind: high(IntCC::NotEqual), |
| }); |
| insts.push(Inst::CondBr { |
| taken, |
| not_taken, |
| kind: low(cc.unsigned()), |
| }); |
| } |
| } |
| insts |
| } |
| |
| /// Returns Some(VState) if this insturction is expecting a specific vector state |
| /// before emission. |
| fn expected_vstate(&self) -> Option<&VState> { |
| match self { |
| Inst::Nop0 |
| | Inst::Nop4 |
| | Inst::BrTable { .. } |
| | Inst::Auipc { .. } |
| | Inst::Lui { .. } |
| | Inst::LoadInlineConst { .. } |
| | Inst::AluRRR { .. } |
| | Inst::FpuRRR { .. } |
| | Inst::AluRRImm12 { .. } |
| | Inst::CsrReg { .. } |
| | Inst::CsrImm { .. } |
| | Inst::Load { .. } |
| | Inst::Store { .. } |
| | Inst::Args { .. } |
| | Inst::Rets { .. } |
| | Inst::Ret { .. } |
| | Inst::Extend { .. } |
| | Inst::AdjustSp { .. } |
| | Inst::Call { .. } |
| | Inst::CallInd { .. } |
| | Inst::ReturnCall { .. } |
| | Inst::ReturnCallInd { .. } |
| | Inst::Jal { .. } |
| | Inst::CondBr { .. } |
| | Inst::LoadExtName { .. } |
| | Inst::ElfTlsGetAddr { .. } |
| | Inst::LoadAddr { .. } |
| | Inst::VirtualSPOffsetAdj { .. } |
| | Inst::Mov { .. } |
| | Inst::MovFromPReg { .. } |
| | Inst::Fence { .. } |
| | Inst::EBreak |
| | Inst::Udf { .. } |
| | Inst::FpuRR { .. } |
| | Inst::FpuRRRR { .. } |
| | Inst::Jalr { .. } |
| | Inst::Atomic { .. } |
| | Inst::Select { .. } |
| | Inst::AtomicCas { .. } |
| | Inst::Icmp { .. } |
| | Inst::FcvtToInt { .. } |
| | Inst::RawData { .. } |
| | Inst::AtomicStore { .. } |
| | Inst::AtomicLoad { .. } |
| | Inst::AtomicRmwLoop { .. } |
| | Inst::TrapIf { .. } |
| | Inst::Unwind { .. } |
| | Inst::DummyUse { .. } |
| | Inst::FloatRound { .. } |
| | Inst::FloatSelect { .. } |
| | Inst::Popcnt { .. } |
| | Inst::Rev8 { .. } |
| | Inst::Cltz { .. } |
| | Inst::Brev8 { .. } |
| | Inst::StackProbeLoop { .. } => None, |
| |
| // VecSetState does not expect any vstate, rather it updates it. |
| Inst::VecSetState { .. } => None, |
| |
| // `vmv` instructions copy a set of registers and ignore vstate. |
| Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None, |
| |
| Inst::VecAluRR { vstate, .. } | |
| Inst::VecAluRRR { vstate, .. } | |
| Inst::VecAluRRRR { vstate, .. } | |
| Inst::VecAluRImm5 { vstate, .. } | |
| Inst::VecAluRRImm5 { vstate, .. } | |
| Inst::VecAluRRRImm5 { vstate, .. } | |
| // TODO: Unit-stride loads and stores only need the AVL to be correct, not |
| // the full vtype. A future optimization could be to decouple these two when |
| // updating vstate. This would allow us to avoid emitting a VecSetState in |
| // some cases. |
| Inst::VecLoad { vstate, .. } |
| | Inst::VecStore { vstate, .. } => Some(vstate), |
| } |
| } |
| } |
| |
| impl MachInstEmit for Inst { |
| type State = EmitState; |
| type Info = EmitInfo; |
| |
| fn emit( |
| &self, |
| allocs: &[Allocation], |
| sink: &mut MachBuffer<Inst>, |
| emit_info: &Self::Info, |
| state: &mut EmitState, |
| ) { |
| // Transform this into a instruction with all the physical regs |
| let mut allocs = AllocationConsumer::new(allocs); |
| let inst = self.clone().allocate(&mut allocs); |
| |
| // Check if we need to update the vector state before emitting this instruction |
| if let Some(expected) = inst.expected_vstate() { |
| if state.vstate != EmitVState::Known(expected.clone()) { |
| // Update the vector state. |
| Inst::VecSetState { |
| rd: writable_zero_reg(), |
| vstate: expected.clone(), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| } |
| |
| // N.B.: we *must* not exceed the "worst-case size" used to compute |
| // where to insert islands, except when islands are explicitly triggered |
| // (with an `EmitIsland`). We check this in debug builds. This is `mut` |
| // to allow disabling the check for `JTSequence`, which is always |
| // emitted following an `EmitIsland`. |
| let mut start_off = sink.cur_offset(); |
| |
| // First try to emit this as a compressed instruction |
| let res = inst.try_emit_compressed(sink, emit_info, state, &mut start_off); |
| if res.is_none() { |
| // If we can't lets emit it as a normal instruction |
| inst.emit_uncompressed(sink, emit_info, state, &mut start_off); |
| } |
| |
| let end_off = sink.cur_offset(); |
| assert!( |
| (end_off - start_off) <= Inst::worst_case_size(), |
| "Inst:{:?} length:{} worst_case_size:{}", |
| self, |
| end_off - start_off, |
| Inst::worst_case_size() |
| ); |
| } |
| |
| fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { |
| let mut allocs = AllocationConsumer::new(allocs); |
| self.print_with_state(state, &mut allocs) |
| } |
| } |
| |
| impl Inst { |
| /// Tries to emit an instruction as compressed, if we can't return false. |
| fn try_emit_compressed( |
| &self, |
| sink: &mut MachBuffer<Inst>, |
| emit_info: &EmitInfo, |
| state: &mut EmitState, |
| start_off: &mut u32, |
| ) -> Option<()> { |
| let has_m = emit_info.isa_flags.has_m(); |
| let has_zba = emit_info.isa_flags.has_zba(); |
| let has_zbb = emit_info.isa_flags.has_zbb(); |
| let has_zca = emit_info.isa_flags.has_zca(); |
| let has_zcb = emit_info.isa_flags.has_zcb(); |
| let has_zcd = emit_info.isa_flags.has_zcd(); |
| |
| // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca |
| // to be enabled, so check it early. |
| if !has_zca { |
| return None; |
| } |
| |
| fn reg_is_compressible(r: Reg) -> bool { |
| r.to_real_reg() |
| .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16) |
| .unwrap_or(false) |
| } |
| |
| match *self { |
| // C.ADD |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Add, |
| rd, |
| rs1, |
| rs2, |
| } if rd.to_reg() == rs1 && rs1 != zero_reg() && rs2 != zero_reg() => { |
| sink.put2(encode_cr_type(CrOp::CAdd, rd, rs2)); |
| } |
| |
| // C.MV |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi | AluOPRRI::Ori, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() != rs |
| && rd.to_reg() != zero_reg() |
| && rs != zero_reg() |
| && imm12.as_i16() == 0 => |
| { |
| sink.put2(encode_cr_type(CrOp::CMv, rd, rs)); |
| } |
| |
| // CA Ops |
| Inst::AluRRR { |
| alu_op: |
| alu_op @ (AluOPRRR::And |
| | AluOPRRR::Or |
| | AluOPRRR::Xor |
| | AluOPRRR::Sub |
| | AluOPRRR::Addw |
| | AluOPRRR::Subw |
| | AluOPRRR::Mul), |
| rd, |
| rs1, |
| rs2, |
| } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => { |
| let op = match alu_op { |
| AluOPRRR::And => CaOp::CAnd, |
| AluOPRRR::Or => CaOp::COr, |
| AluOPRRR::Xor => CaOp::CXor, |
| AluOPRRR::Sub => CaOp::CSub, |
| AluOPRRR::Addw => CaOp::CAddw, |
| AluOPRRR::Subw => CaOp::CSubw, |
| AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul, |
| _ => return None, |
| }; |
| |
| sink.put2(encode_ca_type(op, rd, rs2)); |
| } |
| |
| // c.j |
| // |
| // We don't have a separate JAL as that is only availabile in RV32C |
| Inst::Jal { label } => { |
| sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump); |
| sink.add_uncond_branch(*start_off, *start_off + 2, label); |
| sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO)); |
| } |
| |
| // c.jr |
| Inst::Jalr { rd, base, offset } |
| if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 => |
| { |
| sink.put2(encode_cr2_type(CrOp::CJr, base)); |
| } |
| |
| // c.jalr |
| Inst::Jalr { rd, base, offset } |
| if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 => |
| { |
| sink.put2(encode_cr2_type(CrOp::CJalr, base)); |
| } |
| |
| // c.ebreak |
| Inst::EBreak => { |
| sink.put2(encode_cr_type( |
| CrOp::CEbreak, |
| writable_zero_reg(), |
| zero_reg(), |
| )); |
| } |
| |
| // c.unimp |
| Inst::Udf { trap_code } => { |
| sink.add_trap(trap_code); |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); |
| } |
| sink.put2(0x0000); |
| } |
| |
| // c.addi16sp |
| // |
| // c.addi16sp shares the opcode with c.lui, but has a destination field of x2. |
| // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2), |
| // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used |
| // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp |
| // is only valid when nzimm≠0; the code point with nzimm=0 is reserved. |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() == rs |
| && rs == stack_reg() |
| && imm12.as_i16() != 0 |
| && (imm12.as_i16() % 16) == 0 |
| && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() => |
| { |
| let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap(); |
| sink.put2(encode_c_addi16sp(imm6)); |
| } |
| |
| // c.addi4spn |
| // |
| // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero |
| // immediate, scaled by 4, to the stack pointer, x2, and writes the result to |
| // rd. This instruction is used to generate pointers to stack-allocated variables |
| // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0; |
| // the code points with nzuimm=0 are reserved. |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd, |
| rs, |
| imm12, |
| } if reg_is_compressible(rd.to_reg()) |
| && rs == stack_reg() |
| && imm12.as_i16() != 0 |
| && (imm12.as_i16() % 4) == 0 |
| && u8::try_from(imm12.as_i16() / 4).is_ok() => |
| { |
| let imm = u8::try_from(imm12.as_i16() / 4).unwrap(); |
| sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm)); |
| } |
| |
| // c.li |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() != zero_reg() && rs == zero_reg() && imm12.as_i16() != 0 => { |
| let imm6 = Imm6::maybe_from_imm12(imm12)?; |
| sink.put2(encode_ci_type(CiOp::CLi, rd, imm6)); |
| } |
| |
| // c.addi |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => { |
| let imm6 = Imm6::maybe_from_imm12(imm12)?; |
| sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6)); |
| } |
| |
| // c.addiw |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addiw, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() == rs && rs != zero_reg() => { |
| let imm6 = Imm6::maybe_from_imm12(imm12)?; |
| sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6)); |
| } |
| |
| // c.lui |
| // |
| // c.lui loads the non-zero 6-bit immediate field into bits 17–12 |
| // of the destination register, clears the bottom 12 bits, and |
| // sign-extends bit 17 into all higher bits of the destination. |
| Inst::Lui { rd, imm: imm20 } |
| if rd.to_reg() != zero_reg() |
| && rd.to_reg() != stack_reg() |
| && imm20.as_i32() != 0 => |
| { |
| // Check that the top bits are sign extended |
| let imm = imm20.as_i32() << 14 >> 14; |
| if imm != imm20.as_i32() { |
| return None; |
| } |
| let imm6 = Imm6::maybe_from_i32(imm)?; |
| sink.put2(encode_ci_type(CiOp::CLui, rd, imm6)); |
| } |
| |
| // c.slli |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => { |
| // The shift amount is unsigned, but we encode it as signed. |
| let shift = imm12.as_i16() & 0x3f; |
| let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap(); |
| sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6)); |
| } |
| |
| // c.srli / c.srai |
| Inst::AluRRImm12 { |
| alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai), |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => { |
| let op = match op { |
| AluOPRRI::Srli => CbOp::CSrli, |
| AluOPRRI::Srai => CbOp::CSrai, |
| _ => unreachable!(), |
| }; |
| |
| // The shift amount is unsigned, but we encode it as signed. |
| let shift = imm12.as_i16() & 0x3f; |
| let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap(); |
| sink.put2(encode_cb_type(op, rd, imm6)); |
| } |
| |
| // c.zextb |
| // |
| // This is an alias for `andi rd, rd, 0xff` |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Andi, |
| rd, |
| rs, |
| imm12, |
| } if has_zcb |
| && rd.to_reg() == rs |
| && reg_is_compressible(rs) |
| && imm12.as_i16() == 0xff => |
| { |
| sink.put2(encode_cszn_type(CsznOp::CZextb, rd)); |
| } |
| |
| // c.andi |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Andi, |
| rd, |
| rs, |
| imm12, |
| } if rd.to_reg() == rs && reg_is_compressible(rs) => { |
| let imm6 = Imm6::maybe_from_imm12(imm12)?; |
| sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6)); |
| } |
| |
| // Stack Based Loads |
| Inst::Load { |
| rd, |
| op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld), |
| from, |
| flags, |
| } if from.get_base_register() == Some(stack_reg()) |
| && (from.get_offset_with_state(state) % op.size()) == 0 => |
| { |
| // We encode the offset in multiples of the load size. |
| let offset = from.get_offset_with_state(state); |
| let imm6 = u8::try_from(offset / op.size()) |
| .ok() |
| .and_then(Uimm6::maybe_from_u8)?; |
| |
| // Some additional constraints on these instructions. |
| // |
| // Integer loads are not allowed to target x0, but floating point loads |
| // are, since f0 is not a special register. |
| // |
| // Floating point loads are not included in the base Zca extension |
| // but in a separate Zcd extension. Both of these are part of the C Extension. |
| let rd_is_zero = rd.to_reg() == zero_reg(); |
| let op = match op { |
| LoadOP::Lw if !rd_is_zero => CiOp::CLwsp, |
| LoadOP::Ld if !rd_is_zero => CiOp::CLdsp, |
| LoadOP::Fld if has_zcd => CiOp::CFldsp, |
| _ => return None, |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| sink.put2(encode_ci_sp_load(op, rd, imm6)); |
| } |
| |
| // Regular Loads |
| Inst::Load { |
| rd, |
| op: |
| op |
| @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh), |
| from, |
| flags, |
| } if reg_is_compressible(rd.to_reg()) |
| && from |
| .get_base_register() |
| .map(reg_is_compressible) |
| .unwrap_or(false) |
| && (from.get_offset_with_state(state) % op.size()) == 0 => |
| { |
| let base = from.get_base_register().unwrap(); |
| |
| // We encode the offset in multiples of the store size. |
| let offset = from.get_offset_with_state(state); |
| let offset = u8::try_from(offset / op.size()).ok()?; |
| |
| // We mix two different formats here. |
| // |
| // c.lw / c.ld / c.fld instructions are available in the standard Zca |
| // extension using the CL format. |
| // |
| // c.lbu / c.lhu / c.lh are only available in the Zcb extension and |
| // are also encoded differently. Technically they each have a different |
| // format, but they are similar enough that we can group them. |
| let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh); |
| let encoded = if is_zcb_load { |
| if !has_zcb { |
| return None; |
| } |
| |
| let op = match op { |
| LoadOP::Lbu => ZcbMemOp::CLbu, |
| LoadOP::Lhu => ZcbMemOp::CLhu, |
| LoadOP::Lh => ZcbMemOp::CLh, |
| _ => unreachable!(), |
| }; |
| |
| // Byte stores & loads have 2 bits of immediate offset. Halfword stores |
| // and loads only have 1 bit. |
| let imm2 = Uimm2::maybe_from_u8(offset)?; |
| if (offset & !((1 << op.imm_bits()) - 1)) != 0 { |
| return None; |
| } |
| |
| encode_zcbmem_load(op, rd, base, imm2) |
| } else { |
| // Floating point loads are not included in the base Zca extension |
| // but in a separate Zcd extension. Both of these are part of the C Extension. |
| let op = match op { |
| LoadOP::Lw => ClOp::CLw, |
| LoadOP::Ld => ClOp::CLd, |
| LoadOP::Fld if has_zcd => ClOp::CFld, |
| _ => return None, |
| }; |
| let imm5 = Uimm5::maybe_from_u8(offset)?; |
| |
| encode_cl_type(op, rd, base, imm5) |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| sink.put2(encoded); |
| } |
| |
| // Stack Based Stores |
| Inst::Store { |
| src, |
| op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd), |
| to, |
| flags, |
| } if to.get_base_register() == Some(stack_reg()) |
| && (to.get_offset_with_state(state) % op.size()) == 0 => |
| { |
| // We encode the offset in multiples of the store size. |
| let offset = to.get_offset_with_state(state); |
| let imm6 = u8::try_from(offset / op.size()) |
| .ok() |
| .and_then(Uimm6::maybe_from_u8)?; |
| |
| // Floating point stores are not included in the base Zca extension |
| // but in a separate Zcd extension. Both of these are part of the C Extension. |
| let op = match op { |
| StoreOP::Sw => CssOp::CSwsp, |
| StoreOP::Sd => CssOp::CSdsp, |
| StoreOP::Fsd if has_zcd => CssOp::CFsdsp, |
| _ => return None, |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| sink.put2(encode_css_type(op, src, imm6)); |
| } |
| |
| // Regular Stores |
| Inst::Store { |
| src, |
| op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb), |
| to, |
| flags, |
| } if reg_is_compressible(src) |
| && to |
| .get_base_register() |
| .map(reg_is_compressible) |
| .unwrap_or(false) |
| && (to.get_offset_with_state(state) % op.size()) == 0 => |
| { |
| let base = to.get_base_register().unwrap(); |
| |
| // We encode the offset in multiples of the store size. |
| let offset = to.get_offset_with_state(state); |
| let offset = u8::try_from(offset / op.size()).ok()?; |
| |
| // We mix two different formats here. |
| // |
| // c.sw / c.sd / c.fsd instructions are available in the standard Zca |
| // extension using the CL format. |
| // |
| // c.sb / c.sh are only available in the Zcb extension and are also |
| // encoded differently. |
| let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb); |
| let encoded = if is_zcb_store { |
| if !has_zcb { |
| return None; |
| } |
| |
| let op = match op { |
| StoreOP::Sh => ZcbMemOp::CSh, |
| StoreOP::Sb => ZcbMemOp::CSb, |
| _ => unreachable!(), |
| }; |
| |
| // Byte stores & loads have 2 bits of immediate offset. Halfword stores |
| // and loads only have 1 bit. |
| let imm2 = Uimm2::maybe_from_u8(offset)?; |
| if (offset & !((1 << op.imm_bits()) - 1)) != 0 { |
| return None; |
| } |
| |
| encode_zcbmem_store(op, src, base, imm2) |
| } else { |
| // Floating point stores are not included in the base Zca extension |
| // but in a separate Zcd extension. Both of these are part of the C Extension. |
| let op = match op { |
| StoreOP::Sw => CsOp::CSw, |
| StoreOP::Sd => CsOp::CSd, |
| StoreOP::Fsd if has_zcd => CsOp::CFsd, |
| _ => return None, |
| }; |
| let imm5 = Uimm5::maybe_from_u8(offset)?; |
| |
| encode_cs_type(op, src, base, imm5) |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| sink.put2(encoded); |
| } |
| |
| // c.not |
| // |
| // This is an alias for `xori rd, rd, -1` |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Xori, |
| rd, |
| rs, |
| imm12, |
| } if has_zcb |
| && rd.to_reg() == rs |
| && reg_is_compressible(rs) |
| && imm12.as_i16() == -1 => |
| { |
| sink.put2(encode_cszn_type(CsznOp::CNot, rd)); |
| } |
| |
| // c.sext.b / c.sext.h / c.zext.h |
| // |
| // These are all the extend instructions present in `Zcb`, they |
| // also require `Zbb` since they aren't available in the base ISA. |
| Inst::AluRRImm12 { |
| alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth), |
| rd, |
| rs, |
| imm12, |
| } if has_zcb |
| && has_zbb |
| && rd.to_reg() == rs |
| && reg_is_compressible(rs) |
| && imm12.as_i16() == 0 => |
| { |
| let op = match alu_op { |
| AluOPRRI::Sextb => CsznOp::CSextb, |
| AluOPRRI::Sexth => CsznOp::CSexth, |
| AluOPRRI::Zexth => CsznOp::CZexth, |
| _ => unreachable!(), |
| }; |
| sink.put2(encode_cszn_type(op, rd)); |
| } |
| |
| // c.zext.w |
| // |
| // This is an alias for `add.uw rd, rd, zero` |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Adduw, |
| rd, |
| rs1, |
| rs2, |
| } if has_zcb |
| && has_zba |
| && rd.to_reg() == rs1 |
| && reg_is_compressible(rs1) |
| && rs2 == zero_reg() => |
| { |
| sink.put2(encode_cszn_type(CsznOp::CZextw, rd)); |
| } |
| |
| _ => return None, |
| } |
| |
| return Some(()); |
| } |
| |
| fn emit_uncompressed( |
| &self, |
| sink: &mut MachBuffer<Inst>, |
| emit_info: &EmitInfo, |
| state: &mut EmitState, |
| start_off: &mut u32, |
| ) { |
| match self { |
| &Inst::Nop0 => { |
| // do nothing |
| } |
| // Addi x0, x0, 0 |
| &Inst::Nop4 => { |
| let x = Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: Writable::from_reg(zero_reg()), |
| rs: zero_reg(), |
| imm12: Imm12::ZERO, |
| }; |
| x.emit(&[], sink, emit_info, state) |
| } |
| &Inst::RawData { ref data } => { |
| // Right now we only put a u32 or u64 in this instruction. |
| // It is not very long, no need to check if need `emit_island`. |
| // If data is very long , this is a bug because RawData is typecial |
| // use to load some data and rely on some positon in the code stream. |
| // and we may exceed `Inst::worst_case_size`. |
| // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612. |
| sink.put_data(&data[..]); |
| } |
| &Inst::Lui { rd, ref imm } => { |
| let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12); |
| sink.put4(x); |
| } |
| &Inst::LoadInlineConst { rd, ty, imm } => { |
| let data = &imm.to_le_bytes()[..ty.bytes() as usize]; |
| |
| let label_data: MachLabel = sink.get_label(); |
| let label_end: MachLabel = sink.get_label(); |
| |
| // Load into rd |
| Inst::Load { |
| rd, |
| op: LoadOP::from_type(ty), |
| flags: MemFlags::new(), |
| from: AMode::Label(label_data), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Jump over the inline pool |
| Inst::gen_jump(label_end).emit(&[], sink, emit_info, state); |
| |
| // Emit the inline data |
| sink.bind_label(label_data, &mut state.ctrl_plane); |
| Inst::RawData { data: data.into() }.emit(&[], sink, emit_info, state); |
| |
| sink.bind_label(label_end, &mut state.ctrl_plane); |
| } |
| &Inst::FpuRR { |
| frm, |
| alu_op, |
| rd, |
| rs, |
| } => { |
| let x = alu_op.op_code() |
| | reg_to_gpr_num(rd.to_reg()) << 7 |
| | alu_op.funct3(frm) << 12 |
| | reg_to_gpr_num(rs) << 15 |
| | alu_op.rs2_funct5() << 20 |
| | alu_op.funct7() << 25; |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && alu_op.is_convert_to_int() { |
| sink.add_trap(TrapCode::BadConversionToInteger); |
| } |
| sink.put4(x); |
| } |
| &Inst::FpuRRRR { |
| alu_op, |
| rd, |
| rs1, |
| rs2, |
| rs3, |
| frm, |
| } => { |
| let x = alu_op.op_code() |
| | reg_to_gpr_num(rd.to_reg()) << 7 |
| | alu_op.funct3(frm) << 12 |
| | reg_to_gpr_num(rs1) << 15 |
| | reg_to_gpr_num(rs2) << 20 |
| | alu_op.funct2() << 25 |
| | reg_to_gpr_num(rs3) << 27; |
| |
| sink.put4(x); |
| } |
| &Inst::FpuRRR { |
| alu_op, |
| frm, |
| rd, |
| rs1, |
| rs2, |
| } => { |
| let x: u32 = alu_op.op_code() |
| | reg_to_gpr_num(rd.to_reg()) << 7 |
| | (alu_op.funct3(frm)) << 12 |
| | reg_to_gpr_num(rs1) << 15 |
| | reg_to_gpr_num(rs2) << 20 |
| | alu_op.funct7() << 25; |
| sink.put4(x); |
| } |
| &Inst::Unwind { ref inst } => { |
| sink.add_unwind(inst.clone()); |
| } |
| &Inst::DummyUse { .. } => { |
| // This has already been handled by Inst::allocate. |
| } |
| &Inst::AluRRR { |
| alu_op, |
| rd, |
| rs1, |
| rs2, |
| } => { |
| let (rs1, rs2) = if alu_op.reverse_rs() { |
| (rs2, rs1) |
| } else { |
| (rs1, rs2) |
| }; |
| |
| sink.put4(encode_r_type( |
| alu_op.op_code(), |
| rd, |
| alu_op.funct3(), |
| rs1, |
| rs2, |
| alu_op.funct7(), |
| )); |
| } |
| &Inst::AluRRImm12 { |
| alu_op, |
| rd, |
| rs, |
| imm12, |
| } => { |
| let x = alu_op.op_code() |
| | reg_to_gpr_num(rd.to_reg()) << 7 |
| | alu_op.funct3() << 12 |
| | reg_to_gpr_num(rs) << 15 |
| | alu_op.imm12(imm12) << 20; |
| sink.put4(x); |
| } |
| &Inst::CsrReg { op, rd, rs, csr } => { |
| sink.put4(encode_csr_reg(op, rd, rs, csr)); |
| } |
| &Inst::CsrImm { op, rd, csr, imm } => { |
| sink.put4(encode_csr_imm(op, rd, csr, imm)); |
| } |
| &Inst::Load { |
| rd, |
| op, |
| from, |
| flags, |
| } => { |
| let base = from.get_base_register(); |
| let offset = from.get_offset_with_state(state); |
| let offset_imm12 = Imm12::maybe_from_i64(offset); |
| let label = from.get_label_with_sink(sink); |
| |
| let (addr, imm12) = match (base, offset_imm12, label) { |
| // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it. |
| (Some(base), Some(imm12), None) => (base, imm12), |
| |
| // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a |
| // register and load from that. |
| (Some(_), None, None) => { |
| let tmp = writable_spilltmp_reg(); |
| Inst::LoadAddr { rd: tmp, mem: from }.emit(&[], sink, emit_info, state); |
| (tmp.to_reg(), Imm12::ZERO) |
| } |
| |
| // If the AMode contains a label we can emit an internal relocation that gets |
| // resolved with the correct address later. |
| (None, Some(imm), Some(label)) => { |
| debug_assert_eq!(imm.as_i16(), 0); |
| |
| // Get the current PC. |
| sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); |
| Inst::Auipc { |
| rd, |
| imm: Imm20::ZERO, |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // Emit a relocation for the load. This patches the offset into the instruction. |
| sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); |
| |
| // Imm12 here is meaningless since it's going to get replaced. |
| (rd.to_reg(), Imm12::ZERO) |
| } |
| |
| // These cases are impossible with the current AModes that we have. We either |
| // always have a register, or always have a label. Never both, and never neither. |
| (None, None, None) |
| | (None, Some(_), None) |
| | (Some(_), None, Some(_)) |
| | (Some(_), Some(_), Some(_)) |
| | (None, None, Some(_)) => { |
| unreachable!("Invalid load address") |
| } |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12)); |
| } |
| &Inst::Store { op, src, flags, to } => { |
| let base = to.get_base_register(); |
| let offset = to.get_offset_with_state(state); |
| let offset_imm12 = Imm12::maybe_from_i64(offset); |
| |
| let (addr, imm12) = match (base, offset_imm12) { |
| // If the offset fits into an imm12 we can directly encode it. |
| (Some(base), Some(imm12)) => (base, imm12), |
| // Otherwise load the address it into a reg and load from it. |
| _ => { |
| let tmp = writable_spilltmp_reg(); |
| Inst::LoadAddr { rd: tmp, mem: to }.emit(&[], sink, emit_info, state); |
| (tmp.to_reg(), Imm12::ZERO) |
| } |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12)); |
| } |
| &Inst::Args { .. } | &Inst::Rets { .. } => { |
| // Nothing: this is a pseudoinstruction that serves |
| // only to constrain registers at a certain point. |
| } |
| &Inst::Ret {} => { |
| // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent |
| // `jalr x0, x1, 0` that jumps to the return address. |
| Inst::Jalr { |
| rd: writable_zero_reg(), |
| base: link_reg(), |
| offset: Imm12::ZERO, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| &Inst::Extend { |
| rd, |
| rn, |
| signed, |
| from_bits, |
| to_bits: _to_bits, |
| } => { |
| let mut insts = SmallInstVec::new(); |
| let shift_bits = (64 - from_bits) as i16; |
| let is_u8 = || from_bits == 8 && signed == false; |
| if is_u8() { |
| // special for u8. |
| insts.push(Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Andi, |
| rd, |
| rs: rn, |
| imm12: Imm12::from_i16(255), |
| }); |
| } else { |
| insts.push(Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd, |
| rs: rn, |
| imm12: Imm12::from_i16(shift_bits), |
| }); |
| insts.push(Inst::AluRRImm12 { |
| alu_op: if signed { |
| AluOPRRI::Srai |
| } else { |
| AluOPRRI::Srli |
| }, |
| rd, |
| rs: rd.to_reg(), |
| imm12: Imm12::from_i16(shift_bits), |
| }); |
| } |
| insts |
| .into_iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| } |
| &Inst::AdjustSp { amount } => { |
| if let Some(imm) = Imm12::maybe_from_i64(amount) { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: writable_stack_reg(), |
| rs: stack_reg(), |
| imm12: imm, |
| } |
| .emit(&[], sink, emit_info, state); |
| } else { |
| let tmp = writable_spilltmp_reg(); |
| let mut insts = Inst::load_constant_u64(tmp, amount as u64); |
| insts.push(Inst::AluRRR { |
| alu_op: AluOPRRR::Add, |
| rd: writable_stack_reg(), |
| rs1: tmp.to_reg(), |
| rs2: stack_reg(), |
| }); |
| insts |
| .into_iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| } |
| } |
| &Inst::Call { ref info } => { |
| // call |
| match info.dest { |
| ExternalName::User { .. } => { |
| if info.opcode.is_call() { |
| sink.add_call_site(info.opcode); |
| } |
| sink.add_reloc(Reloc::RiscvCall, &info.dest, 0); |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s); |
| } |
| Inst::construct_auipc_and_jalr( |
| Some(writable_link_reg()), |
| writable_link_reg(), |
| 0, |
| ) |
| .into_iter() |
| .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); |
| } |
| ExternalName::LibCall(..) |
| | ExternalName::TestCase { .. } |
| | ExternalName::KnownSymbol(..) => { |
| // use indirect call. it is more simple. |
| // load ext name. |
| Inst::LoadExtName { |
| rd: writable_spilltmp_reg2(), |
| name: Box::new(info.dest.clone()), |
| offset: 0, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // call |
| Inst::CallInd { |
| info: Box::new(CallIndInfo { |
| rn: spilltmp_reg2(), |
| // This doesen't really matter but we might as well send |
| // the correct info. |
| uses: info.uses.clone(), |
| defs: info.defs.clone(), |
| clobbers: info.clobbers, |
| opcode: Opcode::CallIndirect, |
| caller_callconv: info.caller_callconv, |
| callee_callconv: info.callee_callconv, |
| // Send this as 0 to avoid updating the pop size twice. |
| callee_pop_size: 0, |
| }), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| } |
| |
| let callee_pop_size = i64::from(info.callee_pop_size); |
| state.virtual_sp_offset -= callee_pop_size; |
| trace!( |
| "call adjusts virtual sp offset by {callee_pop_size} -> {}", |
| state.virtual_sp_offset |
| ); |
| } |
| &Inst::CallInd { ref info } => { |
| let start_offset = sink.cur_offset(); |
| |
| Inst::Jalr { |
| rd: writable_link_reg(), |
| base: info.rn, |
| offset: Imm12::ZERO, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); |
| } |
| |
| if info.opcode.is_call() { |
| sink.add_call_site(info.opcode); |
| } |
| |
| let callee_pop_size = i64::from(info.callee_pop_size); |
| state.virtual_sp_offset -= callee_pop_size; |
| trace!( |
| "call adjusts virtual sp offset by {callee_pop_size} -> {}", |
| state.virtual_sp_offset |
| ); |
| } |
| |
| &Inst::ReturnCall { |
| ref callee, |
| ref info, |
| } => { |
| emit_return_call_common_sequence( |
| sink, |
| emit_info, |
| state, |
| info.new_stack_arg_size, |
| info.old_stack_arg_size, |
| ); |
| |
| sink.add_call_site(ir::Opcode::ReturnCall); |
| sink.add_reloc(Reloc::RiscvCall, &**callee, 0); |
| Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0) |
| .into_iter() |
| .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); |
| |
| // `emit_return_call_common_sequence` emits an island if |
| // necessary, so we can safely disable the worst-case-size check |
| // in this case. |
| *start_off = sink.cur_offset(); |
| } |
| |
| &Inst::ReturnCallInd { callee, ref info } => { |
| emit_return_call_common_sequence( |
| sink, |
| emit_info, |
| state, |
| info.new_stack_arg_size, |
| info.old_stack_arg_size, |
| ); |
| |
| Inst::Jalr { |
| rd: writable_zero_reg(), |
| base: callee, |
| offset: Imm12::ZERO, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // `emit_return_call_common_sequence` emits an island if |
| // necessary, so we can safely disable the worst-case-size check |
| // in this case. |
| *start_off = sink.cur_offset(); |
| } |
| &Inst::Jal { label } => { |
| sink.use_label_at_offset(*start_off, label, LabelUse::Jal20); |
| sink.add_uncond_branch(*start_off, *start_off + 4, label); |
| sink.put4(0b1101111); |
| } |
| &Inst::CondBr { |
| taken, |
| not_taken, |
| kind, |
| } => { |
| match taken { |
| CondBrTarget::Label(label) => { |
| let code = kind.emit(); |
| let code_inverse = kind.inverse().emit().to_le_bytes(); |
| sink.use_label_at_offset(*start_off, label, LabelUse::B12); |
| sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse); |
| sink.put4(code); |
| } |
| CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"), |
| } |
| |
| match not_taken { |
| CondBrTarget::Label(label) => { |
| Inst::gen_jump(label).emit(&[], sink, emit_info, state) |
| } |
| CondBrTarget::Fallthrough => {} |
| }; |
| } |
| |
| &Inst::Mov { rd, rm, ty } => { |
| debug_assert_eq!(rd.to_reg().class(), rm.class()); |
| if rd.to_reg() == rm { |
| return; |
| } |
| |
| match rm.class() { |
| RegClass::Int => Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: rd, |
| rs: rm, |
| imm12: Imm12::ZERO, |
| }, |
| RegClass::Float => Inst::FpuRRR { |
| alu_op: if ty == F32 { |
| FpuOPRRR::FsgnjS |
| } else { |
| FpuOPRRR::FsgnjD |
| }, |
| frm: None, |
| rd: rd, |
| rs1: rm, |
| rs2: rm, |
| }, |
| RegClass::Vector => Inst::VecAluRRImm5 { |
| op: VecAluOpRRImm5::VmvrV, |
| vd: rd, |
| vs2: rm, |
| // Imm 0 means copy 1 register. |
| imm: Imm5::maybe_from_i8(0).unwrap(), |
| mask: VecOpMasking::Disabled, |
| // Vstate for this instruction is ignored. |
| vstate: VState::from_type(ty), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| &Inst::MovFromPReg { rd, rm } => { |
| Inst::gen_move(rd, Reg::from(rm), I64).emit(&[], sink, emit_info, state); |
| } |
| |
| &Inst::BrTable { |
| index, |
| tmp1, |
| tmp2, |
| ref targets, |
| } => { |
| let ext_index = writable_spilltmp_reg(); |
| |
| let label_compute_target = sink.get_label(); |
| |
| // The default target is passed in as the 0th element of `targets` |
| // separate it here for clarity. |
| let default_target = targets[0]; |
| let targets = &targets[1..]; |
| |
| // We are going to potentially emit a large amount of instructions, so ensure that we emit an island |
| // now if we need one. |
| // |
| // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions. |
| // Check if we need to emit a jump table here to support that jump. |
| let inst_count = 12 + (targets.len() * 2); |
| let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32; |
| if sink.island_needed(distance) { |
| let jump_around_label = sink.get_label(); |
| Inst::gen_jump(jump_around_label).emit(&[], sink, emit_info, state); |
| sink.emit_island(distance + 4, &mut state.ctrl_plane); |
| sink.bind_label(jump_around_label, &mut state.ctrl_plane); |
| } |
| |
| // We emit a bounds check on the index, if the index is larger than the number of |
| // jump table entries, we jump to the default block. Otherwise we compute a jump |
| // offset by multiplying the index by 8 (the size of each entry) and then jump to |
| // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially. |
| // |
| // Build the following sequence: |
| // |
| // extend_index: |
| // zext.w ext_index, index |
| // bounds_check: |
| // li tmp, n_labels |
| // bltu ext_index, tmp, compute_target |
| // jump_to_default_block: |
| // auipc pc, 0 |
| // jalr zero, pc, default_block |
| // compute_target: |
| // auipc pc, 0 |
| // slli tmp, ext_index, 3 |
| // add pc, pc, tmp |
| // jalr zero, pc, 0x10 |
| // jump_table: |
| // ; This repeats for each entry in the jumptable |
| // auipc pc, 0 |
| // jalr zero, pc, block_target |
| |
| // Extend the index to 64 bits. |
| // |
| // This prevents us branching on the top 32 bits of the index, which |
| // are undefined. |
| Inst::Extend { |
| rd: ext_index, |
| rn: index, |
| signed: false, |
| from_bits: 32, |
| to_bits: 64, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Bounds check. |
| // |
| // Check if the index passed in is larger than the number of jumptable |
| // entries that we have. If it is, we fallthrough to a jump into the |
| // default block. |
| Inst::load_constant_u32(tmp2, targets.len() as u64) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_compute_target), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::UnsignedLessThan, |
| rs1: ext_index.to_reg(), |
| rs2: tmp2.to_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32); |
| Inst::construct_auipc_and_jalr(None, tmp2, 0) |
| .iter() |
| .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); |
| |
| // Compute the jump table offset. |
| // We need to emit a PC relative offset, |
| sink.bind_label(label_compute_target, &mut state.ctrl_plane); |
| |
| // Get the current PC. |
| Inst::Auipc { |
| rd: tmp1, |
| imm: Imm20::ZERO, |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // These instructions must be emitted as uncompressed since we |
| // are manually computing the offset from the PC. |
| |
| // Multiply the index by 8, since that is the size in |
| // bytes of each jump table entry |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: tmp2, |
| rs: ext_index.to_reg(), |
| imm12: Imm12::from_i16(3), |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // Calculate the base of the jump, PC + the offset from above. |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Add, |
| rd: tmp1, |
| rs1: tmp1.to_reg(), |
| rs2: tmp2.to_reg(), |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // Jump to the middle of the jump table. |
| // We add a 16 byte offset here, since we used 4 instructions |
| // since the AUIPC that was used to get the PC. |
| Inst::Jalr { |
| rd: writable_zero_reg(), |
| base: tmp1.to_reg(), |
| offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16), |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // Emit the jump table. |
| // |
| // Each entry is a auipc + jalr to the target block. We also start with a island |
| // if necessary. |
| |
| // Emit the jumps back to back |
| for target in targets.iter() { |
| sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32); |
| |
| Inst::construct_auipc_and_jalr(None, tmp2, 0) |
| .iter() |
| .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off)); |
| } |
| |
| // We've just emitted an island that is safe up to *here*. |
| // Mark it as such so that we don't needlessly emit additional islands. |
| *start_off = sink.cur_offset(); |
| } |
| |
| &Inst::VirtualSPOffsetAdj { amount } => { |
| crate::trace!( |
| "virtual sp offset adjusted by {} -> {}", |
| amount, |
| state.virtual_sp_offset + amount |
| ); |
| state.virtual_sp_offset += amount; |
| } |
| &Inst::Atomic { |
| op, |
| rd, |
| addr, |
| src, |
| amo, |
| } => { |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| let x = op.op_code() |
| | reg_to_gpr_num(rd.to_reg()) << 7 |
| | op.funct3() << 12 |
| | reg_to_gpr_num(addr) << 15 |
| | reg_to_gpr_num(src) << 20 |
| | op.funct7(amo) << 25; |
| |
| sink.put4(x); |
| } |
| &Inst::Fence { pred, succ } => { |
| let x = 0b0001111 |
| | 0b00000 << 7 |
| | 0b000 << 12 |
| | 0b00000 << 15 |
| | (succ as u32) << 20 |
| | (pred as u32) << 24; |
| |
| sink.put4(x); |
| } |
| &Inst::Auipc { rd, imm } => { |
| sink.put4(enc_auipc(rd, imm)); |
| } |
| |
| &Inst::LoadAddr { rd, mem } => { |
| let base = mem.get_base_register(); |
| let offset = mem.get_offset_with_state(state); |
| let offset_imm12 = Imm12::maybe_from_i64(offset); |
| |
| match (mem, base, offset_imm12) { |
| (_, Some(rs), Some(imm12)) => { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd, |
| rs, |
| imm12, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| (_, Some(rs), None) => { |
| let mut insts = Inst::load_constant_u64(rd, offset as u64); |
| insts.push(Inst::AluRRR { |
| alu_op: AluOPRRR::Add, |
| rd, |
| rs1: rd.to_reg(), |
| rs2: rs, |
| }); |
| insts |
| .into_iter() |
| .for_each(|inst| inst.emit(&[], sink, emit_info, state)); |
| } |
| (AMode::Const(addr), None, _) => { |
| // Get an address label for the constant and recurse. |
| let label = sink.get_label_for_constant(addr); |
| Inst::LoadAddr { |
| rd, |
| mem: AMode::Label(label), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| (AMode::Label(label), None, _) => { |
| // Get the current PC. |
| sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20); |
| let inst = Inst::Auipc { |
| rd, |
| imm: Imm20::ZERO, |
| }; |
| inst.emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // Emit an add to the address with a relocation. |
| // This later gets patched up with the correct offset. |
| sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd, |
| rs: rd.to_reg(), |
| imm12: Imm12::ZERO, |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| } |
| (amode, _, _) => { |
| unimplemented!("LoadAddr: {:?}", amode); |
| } |
| } |
| } |
| |
| &Inst::Select { |
| ref dst, |
| condition, |
| ref x, |
| ref y, |
| } => { |
| let label_true = sink.get_label(); |
| let label_false = sink.get_label(); |
| let label_end = sink.get_label(); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_true), |
| not_taken: CondBrTarget::Label(label_false), |
| kind: condition, |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(label_true, &mut state.ctrl_plane); |
| // here is the true |
| // select the first value |
| for i in gen_moves(dst.regs(), x.regs()) { |
| i.emit(&[], sink, emit_info, state); |
| } |
| Inst::gen_jump(label_end).emit(&[], sink, emit_info, state); |
| |
| sink.bind_label(label_false, &mut state.ctrl_plane); |
| for i in gen_moves(dst.regs(), y.regs()) { |
| i.emit(&[], sink, emit_info, state); |
| } |
| |
| sink.bind_label(label_end, &mut state.ctrl_plane); |
| } |
| &Inst::Jalr { rd, base, offset } => { |
| sink.put4(enc_jalr(rd, base, offset)); |
| } |
| &Inst::EBreak => { |
| sink.put4(0x00100073); |
| } |
| &Inst::Icmp { cc, rd, a, b, ty } => { |
| let label_true = sink.get_label(); |
| let label_false = sink.get_label(); |
| let label_end = sink.get_label(); |
| |
| Inst::lower_br_icmp( |
| cc, |
| a, |
| b, |
| CondBrTarget::Label(label_true), |
| CondBrTarget::Label(label_false), |
| ty, |
| ) |
| .into_iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| |
| sink.bind_label(label_true, &mut state.ctrl_plane); |
| Inst::load_imm12(rd, Imm12::ONE).emit(&[], sink, emit_info, state); |
| Inst::gen_jump(label_end).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_false, &mut state.ctrl_plane); |
| Inst::load_imm12(rd, Imm12::ZERO).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_end, &mut state.ctrl_plane); |
| } |
| &Inst::AtomicCas { |
| offset, |
| t0, |
| dst, |
| e, |
| addr, |
| v, |
| ty, |
| } => { |
| // # addr holds address of memory location |
| // # e holds expected value |
| // # v holds desired value |
| // # dst holds return value |
| // cas: |
| // lr.w dst, (addr) # Load original value. |
| // bne dst, e, fail # Doesn’t match, so fail. |
| // sc.w t0, v, (addr) # Try to update. |
| // bnez t0 , cas # if store not ok,retry. |
| // fail: |
| let fail_label = sink.get_label(); |
| let cas_lebel = sink.get_label(); |
| sink.bind_label(cas_lebel, &mut state.ctrl_plane); |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: dst, |
| addr, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| if ty.bits() < 32 { |
| AtomicOP::extract(dst, offset, dst.to_reg(), ty) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| } else if ty.bits() == 32 { |
| Inst::Extend { |
| rd: dst, |
| rn: dst.to_reg(), |
| signed: false, |
| from_bits: 32, |
| to_bits: 64, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| Inst::CondBr { |
| taken: CondBrTarget::Label(fail_label), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::NotEqual, |
| rs1: e, |
| rs2: dst.to_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| let store_value = if ty.bits() < 32 { |
| // reload value to t0. |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: t0, |
| addr, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| // set reset part. |
| AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| t0.to_reg() |
| } else { |
| v |
| }; |
| Inst::Atomic { |
| op: AtomicOP::store_op(ty), |
| rd: t0, |
| addr, |
| src: store_value, |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| // check is our value stored. |
| Inst::CondBr { |
| taken: CondBrTarget::Label(cas_lebel), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::NotEqual, |
| rs1: t0.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(fail_label, &mut state.ctrl_plane); |
| } |
| &Inst::AtomicRmwLoop { |
| offset, |
| op, |
| dst, |
| ty, |
| p, |
| x, |
| t0, |
| } => { |
| let retry = sink.get_label(); |
| sink.bind_label(retry, &mut state.ctrl_plane); |
| // load old value. |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: dst, |
| addr: p, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| // |
| |
| let store_value: Reg = match op { |
| crate::ir::AtomicRmwOp::Add |
| | crate::ir::AtomicRmwOp::Sub |
| | crate::ir::AtomicRmwOp::And |
| | crate::ir::AtomicRmwOp::Or |
| | crate::ir::AtomicRmwOp::Xor => { |
| AtomicOP::extract(dst, offset, dst.to_reg(), ty) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| Inst::AluRRR { |
| alu_op: match op { |
| crate::ir::AtomicRmwOp::Add => AluOPRRR::Add, |
| crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub, |
| crate::ir::AtomicRmwOp::And => AluOPRRR::And, |
| crate::ir::AtomicRmwOp::Or => AluOPRRR::Or, |
| crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor, |
| _ => unreachable!(), |
| }, |
| rd: t0, |
| rs1: dst.to_reg(), |
| rs2: x, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: writable_spilltmp_reg2(), |
| addr: p, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| AtomicOP::merge( |
| writable_spilltmp_reg2(), |
| writable_spilltmp_reg(), |
| offset, |
| t0.to_reg(), |
| ty, |
| ) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| spilltmp_reg2() |
| } |
| crate::ir::AtomicRmwOp::Nand => { |
| if ty.bits() < 32 { |
| AtomicOP::extract(dst, offset, dst.to_reg(), ty) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| } |
| Inst::AluRRR { |
| alu_op: AluOPRRR::And, |
| rd: t0, |
| rs1: x, |
| rs2: dst.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::construct_bit_not(t0, t0.to_reg()).emit(&[], sink, emit_info, state); |
| if ty.bits() < 32 { |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: writable_spilltmp_reg2(), |
| addr: p, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| AtomicOP::merge( |
| writable_spilltmp_reg2(), |
| writable_spilltmp_reg(), |
| offset, |
| t0.to_reg(), |
| ty, |
| ) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| spilltmp_reg2() |
| } else { |
| t0.to_reg() |
| } |
| } |
| |
| crate::ir::AtomicRmwOp::Umin |
| | crate::ir::AtomicRmwOp::Umax |
| | crate::ir::AtomicRmwOp::Smin |
| | crate::ir::AtomicRmwOp::Smax => { |
| let label_select_dst = sink.get_label(); |
| let label_select_done = sink.get_label(); |
| if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax |
| { |
| AtomicOP::extract(dst, offset, dst.to_reg(), ty) |
| } else { |
| AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty) |
| } |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| Inst::lower_br_icmp( |
| match op { |
| crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan, |
| crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan, |
| crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan, |
| crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan, |
| _ => unreachable!(), |
| }, |
| ValueRegs::one(dst.to_reg()), |
| ValueRegs::one(x), |
| CondBrTarget::Label(label_select_dst), |
| CondBrTarget::Fallthrough, |
| ty, |
| ) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| // here we select x. |
| Inst::gen_move(t0, x, I64).emit(&[], sink, emit_info, state); |
| Inst::gen_jump(label_select_done).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_select_dst, &mut state.ctrl_plane); |
| Inst::gen_move(t0, dst.to_reg(), I64).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_select_done, &mut state.ctrl_plane); |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: writable_spilltmp_reg2(), |
| addr: p, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| AtomicOP::merge( |
| writable_spilltmp_reg2(), |
| writable_spilltmp_reg(), |
| offset, |
| t0.to_reg(), |
| ty, |
| ) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| spilltmp_reg2() |
| } |
| crate::ir::AtomicRmwOp::Xchg => { |
| AtomicOP::extract(dst, offset, dst.to_reg(), ty) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| Inst::Atomic { |
| op: AtomicOP::load_op(ty), |
| rd: writable_spilltmp_reg2(), |
| addr: p, |
| src: zero_reg(), |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| AtomicOP::merge( |
| writable_spilltmp_reg2(), |
| writable_spilltmp_reg(), |
| offset, |
| x, |
| ty, |
| ) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| spilltmp_reg2() |
| } |
| }; |
| |
| Inst::Atomic { |
| op: AtomicOP::store_op(ty), |
| rd: t0, |
| addr: p, |
| src: store_value, |
| amo: AMO::SeqCst, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // if store is not ok,retry. |
| Inst::CondBr { |
| taken: CondBrTarget::Label(retry), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::NotEqual, |
| rs1: t0.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| &Inst::FcvtToInt { |
| is_sat, |
| rd, |
| rs, |
| is_signed, |
| in_type, |
| out_type, |
| tmp, |
| } => { |
| let label_nan = sink.get_label(); |
| let label_jump_over = sink.get_label(); |
| // get if nan. |
| Inst::emit_not_nan(rd, rs, in_type).emit(&[], sink, emit_info, state); |
| // jump to nan. |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_nan), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs2: zero_reg(), |
| rs1: rd.to_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| if !is_sat { |
| let f32_bounds = f32_cvt_to_int_bounds(is_signed, out_type.bits() as u8); |
| let f64_bounds = f64_cvt_to_int_bounds(is_signed, out_type.bits() as u8); |
| if in_type == F32 { |
| Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.0), |_| { |
| writable_spilltmp_reg() |
| }) |
| } else { |
| Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.0), |_| { |
| writable_spilltmp_reg() |
| }) |
| } |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| |
| let le_op = if in_type == F32 { |
| FpuOPRRR::FleS |
| } else { |
| FpuOPRRR::FleD |
| }; |
| |
| // rd := rs <= tmp |
| Inst::FpuRRR { |
| alu_op: le_op, |
| frm: None, |
| rd, |
| rs1: rs, |
| rs2: tmp.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::TrapIf { |
| cc: IntCC::NotEqual, |
| rs1: rd.to_reg(), |
| rs2: zero_reg(), |
| trap_code: TrapCode::IntegerOverflow, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| if in_type == F32 { |
| Inst::load_fp_constant32(tmp, f32_bits(f32_bounds.1), |_| { |
| writable_spilltmp_reg() |
| }) |
| } else { |
| Inst::load_fp_constant64(tmp, f64_bits(f64_bounds.1), |_| { |
| writable_spilltmp_reg() |
| }) |
| } |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| |
| // rd := rs >= tmp |
| Inst::FpuRRR { |
| alu_op: le_op, |
| frm: None, |
| rd, |
| rs1: tmp.to_reg(), |
| rs2: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| Inst::TrapIf { |
| cc: IntCC::NotEqual, |
| rs1: rd.to_reg(), |
| rs2: zero_reg(), |
| trap_code: TrapCode::IntegerOverflow, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| // convert to int normally. |
| Inst::FpuRR { |
| frm: Some(FRM::RTZ), |
| alu_op: FpuOPRR::float_convert_2_int_op(in_type, is_signed, out_type), |
| rd, |
| rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| if out_type.bits() < 32 && is_signed { |
| // load value part mask. |
| Inst::load_constant_u32( |
| writable_spilltmp_reg(), |
| if 16 == out_type.bits() { |
| (u16::MAX >> 1) as u64 |
| } else { |
| // I8 |
| (u8::MAX >> 1) as u64 |
| }, |
| ) |
| .into_iter() |
| .for_each(|x| x.emit(&[], sink, emit_info, state)); |
| // keep value part. |
| Inst::AluRRR { |
| alu_op: AluOPRRR::And, |
| rd: writable_spilltmp_reg(), |
| rs1: rd.to_reg(), |
| rs2: spilltmp_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| // extact sign bit. |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Srli, |
| rd: rd, |
| rs: rd.to_reg(), |
| imm12: Imm12::from_i16(31), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: rd, |
| rs: rd.to_reg(), |
| imm12: Imm12::from_i16(if 16 == out_type.bits() { |
| 15 |
| } else { |
| // I8 |
| 7 |
| }), |
| } |
| .emit(&[], sink, emit_info, state); |
| // make result,sign bit and value part. |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Or, |
| rd: rd, |
| rs1: rd.to_reg(), |
| rs2: spilltmp_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| // I already have the result,jump over. |
| Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state); |
| // here is nan , move 0 into rd register |
| sink.bind_label(label_nan, &mut state.ctrl_plane); |
| if is_sat { |
| Inst::load_imm12(rd, Imm12::ZERO).emit(&[], sink, emit_info, state); |
| } else { |
| // here is ud2. |
| Inst::Udf { |
| trap_code: TrapCode::BadConversionToInteger, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| // bind jump_over |
| sink.bind_label(label_jump_over, &mut state.ctrl_plane); |
| } |
| |
| &Inst::LoadExtName { |
| rd, |
| ref name, |
| offset, |
| } => { |
| let label_data = sink.get_label(); |
| let label_end = sink.get_label(); |
| |
| // Load the value from a label |
| Inst::Load { |
| rd, |
| op: LoadOP::Ld, |
| flags: MemFlags::trusted(), |
| from: AMode::Label(label_data), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Jump over the data |
| Inst::gen_jump(label_end).emit(&[], sink, emit_info, state); |
| |
| sink.bind_label(label_data, &mut state.ctrl_plane); |
| sink.add_reloc(Reloc::Abs8, name.as_ref(), offset); |
| sink.put8(0); |
| |
| sink.bind_label(label_end, &mut state.ctrl_plane); |
| } |
| |
| &Inst::ElfTlsGetAddr { rd, ref name } => { |
| // RISC-V's TLS GD model is slightly different from other arches. |
| // |
| // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits |
| // of the address relative to the GOT entry. This relocation points to |
| // the symbol as usual. |
| // |
| // However when loading the bottom 12bits of the address, we need to |
| // use a label that points to the previous AUIPC instruction. |
| // |
| // label: |
| // auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol) |
| // addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label) |
| // |
| // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic |
| |
| // Create the lable that is going to be published to the final binary object. |
| let auipc_label = sink.get_label(); |
| sink.bind_label(auipc_label, &mut state.ctrl_plane); |
| |
| // Get the current PC. |
| sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0); |
| Inst::Auipc { |
| rd: rd, |
| imm: Imm20::from_i32(0), |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| // The `addi` here, points to the `auipc` label instead of directly to the symbol. |
| sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: rd, |
| rs: rd.to_reg(), |
| imm12: Imm12::from_i16(0), |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| |
| Inst::Call { |
| info: Box::new(CallInfo { |
| dest: ExternalName::LibCall(LibCall::ElfTlsGetAddr), |
| uses: smallvec![], |
| defs: smallvec![], |
| opcode: crate::ir::Opcode::TlsValue, |
| caller_callconv: CallConv::SystemV, |
| callee_callconv: CallConv::SystemV, |
| callee_pop_size: 0, |
| clobbers: PRegSet::empty(), |
| }), |
| } |
| .emit_uncompressed(sink, emit_info, state, start_off); |
| } |
| |
| &Inst::TrapIf { |
| rs1, |
| rs2, |
| cc, |
| trap_code, |
| } => { |
| let label_end = sink.get_label(); |
| let cond = IntegerCompare { kind: cc, rs1, rs2 }; |
| |
| // Jump over the trap if we the condition is false. |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_end), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: cond.inverse(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::Udf { trap_code }.emit(&[], sink, emit_info, state); |
| |
| sink.bind_label(label_end, &mut state.ctrl_plane); |
| } |
| &Inst::Udf { trap_code } => { |
| sink.add_trap(trap_code); |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map( |
| StackMapExtent::UpcomingBytes(Inst::TRAP_OPCODE.len() as u32), |
| s, |
| ); |
| } |
| sink.put_data(Inst::TRAP_OPCODE); |
| } |
| &Inst::AtomicLoad { rd, ty, p } => { |
| // emit the fence. |
| Inst::Fence { |
| pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, |
| succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, |
| } |
| .emit(&[], sink, emit_info, state); |
| // load. |
| Inst::Load { |
| rd: rd, |
| op: LoadOP::from_type(ty), |
| flags: MemFlags::new(), |
| from: AMode::RegOffset(p, 0, ty), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::Fence { |
| pred: Inst::FENCE_REQ_R, |
| succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| &Inst::AtomicStore { src, ty, p } => { |
| Inst::Fence { |
| pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W, |
| succ: Inst::FENCE_REQ_W, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::Store { |
| to: AMode::RegOffset(p, 0, ty), |
| op: StoreOP::from_type(ty), |
| flags: MemFlags::new(), |
| src, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| &Inst::FloatRound { |
| op, |
| rd, |
| int_tmp, |
| f_tmp, |
| rs, |
| ty, |
| } => { |
| // this code is port from glibc ceil floor ... implementation. |
| let label_nan = sink.get_label(); |
| let label_x = sink.get_label(); |
| let label_jump_over = sink.get_label(); |
| // check if is nan. |
| Inst::emit_not_nan(int_tmp, rs, ty).emit(&[], sink, emit_info, state); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_nan), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs1: int_tmp.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| fn max_value_need_round(ty: Type) -> u64 { |
| match ty { |
| F32 => { |
| let x: u64 = 1 << f32::MANTISSA_DIGITS; |
| let x = x as f32; |
| let x = u32::from_le_bytes(x.to_le_bytes()); |
| x as u64 |
| } |
| F64 => { |
| let x: u64 = 1 << f64::MANTISSA_DIGITS; |
| let x = x as f64; |
| u64::from_le_bytes(x.to_le_bytes()) |
| } |
| _ => unreachable!(), |
| } |
| } |
| // load max value need to round. |
| if ty == F32 { |
| Inst::load_fp_constant32(f_tmp, max_value_need_round(ty) as u32, &mut |_| { |
| writable_spilltmp_reg() |
| }) |
| } else { |
| Inst::load_fp_constant64(f_tmp, max_value_need_round(ty), &mut |_| { |
| writable_spilltmp_reg() |
| }) |
| } |
| .into_iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| |
| // get abs value. |
| Inst::emit_fabs(rd, rs, ty).emit(&[], sink, emit_info, state); |
| |
| // branch if f_tmp < rd |
| Inst::FpuRRR { |
| frm: None, |
| alu_op: if ty == F32 { |
| FpuOPRRR::FltS |
| } else { |
| FpuOPRRR::FltD |
| }, |
| rd: int_tmp, |
| rs1: f_tmp.to_reg(), |
| rs2: rd.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_x), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::NotEqual, |
| rs1: int_tmp.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| //convert to int. |
| Inst::FpuRR { |
| alu_op: FpuOPRR::float_convert_2_int_op(ty, true, I64), |
| frm: Some(op.to_frm()), |
| rd: int_tmp, |
| rs: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| //convert back. |
| Inst::FpuRR { |
| alu_op: if ty == F32 { |
| FpuOPRR::FcvtSL |
| } else { |
| FpuOPRR::FcvtDL |
| }, |
| frm: Some(op.to_frm()), |
| rd, |
| rs: int_tmp.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| // copy sign. |
| Inst::FpuRRR { |
| alu_op: if ty == F32 { |
| FpuOPRRR::FsgnjS |
| } else { |
| FpuOPRRR::FsgnjD |
| }, |
| frm: None, |
| rd, |
| rs1: rd.to_reg(), |
| rs2: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| // jump over. |
| Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state); |
| // here is nan. |
| sink.bind_label(label_nan, &mut state.ctrl_plane); |
| Inst::FpuRRR { |
| alu_op: if ty == F32 { |
| FpuOPRRR::FaddS |
| } else { |
| FpuOPRRR::FaddD |
| }, |
| frm: None, |
| rd: rd, |
| rs1: rs, |
| rs2: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state); |
| // here select origin x. |
| sink.bind_label(label_x, &mut state.ctrl_plane); |
| Inst::gen_move(rd, rs, ty).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_jump_over, &mut state.ctrl_plane); |
| } |
| |
| &Inst::FloatSelect { |
| op, |
| rd, |
| tmp, |
| rs1, |
| rs2, |
| ty, |
| } => { |
| let label_nan = sink.get_label(); |
| let label_jump_over = sink.get_label(); |
| // check if rs1 is nan. |
| Inst::emit_not_nan(tmp, rs1, ty).emit(&[], sink, emit_info, state); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_nan), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs1: tmp.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| // check if rs2 is nan. |
| Inst::emit_not_nan(tmp, rs2, ty).emit(&[], sink, emit_info, state); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_nan), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs1: tmp.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| // here rs1 and rs2 is not nan. |
| Inst::FpuRRR { |
| alu_op: op.to_fpuoprrr(ty), |
| frm: None, |
| rd: rd, |
| rs1: rs1, |
| rs2: rs2, |
| } |
| .emit(&[], sink, emit_info, state); |
| // special handle for +0 or -0. |
| { |
| // check is rs1 and rs2 all equal to zero. |
| let label_done = sink.get_label(); |
| { |
| // if rs1 == 0 |
| let mut insts = Inst::emit_if_float_not_zero( |
| tmp, |
| rs1, |
| ty, |
| CondBrTarget::Label(label_done), |
| CondBrTarget::Fallthrough, |
| ); |
| insts.extend(Inst::emit_if_float_not_zero( |
| tmp, |
| rs2, |
| ty, |
| CondBrTarget::Label(label_done), |
| CondBrTarget::Fallthrough, |
| )); |
| insts |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| } |
| Inst::FpuRR { |
| alu_op: FpuOPRR::move_f_to_x_op(ty), |
| frm: None, |
| rd: tmp, |
| rs: rs1, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::FpuRR { |
| alu_op: FpuOPRR::move_f_to_x_op(ty), |
| frm: None, |
| rd: writable_spilltmp_reg(), |
| rs: rs2, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRR { |
| alu_op: if op == FloatSelectOP::Max { |
| AluOPRRR::And |
| } else { |
| AluOPRRR::Or |
| }, |
| rd: tmp, |
| rs1: tmp.to_reg(), |
| rs2: spilltmp_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| // move back to rd. |
| Inst::FpuRR { |
| alu_op: FpuOPRR::move_x_to_f_op(ty), |
| frm: None, |
| rd, |
| rs: tmp.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| // |
| sink.bind_label(label_done, &mut state.ctrl_plane); |
| } |
| // we have the reuslt,jump over. |
| Inst::gen_jump(label_jump_over).emit(&[], sink, emit_info, state); |
| // here is nan. |
| sink.bind_label(label_nan, &mut state.ctrl_plane); |
| op.snan_bits(tmp, ty) |
| .into_iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| // move to rd. |
| Inst::FpuRR { |
| alu_op: FpuOPRR::move_x_to_f_op(ty), |
| frm: None, |
| rd, |
| rs: tmp.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(label_jump_over, &mut state.ctrl_plane); |
| } |
| &Inst::Popcnt { |
| sum, |
| tmp, |
| step, |
| rs, |
| ty, |
| } => { |
| // load 0 to sum , init. |
| Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); |
| // load |
| Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)).emit( |
| &[], |
| sink, |
| emit_info, |
| state, |
| ); |
| // |
| Inst::load_imm12(tmp, Imm12::ONE).emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::from_i16((ty.bits() - 1) as i16), |
| } |
| .emit(&[], sink, emit_info, state); |
| let label_done = sink.get_label(); |
| let label_loop = sink.get_label(); |
| sink.bind_label(label_loop, &mut state.ctrl_plane); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_done), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::SignedLessThanOrEqual, |
| rs1: step.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| // test and add sum. |
| { |
| Inst::AluRRR { |
| alu_op: AluOPRRR::And, |
| rd: writable_spilltmp_reg2(), |
| rs1: tmp.to_reg(), |
| rs2: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| let label_over = sink.get_label(); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_over), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs1: zero_reg(), |
| rs2: spilltmp_reg2(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: sum, |
| rs: sum.to_reg(), |
| imm12: Imm12::ONE, |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(label_over, &mut state.ctrl_plane); |
| } |
| // set step and tmp. |
| { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: step, |
| rs: step.to_reg(), |
| imm12: Imm12::from_i16(-1), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Srli, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::ONE, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state); |
| } |
| sink.bind_label(label_done, &mut state.ctrl_plane); |
| } |
| &Inst::Rev8 { rs, rd, tmp, step } => { |
| // init. |
| Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); |
| Inst::gen_move(tmp, rs, I64).emit(&[], sink, emit_info, state); |
| // load 56 to step. |
| Inst::load_imm12(step, Imm12::from_i16(56)).emit(&[], sink, emit_info, state); |
| let label_done = sink.get_label(); |
| let label_loop = sink.get_label(); |
| sink.bind_label(label_loop, &mut state.ctrl_plane); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_done), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::SignedLessThan, |
| rs1: step.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Andi, |
| rd: writable_spilltmp_reg(), |
| rs: tmp.to_reg(), |
| imm12: Imm12::from_i16(255), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Sll, |
| rd: writable_spilltmp_reg(), |
| rs1: spilltmp_reg(), |
| rs2: step.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Or, |
| rd: rd, |
| rs1: rd.to_reg(), |
| rs2: spilltmp_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| { |
| // reset step |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: step, |
| rs: step.to_reg(), |
| imm12: Imm12::from_i16(-8), |
| } |
| .emit(&[], sink, emit_info, state); |
| //reset tmp. |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Srli, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::from_i16(8), |
| } |
| .emit(&[], sink, emit_info, state); |
| // loop. |
| Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state); |
| } |
| |
| sink.bind_label(label_done, &mut state.ctrl_plane); |
| } |
| &Inst::Cltz { |
| sum, |
| tmp, |
| step, |
| rs, |
| leading, |
| ty, |
| } => { |
| // load 0 to sum , init. |
| Inst::gen_move(sum, zero_reg(), I64).emit(&[], sink, emit_info, state); |
| // load |
| Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)).emit( |
| &[], |
| sink, |
| emit_info, |
| state, |
| ); |
| // |
| Inst::load_imm12(tmp, Imm12::ONE).emit(&[], sink, emit_info, state); |
| if leading { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::from_i16((ty.bits() - 1) as i16), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| let label_done = sink.get_label(); |
| let label_loop = sink.get_label(); |
| sink.bind_label(label_loop, &mut state.ctrl_plane); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_done), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::SignedLessThanOrEqual, |
| rs1: step.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| // test and add sum. |
| { |
| Inst::AluRRR { |
| alu_op: AluOPRRR::And, |
| rd: writable_spilltmp_reg2(), |
| rs1: tmp.to_reg(), |
| rs2: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_done), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::NotEqual, |
| rs1: zero_reg(), |
| rs2: spilltmp_reg2(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: sum, |
| rs: sum.to_reg(), |
| imm12: Imm12::ONE, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| // set step and tmp. |
| { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: step, |
| rs: step.to_reg(), |
| imm12: Imm12::from_i16(-1), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: if leading { |
| AluOPRRI::Srli |
| } else { |
| AluOPRRI::Slli |
| }, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::ONE, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state); |
| } |
| sink.bind_label(label_done, &mut state.ctrl_plane); |
| } |
| &Inst::Brev8 { |
| rs, |
| ty, |
| step, |
| tmp, |
| tmp2, |
| rd, |
| } => { |
| Inst::gen_move(rd, zero_reg(), I64).emit(&[], sink, emit_info, state); |
| Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16)).emit( |
| &[], |
| sink, |
| emit_info, |
| state, |
| ); |
| // |
| Inst::load_imm12(tmp, Imm12::ONE).emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::from_i16((ty.bits() - 1) as i16), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::load_imm12(tmp2, Imm12::ONE).emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: tmp2, |
| rs: tmp2.to_reg(), |
| imm12: Imm12::from_i16((ty.bits() - 8) as i16), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| let label_done = sink.get_label(); |
| let label_loop = sink.get_label(); |
| sink.bind_label(label_loop, &mut state.ctrl_plane); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_done), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::SignedLessThanOrEqual, |
| rs1: step.to_reg(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| // test and set bit. |
| { |
| Inst::AluRRR { |
| alu_op: AluOPRRR::And, |
| rd: writable_spilltmp_reg2(), |
| rs1: tmp.to_reg(), |
| rs2: rs, |
| } |
| .emit(&[], sink, emit_info, state); |
| let label_over = sink.get_label(); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_over), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::Equal, |
| rs1: zero_reg(), |
| rs2: spilltmp_reg2(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Or, |
| rd: rd, |
| rs1: rd.to_reg(), |
| rs2: tmp2.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(label_over, &mut state.ctrl_plane); |
| } |
| // set step and tmp. |
| { |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: step, |
| rs: step.to_reg(), |
| imm12: Imm12::from_i16(-1), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Srli, |
| rd: tmp, |
| rs: tmp.to_reg(), |
| imm12: Imm12::ONE, |
| } |
| .emit(&[], sink, emit_info, state); |
| { |
| // reset tmp2 |
| // if (step %=8 == 0) then tmp2 = tmp2 >> 15 |
| // if (step %=8 != 0) then tmp2 = tmp2 << 1 |
| let label_over = sink.get_label(); |
| let label_sll_1 = sink.get_label(); |
| Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8)).emit( |
| &[], |
| sink, |
| emit_info, |
| state, |
| ); |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Rem, |
| rd: writable_spilltmp_reg2(), |
| rs1: step.to_reg(), |
| rs2: spilltmp_reg2(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_sll_1), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::NotEqual, |
| rs1: spilltmp_reg2(), |
| rs2: zero_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Srli, |
| rd: tmp2, |
| rs: tmp2.to_reg(), |
| imm12: Imm12::from_i16(15), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::gen_jump(label_over).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_sll_1, &mut state.ctrl_plane); |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Slli, |
| rd: tmp2, |
| rs: tmp2.to_reg(), |
| imm12: Imm12::ONE, |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(label_over, &mut state.ctrl_plane); |
| } |
| Inst::gen_jump(label_loop).emit(&[], sink, emit_info, state); |
| } |
| sink.bind_label(label_done, &mut state.ctrl_plane); |
| } |
| &Inst::StackProbeLoop { |
| guard_size, |
| probe_count, |
| tmp: guard_size_tmp, |
| } => { |
| let step = writable_spilltmp_reg(); |
| Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64)) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| Inst::load_constant_u64(guard_size_tmp, guard_size as u64) |
| .iter() |
| .for_each(|i| i.emit(&[], sink, emit_info, state)); |
| |
| let loop_start = sink.get_label(); |
| let label_done = sink.get_label(); |
| sink.bind_label(loop_start, &mut state.ctrl_plane); |
| Inst::CondBr { |
| taken: CondBrTarget::Label(label_done), |
| not_taken: CondBrTarget::Fallthrough, |
| kind: IntegerCompare { |
| kind: IntCC::UnsignedLessThanOrEqual, |
| rs1: step.to_reg(), |
| rs2: guard_size_tmp.to_reg(), |
| }, |
| } |
| .emit(&[], sink, emit_info, state); |
| // compute address. |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Sub, |
| rd: writable_spilltmp_reg2(), |
| rs1: stack_reg(), |
| rs2: step.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::Store { |
| to: AMode::RegOffset(spilltmp_reg2(), 0, I8), |
| op: StoreOP::Sb, |
| flags: MemFlags::new(), |
| src: zero_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| // reset step. |
| Inst::AluRRR { |
| alu_op: AluOPRRR::Sub, |
| rd: step, |
| rs1: step.to_reg(), |
| rs2: guard_size_tmp.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::gen_jump(loop_start).emit(&[], sink, emit_info, state); |
| sink.bind_label(label_done, &mut state.ctrl_plane); |
| } |
| &Inst::VecAluRRRImm5 { |
| op, |
| vd, |
| vd_src, |
| imm, |
| vs2, |
| ref mask, |
| .. |
| } => { |
| debug_assert_eq!(vd.to_reg(), vd_src); |
| |
| sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask)); |
| } |
| &Inst::VecAluRRRR { |
| op, |
| vd, |
| vd_src, |
| vs1, |
| vs2, |
| ref mask, |
| .. |
| } => { |
| debug_assert_eq!(vd.to_reg(), vd_src); |
| |
| sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask)); |
| } |
| &Inst::VecAluRRR { |
| op, |
| vd, |
| vs1, |
| vs2, |
| ref mask, |
| .. |
| } => { |
| sink.put4(encode_valu(op, vd, vs1, vs2, *mask)); |
| } |
| &Inst::VecAluRRImm5 { |
| op, |
| vd, |
| imm, |
| vs2, |
| ref mask, |
| .. |
| } => { |
| sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask)); |
| } |
| &Inst::VecAluRR { |
| op, |
| vd, |
| vs, |
| ref mask, |
| .. |
| } => { |
| sink.put4(encode_valu_rr(op, vd, vs, *mask)); |
| } |
| &Inst::VecAluRImm5 { |
| op, |
| vd, |
| imm, |
| ref mask, |
| .. |
| } => { |
| sink.put4(encode_valu_r_imm(op, vd, imm, *mask)); |
| } |
| &Inst::VecSetState { rd, ref vstate } => { |
| sink.put4(encode_vcfg_imm( |
| 0x57, |
| rd.to_reg(), |
| vstate.avl.unwrap_static(), |
| &vstate.vtype, |
| )); |
| |
| // Update the current vector emit state. |
| state.vstate = EmitVState::Known(vstate.clone()); |
| } |
| |
| &Inst::VecLoad { |
| eew, |
| to, |
| ref from, |
| ref mask, |
| flags, |
| .. |
| } => { |
| // Vector Loads don't support immediate offsets, so we need to load it into a register. |
| let addr = match from { |
| VecAMode::UnitStride { base } => { |
| let base_reg = base.get_base_register(); |
| let offset = base.get_offset_with_state(state); |
| |
| // Reg+0 Offset can be directly encoded |
| if let (Some(base_reg), 0) = (base_reg, offset) { |
| base_reg |
| } else { |
| // Otherwise load the address it into a reg and load from it. |
| let tmp = writable_spilltmp_reg(); |
| Inst::LoadAddr { |
| rd: tmp, |
| mem: base.clone(), |
| } |
| .emit(&[], sink, emit_info, state); |
| tmp.to_reg() |
| } |
| } |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(encode_vmem_load( |
| 0x07, |
| to.to_reg(), |
| eew, |
| addr, |
| from.lumop(), |
| *mask, |
| from.mop(), |
| from.nf(), |
| )); |
| } |
| |
| &Inst::VecStore { |
| eew, |
| ref to, |
| from, |
| ref mask, |
| flags, |
| .. |
| } => { |
| // Vector Stores don't support immediate offsets, so we need to load it into a register. |
| let addr = match to { |
| VecAMode::UnitStride { base } => { |
| let base_reg = base.get_base_register(); |
| let offset = base.get_offset_with_state(state); |
| |
| // Reg+0 Offset can be directly encoded |
| if let (Some(base_reg), 0) = (base_reg, offset) { |
| base_reg |
| } else { |
| // Otherwise load the address it into a reg and load from it. |
| let tmp = writable_spilltmp_reg(); |
| Inst::LoadAddr { |
| rd: tmp, |
| mem: base.clone(), |
| } |
| .emit(&[], sink, emit_info, state); |
| tmp.to_reg() |
| } |
| } |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(encode_vmem_store( |
| 0x27, |
| from, |
| eew, |
| addr, |
| to.sumop(), |
| *mask, |
| to.mop(), |
| to.nf(), |
| )); |
| } |
| }; |
| } |
| |
| fn allocate(self, allocs: &mut AllocationConsumer) -> Self { |
| fn alloc_value_regs( |
| orgin: &ValueRegs<Reg>, |
| alloc: &mut AllocationConsumer, |
| ) -> ValueRegs<Reg> { |
| match orgin.regs().len() { |
| 1 => ValueRegs::one(alloc.next(orgin.regs()[0])), |
| 2 => ValueRegs::two(alloc.next(orgin.regs()[0]), alloc.next(orgin.regs()[1])), |
| _ => unreachable!(), |
| } |
| } |
| |
| fn alloc_writable_value_regs( |
| origin: &ValueRegs<Writable<Reg>>, |
| alloc: &mut AllocationConsumer, |
| ) -> ValueRegs<Writable<Reg>> { |
| alloc_value_regs(&origin.map(|r| r.to_reg()), alloc).map(Writable::from_reg) |
| } |
| |
| match self { |
| Inst::Nop0 => self, |
| Inst::Nop4 => self, |
| Inst::RawData { .. } => self, |
| Inst::Lui { rd, imm } => Inst::Lui { |
| rd: allocs.next_writable(rd), |
| imm, |
| }, |
| Inst::LoadInlineConst { rd, ty, imm } => Inst::LoadInlineConst { |
| rd: allocs.next_writable(rd), |
| ty, |
| imm, |
| }, |
| Inst::FpuRR { |
| frm, |
| alu_op, |
| rd, |
| rs, |
| } => Inst::FpuRR { |
| rs: allocs.next(rs), |
| rd: allocs.next_writable(rd), |
| frm, |
| alu_op, |
| }, |
| Inst::FpuRRRR { |
| alu_op, |
| rd, |
| rs1, |
| rs2, |
| rs3, |
| frm, |
| } => Inst::FpuRRRR { |
| rs1: allocs.next(rs1), |
| rs2: allocs.next(rs2), |
| rs3: allocs.next(rs3), |
| rd: allocs.next_writable(rd), |
| alu_op, |
| frm, |
| }, |
| Inst::FpuRRR { |
| alu_op, |
| frm, |
| rd, |
| rs1, |
| rs2, |
| } => Inst::FpuRRR { |
| alu_op, |
| frm, |
| rs1: allocs.next(rs1), |
| rs2: allocs.next(rs2), |
| rd: allocs.next_writable(rd), |
| }, |
| Inst::Unwind { .. } => self, |
| Inst::DummyUse { reg } => Inst::DummyUse { |
| reg: allocs.next(reg), |
| }, |
| Inst::AluRRR { |
| alu_op, |
| rd, |
| rs1, |
| rs2, |
| } => Inst::AluRRR { |
| alu_op, |
| rs1: allocs.next(rs1), |
| rs2: allocs.next(rs2), |
| rd: allocs.next_writable(rd), |
| }, |
| Inst::AluRRImm12 { |
| alu_op, |
| rd, |
| rs, |
| imm12, |
| } => Inst::AluRRImm12 { |
| alu_op, |
| rs: allocs.next(rs), |
| rd: allocs.next_writable(rd), |
| imm12, |
| }, |
| Inst::CsrReg { op, rd, rs, csr } => Inst::CsrReg { |
| op, |
| rs: allocs.next(rs), |
| rd: allocs.next_writable(rd), |
| csr, |
| }, |
| Inst::CsrImm { op, rd, csr, imm } => Inst::CsrImm { |
| op, |
| rd: allocs.next_writable(rd), |
| csr, |
| imm, |
| }, |
| Inst::Load { |
| rd, |
| op, |
| from, |
| flags, |
| } => Inst::Load { |
| from: from.clone().with_allocs(allocs), |
| rd: allocs.next_writable(rd), |
| op, |
| flags, |
| }, |
| Inst::Store { op, src, flags, to } => Inst::Store { |
| op, |
| flags, |
| to: to.clone().with_allocs(allocs), |
| src: allocs.next(src), |
| }, |
| |
| Inst::Args { .. } => self, |
| Inst::Rets { .. } => self, |
| Inst::Ret { .. } => self, |
| |
| Inst::Extend { |
| rd, |
| rn, |
| signed, |
| from_bits, |
| to_bits, |
| } => Inst::Extend { |
| rn: allocs.next(rn), |
| rd: allocs.next_writable(rd), |
| signed, |
| from_bits, |
| to_bits, |
| }, |
| Inst::AdjustSp { .. } => self, |
| |
| Inst::Call { .. } => self, |
| Inst::CallInd { mut info } => { |
| info.rn = allocs.next(info.rn); |
| Inst::CallInd { info } |
| } |
| |
| Inst::ReturnCall { callee, info } => { |
| for u in &info.uses { |
| let _ = allocs.next(u.vreg); |
| } |
| |
| Inst::ReturnCall { callee, info } |
| } |
| |
| Inst::ReturnCallInd { callee, info } => { |
| let callee = allocs.next(callee); |
| |
| for u in &info.uses { |
| let _ = allocs.next(u.vreg); |
| } |
| |
| Inst::ReturnCallInd { callee, info } |
| } |
| |
| Inst::Jal { .. } => self, |
| |
| Inst::CondBr { |
| taken, |
| not_taken, |
| mut kind, |
| } => { |
| kind.rs1 = allocs.next(kind.rs1); |
| kind.rs2 = allocs.next(kind.rs2); |
| Inst::CondBr { |
| taken, |
| not_taken, |
| kind, |
| } |
| } |
| |
| Inst::Mov { rd, rm, ty } => Inst::Mov { |
| ty, |
| rm: allocs.next(rm), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::MovFromPReg { rd, rm } => { |
| debug_assert!([px_reg(2), px_reg(8)].contains(&rm)); |
| let rd = allocs.next_writable(rd); |
| Inst::MovFromPReg { rd, rm } |
| } |
| |
| Inst::BrTable { |
| index, |
| tmp1, |
| tmp2, |
| targets, |
| } => Inst::BrTable { |
| index: allocs.next(index), |
| tmp1: allocs.next_writable(tmp1), |
| tmp2: allocs.next_writable(tmp2), |
| targets, |
| }, |
| |
| Inst::VirtualSPOffsetAdj { .. } => self, |
| Inst::Atomic { |
| op, |
| rd, |
| addr, |
| src, |
| amo, |
| } => Inst::Atomic { |
| op, |
| amo, |
| addr: allocs.next(addr), |
| src: allocs.next(src), |
| rd: allocs.next_writable(rd), |
| }, |
| Inst::Fence { .. } => self, |
| Inst::Auipc { rd, imm } => Inst::Auipc { |
| rd: allocs.next_writable(rd), |
| imm, |
| }, |
| |
| Inst::LoadAddr { rd, mem } => Inst::LoadAddr { |
| mem: mem.with_allocs(allocs), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::Select { |
| ref dst, |
| condition, |
| ref x, |
| ref y, |
| } => { |
| let mut condition: IntegerCompare = condition.clone(); |
| condition.rs1 = allocs.next(condition.rs1); |
| condition.rs2 = allocs.next(condition.rs2); |
| let x = alloc_value_regs(x, allocs); |
| let y = alloc_value_regs(y, allocs); |
| let dst = alloc_writable_value_regs(dst, allocs); |
| |
| Inst::Select { |
| dst, |
| condition, |
| x, |
| y, |
| } |
| } |
| Inst::Jalr { rd, base, offset } => { |
| // For some reason this does not use base? |
| debug_assert!(base.is_real()); |
| Inst::Jalr { |
| rd: allocs.next_writable(rd), |
| base, |
| offset, |
| } |
| } |
| |
| Inst::EBreak => self, |
| |
| Inst::Icmp { |
| cc, |
| rd, |
| ref a, |
| ref b, |
| ty, |
| } => Inst::Icmp { |
| cc, |
| a: alloc_value_regs(a, allocs), |
| b: alloc_value_regs(b, allocs), |
| rd: allocs.next_writable(rd), |
| ty, |
| }, |
| |
| Inst::AtomicCas { |
| offset, |
| t0, |
| dst, |
| e, |
| addr, |
| v, |
| ty, |
| } => Inst::AtomicCas { |
| ty, |
| offset: allocs.next(offset), |
| e: allocs.next(e), |
| addr: allocs.next(addr), |
| v: allocs.next(v), |
| t0: allocs.next_writable(t0), |
| dst: allocs.next_writable(dst), |
| }, |
| |
| Inst::AtomicRmwLoop { |
| offset, |
| op, |
| dst, |
| ty, |
| p, |
| x, |
| t0, |
| } => Inst::AtomicRmwLoop { |
| op, |
| ty, |
| offset: allocs.next(offset), |
| p: allocs.next(p), |
| x: allocs.next(x), |
| t0: allocs.next_writable(t0), |
| dst: allocs.next_writable(dst), |
| }, |
| |
| Inst::FcvtToInt { |
| is_sat, |
| rd, |
| rs, |
| is_signed, |
| in_type, |
| out_type, |
| tmp, |
| } => Inst::FcvtToInt { |
| is_sat, |
| is_signed, |
| in_type, |
| out_type, |
| rs: allocs.next(rs), |
| tmp: allocs.next_writable(tmp), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::LoadExtName { rd, name, offset } => Inst::LoadExtName { |
| rd: allocs.next_writable(rd), |
| name, |
| offset, |
| }, |
| |
| Inst::ElfTlsGetAddr { rd, name } => { |
| let rd = allocs.next_writable(rd); |
| debug_assert_eq!(a0(), rd.to_reg()); |
| Inst::ElfTlsGetAddr { rd, name } |
| } |
| |
| Inst::TrapIf { |
| rs1, |
| rs2, |
| cc, |
| trap_code, |
| } => Inst::TrapIf { |
| rs1: allocs.next(rs1), |
| rs2: allocs.next(rs2), |
| cc, |
| trap_code, |
| }, |
| |
| Inst::Udf { .. } => self, |
| |
| Inst::AtomicLoad { rd, ty, p } => Inst::AtomicLoad { |
| ty, |
| p: allocs.next(p), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::AtomicStore { src, ty, p } => Inst::AtomicStore { |
| ty, |
| src: allocs.next(src), |
| p: allocs.next(p), |
| }, |
| |
| Inst::FloatRound { |
| op, |
| rd, |
| int_tmp, |
| f_tmp, |
| rs, |
| ty, |
| } => Inst::FloatRound { |
| op, |
| ty, |
| rs: allocs.next(rs), |
| int_tmp: allocs.next_writable(int_tmp), |
| f_tmp: allocs.next_writable(f_tmp), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::FloatSelect { |
| op, |
| rd, |
| tmp, |
| rs1, |
| rs2, |
| ty, |
| } => Inst::FloatSelect { |
| op, |
| ty, |
| rs1: allocs.next(rs1), |
| rs2: allocs.next(rs2), |
| tmp: allocs.next_writable(tmp), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::Popcnt { |
| sum, |
| tmp, |
| step, |
| rs, |
| ty, |
| } => Inst::Popcnt { |
| rs: allocs.next(rs), |
| tmp: allocs.next_writable(tmp), |
| step: allocs.next_writable(step), |
| sum: allocs.next_writable(sum), |
| ty, |
| }, |
| |
| Inst::Rev8 { rs, rd, tmp, step } => Inst::Rev8 { |
| rs: allocs.next(rs), |
| tmp: allocs.next_writable(tmp), |
| step: allocs.next_writable(step), |
| rd: allocs.next_writable(rd), |
| }, |
| |
| Inst::Cltz { |
| sum, |
| tmp, |
| step, |
| rs, |
| leading, |
| ty, |
| } => Inst::Cltz { |
| rs: allocs.next(rs), |
| tmp: allocs.next_writable(tmp), |
| step: allocs.next_writable(step), |
| sum: allocs.next_writable(sum), |
| leading, |
| ty, |
| }, |
| |
| Inst::Brev8 { |
| rs, |
| ty, |
| step, |
| tmp, |
| tmp2, |
| rd, |
| } => Inst::Brev8 { |
| rs: allocs.next(rs), |
| step: allocs.next_writable(step), |
| tmp: allocs.next_writable(tmp), |
| tmp2: allocs.next_writable(tmp2), |
| rd: allocs.next_writable(rd), |
| ty, |
| }, |
| |
| Inst::StackProbeLoop { .. } => self, |
| |
| Inst::VecAluRRRImm5 { |
| op, |
| vd, |
| vd_src, |
| imm, |
| vs2, |
| mask, |
| vstate, |
| } => Inst::VecAluRRRImm5 { |
| op, |
| vs2: allocs.next(vs2), |
| vd_src: allocs.next(vd_src), |
| vd: allocs.next_writable(vd), |
| mask: mask.with_allocs(allocs), |
| imm, |
| vstate, |
| }, |
| |
| Inst::VecAluRRRR { |
| op, |
| vd, |
| vd_src, |
| vs1, |
| vs2, |
| mask, |
| vstate, |
| } => Inst::VecAluRRRR { |
| op, |
| vs1: allocs.next(vs1), |
| vs2: allocs.next(vs2), |
| vd_src: allocs.next(vd_src), |
| vd: allocs.next_writable(vd), |
| mask: mask.with_allocs(allocs), |
| vstate, |
| }, |
| |
| Inst::VecAluRRR { |
| op, |
| vd, |
| vs1, |
| vs2, |
| mask, |
| vstate, |
| } => Inst::VecAluRRR { |
| op, |
| vs1: allocs.next(vs1), |
| vs2: allocs.next(vs2), |
| vd: allocs.next_writable(vd), |
| mask: mask.with_allocs(allocs), |
| vstate, |
| }, |
| |
| Inst::VecAluRRImm5 { |
| op, |
| vd, |
| imm, |
| vs2, |
| mask, |
| vstate, |
| } => Inst::VecAluRRImm5 { |
| op, |
| imm, |
| vs2: allocs.next(vs2), |
| vd: allocs.next_writable(vd), |
| mask: mask.with_allocs(allocs), |
| vstate, |
| }, |
| |
| Inst::VecAluRR { |
| op, |
| vd, |
| vs, |
| mask, |
| vstate, |
| } => Inst::VecAluRR { |
| op, |
| vs: allocs.next(vs), |
| vd: allocs.next_writable(vd), |
| mask: mask.with_allocs(allocs), |
| vstate, |
| }, |
| |
| Inst::VecAluRImm5 { |
| op, |
| vd, |
| imm, |
| mask, |
| vstate, |
| } => Inst::VecAluRImm5 { |
| vd: allocs.next_writable(vd), |
| mask: mask.with_allocs(allocs), |
| op, |
| imm, |
| vstate, |
| }, |
| |
| Inst::VecSetState { rd, vstate } => Inst::VecSetState { |
| rd: allocs.next_writable(rd), |
| vstate, |
| }, |
| |
| Inst::VecLoad { |
| eew, |
| to, |
| from, |
| mask, |
| flags, |
| vstate, |
| } => Inst::VecLoad { |
| eew, |
| from: from.clone().with_allocs(allocs), |
| to: allocs.next_writable(to), |
| mask: mask.with_allocs(allocs), |
| flags, |
| vstate, |
| }, |
| |
| Inst::VecStore { |
| eew, |
| to, |
| from, |
| mask, |
| flags, |
| vstate, |
| } => Inst::VecStore { |
| eew, |
| to: to.clone().with_allocs(allocs), |
| from: allocs.next(from), |
| mask: mask.with_allocs(allocs), |
| flags, |
| vstate, |
| }, |
| } |
| } |
| } |
| |
| fn emit_return_call_common_sequence( |
| sink: &mut MachBuffer<Inst>, |
| emit_info: &EmitInfo, |
| state: &mut EmitState, |
| new_stack_arg_size: u32, |
| old_stack_arg_size: u32, |
| ) { |
| // We are emitting a dynamic number of instructions and might need an |
| // island. We emit four instructions regardless of how many stack arguments |
| // we have, up to two instructions for the actual call, and then two |
| // instructions per word of stack argument space. |
| let new_stack_words = new_stack_arg_size / 8; |
| let insts = 4 + 2 + 2 * new_stack_words; |
| let space_needed = insts * u32::try_from(Inst::UNCOMPRESSED_INSTRUCTION_SIZE).unwrap(); |
| if sink.island_needed(space_needed) { |
| let jump_around_label = sink.get_label(); |
| Inst::gen_jump(jump_around_label).emit(&[], sink, emit_info, state); |
| sink.emit_island(space_needed + 4, &mut state.ctrl_plane); |
| sink.bind_label(jump_around_label, &mut state.ctrl_plane); |
| } |
| |
| // Copy the new frame on top of our current frame. |
| // |
| // The current stack layout is the following: |
| // |
| // | ... | |
| // +---------------------+ |
| // | ... | |
| // | stack arguments | |
| // | ... | |
| // current | return address | |
| // frame | old FP | <-- FP |
| // | ... | |
| // | old stack slots | |
| // | ... | |
| // +---------------------+ |
| // | ... | |
| // new | new stack arguments | |
| // frame | ... | <-- SP |
| // +---------------------+ |
| // |
| // We need to restore the old FP, restore the return address from the stack |
| // to the link register, copy the new stack arguments over the old stack |
| // arguments, adjust SP to point to the new stack arguments, and then jump |
| // to the callee (which will push the old FP and RA again). Note that the |
| // actual jump happens outside this helper function. |
| |
| assert_eq!( |
| new_stack_arg_size % 8, |
| 0, |
| "size of new stack arguments must be 8-byte aligned" |
| ); |
| |
| // The delta from our frame pointer to the (eventual) stack pointer value |
| // when we jump to the tail callee. This is the difference in size of stack |
| // arguments as well as accounting for the two words we pushed onto the |
| // stack upon entry to this function (the return address and old frame |
| // pointer). |
| let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16; |
| |
| let tmp1 = regs::writable_spilltmp_reg(); |
| let tmp2 = regs::writable_spilltmp_reg2(); |
| |
| // Restore the return address to the link register, and load the old FP into |
| // a temporary register. |
| // |
| // We can't put the old FP into the FP register until after we copy the |
| // stack arguments into place, since that uses address modes that are |
| // relative to our current FP. |
| // |
| // Note that the FP is saved in the function prologue for all non-leaf |
| // functions, even when `preserve_frame_pointers=false`. Note also that |
| // `return_call` instructions make it so that a function is considered |
| // non-leaf. Therefore we always have an FP to restore here. |
| |
| Inst::gen_load( |
| writable_link_reg(), |
| AMode::FPOffset(8, I64), |
| I64, |
| MemFlags::trusted(), |
| ) |
| .emit(&[], sink, emit_info, state); |
| Inst::gen_load(tmp1, AMode::FPOffset(0, I64), I64, MemFlags::trusted()).emit( |
| &[], |
| sink, |
| emit_info, |
| state, |
| ); |
| |
| // Copy the new stack arguments over the old stack arguments. |
| for i in (0..new_stack_words).rev() { |
| // Load the `i`th new stack argument word from the temporary stack |
| // space. |
| Inst::gen_load( |
| tmp2, |
| AMode::SPOffset(i64::from(i * 8), types::I64), |
| types::I64, |
| ir::MemFlags::trusted(), |
| ) |
| .emit(&[], sink, emit_info, state); |
| |
| // Store it to its final destination on the stack, overwriting our |
| // current frame. |
| Inst::gen_store( |
| AMode::FPOffset(fp_to_callee_sp + i64::from(i * 8), types::I64), |
| tmp2.to_reg(), |
| types::I64, |
| ir::MemFlags::trusted(), |
| ) |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| // Initialize the SP for the tail callee, deallocating the temporary stack |
| // argument space and our current frame at the same time. |
| Inst::AluRRImm12 { |
| alu_op: AluOPRRI::Addi, |
| rd: regs::writable_stack_reg(), |
| rs: regs::fp_reg(), |
| imm12: Imm12::maybe_from_i64(fp_to_callee_sp).unwrap(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Move the old FP value from the temporary into the FP register. |
| Inst::Mov { |
| ty: types::I64, |
| rd: regs::writable_fp_reg(), |
| rm: tmp1.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| state.virtual_sp_offset -= i64::from(new_stack_arg_size); |
| trace!( |
| "return_call[_ind] adjusts virtual sp offset by {} -> {}", |
| new_stack_arg_size, |
| state.virtual_sp_offset |
| ); |
| } |