vendor/cranelift-codegen/src/isa/riscv64/inst/mod.rs - toolchain/rustc - Git at Google

 //! This module defines riscv64-specific machine instruction types.

 // Some variants are not constructed, but we still want them as options in the future.
 #![allow(dead_code)]
 #![allow(non_camel_case_types)]

 use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking};
 use crate::binemit::{Addend, CodeOffset, Reloc};
 pub use crate::ir::condcodes::IntCC;
 use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};

 pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
 use crate::isa::{CallConv, FunctionAlignment};
 use crate::machinst::*;
 use crate::{settings, CodegenError, CodegenResult};

 pub use crate::ir::condcodes::FloatCC;

 use alloc::vec::Vec;
 use regalloc2::{PRegSet, RegClass, VReg};
 use smallvec::{smallvec, SmallVec};
 use std::boxed::Box;
 use std::fmt::Write;
 use std::string::{String, ToString};

 pub mod regs;
 pub use self::regs::*;
 pub mod imms;
 pub use self::imms::*;
 pub mod args;
 pub use self::args::*;
 pub mod emit;
 pub use self::emit::*;
 pub mod vector;
 pub use self::vector::*;
 pub mod encode;
 pub use self::encode::*;
 pub mod unwind;

 use crate::isa::riscv64::abi::Riscv64MachineDeps;

 #[cfg(test)]
 mod emit_tests;

 use std::fmt::{Display, Formatter};

 pub(crate) type OptionReg = Option<Reg>;
 pub(crate) type OptionImm12 = Option<Imm12>;
 pub(crate) type OptionUimm5 = Option<UImm5>;
 pub(crate) type OptionFloatRoundingMode = Option<FRM>;
 pub(crate) type VecU8 = Vec<u8>;
 pub(crate) type VecWritableReg = Vec<Writable<Reg>>;
 //=============================================================================
 // Instructions (top level): definition

 pub use crate::isa::riscv64::lower::isle::generated_code::{
     AluOPRRI, AluOPRRR, AtomicOP, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FloatRoundOP,
     FloatSelectOP, FpuOPRR, FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP, CSR, FRM,
 };
 use crate::isa::riscv64::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR};

 type BoxCallInfo = Box<CallInfo>;
 type BoxCallIndInfo = Box<CallIndInfo>;
 type BoxReturnCallInfo = Box<ReturnCallInfo>;

 /// Additional information for (direct) Call instructions, left out of line to lower the size of
 /// the Inst enum.
 #[derive(Clone, Debug)]
 pub struct CallInfo {
     pub dest: ExternalName,
     pub uses: CallArgList,
     pub defs: CallRetList,
     pub opcode: Opcode,
     pub caller_callconv: CallConv,
     pub callee_callconv: CallConv,
     pub clobbers: PRegSet,
     pub callee_pop_size: u32,
 }

 /// Additional information for CallInd instructions, left out of line to lower the size of the Inst
 /// enum.
 #[derive(Clone, Debug)]
 pub struct CallIndInfo {
     pub rn: Reg,
     pub uses: CallArgList,
     pub defs: CallRetList,
     pub opcode: Opcode,
     pub caller_callconv: CallConv,
     pub callee_callconv: CallConv,
     pub clobbers: PRegSet,
     pub callee_pop_size: u32,
 }

 /// Additional information for `return_call[_ind]` instructions, left out of
 /// line to lower the size of the `Inst` enum.
 #[derive(Clone, Debug)]
 pub struct ReturnCallInfo {
     pub uses: CallArgList,
     pub opcode: Opcode,
     pub old_stack_arg_size: u32,
     pub new_stack_arg_size: u32,
 }

 /// A conditional branch target.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum CondBrTarget {
     /// An unresolved reference to a Label, as passed into
     /// `lower_branch_group()`.
     Label(MachLabel),
     /// No jump; fall through to the next instruction.
     Fallthrough,
 }

 impl CondBrTarget {
     /// Return the target's label, if it is a label-based target.
     pub(crate) fn as_label(self) -> Option<MachLabel> {
         match self {
             CondBrTarget::Label(l) => Some(l),
             _ => None,
         }
     }

     pub(crate) fn is_fallthrouh(&self) -> bool {
         self == &CondBrTarget::Fallthrough
     }
 }

 impl Display for CondBrTarget {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
             CondBrTarget::Label(l) => write!(f, "{}", l.to_string()),
             CondBrTarget::Fallthrough => write!(f, "0"),
         }
     }
 }

 pub(crate) fn enc_auipc(rd: Writable<Reg>, imm: Imm20) -> u32 {
     let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.bits() << 12;
     x
 }

 pub(crate) fn enc_jalr(rd: Writable<Reg>, base: Reg, offset: Imm12) -> u32 {
     let x = 0b1100111
         | reg_to_gpr_num(rd.to_reg()) << 7
         | 0b000 << 12
         | reg_to_gpr_num(base) << 15
         | offset.bits() << 20;
     x
 }

 /// rd and src must have the same length.
 pub(crate) fn gen_moves(rd: &[Writable<Reg>], src: &[Reg]) -> SmallInstVec<Inst> {
     assert!(rd.len() == src.len());
     assert!(rd.len() > 0);
     let mut insts = SmallInstVec::new();
     for (dst, src) in rd.iter().zip(src.iter()) {
         let ty = Inst::canonical_type_for_rc(dst.to_reg().class());
         insts.push(Inst::gen_move(*dst, *src, ty));
     }
     insts
 }

 impl Inst {
     /// RISC-V can have multiple instruction sizes. 2 bytes for compressed
     /// instructions, 4 for regular instructions, 6 and 8 byte instructions
     /// are also being considered.
     const UNCOMPRESSED_INSTRUCTION_SIZE: i32 = 4;

     #[inline]
     pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {
         Inst::AluRRImm12 {
             alu_op: AluOPRRI::Addi,
             rd,
             rs: zero_reg(),
             imm12: imm,
         }
     }

     /// Immediates can be loaded using lui and addi instructions.
     fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {
         Inst::generate_imm(value).map(|(imm20, imm12)| {
             let mut insts = SmallVec::new();

             let imm20_is_zero = imm20.as_i32() == 0;
             let imm12_is_zero = imm12.as_i16() == 0;

             let rs = if !imm20_is_zero {
                 insts.push(Inst::Lui { rd, imm: imm20 });
                 rd.to_reg()
             } else {
                 zero_reg()
             };

             // We also need to emit the addi if the value is 0, otherwise we just
             // won't produce any instructions.
             if !imm12_is_zero || (imm20_is_zero && imm12_is_zero) {
                 insts.push(Inst::AluRRImm12 {
                     alu_op: AluOPRRI::Addi,
                     rd,
                     rs,
                     imm12,
                 })
             }

             insts
         })
     }

     pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
         let insts = Inst::load_const_imm(rd, value);
         insts.unwrap_or_else(|| {
             smallvec![Inst::LoadInlineConst {
                 rd,
                 ty: I32,
                 imm: value
             }]
         })
     }

     pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {
         let insts = Inst::load_const_imm(rd, value);
         insts.unwrap_or_else(|| {
             smallvec![Inst::LoadInlineConst {
                 rd,
                 ty: I64,
                 imm: value
             }]
         })
     }

     pub(crate) fn construct_auipc_and_jalr(
         link: Option<Writable<Reg>>,
         tmp: Writable<Reg>,
         offset: i64,
     ) -> [Inst; 2] {
         Inst::generate_imm(offset as u64)
             .map(|(imm20, imm12)| {
                 let a = Inst::Auipc {
                     rd: tmp,
                     imm: imm20,
                 };
                 let b = Inst::Jalr {
                     rd: link.unwrap_or(writable_zero_reg()),
                     base: tmp.to_reg(),
                     offset: imm12,
                 };
                 [a, b]
             })
             .expect("code range is too big.")
     }

     /// Create instructions that load a 32-bit floating-point constant.
     pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
         rd: Writable<Reg>,
         const_data: u32,
         mut alloc_tmp: F,
     ) -> SmallVec<[Inst; 4]> {
         let mut insts = SmallVec::new();
         let tmp = alloc_tmp(I64);
         insts.extend(Self::load_constant_u32(tmp, const_data as u64));
         insts.push(Inst::FpuRR {
             frm: None,
             alu_op: FpuOPRR::move_x_to_f_op(F32),
             rd,
             rs: tmp.to_reg(),
         });
         insts
     }

     /// Create instructions that load a 64-bit floating-point constant.
     pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
         rd: Writable<Reg>,
         const_data: u64,
         mut alloc_tmp: F,
     ) -> SmallVec<[Inst; 4]> {
         let mut insts = SmallInstVec::new();
         let tmp = alloc_tmp(I64);
         insts.extend(Self::load_constant_u64(tmp, const_data));
         insts.push(Inst::FpuRR {
             frm: None,
             alu_op: FpuOPRR::move_x_to_f_op(F64),
             rd,
             rs: tmp.to_reg(),
         });
         insts
     }

     /// Generic constructor for a load (zero-extending where appropriate).
     pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
         if ty.is_vector() {
             Inst::VecLoad {
                 eew: VecElementWidth::from_type(ty),
                 to: into_reg,
                 from: VecAMode::UnitStride { base: mem },
                 flags,
                 mask: VecOpMasking::Disabled,
                 vstate: VState::from_type(ty),
             }
         } else {
             Inst::Load {
                 rd: into_reg,
                 op: LoadOP::from_type(ty),
                 from: mem,
                 flags,
             }
         }
     }

     /// Generic constructor for a store.
     pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
         if ty.is_vector() {
             Inst::VecStore {
                 eew: VecElementWidth::from_type(ty),
                 to: VecAMode::UnitStride { base: mem },
                 from: from_reg,
                 flags,
                 mask: VecOpMasking::Disabled,
                 vstate: VState::from_type(ty),
             }
         } else {
             Inst::Store {
                 src: from_reg,
                 op: StoreOP::from_type(ty),
                 to: mem,
                 flags,
             }
         }
     }
 }

 //=============================================================================

 fn vec_mask_operands<F: Fn(VReg) -> VReg>(
     mask: &VecOpMasking,
     collector: &mut OperandCollector<'_, F>,
 ) {
     match mask {
         VecOpMasking::Enabled { reg } => {
             collector.reg_fixed_use(*reg, pv_reg(0).into());
         }
         VecOpMasking::Disabled => {}
     }
 }
 fn vec_mask_late_operands<F: Fn(VReg) -> VReg>(
     mask: &VecOpMasking,
     collector: &mut OperandCollector<'_, F>,
 ) {
     match mask {
         VecOpMasking::Enabled { reg } => {
             collector.reg_fixed_late_use(*reg, pv_reg(0).into());
         }
         VecOpMasking::Disabled => {}
     }
 }

 fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) {
     match inst {
         &Inst::Nop0 => {}
         &Inst::Nop4 => {}
         &Inst::BrTable {
             index, tmp1, tmp2, ..
         } => {
             collector.reg_use(index);
             collector.reg_early_def(tmp1);
             collector.reg_early_def(tmp2);
         }
         &Inst::Auipc { rd, .. } => collector.reg_def(rd),
         &Inst::Lui { rd, .. } => collector.reg_def(rd),
         &Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),
         &Inst::AluRRR { rd, rs1, rs2, .. } => {
             collector.reg_use(rs1);
             collector.reg_use(rs2);
             collector.reg_def(rd);
         }
         &Inst::FpuRRR { rd, rs1, rs2, .. } => {
             collector.reg_use(rs1);
             collector.reg_use(rs2);
             collector.reg_def(rd);
         }
         &Inst::AluRRImm12 { rd, rs, .. } => {
             collector.reg_use(rs);
             collector.reg_def(rd);
         }
         &Inst::CsrReg { rd, rs, .. } => {
             collector.reg_use(rs);
             collector.reg_def(rd);
         }
         &Inst::CsrImm { rd, .. } => {
             collector.reg_def(rd);
         }
         &Inst::Load { rd, from, .. } => {
             if let Some(r) = from.get_allocatable_register() {
                 collector.reg_use(r);
             }
             collector.reg_def(rd);
         }
         &Inst::Store { to, src, .. } => {
             if let Some(r) = to.get_allocatable_register() {
                 collector.reg_use(r);
             }
             collector.reg_use(src);
         }

         &Inst::Args { ref args } => {
             for arg in args {
                 collector.reg_fixed_def(arg.vreg, arg.preg);
             }
         }
         &Inst::Rets { ref rets } => {
             for ret in rets {
                 collector.reg_fixed_use(ret.vreg, ret.preg);
             }
         }
         &Inst::Ret { .. } => {}

         &Inst::Extend { rd, rn, .. } => {
             collector.reg_use(rn);
             collector.reg_def(rd);
         }
         &Inst::AdjustSp { .. } => {}
         &Inst::Call { ref info } => {
             for u in &info.uses {
                 collector.reg_fixed_use(u.vreg, u.preg);
             }
             for d in &info.defs {
                 collector.reg_fixed_def(d.vreg, d.preg);
             }
             collector.reg_clobbers(info.clobbers);
         }
         &Inst::CallInd { ref info } => {
             if info.callee_callconv == CallConv::Tail {
                 // TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
                 // This shouldn't be a fixed register constraint.
                 collector.reg_fixed_use(info.rn, x_reg(5));
             } else {
                 collector.reg_use(info.rn);
             }

             for u in &info.uses {
                 collector.reg_fixed_use(u.vreg, u.preg);
             }
             for d in &info.defs {
                 collector.reg_fixed_def(d.vreg, d.preg);
             }
             collector.reg_clobbers(info.clobbers);
         }
         &Inst::ReturnCall {
             callee: _,
             ref info,
         } => {
             for u in &info.uses {
                 collector.reg_fixed_use(u.vreg, u.preg);
             }
         }
         &Inst::ReturnCallInd { ref info, callee } => {
             collector.reg_use(callee);
             for u in &info.uses {
                 collector.reg_fixed_use(u.vreg, u.preg);
             }
         }
         &Inst::Jal { .. } => {
             // JAL technically has a rd register, but we currently always
             // hardcode it to x0.
         }
         &Inst::CondBr { kind, .. } => {
             collector.reg_use(kind.rs1);
             collector.reg_use(kind.rs2);
         }
         &Inst::LoadExtName { rd, .. } => {
             collector.reg_def(rd);
         }
         &Inst::ElfTlsGetAddr { rd, .. } => {
             // x10 is a0 which is both the first argument and the first return value.
             collector.reg_fixed_def(rd, a0());
             let mut clobbers = Riscv64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV);
             clobbers.remove(px_reg(10));
             collector.reg_clobbers(clobbers);
         }
         &Inst::LoadAddr { rd, mem } => {
             if let Some(r) = mem.get_allocatable_register() {
                 collector.reg_use(r);
             }
             collector.reg_early_def(rd);
         }

         &Inst::VirtualSPOffsetAdj { .. } => {}
         &Inst::Mov { rd, rm, .. } => {
             collector.reg_use(rm);
             collector.reg_def(rd);
         }
         &Inst::MovFromPReg { rd, rm } => {
             debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
             collector.reg_def(rd);
         }
         &Inst::Fence { .. } => {}
         &Inst::EBreak => {}
         &Inst::Udf { .. } => {}
         &Inst::FpuRR { rd, rs, .. } => {
             collector.reg_use(rs);
             collector.reg_def(rd);
         }
         &Inst::FpuRRRR {
             rd, rs1, rs2, rs3, ..
         } => {
             collector.reg_uses(&[rs1, rs2, rs3]);
             collector.reg_def(rd);
         }

         &Inst::Jalr { rd, base, .. } => {
             collector.reg_use(base);
             collector.reg_def(rd);
         }
         &Inst::Atomic { rd, addr, src, .. } => {
             collector.reg_use(addr);
             collector.reg_use(src);
             collector.reg_def(rd);
         }
         &Inst::Select {
             ref dst,
             condition,
             x,
             y,
             ..
         } => {
             collector.reg_use(condition.rs1);
             collector.reg_use(condition.rs2);
             collector.reg_uses(x.regs());
             collector.reg_uses(y.regs());
             // If there's more than one destination register then use
             // `reg_early_def` to prevent destination registers from overlapping
             // with any operands. This ensures that the lowering doesn't have to
             // deal with a situation such as when the input registers need to be
             // swapped when moved to the destination.
             //
             // When there's only one destination register though don't use an
             // early def because once the register is written no other inputs
             // are read so it's ok for the destination to overlap the sources.
             if dst.regs().len() > 1 {
                 for d in dst.regs() {
                     collector.reg_early_def(d.clone());
                 }
             } else {
                 collector.reg_defs(dst.regs());
             }
         }
         &Inst::AtomicCas {
             offset,
             t0,
             dst,
             e,
             addr,
             v,
             ..
         } => {
             collector.reg_uses(&[offset, e, addr, v]);
             collector.reg_early_def(t0);
             collector.reg_early_def(dst);
         }

         &Inst::Icmp { rd, a, b, .. } => {
             collector.reg_uses(a.regs());
             collector.reg_uses(b.regs());
             collector.reg_def(rd);
         }

         &Inst::FcvtToInt { rd, rs, tmp, .. } => {
             collector.reg_use(rs);
             collector.reg_early_def(tmp);
             collector.reg_early_def(rd);
         }
         &Inst::RawData { .. } => {}
         &Inst::AtomicStore { src, p, .. } => {
             collector.reg_use(src);
             collector.reg_use(p);
         }
         &Inst::AtomicLoad { rd, p, .. } => {
             collector.reg_use(p);
             collector.reg_def(rd);
         }
         &Inst::AtomicRmwLoop {
             offset,
             dst,
             p,
             x,
             t0,
             ..
         } => {
             collector.reg_uses(&[offset, p, x]);
             collector.reg_early_def(t0);
             collector.reg_early_def(dst);
         }
         &Inst::TrapIf { rs1, rs2, .. } => {
             collector.reg_use(rs1);
             collector.reg_use(rs2);
         }
         &Inst::Unwind { .. } => {}
         &Inst::DummyUse { reg } => {
             collector.reg_use(reg);
         }
         &Inst::FloatRound {
             rd,
             int_tmp,
             f_tmp,
             rs,
             ..
         } => {
             collector.reg_use(rs);
             collector.reg_early_def(int_tmp);
             collector.reg_early_def(f_tmp);
             collector.reg_early_def(rd);
         }
         &Inst::FloatSelect {
             rd, tmp, rs1, rs2, ..
         } => {
             collector.reg_uses(&[rs1, rs2]);
             collector.reg_early_def(tmp);
             collector.reg_early_def(rd);
         }
         &Inst::Popcnt {
             sum, step, rs, tmp, ..
         } => {
             collector.reg_use(rs);
             collector.reg_early_def(tmp);
             collector.reg_early_def(step);
             collector.reg_early_def(sum);
         }
         &Inst::Rev8 { rs, rd, tmp, step } => {
             collector.reg_use(rs);
             collector.reg_early_def(tmp);
             collector.reg_early_def(step);
             collector.reg_early_def(rd);
         }
         &Inst::Cltz {
             sum, step, tmp, rs, ..
         } => {
             collector.reg_use(rs);
             collector.reg_early_def(tmp);
             collector.reg_early_def(step);
             collector.reg_early_def(sum);
         }
         &Inst::Brev8 {
             rs,
             rd,
             step,
             tmp,
             tmp2,
             ..
         } => {
             collector.reg_use(rs);
             collector.reg_early_def(step);
             collector.reg_early_def(tmp);
             collector.reg_early_def(tmp2);
             collector.reg_early_def(rd);
         }
         &Inst::StackProbeLoop { .. } => {
             // StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue.
             // t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg)
             // gen_prologue is called at emit stage.
             // no need let reg alloc know.
         }
         &Inst::VecAluRRRR {
             op,
             vd,
             vd_src,
             vs1,
             vs2,
             ref mask,
             ..
         } => {
             debug_assert_eq!(vd_src.class(), RegClass::Vector);
             debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
             debug_assert_eq!(vs2.class(), RegClass::Vector);
             debug_assert_eq!(vs1.class(), op.vs1_regclass());

             collector.reg_late_use(vs1);
             collector.reg_late_use(vs2);
             collector.reg_use(vd_src);
             collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`.
             vec_mask_late_operands(mask, collector);
         }
         &Inst::VecAluRRRImm5 {
             op,
             vd,
             vd_src,
             vs2,
             ref mask,
             ..
         } => {
             debug_assert_eq!(vd_src.class(), RegClass::Vector);
             debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
             debug_assert_eq!(vs2.class(), RegClass::Vector);

             // If the operation forbids source/destination overlap we need to
             // ensure that the source and destination registers are different.
             if op.forbids_overlaps(mask) {
                 collector.reg_late_use(vs2);
                 collector.reg_use(vd_src);
                 collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
                 vec_mask_late_operands(mask, collector);
             } else {
                 collector.reg_use(vs2);
                 collector.reg_use(vd_src);
                 collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.
                 vec_mask_operands(mask, collector);
             }
         }
         &Inst::VecAluRRR {
             op,
             vd,
             vs1,
             vs2,
             ref mask,
             ..
         } => {
             debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
             debug_assert_eq!(vs2.class(), RegClass::Vector);
             debug_assert_eq!(vs1.class(), op.vs1_regclass());

             collector.reg_use(vs1);
             collector.reg_use(vs2);

             // If the operation forbids source/destination overlap, then we must
             // register it as an early_def. This encodes the constraint that
             // these must not overlap.
             if op.forbids_overlaps(mask) {
                 collector.reg_early_def(vd);
             } else {
                 collector.reg_def(vd);
             }

             vec_mask_operands(mask, collector);
         }
         &Inst::VecAluRRImm5 {
             op,
             vd,
             vs2,
             ref mask,
             ..
         } => {
             debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
             debug_assert_eq!(vs2.class(), RegClass::Vector);

             collector.reg_use(vs2);

             // If the operation forbids source/destination overlap, then we must
             // register it as an early_def. This encodes the constraint that
             // these must not overlap.
             if op.forbids_overlaps(mask) {
                 collector.reg_early_def(vd);
             } else {
                 collector.reg_def(vd);
             }

             vec_mask_operands(mask, collector);
         }
         &Inst::VecAluRR {
             op,
             vd,
             vs,
             ref mask,
             ..
         } => {
             debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());
             debug_assert_eq!(vs.class(), op.src_regclass());

             collector.reg_use(vs);

             // If the operation forbids source/destination overlap, then we must
             // register it as an early_def. This encodes the constraint that
             // these must not overlap.
             if op.forbids_overlaps(mask) {
                 collector.reg_early_def(vd);
             } else {
                 collector.reg_def(vd);
             }

             vec_mask_operands(mask, collector);
         }
         &Inst::VecAluRImm5 {
             op, vd, ref mask, ..
         } => {
             debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);
             debug_assert!(!op.forbids_overlaps(mask));

             collector.reg_def(vd);
             vec_mask_operands(mask, collector);
         }
         &Inst::VecSetState { rd, .. } => {
             collector.reg_def(rd);
         }
         &Inst::VecLoad {
             to,
             ref from,
             ref mask,
             ..
         } => {
             if let Some(r) = from.get_allocatable_register() {
                 collector.reg_use(r);
             }
             collector.reg_def(to);
             vec_mask_operands(mask, collector);
         }
         &Inst::VecStore {
             ref to,
             from,
             ref mask,
             ..
         } => {
             if let Some(r) = to.get_allocatable_register() {
                 collector.reg_use(r);
             }
             collector.reg_use(from);
             vec_mask_operands(mask, collector);
         }
     }
 }

 impl MachInst for Inst {
     type LabelUse = LabelUse;
     type ABIMachineSpec = Riscv64MachineDeps;

     // https://github.com/riscv/riscv-isa-manual/issues/850
     // all zero will cause invalid opcode.
     const TRAP_OPCODE: &'static [u8] = &[0; 4];

     fn gen_dummy_use(reg: Reg) -> Self {
         Inst::DummyUse { reg }
     }

     fn canonical_type_for_rc(rc: RegClass) -> Type {
         match rc {
             regalloc2::RegClass::Int => I64,
             regalloc2::RegClass::Float => F64,
             regalloc2::RegClass::Vector => I8X16,
         }
     }

     fn is_safepoint(&self) -> bool {
         match self {
             &Inst::Call { .. }
             | &Inst::CallInd { .. }
             | &Inst::TrapIf { .. }
             | &Inst::Udf { .. } => true,
             _ => false,
         }
     }

     fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>) {
         riscv64_get_operands(self, collector);
     }

     fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
         match self {
             Inst::Mov { rd, rm, .. } => Some((rd.clone(), rm.clone())),
             _ => None,
         }
     }

     fn is_included_in_clobbers(&self) -> bool {
         match self {
             &Inst::Args { .. } => false,
             _ => true,
         }
     }

     fn is_trap(&self) -> bool {
         match self {
             Self::Udf { .. } => true,
             _ => false,
         }
     }

     fn is_args(&self) -> bool {
         match self {
             Self::Args { .. } => true,
             _ => false,
         }
     }

     fn is_term(&self) -> MachTerminator {
         match self {
             &Inst::Jal { .. } => MachTerminator::Uncond,
             &Inst::CondBr { .. } => MachTerminator::Cond,
             &Inst::Jalr { .. } => MachTerminator::Uncond,
             &Inst::Rets { .. } => MachTerminator::Ret,
             &Inst::BrTable { .. } => MachTerminator::Indirect,
             &Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
             _ => MachTerminator::None,
         }
     }

     fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
         let x = Inst::Mov {
             rd: to_reg,
             rm: from_reg,
             ty,
         };
         x
     }

     fn gen_nop(preferred_size: usize) -> Inst {
         if preferred_size == 0 {
             return Inst::Nop0;
         }
         // We can't give a NOP (or any insn) < 4 bytes.
         assert!(preferred_size >= 4);
         Inst::Nop4
     }

     fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {
         match ty {
             I8 => Ok((&[RegClass::Int], &[I8])),
             I16 => Ok((&[RegClass::Int], &[I16])),
             I32 => Ok((&[RegClass::Int], &[I32])),
             I64 => Ok((&[RegClass::Int], &[I64])),
             R32 => panic!("32-bit reftype pointer should never be seen on riscv64"),
             R64 => Ok((&[RegClass::Int], &[R64])),
             F32 => Ok((&[RegClass::Float], &[F32])),
             F64 => Ok((&[RegClass::Float], &[F64])),
             I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
             _ if ty.is_vector() => {
                 debug_assert!(ty.bits() <= 512);

                 // Here we only need to return a SIMD type with the same size as `ty`.
                 // We use these types for spills and reloads, so prefer types with lanes <= 31
                 // since that fits in the immediate field of `vsetivli`.
                 const SIMD_TYPES: [[Type; 1]; 6] = [
                     [types::I8X2],
                     [types::I8X4],
                     [types::I8X8],
                     [types::I8X16],
                     [types::I16X16],
                     [types::I32X16],
                 ];
                 let idx = (ty.bytes().ilog2() - 1) as usize;
                 let ty = &SIMD_TYPES[idx][..];

                 Ok((&[RegClass::Vector], ty))
             }
             _ => Err(CodegenError::Unsupported(format!(
                 "Unexpected SSA-value type: {}",
                 ty
             ))),
         }
     }

     fn gen_jump(target: MachLabel) -> Inst {
         Inst::Jal { label: target }
     }

     fn worst_case_size() -> CodeOffset {
         // calculate by test function riscv64_worst_case_instruction_size()
         124
     }

     fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
         RegClass::Int
     }

     fn function_alignment() -> FunctionAlignment {
         FunctionAlignment {
             minimum: 2,
             preferred: 4,
         }
     }
 }

 //=============================================================================
 // Pretty-printing of instructions.
 pub fn reg_name(reg: Reg) -> String {
     match reg.to_real_reg() {
         Some(real) => match real.class() {
             RegClass::Int => match real.hw_enc() {
                 0 => "zero".into(),
                 1 => "ra".into(),
                 2 => "sp".into(),
                 3 => "gp".into(),
                 4 => "tp".into(),
                 5..=7 => format!("t{}", real.hw_enc() - 5),
                 8 => "fp".into(),
                 9 => "s1".into(),
                 10..=17 => format!("a{}", real.hw_enc() - 10),
                 18..=27 => format!("s{}", real.hw_enc() - 16),
                 28..=31 => format!("t{}", real.hw_enc() - 25),
                 _ => unreachable!(),
             },
             RegClass::Float => match real.hw_enc() {
                 0..=7 => format!("ft{}", real.hw_enc() - 0),
                 8..=9 => format!("fs{}", real.hw_enc() - 8),
                 10..=17 => format!("fa{}", real.hw_enc() - 10),
                 18..=27 => format!("fs{}", real.hw_enc() - 16),
                 28..=31 => format!("ft{}", real.hw_enc() - 20),
                 _ => unreachable!(),
             },
             RegClass::Vector => format!("v{}", real.hw_enc()),
         },
         None => {
             format!("{:?}", reg)
         }
     }
 }

 impl Inst {
     fn print_with_state(
         &self,
         _state: &mut EmitState,
         allocs: &mut AllocationConsumer<'_>,
     ) -> String {
         let format_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
             let reg = allocs.next(reg);
             reg_name(reg)
         };

         let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
             match amode {
                 VecAMode::UnitStride { base } => base.to_string_with_alloc(allocs),
             }
         };

         let format_mask = |mask: &VecOpMasking, allocs: &mut AllocationConsumer<'_>| -> String {
             match mask {
                 VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg, allocs)),
                 VecOpMasking::Disabled => format!(""),
             }
         };

         let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String {
             let mut x = if regs.len() > 1 {
                 String::from("[")
             } else {
                 String::default()
             };
             regs.iter().for_each(|i| {
                 x.push_str(format_reg(i.clone(), allocs).as_str());
                 if *i != *regs.last().unwrap() {
                     x.push_str(",");
                 }
             });
             if regs.len() > 1 {
                 x.push_str("]");
             }
             x
         };
         let format_labels = |labels: &[MachLabel]| -> String {
             if labels.len() == 0 {
                 return String::from("[_]");
             }
             let mut x = String::from("[");
             labels.iter().for_each(|l| {
                 x.push_str(
                     format!(
                         "{:?}{}",
                         l,
                         if l != labels.last().unwrap() { "," } else { "" },
                     )
                     .as_str(),
                 );
             });
             x.push_str("]");
             x
         };

         fn format_frm(rounding_mode: Option<FRM>) -> String {
             if let Some(r) = rounding_mode {
                 format!(",{}", r.to_static_str(),)
             } else {
                 "".into()
             }
         }

         let mut empty_allocs = AllocationConsumer::default();
         match self {
             &Inst::Nop0 => {
                 format!("##zero length nop")
             }
             &Inst::Nop4 => {
                 format!("##fixed 4-size nop")
             }
             &Inst::StackProbeLoop {
                 guard_size,
                 probe_count,
                 tmp,
             } => {
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 format!(
                     "inline_stack_probe##guard_size={} probe_count={} tmp={}",
                     guard_size, probe_count, tmp
                 )
             }
             &Inst::FloatRound {
                 op,
                 rd,
                 int_tmp,
                 f_tmp,
                 rs,
                 ty,
             } => {
                 let rs = format_reg(rs, allocs);
                 let int_tmp = format_reg(int_tmp.to_reg(), allocs);
                 let f_tmp = format_reg(f_tmp.to_reg(), allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!(
                     "{} {},{}##int_tmp={} f_tmp={} ty={}",
                     op.op_name(),
                     rd,
                     rs,
                     int_tmp,
                     f_tmp,
                     ty
                 )
             }
             &Inst::FloatSelect {
                 op,
                 rd,
                 tmp,
                 rs1,
                 rs2,
                 ty,
             } => {
                 let rs1 = format_reg(rs1, allocs);
                 let rs2 = format_reg(rs2, allocs);
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!(
                     "f{}.{} {},{},{}##tmp={} ty={}",
                     op.op_name(),
                     if ty == F32 { "s" } else { "d" },
                     rd,
                     rs1,
                     rs2,
                     tmp,
                     ty
                 )
             }
             &Inst::AtomicStore { src, ty, p } => {
                 let src = format_reg(src, allocs);
                 let p = format_reg(p, allocs);
                 format!("atomic_store.{} {},({})", ty, src, p)
             }
             &Inst::DummyUse { reg } => {
                 let reg = format_reg(reg, allocs);
                 format!("dummy_use {}", reg)
             }

             &Inst::AtomicLoad { rd, ty, p } => {
                 let p = format_reg(p, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("atomic_load.{} {},({})", ty, rd, p)
             }
             &Inst::AtomicRmwLoop {
                 offset,
                 op,
                 dst,
                 ty,
                 p,
                 x,
                 t0,
             } => {
                 let offset = format_reg(offset, allocs);
                 let p = format_reg(p, allocs);
                 let x = format_reg(x, allocs);
                 let t0 = format_reg(t0.to_reg(), allocs);
                 let dst = format_reg(dst.to_reg(), allocs);
                 format!(
                     "atomic_rmw.{} {} {},{},({})##t0={} offset={}",
                     ty, op, dst, x, p, t0, offset
                 )
             }

             &Inst::RawData { ref data } => match data.len() {
                 4 => {
                     let mut bytes = [0; 4];
                     for i in 0..bytes.len() {
                         bytes[i] = data[i];
                     }
                     format!(".4byte 0x{:x}", u32::from_le_bytes(bytes))
                 }
                 8 => {
                     let mut bytes = [0; 8];
                     for i in 0..bytes.len() {
                         bytes[i] = data[i];
                     }
                     format!(".8byte 0x{:x}", u64::from_le_bytes(bytes))
                 }
                 _ => {
                     format!(".data {:?}", data)
                 }
             },
             &Inst::Unwind { ref inst } => {
                 format!("unwind {:?}", inst)
             }
             &Inst::Brev8 {
                 rs,
                 ty,
                 step,
                 tmp,
                 tmp2,
                 rd,
             } => {
                 let rs = format_reg(rs, allocs);
                 let step = format_reg(step.to_reg(), allocs);
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 let tmp2 = format_reg(tmp2.to_reg(), allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!(
                     "brev8 {},{}##tmp={} tmp2={} step={} ty={}",
                     rd, rs, tmp, tmp2, step, ty
                 )
             }
             &Inst::Popcnt {
                 sum,
                 step,
                 rs,
                 tmp,
                 ty,
             } => {
                 let rs = format_reg(rs, allocs);
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 let step = format_reg(step.to_reg(), allocs);
                 let sum = format_reg(sum.to_reg(), allocs);
                 format!("popcnt {},{}##ty={} tmp={} step={}", sum, rs, ty, tmp, step)
             }
             &Inst::Rev8 { rs, rd, tmp, step } => {
                 let rs = format_reg(rs, allocs);
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 let step = format_reg(step.to_reg(), allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("rev8 {},{}##step={} tmp={}", rd, rs, step, tmp)
             }
             &Inst::Cltz {
                 sum,
                 step,
                 rs,
                 tmp,
                 ty,
                 leading,
             } => {
                 let rs = format_reg(rs, allocs);
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 let step = format_reg(step.to_reg(), allocs);
                 let sum = format_reg(sum.to_reg(), allocs);
                 format!(
                     "{} {},{}##ty={} tmp={} step={}",
                     if leading { "clz" } else { "ctz" },
                     sum,
                     rs,
                     ty,
                     tmp,
                     step
                 )
             }
             &Inst::FcvtToInt {
                 is_sat,
                 rd,
                 rs,
                 is_signed,
                 in_type,
                 out_type,
                 tmp,
             } => {
                 let rs = format_reg(rs, allocs);
                 let tmp = format_reg(tmp.to_reg(), allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!(
                     "fcvt_to_{}int{}.{} {},{}##in_ty={} tmp={}",
                     if is_signed { "s" } else { "u" },
                     if is_sat { "_sat" } else { "" },
                     out_type,
                     rd,
                     rs,
                     in_type,
                     tmp
                 )
             }
             &Inst::AtomicCas {
                 offset,
                 t0,
                 dst,
                 e,
                 addr,
                 v,
                 ty,
             } => {
                 let offset = format_reg(offset, allocs);
                 let e = format_reg(e, allocs);
                 let addr = format_reg(addr, allocs);
                 let v = format_reg(v, allocs);
                 let t0 = format_reg(t0.to_reg(), allocs);
                 let dst = format_reg(dst.to_reg(), allocs);
                 format!(
                     "atomic_cas.{} {},{},{},({})##t0={} offset={}",
                     ty, dst, e, v, addr, t0, offset,
                 )
             }
             &Inst::Icmp { cc, rd, a, b, ty } => {
                 let a = format_regs(a.regs(), allocs);
                 let b = format_regs(b.regs(), allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("{} {},{},{}##ty={}", cc.to_static_str(), rd, a, b, ty)
             }
             &Inst::BrTable {
                 index,
                 tmp1,
                 tmp2,
                 ref targets,
             } => {
                 format!(
                     "{} {},{}##tmp1={},tmp2={}",
                     "br_table",
                     format_reg(index, allocs),
                     format_labels(&targets[..]),
                     format_reg(tmp1.to_reg(), allocs),
                     format_reg(tmp2.to_reg(), allocs),
                 )
             }
             &Inst::Auipc { rd, imm } => {
                 format!(
                     "{} {},{}",
                     "auipc",
                     format_reg(rd.to_reg(), allocs),
                     imm.as_i32(),
                 )
             }
             &Inst::Jalr { rd, base, offset } => {
                 let base = format_reg(base, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("{} {},{}({})", "jalr", rd, offset.as_i16(), base)
             }
             &Inst::Lui { rd, ref imm } => {
                 format!(
                     "{} {},{}",
                     "lui",
                     format_reg(rd.to_reg(), allocs),
                     imm.as_i32()
                 )
             }
             &Inst::LoadInlineConst { rd, imm, .. } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 let mut buf = String::new();
                 write!(&mut buf, "auipc {},0; ", rd).unwrap();
                 write!(&mut buf, "ld {},12({}); ", rd, rd).unwrap();
                 write!(&mut buf, "j {}; ", Inst::UNCOMPRESSED_INSTRUCTION_SIZE + 8).unwrap();
                 write!(&mut buf, ".8byte 0x{:x}", imm).unwrap();
                 buf
             }
             &Inst::AluRRR {
                 alu_op,
                 rd,
                 rs1,
                 rs2,
             } => {
                 let rs1_s = format_reg(rs1, allocs);
                 let rs2_s = format_reg(rs2, allocs);
                 let rd_s = format_reg(rd.to_reg(), allocs);
                 match alu_op {
                     AluOPRRR::Adduw if rs2 == zero_reg() => {
                         format!("zext.w {},{}", rd_s, rs1_s)
                     }
                     _ => {
                         format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)
                     }
                 }
             }
             &Inst::FpuRR {
                 frm,
                 alu_op,
                 rd,
                 rs,
             } => {
                 let rs = format_reg(rs, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("{} {},{}{}", alu_op.op_name(), rd, rs, format_frm(frm))
             }
             &Inst::FpuRRR {
                 alu_op,
                 rd,
                 rs1,
                 rs2,
                 frm,
             } => {
                 let rs1 = format_reg(rs1, allocs);
                 let rs2 = format_reg(rs2, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 let rs1_is_rs2 = rs1 == rs2;
                 if rs1_is_rs2 && alu_op.is_copy_sign() {
                     // this is move instruction.
                     format!(
                         "fmv.{} {},{}",
                         if alu_op.is_32() { "s" } else { "d" },
                         rd,
                         rs1
                     )
                 } else if rs1_is_rs2 && alu_op.is_copy_neg_sign() {
                     format!(
                         "fneg.{} {},{}",
                         if alu_op.is_32() { "s" } else { "d" },
                         rd,
                         rs1
                     )
                 } else if rs1_is_rs2 && alu_op.is_copy_xor_sign() {
                     format!(
                         "fabs.{} {},{}",
                         if alu_op.is_32() { "s" } else { "d" },
                         rd,
                         rs1
                     )
                 } else {
                     format!(
                         "{} {},{},{}{}",
                         alu_op.op_name(),
                         rd,
                         rs1,
                         rs2,
                         format_frm(frm)
                     )
                 }
             }
             &Inst::FpuRRRR {
                 alu_op,
                 rd,
                 rs1,
                 rs2,
                 rs3,
                 frm,
             } => {
                 let rs1 = format_reg(rs1, allocs);
                 let rs2 = format_reg(rs2, allocs);
                 let rs3 = format_reg(rs3, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!(
                     "{} {},{},{},{}{}",
                     alu_op.op_name(),
                     rd,
                     rs1,
                     rs2,
                     rs3,
                     format_frm(frm)
                 )
             }
             &Inst::AluRRImm12 {
                 alu_op,
                 rd,
                 rs,
                 ref imm12,
             } => {
                 let rs_s = format_reg(rs, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);

                 // Some of these special cases are better known as
                 // their pseudo-instruction version, so prefer printing those.
                 match (alu_op, rs, imm12) {
                     (AluOPRRI::Addi, rs, _) if rs == zero_reg() => {
                         return format!("li {},{}", rd, imm12.as_i16());
                     }
                     (AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => {
                         return format!("sext.w {},{}", rd, rs_s);
                     }
                     (AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => {
                         return format!("not {},{}", rd, rs_s);
                     }
                     (AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => {
                         return format!("seqz {},{}", rd, rs_s);
                     }
                     (alu_op, _, _) if alu_op.option_funct12().is_some() => {
                         format!("{} {},{}", alu_op.op_name(), rd, rs_s)
                     }
                     (alu_op, _, imm12) => {
                         format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16())
                     }
                 }
             }
             &Inst::CsrReg { op, rd, rs, csr } => {
                 let rs_s = format_reg(rs, allocs);
                 let rd_s = format_reg(rd.to_reg(), allocs);

                 match (op, csr, rd) {
                     (CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => {
                         format!("fsrm {rs_s}")
                     }
                     _ => {
                         format!("{op} {rd_s},{csr},{rs_s}")
                     }
                 }
             }
             &Inst::CsrImm { op, rd, csr, imm } => {
                 let rd_s = format_reg(rd.to_reg(), allocs);

                 match (op, csr, rd) {
                     (CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => {
                         format!("fsrmi {rd_s},{imm}")
                     }
                     _ => {
                         format!("{op} {rd_s},{csr},{imm}")
                     }
                 }
             }
             &Inst::Load {
                 rd,
                 op,
                 from,
                 flags: _flags,
             } => {
                 let base = from.to_string_with_alloc(allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("{} {},{}", op.op_name(), rd, base,)
             }
             &Inst::Store {
                 to,
                 src,
                 op,
                 flags: _flags,
             } => {
                 let base = to.to_string_with_alloc(allocs);
                 let src = format_reg(src, allocs);
                 format!("{} {},{}", op.op_name(), src, base,)
             }
             &Inst::Args { ref args } => {
                 let mut s = "args".to_string();
                 let mut empty_allocs = AllocationConsumer::default();
                 for arg in args {
                     let preg = format_reg(arg.preg, &mut empty_allocs);
                     let def = format_reg(arg.vreg.to_reg(), allocs);
                     write!(&mut s, " {}={}", def, preg).unwrap();
                 }
                 s
             }
             &Inst::Rets { ref rets } => {
                 let mut s = "rets".to_string();
                 let mut empty_allocs = AllocationConsumer::default();
                 for ret in rets {
                     let preg = format_reg(ret.preg, &mut empty_allocs);
                     let vreg = format_reg(ret.vreg, allocs);
                     write!(&mut s, " {vreg}={preg}").unwrap();
                 }
                 s
             }
             &Inst::Ret {} => "ret".to_string(),

             &MInst::Extend {
                 rd,
                 rn,
                 signed,
                 from_bits,
                 ..
             } => {
                 let rn = format_reg(rn, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 return if signed == false && from_bits == 8 {
                     format!("andi {rd},{rn}")
                 } else {
                     let op = if signed { "srai" } else { "srli" };
                     let shift_bits = (64 - from_bits) as i16;
                     format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}")
                 };
             }
             &MInst::AdjustSp { amount } => {
                 format!("{} sp,{:+}", "add", amount)
             }
             &MInst::Call { ref info } => format!("call {}", info.dest.display(None)),
             &MInst::CallInd { ref info } => {
                 let rd = format_reg(info.rn, allocs);
                 format!("callind {}", rd)
             }
             &MInst::ReturnCall {
                 ref callee,
                 ref info,
             } => {
                 let mut s = format!(
                     "return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}",
                     info.old_stack_arg_size, info.new_stack_arg_size
                 );
                 for ret in &info.uses {
                     let preg = format_reg(ret.preg, &mut empty_allocs);
                     let vreg = format_reg(ret.vreg, allocs);
                     write!(&mut s, " {vreg}={preg}").unwrap();
                 }
                 s
             }
             &MInst::ReturnCallInd { callee, ref info } => {
                 let callee = format_reg(callee, allocs);
                 let mut s = format!(
                     "return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}",
                     info.old_stack_arg_size, info.new_stack_arg_size
                 );
                 for ret in &info.uses {
                     let preg = format_reg(ret.preg, &mut empty_allocs);
                     let vreg = format_reg(ret.vreg, allocs);
                     write!(&mut s, " {vreg}={preg}").unwrap();
                 }
                 s
             }
             &MInst::TrapIf {
                 rs1,
                 rs2,
                 cc,
                 trap_code,
             } => {
                 let rs1 = format_reg(rs1, allocs);
                 let rs2 = format_reg(rs2, allocs);
                 format!("trap_if {trap_code}##({rs1} {cc} {rs2})")
             }
             &MInst::Jal { label } => {
                 format!("j {}", label.to_string())
             }
             &MInst::CondBr {
                 taken,
                 not_taken,
                 kind,
                 ..
             } => {
                 let rs1 = format_reg(kind.rs1, allocs);
                 let rs2 = format_reg(kind.rs2, allocs);
                 if not_taken.is_fallthrouh() && taken.as_label().is_none() {
                     format!("{} {},{},0", kind.op_name(), rs1, rs2)
                 } else {
                     let x = format!(
                         "{} {},{},taken({}),not_taken({})",
                         kind.op_name(),
                         rs1,
                         rs2,
                         taken,
                         not_taken
                     );
                     x
                 }
             }
             &MInst::Atomic {
                 op,
                 rd,
                 addr,
                 src,
                 amo,
             } => {
                 let op_name = op.op_name(amo);
                 let addr = format_reg(addr, allocs);
                 let src = format_reg(src, allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 if op.is_load() {
                     format!("{} {},({})", op_name, rd, addr)
                 } else {
                     format!("{} {},{},({})", op_name, rd, src, addr)
                 }
             }
             &MInst::LoadExtName {
                 rd,
                 ref name,
                 offset,
             } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("load_sym {},{}{:+}", rd, name.display(None), offset)
             }
             &Inst::ElfTlsGetAddr { rd, ref name } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("elf_tls_get_addr {rd},{}", name.display(None))
             }
             &MInst::LoadAddr { ref rd, ref mem } => {
                 let rs = mem.to_string_with_alloc(allocs);
                 let rd = format_reg(rd.to_reg(), allocs);
                 format!("load_addr {},{}", rd, rs)
             }
             &MInst::VirtualSPOffsetAdj { amount } => {
                 format!("virtual_sp_offset_adj {:+}", amount)
             }
             &MInst::Mov { rd, rm, ty } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 let rm = format_reg(rm, allocs);

                 let op = match ty {
                     F32 => "fmv.s",
                     F64 => "fmv.d",
                     ty if ty.is_vector() => "vmv1r.v",
                     _ => "mv",
                 };

                 format!("{op} {rd},{rm}")
             }
             &MInst::MovFromPReg { rd, rm } => {
                 let rd = format_reg(rd.to_reg(), allocs);
                 debug_assert!([px_reg(2), px_reg(8)].contains(&rm));
                 let rm = reg_name(Reg::from(rm));
                 format!("mv {},{}", rd, rm)
             }
             &MInst::Fence { pred, succ } => {
                 format!(
                     "fence {},{}",
                     Inst::fence_req_to_string(pred),
                     Inst::fence_req_to_string(succ),
                 )
             }
             &MInst::Select {
                 ref dst,
                 condition,
                 ref x,
                 ref y,
             } => {
                 let c_rs1 = format_reg(condition.rs1, allocs);
                 let c_rs2 = format_reg(condition.rs2, allocs);
                 let x = format_regs(x.regs(), allocs);
                 let y = format_regs(y.regs(), allocs);
                 let dst = dst.map(|r| r.to_reg());
                 let dst = format_regs(dst.regs(), allocs);
                 format!(
                     "select {},{},{}##condition=({} {} {})",
                     dst,
                     x,
                     y,
                     c_rs1,
                     condition.kind.to_static_str(),
                     c_rs2
                 )
             }
             &MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code),
             &MInst::EBreak {} => String::from("ebreak"),
             &Inst::VecAluRRRR {
                 op,
                 vd,
                 vd_src,
                 vs1,
                 vs2,
                 ref mask,
                 ref vstate,
             } => {
                 let vs1_s = format_reg(vs1, allocs);
                 let vs2_s = format_reg(vs2, allocs);
                 let vd_src_s = format_reg(vd_src, allocs);
                 let vd_s = format_reg(vd.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 let vd_fmt = if vd_s != vd_src_s {
                     format!("{},{}", vd_s, vd_src_s)
                 } else {
                     vd_s
                 };

                 // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
                 // This is noted in Section 10.1 of the RISC-V Vector spec.
                 format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}")
             }
             &Inst::VecAluRRRImm5 {
                 op,
                 vd,
                 imm,
                 vs2,
                 ref mask,
                 ref vstate,
                 ..
             } => {
                 let vs2_s = format_reg(vs2, allocs);
                 let vd_s = format_reg(vd.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 // Some opcodes interpret the immediate as unsigned, lets show the
                 // correct number here.
                 let imm_s = if op.imm_is_unsigned() {
                     format!("{}", imm.bits())
                 } else {
                     format!("{}", imm)
                 };

                 format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")
             }
             &Inst::VecAluRRR {
                 op,
                 vd,
                 vs1,
                 vs2,
                 ref mask,
                 ref vstate,
             } => {
                 let vs1_s = format_reg(vs1, allocs);
                 let vs2_s = format_reg(vs2, allocs);
                 let vd_s = format_reg(vd.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 // Note: vs2 and vs1 here are opposite to the standard scalar ordering.
                 // This is noted in Section 10.1 of the RISC-V Vector spec.
                 match (op, vs2, vs1) {
                     (VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
                         format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}")
                     }
                     (VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
                         format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")
                     }
                     (VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {
                         format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")
                     }
                     (VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {
                         format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")
                     }
                     _ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),
                 }
             }
             &Inst::VecAluRRImm5 {
                 op,
                 vd,
                 imm,
                 vs2,
                 ref mask,
                 ref vstate,
             } => {
                 let vs2_s = format_reg(vs2, allocs);
                 let vd_s = format_reg(vd.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 // Some opcodes interpret the immediate as unsigned, lets show the
                 // correct number here.
                 let imm_s = if op.imm_is_unsigned() {
                     format!("{}", imm.bits())
                 } else {
                     format!("{}", imm)
                 };

                 match (op, imm) {
                     (VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => {
                         format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}")
                     }
                     _ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"),
                 }
             }
             &Inst::VecAluRR {
                 op,
                 vd,
                 vs,
                 ref mask,
                 ref vstate,
             } => {
                 let vs_s = format_reg(vs, allocs);
                 let vd_s = format_reg(vd.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 format!("{op} {vd_s},{vs_s}{mask} {vstate}")
             }
             &Inst::VecAluRImm5 {
                 op,
                 vd,
                 imm,
                 ref mask,
                 ref vstate,
             } => {
                 let vd_s = format_reg(vd.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 format!("{op} {vd_s},{imm}{mask} {vstate}")
             }
             &Inst::VecSetState { rd, ref vstate } => {
                 let rd_s = format_reg(rd.to_reg(), allocs);
                 assert!(vstate.avl.is_static());
                 format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)
             }
             Inst::VecLoad {
                 eew,
                 to,
                 from,
                 ref mask,
                 ref vstate,
                 ..
             } => {
                 let base = format_vec_amode(from, allocs);
                 let vd = format_reg(to.to_reg(), allocs);
                 let mask = format_mask(mask, allocs);

                 format!("vl{eew}.v {vd},{base}{mask} {vstate}")
             }
             Inst::VecStore {
                 eew,
                 to,
                 from,
                 ref mask,
                 ref vstate,
                 ..
             } => {
                 let dst = format_vec_amode(to, allocs);
                 let vs3 = format_reg(*from, allocs);
                 let mask = format_mask(mask, allocs);

                 format!("vs{eew}.v {vs3},{dst}{mask} {vstate}")
             }
         }
     }
 }

 /// Different forms of label references for different instruction formats.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum LabelUse {
     /// 20-bit branch offset (unconditional branches). PC-rel, offset is
     /// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.
     Jal20,

     /// The unconditional jump instructions all use PC-relative
     /// addressing to help support position independent code. The JALR
     /// instruction was defined to enable a two-instruction sequence to
     /// jump anywhere in a 32-bit absolute address range. A LUI
     /// instruction can first load rs1 with the upper 20 bits of a
     /// target address, then JALR can add in the lower bits. Similarly,
     /// AUIPC then JALR can jump anywhere in a 32-bit pc-relative
     /// address range.
     PCRel32,

     /// All branch instructions use the B-type instruction format. The
     /// 12-bit B-immediate encodes signed offsets in multiples of 2, and
     /// is added to the current pc to give the target address. The
     /// conditional branch range is ±4 KiB.
     B12,

     /// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting
     /// the immediate field of an `auipc` instruction.
     PCRelHi20,

     /// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to
     /// the final address, instead of the `PCREL_HI20` label. Allows setting
     /// the immediate field of I Type instructions such as `addi` or `lw`.
     ///
     /// Since we currently don't support offsets in labels, this relocation has
     /// an implicit offset of 4.
     PCRelLo12I,

     /// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation
     RVCJump,
 }

 impl MachInstLabelUse for LabelUse {
     /// Alignment for veneer code. Every Riscv64 instruction must be
     /// 4-byte-aligned.
     const ALIGN: CodeOffset = 4;

     /// Maximum PC-relative range (positive), inclusive.
     fn max_pos_range(self) -> CodeOffset {
         match self {
             LabelUse::Jal20 => ((1 << 19) - 1) * 2,
             LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => {
                 Inst::imm_max() as CodeOffset
             }
             LabelUse::B12 => ((1 << 11) - 1) * 2,
             LabelUse::RVCJump => ((1 << 10) - 1) * 2,
         }
     }

     /// Maximum PC-relative range (negative).
     fn max_neg_range(self) -> CodeOffset {
         match self {
             LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset,
             _ => self.max_pos_range() + 2,
         }
     }

     /// Size of window into code needed to do the patch.
     fn patch_size(self) -> CodeOffset {
         match self {
             LabelUse::RVCJump => 2,
             LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4,
             LabelUse::PCRel32 => 8,
         }
     }

     /// Perform the patch.
     fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
         assert!(use_offset % 2 == 0);
         assert!(label_offset % 2 == 0);
         let offset = (label_offset as i64) - (use_offset as i64);

         // re-check range
         assert!(
             offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64),
             "{:?} offset '{}' use_offset:'{}' label_offset:'{}'  must not exceed max range.",
             self,
             offset,
             use_offset,
             label_offset,
         );
         self.patch_raw_offset(buffer, offset);
     }

     /// Is a veneer supported for this label reference type?
     fn supports_veneer(self) -> bool {
         match self {
             Self::Jal20 | Self::B12 | Self::RVCJump => true,
             _ => false,
         }
     }

     /// How large is the veneer, if supported?
     fn veneer_size(self) -> CodeOffset {
         match self {
             Self::B12 | Self::Jal20 | Self::RVCJump => 8,
             _ => unreachable!(),
         }
     }

     fn worst_case_veneer_size() -> CodeOffset {
         8
     }

     /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
     /// an offset and label-use for the veneer's use of the original label.
     fn generate_veneer(
         self,
         buffer: &mut [u8],
         veneer_offset: CodeOffset,
     ) -> (CodeOffset, LabelUse) {
         let base = writable_spilltmp_reg();
         {
             let x = enc_auipc(base, Imm20::ZERO).to_le_bytes();
             buffer[0] = x[0];
             buffer[1] = x[1];
             buffer[2] = x[2];
             buffer[3] = x[3];
         }
         {
             let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::ZERO).to_le_bytes();
             buffer[4] = x[0];
             buffer[5] = x[1];
             buffer[6] = x[2];
             buffer[7] = x[3];
         }
         (veneer_offset, Self::PCRel32)
     }

     fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {
         match (reloc, addend) {
             (Reloc::RiscvCall, _) => Some(Self::PCRel32),
             _ => None,
         }
     }
 }

 impl LabelUse {
     fn offset_in_range(self, offset: i64) -> bool {
         let min = -(self.max_neg_range() as i64);
         let max = self.max_pos_range() as i64;
         offset >= min && offset <= max
     }

     fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) {
         let insn = match self {
             LabelUse::RVCJump => u16::from_le_bytes(buffer[..2].try_into().unwrap()) as u32,
             _ => u32::from_le_bytes(buffer[..4].try_into().unwrap()),
         };

         match self {
             LabelUse::Jal20 => {
                 let offset = offset as u32;
                 let v = ((offset >> 12 & 0b1111_1111) << 12)
                     | ((offset >> 11 & 0b1) << 20)
                     | ((offset >> 1 & 0b11_1111_1111) << 21)
                     | ((offset >> 20 & 0b1) << 31);
                 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
             }
             LabelUse::PCRel32 => {
                 let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);
                 Inst::generate_imm(offset as u64)
                     .map(|(imm20, imm12)| {
                         // Encode the OR-ed-in value with zero_reg(). The
                         // register parameter must be in the original
                         // encoded instruction and or'ing in zeroes does not
                         // change it.
                         buffer[0..4].clone_from_slice(&u32::to_le_bytes(
                             insn | enc_auipc(writable_zero_reg(), imm20),
                         ));
                         buffer[4..8].clone_from_slice(&u32::to_le_bytes(
                             insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12),
                         ));
                     })
                     // expect make sure we handled.
                     .expect("we have check the range before,this is a compiler error.");
             }

             LabelUse::B12 => {
                 let offset = offset as u32;
                 let v = ((offset >> 11 & 0b1) << 7)
                     | ((offset >> 1 & 0b1111) << 8)
                     | ((offset >> 5 & 0b11_1111) << 25)
                     | ((offset >> 12 & 0b1) << 31);
                 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));
             }

             LabelUse::PCRelHi20 => {
                 // See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
                 //
                 // We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the
                 // Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an
                 // offset of 2048, we need to land at the next page and subtract instead.
                 let offset = offset as u32;
                 let hi20 = offset.wrapping_add(0x800) >> 12;
                 let insn = (insn & 0xFFF) | (hi20 << 12);
                 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
             }

             LabelUse::PCRelLo12I => {
                 // `offset` is the offset from the current instruction to the target address.
                 //
                 // However we are trying to compute the offset to the target address from the previous instruction.
                 // The previous instruction should be the one that contains the PCRelHi20 relocation and
                 // stores/references the program counter (`auipc` usually).
                 //
                 // Since we are trying to compute the offset from the previous instruction, we can
                 // represent it as offset = target_address - (current_instruction_address - 4)
                 // which is equivalent to offset = target_address - current_instruction_address + 4.
                 //
                 // Thus we need to add 4 to the offset here.
                 let lo12 = (offset + 4) as u32 & 0xFFF;
                 let insn = (insn & 0xFFFFF) | (lo12 << 20);
                 buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));
             }
             LabelUse::RVCJump => {
                 debug_assert!(offset & 1 == 0);

                 // We currently only support this for the C.J operation, so assert that is the opcode in
                 // the buffer.
                 debug_assert_eq!(insn & 0xFFFF, 0xA001);

                 buffer[0..2].clone_from_slice(&u16::to_le_bytes(encode_cj_type(
                     CjOp::CJ,
                     Imm12::from_i16(i16::try_from(offset).unwrap()),
                 )));
             }
         }
     }
 }

 #[cfg(test)]
 mod test {
     use super::*;
     #[test]
     fn label_use_max_range() {
         assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2);
         assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2);
         assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset));
         assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset));
         assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2);
     }
 }