vendor/cranelift-codegen/src/isa/riscv64/inst.isle - toolchain/rustc - Git at Google

 ;; Instruction formats.
 (type MInst
   (enum
     ;; A no-op of zero size.
     (Nop0)
     (Nop4)

     ;; load immediate
     (Lui
       (rd WritableReg)
       (imm Imm20))

     (LoadInlineConst
       (rd WritableReg)
       (ty Type)
       (imm u64))

      (Auipc
       (rd WritableReg)
       (imm Imm20))

     ;; An ALU operation with one register sources and a register destination.
     (FpuRR
       (alu_op FpuOPRR)
       (frm OptionFloatRoundingMode)
       (rd WritableReg)
       (rs Reg))


     ;; An ALU operation with two register sources and a register destination.
     (AluRRR
       (alu_op AluOPRRR)
       (rd WritableReg)
       (rs1 Reg)
       (rs2 Reg))

     ;; An ALU operation with two register sources and a register destination.
     (FpuRRR
       (alu_op FpuOPRRR)
       (frm OptionFloatRoundingMode)
       (rd WritableReg)
       (rs1 Reg)
       (rs2 Reg))

     ;; An ALU operation with three register sources and a register destination.
     (FpuRRRR
       (alu_op FpuOPRRRR)
       (frm OptionFloatRoundingMode)
       (rd WritableReg)
       (rs1 Reg)
       (rs2 Reg)
       (rs3 Reg))

     ;; An ALU operation with a register source and an immediate-12 source, and a register
     ;; destination.
     (AluRRImm12
       (alu_op AluOPRRI)
       (rd WritableReg)
       (rs Reg)
       (imm12 Imm12))

     ;; A CSR Reading or Writing instruction with a register source and a register destination.
     (CsrReg
       (op CsrRegOP)
       (rd WritableReg)
       (rs Reg)
       (csr CSR))

     ;; A CSR Writing instruction with an immediate source and a register destination.
     (CsrImm
       (op CsrImmOP)
       (rd WritableReg)
       (imm UImm5)
       (csr CSR))

     ;; An load
     (Load
       (rd WritableReg)
       (op LoadOP)
       (flags MemFlags)
       (from AMode))
     ;; An Store
     (Store
       (to AMode)
       (op StoreOP)
       (flags MemFlags)
       (src Reg))

     ;; A pseudo-instruction that captures register arguments in vregs.
     (Args
       (args VecArgPair))

     ;; A pseudo-instruction that moves vregs to return registers.
     (Rets
       (rets VecRetPair))

     (Ret)

      (Extend
       (rd WritableReg)
       (rn Reg)
       (signed bool)
       (from_bits u8)
       (to_bits u8))

     (AdjustSp
       (amount i64))
     (Call
       (info BoxCallInfo))

       ;; A machine indirect-call instruction.
     (CallInd
       (info BoxCallIndInfo))

     ;; A direct return-call macro instruction.
     (ReturnCall
       (callee BoxExternalName)
       (info BoxReturnCallInfo))

     ;; An indirect return-call macro instruction.
     (ReturnCallInd
       (callee Reg)
       (info BoxReturnCallInfo))

     ;; Emits a trap with the given trap code if the comparison succeeds
     (TrapIf
       (rs1 Reg)
       (rs2 Reg)
       (cc IntCC)
       (trap_code TrapCode))

     (Jal
       ;; (rd WritableReg) don't use
       (label MachLabel))

     (CondBr
       (taken CondBrTarget)
       (not_taken CondBrTarget)
       (kind IntegerCompare))

     ;; Load an inline symbol reference.
     (LoadExtName
       (rd WritableReg)
       (name BoxExternalName)
       (offset i64))

     ;; Load a TLS symbol address
     (ElfTlsGetAddr
       (rd WritableReg)
       (name BoxExternalName))

     ;; Load address referenced by `mem` into `rd`.
     (LoadAddr
       (rd WritableReg)
       (mem AMode))

     ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This
     ;; controls how AMode::NominalSPOffset args are lowered.
     (VirtualSPOffsetAdj
       (amount i64))

     ;; A MOV instruction. These are encoded as OrR's (AluRRR form) but we
     ;; keep them separate at the `Inst` level for better pretty-printing
     ;; and faster `is_move()` logic.
     (Mov
       (rd WritableReg)
       (rm Reg)
       (ty Type))

     ;; A MOV instruction, but where the source register is a non-allocatable
     ;; PReg. It's important that the register be non-allocatable, as regalloc2
     ;; will not see it as used.
     (MovFromPReg
       (rd WritableReg)
       (rm PReg))

     (Fence
       (pred FenceReq)
       (succ FenceReq))

     (EBreak)

     ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at
     ;; runtime.
     (Udf
       (trap_code TrapCode))
     ;; a jump and link register operation
     (Jalr
       ;;Plain unconditional jumps (assembler pseudo-op J) are encoded as a JAL with rd=x0.
       (rd WritableReg)
       (base Reg)
       (offset Imm12))

     ;; atomic operations.
     (Atomic
       (op AtomicOP)
       (rd WritableReg)
       (addr Reg)
       (src Reg)
       (amo AMO))
     ;; an atomic store
     (AtomicStore
       (src Reg)
       (ty Type)
       (p Reg))
     ;; an atomic load.
     (AtomicLoad
       (rd WritableReg)
       (ty Type)
       (p Reg))

     ;; an atomic nand need using loop to implement.
     (AtomicRmwLoop
       (offset Reg)
       (op AtomicRmwOp)
       (dst WritableReg)
       (ty Type)
       (p Reg)
       (x Reg)
       (t0 WritableReg))

     ;; select x or y base on condition
     (Select
       (dst WritableValueRegs)
       (condition IntegerCompare)
       (x ValueRegs)
       (y ValueRegs))

     (BrTable
       (index Reg)
       (tmp1 WritableReg)
       (tmp2 WritableReg)
       (targets VecMachLabel))

     ;; atomic compare and set operation
     (AtomicCas
       (offset Reg)
       (t0 WritableReg)
       (dst WritableReg)
       (e Reg)
       (addr Reg)
       (v Reg)
       (ty Type))
     ;; an integer compare.
     (Icmp
       (cc IntCC)
       (rd WritableReg)
       (a ValueRegs)
       (b ValueRegs)
       (ty Type))
     (FcvtToInt
       (is_sat bool)
       (rd WritableReg)
       (tmp WritableReg) ;; a float register to load bounds.
       (rs Reg)
       (is_signed bool)
       (in_type Type)
       (out_type Type))

     (RawData (data VecU8))

     ;; An unwind pseudo-instruction.
        (Unwind
         (inst UnwindInst))

     ;; A dummy use, useful to keep a value alive.
        (DummyUse
         (reg Reg))
     ;;;
     (FloatRound
       (op FloatRoundOP)
       (rd WritableReg)
       (int_tmp WritableReg)
       (f_tmp WritableReg)
       (rs Reg)
       (ty Type))
     ;;;; FMax
     (FloatSelect
       (op FloatSelectOP)
       (rd WritableReg)
       ;; a integer register
       (tmp WritableReg)
       (rs1 Reg)
       (rs2 Reg)
       (ty Type))

     ;; popcnt  if target doesn't support extension B
     ;; use iteration to implement.
     (Popcnt
       (sum WritableReg)
       (step WritableReg)
       (tmp WritableReg)
       (rs Reg)
       (ty Type))

     ;;; counting leading or trailing zeros.
     (Cltz
       ;; leading or trailing.
       (leading bool)
       (sum WritableReg)
       (step WritableReg)
       (tmp WritableReg)
       (rs Reg)
       (ty Type))
     ;; Byte-reverse register
     (Rev8
       (rs Reg)
       (step WritableReg)
       (tmp WritableReg)
       (rd WritableReg))
     ;;
     (Brev8
       (rs Reg)
       (ty Type)
       (step WritableReg)
       (tmp WritableReg)
       (tmp2 WritableReg)
       (rd WritableReg))
     (StackProbeLoop
       (guard_size u32)
       (probe_count u32)
       (tmp WritableReg))

     (VecAluRRRR
       (op VecAluOpRRRR)
       (vd WritableReg)
       (vd_src Reg)
       (vs2 Reg)
       (vs1 Reg)
       (mask VecOpMasking)
       (vstate VState))

     (VecAluRRRImm5
       (op VecAluOpRRRImm5)
       (vd WritableReg)
       (vd_src Reg)
       (vs2 Reg)
       (imm Imm5)
       (mask VecOpMasking)
       (vstate VState))

     (VecAluRRR
       (op VecAluOpRRR)
       (vd WritableReg)
       (vs2 Reg)
       (vs1 Reg)
       (mask VecOpMasking)
       (vstate VState))

     (VecAluRRImm5
       (op VecAluOpRRImm5)
       (vd WritableReg)
       (vs2 Reg)
       (imm Imm5)
       (mask VecOpMasking)
       (vstate VState))

     (VecAluRR
       (op VecAluOpRR)
       (vd WritableReg)
       (vs Reg)
       (mask VecOpMasking)
       (vstate VState))

     (VecAluRImm5
       (op VecAluOpRImm5)
       (vd WritableReg)
       (imm Imm5)
       (mask VecOpMasking)
       (vstate VState))

     (VecSetState
       (rd WritableReg)
       (vstate VState))

     (VecLoad
       (eew VecElementWidth)
       (to WritableReg)
       (from VecAMode)
       (flags MemFlags)
       (mask VecOpMasking)
       (vstate VState))

     (VecStore
       (eew VecElementWidth)
       (to VecAMode)
       (from Reg)
       (flags MemFlags)
       (mask VecOpMasking)
       (vstate VState))
 ))


 (type FloatSelectOP (enum
   (Max)
   (Min)
 ))

 (type FloatRoundOP (enum
   (Nearest)
   (Ceil)
   (Floor)
   (Trunc)
 ))

 (type AtomicOP (enum
   (LrW)
   (ScW)
   (AmoswapW)
   (AmoaddW)
   (AmoxorW)
   (AmoandW)
   (AmoorW)
   (AmominW)
   (AmomaxW)
   (AmominuW)
   (AmomaxuW)
   (LrD)
   (ScD)
   (AmoswapD)
   (AmoaddD)
   (AmoxorD)
   (AmoandD)
   (AmoorD)
   (AmominD)
   (AmomaxD)
   (AmominuD)
   (AmomaxuD)
 ))

 (type FpuOPRRRR (enum
   ;; float32
   (FmaddS)
   (FmsubS)
   (FnmsubS)
   (FnmaddS)
   ;; float64
   (FmaddD)
   (FmsubD)
   (FnmsubD)
   (FnmaddD)
 ))

 (type FClassResult (enum
   ;;0 rs1 is −∞.
   (NegInfinite)
   ;; 1 rs1 is a negative normal number.
   (NegNormal)
   ;; 2 rs1 is a negative subnormal number.
   (NegSubNormal)
   ;; 3 rs1 is −0.
   (NegZero)
   ;; 4 rs1 is +0.
   (PosZero)
   ;; 5 rs1 is a positive subnormal number.
   (PosSubNormal)
   ;; 6 rs1 is a positive normal number.
   (PosNormal)
   ;; 7 rs1 is +∞.
   (PosInfinite)
   ;; 8 rs1 is a signaling NaN.
   (SNaN)
   ;; 9 rs1 is a quiet NaN.
   (QNaN)
 ))

 (type FpuOPRR (enum
   ;; RV32F Standard Extension
   (FsqrtS)
   (FcvtWS)
   (FcvtWuS)
   (FmvXW)
   (FclassS)
   (FcvtSw)
   (FcvtSwU)
   (FmvWX)


   ;; RV64F Standard Extension (in addition to RV32F)
   (FcvtLS)
   (FcvtLuS)
   (FcvtSL)
   (FcvtSLU)


   ;; RV64D Standard Extension (in addition to RV32D)
   (FcvtLD)
   (FcvtLuD)
   (FmvXD)
   (FcvtDL)
   (FcvtDLu)
   (FmvDX)

   ;; RV32D Standard Extension
   (FsqrtD)
   (FcvtSD)
   (FcvtDS)
   (FclassD)
   (FcvtWD)
   (FcvtWuD)
   (FcvtDW)
   (FcvtDWU)
   ;; bitmapip

 ))

 (type LoadOP (enum
   (Lb)
   (Lh)
   (Lw)
   (Lbu)
   (Lhu)
   (Lwu)
   (Ld)
   (Flw)
   (Fld)
 ))

 (type StoreOP (enum
   (Sb)
   (Sh)
   (Sw)
   (Sd)
   (Fsw)
   (Fsd)
 ))

 (type AluOPRRR (enum
   ;; base set
   (Add)
   (Sub)
   (Sll)
   (Slt)
   (SltU)
   (Sgt)
   (Sgtu)
   (Xor)
   (Srl)
   (Sra)
   (Or)
   (And)

   ;; RV64I Base Instruction Set (in addition to RV32I)
   (Addw)
   (Subw)
   (Sllw)
   (Srlw)
   (Sraw)


   ;;RV32M Standard Extension
   (Mul)
   (Mulh)
   (Mulhsu)
   (Mulhu)
   (Div)
   (DivU)
   (Rem)
   (RemU)

   ;; RV64M Standard Extension (in addition to RV32M)
   (Mulw)
   (Divw)
   (Divuw)
   (Remw)
   (Remuw)

   ;; Zba: Address Generation Instructions
   (Adduw)
   (Sh1add)
   (Sh1adduw)
   (Sh2add)
   (Sh2adduw)
   (Sh3add)
   (Sh3adduw)

   ;; Zbb: Bit Manipulation Instructions
   (Andn)
   (Orn)
   (Xnor)
   (Max)
   (Maxu)
   (Min)
   (Minu)
   (Rol)
   (Rolw)
   (Ror)
   (Rorw)

   ;; Zbs: Single-bit instructions
   (Bclr)
   (Bext)
   (Binv)
   (Bset)

   ;; Zbc: Carry-less multiplication
   (Clmul)
   (Clmulh)
   (Clmulr)

   ;; Zbkb: Bit-manipulation for Cryptography
   (Pack)
   (Packw)
   (Packh)
 ))


 (type FpuOPRRR (enum
   ;; RV32F Standard Extension
   (FaddS)
   (FsubS)
   (FmulS)
   (FdivS)

   (FsgnjS)
   (FsgnjnS)
   (FsgnjxS)
   (FminS)
   (FmaxS)
   (FeqS)
   (FltS)
   (FleS)

   ;; RV32D Standard Extension
   (FaddD)
   (FsubD)
   (FmulD)
   (FdivD)
   (FsgnjD)
   (FsgnjnD)
   (FsgnjxD)
   (FminD)
   (FmaxD)
   (FeqD)
   (FltD)
   (FleD)
 ))


 (type AluOPRRI (enum
   ;; Base ISA
   (Addi)
   (Slti)
   (SltiU)
   (Xori)
   (Ori)
   (Andi)
   (Slli)
   (Srli)
   (Srai)
   (Addiw)
   (Slliw)
   (SrliW)
   (Sraiw)

   ;; Zba: Address Generation Instructions
   (SlliUw)

   ;; Zbb: Bit Manipulation Instructions
   (Clz)
   (Clzw)
   (Ctz)
   (Ctzw)
   (Cpop)
   (Cpopw)
   (Sextb)
   (Sexth)
   (Zexth)
   (Rori)
   (Roriw)
   (Rev8)
   (Brev8)
   (Orcb)

   ;; Zbs: Single-bit instructions
   (Bclri)
   (Bexti)
   (Binvi)
   (Bseti)
 ))

 (type COpcodeSpace (enum
   (C0)
   (C1)
   (C2)
 ))

 ;; Opcodes for the CR compressed instruction format
 (type CrOp (enum
   (CMv)
   (CAdd)
   (CJr)
   (CJalr)
   ;; c.ebreak technically isn't a CR format instruction, but it's encoding
   ;; lines up with this format.
   (CEbreak)
 ))

 ;; Opcodes for the CA compressed instruction format
 (type CaOp (enum
   (CAnd)
   (COr)
   (CXor)
   (CSub)
   (CAddw)
   (CSubw)
   (CMul)
 ))

 ;; Opcodes for the CJ compressed instruction format
 (type CjOp (enum
   (CJ)
 ))

 ;; Opcodes for the CI compressed instruction format
 (type CiOp (enum
   (CAddi)
   (CAddiw)
   (CAddi16sp)
   (CSlli)
   (CLi)
   (CLui)
   (CLwsp)
   (CLdsp)
   (CFldsp)
 ))

 ;; Opcodes for the CIW compressed instruction format
 (type CiwOp (enum
   (CAddi4spn)
 ))

 ;; Opcodes for the CB compressed instruction format
 (type CbOp (enum
   (CSrli)
   (CSrai)
   (CAndi)
 ))

 ;; Opcodes for the CSS compressed instruction format
 (type CssOp (enum
   (CSwsp)
   (CSdsp)
   (CFsdsp)
 ))

 ;; Opcodes for the CS compressed instruction format
 (type CsOp (enum
   (CSw)
   (CSd)
   (CFsd)
 ))

 ;; Opcodes for the CL compressed instruction format
 (type ClOp (enum
   (CLw)
   (CLd)
   (CFld)
 ))

 ;; Opcodes for the CSZN compressed instruction format
 (type CsznOp (enum
   (CNot)
   (CZextb)
   (CZexth)
   (CZextw)
   (CSextb)
   (CSexth)
 ))

 ;; This is a mix of all Zcb memory adressing instructions
 ;;
 ;; Technically they are split across 4 different formats.
 ;; But they are all very similar, so we just group them all together.
 (type ZcbMemOp (enum
   (CLbu)
   (CLhu)
   (CLh)
   (CSb)
   (CSh)
 ))


 (type CsrRegOP (enum
   ;; Atomic Read/Write CSR
   (CsrRW)
   ;; Atomic Read and Set Bits in CSR
   (CsrRS)
   ;; Atomic Read and Clear Bits in CSR
   (CsrRC)
 ))

 (type CsrImmOP (enum
   ;; Atomic Read/Write CSR (Immediate Source)
   (CsrRWI)
   ;; Atomic Read and Set Bits in CSR (Immediate Source)
   (CsrRSI)
   ;; Atomic Read and Clear Bits in CSR (Immediate Source)
   (CsrRCI)
 ))

 ;; Enum of the known CSR registers
 (type CSR (enum
   ;; Floating-Point Dynamic Rounding Mode
   (Frm)
 ))


 (type FRM (enum
   ;; Round to Nearest, ties to Even
   (RNE)
   ;; Round towards Zero
   (RTZ)
   ;;  Round Down (towards −∞)
   (RDN)
   ;; Round Up (towards +∞)
   (RUP)
   ;; Round to Nearest, ties to Max Magnitude
   (RMM)
   ;; In instruction’s rm field, selects dynamic rounding mode;
   ;;In Rounding Mode register, Invalid.
   (Fcsr)
 ))

 (decl pure frm_bits (FRM) UImm5)
 (extern constructor frm_bits frm_bits)
 (convert FRM UImm5 frm_bits)

 (type FFlagsException (enum
   ;; Invalid Operation
   (NV)
   ;; Divide by Zero
   (DZ)
   ;; Overflow
   (OF)
   ;; Underflow
   (UF)
   ;; Inexact
   (NX)
 ))

 ;;;; input output read write
 ;;;; SI SO SR SW
 ;;;; PI PO PR PW
 ;;;; lowest four bit are used.
 (type FenceReq (primitive u8))

 (type BoxCallInfo (primitive BoxCallInfo))
 (type BoxCallIndInfo (primitive BoxCallIndInfo))
 (type BoxReturnCallInfo (primitive BoxReturnCallInfo))
 (type IntegerCompare (primitive IntegerCompare))
 (type AMode (primitive AMode))
 (type OptionReg (primitive OptionReg))
 (type OptionImm12 (primitive OptionImm12))
 (type OptionUimm5 (primitive OptionUimm5))
 (type Imm12 (primitive Imm12))
 (type UImm5 (primitive UImm5))
 (type Imm5 (primitive Imm5))
 (type Imm20 (primitive Imm20))
 (type Imm3 (primitive Imm3))
 (type CondBrTarget (primitive CondBrTarget))
 (type OptionFloatRoundingMode (primitive OptionFloatRoundingMode))
 (type VecU8 (primitive VecU8))
 (type AMO (primitive AMO))
 (type VecMachLabel extern (enum))


 ;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (type XReg (primitive XReg))
 (type WritableXReg (primitive WritableXReg))
 (type FReg (primitive FReg))
 (type WritableFReg (primitive WritableFReg))
 (type VReg (primitive VReg))
 (type WritableVReg (primitive WritableVReg))

 ;; Construct a new `XReg` from a `Reg`.
 ;;
 ;; Asserts that the register has a Integer RegClass.
 (decl xreg_new (Reg) XReg)
 (extern constructor xreg_new xreg_new)
 (convert Reg XReg xreg_new)

 ;; Construct a new `WritableXReg` from a `WritableReg`.
 ;;
 ;; Asserts that the register has a Integer RegClass.
 (decl writable_xreg_new (WritableReg) WritableXReg)
 (extern constructor writable_xreg_new writable_xreg_new)
 (convert WritableReg WritableXReg writable_xreg_new)

 ;; Put a value into a XReg.
 ;;
 ;; Asserts that the value goes into a XReg.
 (decl put_in_xreg (Value) XReg)
 (rule (put_in_xreg val) (xreg_new (put_in_reg val)))
 (convert Value XReg put_in_xreg)

 ;; Construct an `InstOutput` out of a single XReg register.
 (decl output_xreg (XReg) InstOutput)
 (rule (output_xreg x) (output_reg x))
 (convert XReg InstOutput output_xreg)

 ;; Convert a `WritableXReg` to an `XReg`.
 (decl pure writable_xreg_to_xreg (WritableXReg) XReg)
 (extern constructor writable_xreg_to_xreg writable_xreg_to_xreg)
 (convert WritableXReg XReg writable_xreg_to_xreg)

 ;; Convert a `WritableXReg` to an `WritableReg`.
 (decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg)
 (extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg)
 (convert WritableXReg WritableReg writable_xreg_to_writable_reg)

 ;; Convert a `WritableXReg` to an `Reg`.
 (decl pure writable_xreg_to_reg (WritableXReg) Reg)
 (rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x))
 (convert WritableXReg Reg writable_xreg_to_reg)

 ;; Convert an `XReg` to a `Reg`.
 (decl pure xreg_to_reg (XReg) Reg)
 (extern constructor xreg_to_reg xreg_to_reg)
 (convert XReg Reg xreg_to_reg)

 ;; Convert a `XReg` to a `ValueRegs`.
 (decl xreg_to_value_regs (XReg) ValueRegs)
 (rule (xreg_to_value_regs x) (value_reg x))
 (convert XReg ValueRegs xreg_to_reg)

 ;; Convert a `WritableXReg` to a `ValueRegs`.
 (decl writable_xreg_to_value_regs (WritableXReg) ValueRegs)
 (rule (writable_xreg_to_value_regs x) (value_reg x))
 (convert WritableXReg ValueRegs writable_xreg_to_value_regs)

 ;; Allocates a new `WritableXReg`.
 (decl temp_writable_xreg () WritableXReg)
 (rule (temp_writable_xreg) (temp_writable_reg $I64))


 ;; Construct a new `FReg` from a `Reg`.
 ;;
 ;; Asserts that the register has a Float RegClass.
 (decl freg_new (Reg) FReg)
 (extern constructor freg_new freg_new)
 (convert Reg FReg freg_new)

 ;; Construct a new `WritableFReg` from a `WritableReg`.
 ;;
 ;; Asserts that the register has a Float RegClass.
 (decl writable_freg_new (WritableReg) WritableFReg)
 (extern constructor writable_freg_new writable_freg_new)
 (convert WritableReg WritableFReg writable_freg_new)

 ;; Put a value into a FReg.
 ;;
 ;; Asserts that the value goes into a FReg.
 (decl put_in_freg (Value) FReg)
 (rule (put_in_freg val) (freg_new (put_in_reg val)))
 (convert Value FReg put_in_freg)

 ;; Construct an `InstOutput` out of a single FReg register.
 (decl output_freg (FReg) InstOutput)
 (rule (output_freg x) (output_reg x))
 (convert FReg InstOutput output_freg)

 ;; Convert a `WritableFReg` to an `FReg`.
 (decl pure writable_freg_to_freg (WritableFReg) FReg)
 (extern constructor writable_freg_to_freg writable_freg_to_freg)
 (convert WritableFReg FReg writable_freg_to_freg)

 ;; Convert a `WritableFReg` to an `WritableReg`.
 (decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg)
 (extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg)
 (convert WritableFReg WritableReg writable_freg_to_writable_reg)

 ;; Convert a `WritableFReg` to an `Reg`.
 (decl pure writable_freg_to_reg (WritableFReg) Reg)
 (rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x))
 (convert WritableFReg Reg writable_freg_to_reg)

 ;; Convert an `FReg` to a `Reg`.
 (decl pure freg_to_reg (FReg) Reg)
 (extern constructor freg_to_reg freg_to_reg)
 (convert FReg Reg freg_to_reg)

 ;; Convert a `FReg` to a `ValueRegs`.
 (decl freg_to_value_regs (FReg) ValueRegs)
 (rule (freg_to_value_regs x) (value_reg x))
 (convert FReg ValueRegs xreg_to_reg)

 ;; Convert a `WritableFReg` to a `ValueRegs`.
 (decl writable_freg_to_value_regs (WritableFReg) ValueRegs)
 (rule (writable_freg_to_value_regs x) (value_reg x))
 (convert WritableFReg ValueRegs writable_freg_to_value_regs)

 ;; Allocates a new `WritableFReg`.
 (decl temp_writable_freg () WritableFReg)
 (rule (temp_writable_freg) (temp_writable_reg $F64))


 ;; Construct a new `VReg` from a `Reg`.
 ;;
 ;; Asserts that the register has a Vector RegClass.
 (decl vreg_new (Reg) VReg)
 (extern constructor vreg_new vreg_new)
 (convert Reg VReg vreg_new)

 ;; Construct a new `WritableVReg` from a `WritableReg`.
 ;;
 ;; Asserts that the register has a Vector RegClass.
 (decl writable_vreg_new (WritableReg) WritableVReg)
 (extern constructor writable_vreg_new writable_vreg_new)
 (convert WritableReg WritableVReg writable_vreg_new)

 ;; Put a value into a VReg.
 ;;
 ;; Asserts that the value goes into a VReg.
 (decl put_in_vreg (Value) VReg)
 (rule (put_in_vreg val) (vreg_new (put_in_reg val)))
 (convert Value VReg put_in_vreg)

 ;; Construct an `InstOutput` out of a single VReg register.
 (decl output_vreg (VReg) InstOutput)
 (rule (output_vreg x) (output_reg x))
 (convert VReg InstOutput output_vreg)

 ;; Convert a `WritableVReg` to an `VReg`.
 (decl pure writable_vreg_to_vreg (WritableVReg) VReg)
 (extern constructor writable_vreg_to_vreg writable_vreg_to_vreg)
 (convert WritableVReg VReg writable_vreg_to_vreg)

 ;; Convert a `WritableVReg` to an `WritableReg`.
 (decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg)
 (extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg)
 (convert WritableVReg WritableReg writable_vreg_to_writable_reg)

 ;; Convert a `WritableVReg` to an `Reg`.
 (decl pure writable_vreg_to_reg (WritableVReg) Reg)
 (rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x))
 (convert WritableVReg Reg writable_vreg_to_reg)

 ;; Convert an `VReg` to a `Reg`.
 (decl pure vreg_to_reg (VReg) Reg)
 (extern constructor vreg_to_reg vreg_to_reg)
 (convert VReg Reg vreg_to_reg)

 ;; Convert a `VReg` to a `ValueRegs`.
 (decl vreg_to_value_regs (VReg) ValueRegs)
 (rule (vreg_to_value_regs x) (value_reg x))
 (convert VReg ValueRegs xreg_to_reg)

 ;; Convert a `WritableVReg` to a `ValueRegs`.
 (decl writable_vreg_to_value_regs (WritableVReg) ValueRegs)
 (rule (writable_vreg_to_value_regs x) (value_reg x))
 (convert WritableVReg ValueRegs writable_vreg_to_value_regs)

 ;; Allocates a new `WritableVReg`.
 (decl temp_writable_vreg () WritableVReg)
 (rule (temp_writable_vreg) (temp_writable_reg $I8X16))


 ;; Converters

 (convert u8 i32 u8_as_i32)
 (decl u8_as_i32 (u8) i32)
 (extern constructor u8_as_i32 u8_as_i32)

 ;; ISA Extension helpers

 (decl pure has_m () bool)
 (extern constructor has_m has_m)

 (decl pure has_v () bool)
 (extern constructor has_v has_v)

 (decl pure has_zbkb () bool)
 (extern constructor has_zbkb has_zbkb)

 (decl pure has_zba () bool)
 (extern constructor has_zba has_zba)

 (decl pure has_zbb () bool)
 (extern constructor has_zbb has_zbb)

 (decl pure has_zbc () bool)
 (extern constructor has_zbc has_zbc)

 (decl pure has_zbs () bool)
 (extern constructor has_zbs has_zbs)

 (decl gen_float_round (FloatRoundOP Reg Type) Reg)
 (rule
   (gen_float_round op rs ty)
   (let
     ((rd WritableReg (temp_writable_reg ty))
       (tmp WritableXReg (temp_writable_xreg))
       (tmp2 WritableFReg (temp_writable_freg))
       (_ Unit (emit (MInst.FloatRound op rd tmp tmp2 rs ty))))
     (writable_reg_to_reg rd)))

 (decl gen_float_select (FloatSelectOP Reg Reg Type) Reg)
 (rule
   (gen_float_select op x y ty)
   (let
     ((rd WritableReg (temp_writable_reg ty))
       (tmp WritableXReg (temp_writable_xreg))
       (_ Unit (emit (MInst.FloatSelect op rd tmp x y ty))))
     (writable_reg_to_reg rd)))


 ;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; RV32I Base Integer Instruction Set

 ;; Helper for emitting the `add` instruction.
 ;; rd ← rs1 + rs2
 (decl rv_add (XReg XReg) XReg)
 (rule (rv_add rs1 rs2)
   (alu_rrr (AluOPRRR.Add) rs1 rs2))

 ;; Helper for emitting the `addi` ("Add Immediate") instruction.
 ;; rd ← rs1 + sext(imm)
 (decl rv_addi (XReg Imm12) XReg)
 (rule (rv_addi rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Addi) rs1 imm))

 ;; Helper for emitting the `sub` instruction.
 ;; rd ← rs1 - rs2
 (decl rv_sub (XReg XReg) XReg)
 (rule (rv_sub rs1 rs2)
   (alu_rrr (AluOPRRR.Sub) rs1 rs2))

 ;; Helper for emitting the `neg` instruction.
 ;; This instruction is a mnemonic for `sub rd, zero, rs1`.
 (decl rv_neg (XReg) XReg)
 (rule (rv_neg rs1)
   (alu_rrr (AluOPRRR.Sub) (zero_reg) rs1))

 ;; Helper for emitting the `sll` ("Shift Left Logical") instruction.
 ;; rd ← rs1 << rs2
 (decl rv_sll (XReg XReg) XReg)
 (rule (rv_sll rs1 rs2)
   (alu_rrr (AluOPRRR.Sll) rs1 rs2))

 ;; Helper for emitting the `slli` ("Shift Left Logical Immediate") instruction.
 ;; rd ← rs1 << uext(imm)
 (decl rv_slli (XReg Imm12) XReg)
 (rule (rv_slli rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Slli) rs1 imm))

 ;; Helper for emitting the `srl` ("Shift Right Logical") instruction.
 ;; rd ← rs1 >> rs2
 (decl rv_srl (XReg XReg) XReg)
 (rule (rv_srl rs1 rs2)
   (alu_rrr (AluOPRRR.Srl) rs1 rs2))

 ;; Helper for emitting the `srli` ("Shift Right Logical Immediate") instruction.
 ;; rd ← rs1 >> uext(imm)
 (decl rv_srli (XReg Imm12) XReg)
 (rule (rv_srli rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Srli) rs1 imm))

 ;; Helper for emitting the `sra` ("Shift Right Arithmetic") instruction.
 ;; rd ← rs1 >> rs2
 (decl rv_sra (XReg XReg) XReg)
 (rule (rv_sra rs1 rs2)
   (alu_rrr (AluOPRRR.Sra) rs1 rs2))

 ;; Helper for emitting the `srai` ("Shift Right Arithmetic Immediate") instruction.
 ;; rd ← rs1 >> uext(imm)
 (decl rv_srai (XReg Imm12) XReg)
 (rule (rv_srai rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Srai) rs1 imm))

 ;; Helper for emitting the `or` instruction.
 ;; rd ← rs1 ∨ rs2
 (decl rv_or (XReg XReg) XReg)
 (rule (rv_or rs1 rs2)
   (alu_rrr (AluOPRRR.Or) rs1 rs2))

 ;; Helper for emitting the `ori` ("Or Immediate") instruction.
 ;; rd ← rs1 ∨ uext(imm)
 (decl rv_ori (XReg Imm12) XReg)
 (rule (rv_ori rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Ori) rs1 imm))

 ;; Helper for emitting the `xor` instruction.
 ;; rd ← rs1 ⊕ rs2
 (decl rv_xor (XReg XReg) XReg)
 (rule (rv_xor rs1 rs2)
   (alu_rrr (AluOPRRR.Xor) rs1 rs2))

 ;; Helper for emitting the `xori` ("Exlusive Or Immediate") instruction.
 ;; rd ← rs1 ⊕ uext(imm)
 (decl rv_xori (XReg Imm12) XReg)
 (rule (rv_xori rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Xori) rs1 imm))

 ;; Helper for emitting the `not` instruction.
 ;; This instruction is a mnemonic for `xori rd, rs1, -1`.
 (decl rv_not (XReg) XReg)
 (rule (rv_not rs1)
   (rv_xori rs1 (imm12_const -1)))

 ;; Helper for emitting the `and` instruction.
 ;; rd ← rs1 ∧ rs2
 (decl rv_and (XReg XReg) XReg)
 (rule (rv_and rs1 rs2)
   (alu_rrr (AluOPRRR.And) rs1 rs2))

 ;; Helper for emitting the `andi` ("And Immediate") instruction.
 ;; rd ← rs1 ∧ uext(imm)
 (decl rv_andi (XReg Imm12) XReg)
 (rule (rv_andi rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Andi) rs1 imm))

 ;; Helper for emitting the `sltu` ("Set Less Than Unsigned") instruction.
 ;; rd ← rs1 < rs2
 (decl rv_sltu (XReg XReg) XReg)
 (rule (rv_sltu rs1 rs2)
   (alu_rrr (AluOPRRR.SltU) rs1 rs2))

 ;; Helper for emitting the `snez` instruction.
 ;; This instruction is a mnemonic for `sltu rd, zero, rs`.
 (decl rv_snez (XReg) XReg)
 (rule (rv_snez rs1)
   (rv_sltu (zero_reg) rs1))

 ;; Helper for emiting the `sltiu` ("Set Less Than Immediate Unsigned") instruction.
 ;; rd ← rs1 < imm
 (decl rv_sltiu (XReg Imm12) XReg)
 (rule (rv_sltiu rs1 imm)
   (alu_rr_imm12 (AluOPRRI.SltiU) rs1 imm))

 ;; Helper for emitting the `seqz` instruction.
 ;; This instruction is a mnemonic for `sltiu rd, rs, 1`.
 (decl rv_seqz (XReg) XReg)
 (rule (rv_seqz rs1)
   (rv_sltiu rs1 (imm12_const 1)))


 ;; RV64I Base Integer Instruction Set
 ;; Unlike RV32I instructions these are only present in the 64bit ISA

 ;; Helper for emitting the `addw` ("Add Word") instruction.
 ;; rd ← sext32(rs1) + sext32(rs2)
 (decl rv_addw (XReg XReg) XReg)
 (rule (rv_addw rs1 rs2)
   (alu_rrr (AluOPRRR.Addw) rs1 rs2))

 ;; Helper for emitting the `addiw` ("Add Word Immediate") instruction.
 ;; rd ← sext32(rs1) + imm
 (decl rv_addiw (XReg Imm12) XReg)
 (rule (rv_addiw rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Addiw) rs1 imm))

 ;; Helper for emitting the `sext.w` ("Sign Extend Word") instruction.
 ;; This instruction is a mnemonic for `addiw rd, rs, zero`.
 (decl rv_sextw (XReg) XReg)
 (rule (rv_sextw rs1)
   (rv_addiw rs1 (imm12_const 0)))

 ;; Helper for emitting the `subw` ("Subtract Word") instruction.
 ;; rd ← sext32(rs1) - sext32(rs2)
 (decl rv_subw (XReg XReg) XReg)
 (rule (rv_subw rs1 rs2)
   (alu_rrr (AluOPRRR.Subw) rs1 rs2))

 ;; Helper for emitting the `sllw` ("Shift Left Logical Word") instruction.
 ;; rd ← sext32(uext32(rs1) << rs2)
 (decl rv_sllw (XReg XReg) XReg)
 (rule (rv_sllw rs1 rs2)
   (alu_rrr (AluOPRRR.Sllw) rs1 rs2))

 ;; Helper for emitting the `slliw` ("Shift Left Logical Immediate Word") instruction.
 ;; rd ← sext32(uext32(rs1) << imm)
 (decl rv_slliw (XReg Imm12) XReg)
 (rule (rv_slliw rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Slliw) rs1 imm))

 ;; Helper for emitting the `srlw` ("Shift Right Logical Word") instruction.
 ;; rd ← sext32(uext32(rs1) >> rs2)
 (decl rv_srlw (XReg XReg) XReg)
 (rule (rv_srlw rs1 rs2)
   (alu_rrr (AluOPRRR.Srlw) rs1 rs2))

 ;; Helper for emitting the `srliw` ("Shift Right Logical Immediate Word") instruction.
 ;; rd ← sext32(uext32(rs1) >> imm)
 (decl rv_srliw (XReg Imm12) XReg)
 (rule (rv_srliw rs1 imm)
   (alu_rr_imm12 (AluOPRRI.SrliW) rs1 imm))

 ;; Helper for emitting the `sraw` ("Shift Right Arithmetic Word") instruction.
 ;; rd ← sext32(rs1 >> rs2)
 (decl rv_sraw (XReg XReg) XReg)
 (rule (rv_sraw rs1 rs2)
   (alu_rrr (AluOPRRR.Sraw) rs1 rs2))

 ;; Helper for emitting the `sraiw` ("Shift Right Arithmetic Immediate Word") instruction.
 ;; rd ← sext32(rs1 >> imm)
 (decl rv_sraiw (XReg Imm12) XReg)
 (rule (rv_sraiw rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Sraiw) rs1 imm))


 ;; RV32M Extension
 ;; TODO: Enable these instructions only when we have the M extension

 ;; Helper for emitting the `mul` instruction.
 ;; rd ← rs1 × rs2
 (decl rv_mul (XReg XReg) XReg)
 (rule (rv_mul rs1 rs2)
   (alu_rrr (AluOPRRR.Mul) rs1 rs2))

 ;; Helper for emitting the `mulh` ("Multiply High Signed Signed") instruction.
 ;; rd ← (sext(rs1) × sext(rs2)) » xlen
 (decl rv_mulh (XReg XReg) XReg)
 (rule (rv_mulh rs1 rs2)
   (alu_rrr (AluOPRRR.Mulh) rs1 rs2))

 ;; Helper for emitting the `mulhu` ("Multiply High Unsigned Unsigned") instruction.
 ;; rd ← (uext(rs1) × uext(rs2)) » xlen
 (decl rv_mulhu (XReg XReg) XReg)
 (rule (rv_mulhu rs1 rs2)
   (alu_rrr (AluOPRRR.Mulhu) rs1 rs2))

 ;; Helper for emitting the `div` instruction.
 ;; rd ← rs1 ÷ rs2
 (decl rv_div (XReg XReg) XReg)
 (rule (rv_div rs1 rs2)
   (alu_rrr (AluOPRRR.Div) rs1 rs2))

 ;; Helper for emitting the `divu` ("Divide Unsigned") instruction.
 ;; rd ← rs1 ÷ rs2
 (decl rv_divu (XReg XReg) XReg)
 (rule (rv_divu rs1 rs2)
   (alu_rrr (AluOPRRR.DivU) rs1 rs2))

 ;; Helper for emitting the `rem` instruction.
 ;; rd ← rs1 mod rs2
 (decl rv_rem (XReg XReg) XReg)
 (rule (rv_rem rs1 rs2)
   (alu_rrr (AluOPRRR.Rem) rs1 rs2))

 ;; Helper for emitting the `remu` ("Remainder Unsigned") instruction.
 ;; rd ← rs1 mod rs2
 (decl rv_remu (XReg XReg) XReg)
 (rule (rv_remu rs1 rs2)
   (alu_rrr (AluOPRRR.RemU) rs1 rs2))

 ;; RV64M Extension
 ;; TODO: Enable these instructions only when we have the M extension

 ;; Helper for emitting the `mulw` ("Multiply Word") instruction.
 ;; rd ← uext32(rs1) × uext32(rs2)
 (decl rv_mulw (XReg XReg) XReg)
 (rule (rv_mulw rs1 rs2)
   (alu_rrr (AluOPRRR.Mulw) rs1 rs2))

 ;; Helper for emitting the `divw` ("Divide Word") instruction.
 ;; rd ← sext32(rs1) ÷ sext32(rs2)
 (decl rv_divw (XReg XReg) XReg)
 (rule (rv_divw rs1 rs2)
   (alu_rrr (AluOPRRR.Divw) rs1 rs2))

 ;; Helper for emitting the `divuw` ("Divide Unsigned Word") instruction.
 ;; rd ← uext32(rs1) ÷ uext32(rs2)
 (decl rv_divuw (XReg XReg) XReg)
 (rule (rv_divuw rs1 rs2)
   (alu_rrr (AluOPRRR.Divuw) rs1 rs2))

 ;; Helper for emitting the `remw` ("Remainder Word") instruction.
 ;; rd ← sext32(rs1) mod sext32(rs2)
 (decl rv_remw (XReg XReg) XReg)
 (rule (rv_remw rs1 rs2)
   (alu_rrr (AluOPRRR.Remw) rs1 rs2))

 ;; Helper for emitting the `remuw` ("Remainder Unsigned Word") instruction.
 ;; rd ← uext32(rs1) mod uext32(rs2)
 (decl rv_remuw (XReg XReg) XReg)
 (rule (rv_remuw rs1 rs2)
   (alu_rrr (AluOPRRR.Remuw) rs1 rs2))


 ;; F and D Extensions
 ;; TODO: Enable these instructions only when we have the F or D extensions

 ;; Helper for emitting the `fadd` instruction.
 (decl rv_fadd (Type FReg FReg) FReg)
 (rule (rv_fadd $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddS) $F32 rs1 rs2))
 (rule (rv_fadd $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FaddD) $F64 rs1 rs2))

 ;; Helper for emitting the `fsub` instruction.
 (decl rv_fsub (Type FReg FReg) FReg)
 (rule (rv_fsub $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubS) $F32 rs1 rs2))
 (rule (rv_fsub $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsubD) $F64 rs1 rs2))

 ;; Helper for emitting the `fmul` instruction.
 (decl rv_fmul (Type FReg FReg) FReg)
 (rule (rv_fmul $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulS) $F32 rs1 rs2))
 (rule (rv_fmul $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FmulD) $F64 rs1 rs2))

 ;; Helper for emitting the `fdiv` instruction.
 (decl rv_fdiv (Type FReg FReg) FReg)
 (rule (rv_fdiv $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivS) $F32 rs1 rs2))
 (rule (rv_fdiv $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FdivD) $F64 rs1 rs2))

 ;; Helper for emitting the `fsqrt` instruction.
 (decl rv_fsqrt (Type FReg) FReg)
 (rule (rv_fsqrt $F32 rs1) (fpu_rr (FpuOPRR.FsqrtS) $F32 rs1))
 (rule (rv_fsqrt $F64 rs1) (fpu_rr (FpuOPRR.FsqrtD) $F64 rs1))

 ;; Helper for emitting the `fmadd` instruction.
 (decl rv_fmadd (Type FReg FReg FReg) FReg)
 (rule (rv_fmadd $F32 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddS) $F32 rs1 rs2 rs3))
 (rule (rv_fmadd $F64 rs1 rs2 rs3) (fpu_rrrr (FpuOPRRRR.FmaddD) $F64 rs1 rs2 rs3))

 ;; Helper for emitting the `fmv.x.w` instruction.
 (decl rv_fmvxw (FReg) XReg)
 (rule (rv_fmvxw r) (fpu_rr (FpuOPRR.FmvXW) $I32 r))

 ;; Helper for emitting the `fmv.x.d` instruction.
 (decl rv_fmvxd (FReg) XReg)
 (rule (rv_fmvxd r) (fpu_rr (FpuOPRR.FmvXD) $I64 r))

 ;; Helper for emitting the `fmv.w.x` instruction.
 (decl rv_fmvwx (XReg) FReg)
 (rule (rv_fmvwx r) (fpu_rr (FpuOPRR.FmvWX) $F32 r))

 ;; Helper for emitting the `fmv.d.x` instruction.
 (decl rv_fmvdx (XReg) FReg)
 (rule (rv_fmvdx r) (fpu_rr (FpuOPRR.FmvDX) $F64 r))

 ;; Helper for emitting the `fcvt.d.s` ("Float Convert Double to Single") instruction.
 (decl rv_fcvtds (FReg) FReg)
 (rule (rv_fcvtds rs1) (fpu_rr (FpuOPRR.FcvtDS) $F32 rs1))

 ;; Helper for emitting the `fcvt.s.d` ("Float Convert Single to Double") instruction.
 (decl rv_fcvtsd (FReg) FReg)
 (rule (rv_fcvtsd rs1) (fpu_rr (FpuOPRR.FcvtSD) $F64 rs1))

 ;; Helper for emitting the `fcvt.s.w` instruction.
 (decl rv_fcvtsw (XReg) FReg)
 (rule (rv_fcvtsw rs1) (fpu_rr (FpuOPRR.FcvtSw) $F32 rs1))

 ;; Helper for emitting the `fcvt.s.wu` instruction.
 (decl rv_fcvtswu (XReg) FReg)
 (rule (rv_fcvtswu rs1) (fpu_rr (FpuOPRR.FcvtSwU) $F32 rs1))

 ;; Helper for emitting the `fcvt.d.w` instruction.
 (decl rv_fcvtdw (XReg) FReg)
 (rule (rv_fcvtdw rs1) (fpu_rr (FpuOPRR.FcvtDW) $F32 rs1))

 ;; Helper for emitting the `fcvt.d.wu` instruction.
 (decl rv_fcvtdwu (XReg) FReg)
 (rule (rv_fcvtdwu rs1) (fpu_rr (FpuOPRR.FcvtDWU) $F32 rs1))

 ;; Helper for emitting the `fcvt.s.l` instruction.
 (decl rv_fcvtsl (XReg) FReg)
 (rule (rv_fcvtsl rs1) (fpu_rr (FpuOPRR.FcvtSL) $F32 rs1))

 ;; Helper for emitting the `fcvt.s.lu` instruction.
 (decl rv_fcvtslu (XReg) FReg)
 (rule (rv_fcvtslu rs1) (fpu_rr (FpuOPRR.FcvtSLU) $F32 rs1))

 ;; Helper for emitting the `fcvt.d.l` instruction.
 (decl rv_fcvtdl (XReg) FReg)
 (rule (rv_fcvtdl rs1) (fpu_rr (FpuOPRR.FcvtDL) $F32 rs1))

 ;; Helper for emitting the `fcvt.d.lu` instruction.
 (decl rv_fcvtdlu (XReg) FReg)
 (rule (rv_fcvtdlu rs1) (fpu_rr (FpuOPRR.FcvtDLu) $F32 rs1))

 ;; Helper for emitting the `fsgnj` ("Floating Point Sign Injection") instruction.
 ;; The output of this instruction is `rs1` with the sign bit from `rs2`
 ;; This implements the `copysign` operation
 (decl rv_fsgnj (Type FReg FReg) FReg)
 (rule (rv_fsgnj $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjS) $F32 rs1 rs2))
 (rule (rv_fsgnj $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjD) $F64 rs1 rs2))

 ;; Helper for emitting the `fsgnjn` ("Floating Point Sign Injection Negated") instruction.
 ;; The output of this instruction is `rs1` with the negated sign bit from `rs2`
 ;; When `rs1 == rs2` this implements the `neg` operation
 (decl rv_fsgnjn (Type FReg FReg) FReg)
 (rule (rv_fsgnjn $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnS) $F32 rs1 rs2))
 (rule (rv_fsgnjn $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjnD) $F64 rs1 rs2))

 ;; Helper for emitting the `fneg` ("Floating Point Negate") instruction.
 ;; This instruction is a mnemonic for `fsgnjn rd, rs1, rs1`
 (decl rv_fneg (Type FReg) FReg)
 (rule (rv_fneg ty rs1) (rv_fsgnjn ty rs1 rs1))

 ;; Helper for emitting the `fsgnjx` ("Floating Point Sign Injection Exclusive") instruction.
 ;; The output of this instruction is `rs1` with the XOR of the sign bits from `rs1` and `rs2`.
 ;; When `rs1 == rs2` this implements `fabs`
 (decl rv_fsgnjx (Type FReg FReg) FReg)
 (rule (rv_fsgnjx $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxS) $F32 rs1 rs2))
 (rule (rv_fsgnjx $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FsgnjxD) $F64 rs1 rs2))

 ;; Helper for emitting the `fabs` ("Floating Point Absolute") instruction.
 ;; This instruction is a mnemonic for `fsgnjx rd, rs1, rs1`
 (decl rv_fabs (Type FReg) FReg)
 (rule (rv_fabs ty rs1) (rv_fsgnjx ty rs1 rs1))

 ;; Helper for emitting the `feq` ("Float Equal") instruction.
 (decl rv_feq (Type FReg FReg) XReg)
 (rule (rv_feq $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqS) $I64 rs1 rs2))
 (rule (rv_feq $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FeqD) $I64 rs1 rs2))

 ;; Helper for emitting the `flt` ("Float Less Than") instruction.
 (decl rv_flt (Type FReg FReg) XReg)
 (rule (rv_flt $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FltS) $I64 rs1 rs2))
 (rule (rv_flt $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FltD) $I64 rs1 rs2))

 ;; Helper for emitting the `fle` ("Float Less Than or Equal") instruction.
 (decl rv_fle (Type FReg FReg) XReg)
 (rule (rv_fle $F32 rs1 rs2) (fpu_rrr (FpuOPRRR.FleS) $I64 rs1 rs2))
 (rule (rv_fle $F64 rs1 rs2) (fpu_rrr (FpuOPRRR.FleD) $I64 rs1 rs2))

 ;; Helper for emitting the `fgt` ("Float Greater Than") instruction.
 ;; Note: The arguments are reversed
 (decl rv_fgt (Type FReg FReg) XReg)
 (rule (rv_fgt ty rs1 rs2) (rv_flt ty rs2 rs1))

 ;; Helper for emitting the `fge` ("Float Greater Than or Equal") instruction.
 ;; Note: The arguments are reversed
 (decl rv_fge (Type FReg FReg) XReg)
 (rule (rv_fge ty rs1 rs2) (rv_fle ty rs2 rs1))


 ;; `Zba` Extension Instructions

 ;; Helper for emitting the `adduw` ("Add Unsigned Word") instruction.
 ;; rd ← uext32(rs1) + uext32(rs2)
 (decl rv_adduw (XReg XReg) XReg)
 (rule (rv_adduw rs1 rs2)
   (alu_rrr (AluOPRRR.Adduw) rs1 rs2))

 ;; Helper for emitting the `zext.w` ("Zero Extend Word") instruction.
 ;; This instruction is a mnemonic for `adduw rd, rs1, zero`.
 ;; rd ← uext32(rs1)
 (decl rv_zextw (XReg) XReg)
 (rule (rv_zextw rs1)
   (rv_adduw rs1 (zero_reg)))

 ;; Helper for emitting the `slli.uw` ("Shift Left Logical Immediate Unsigned Word") instruction.
 ;; rd ← uext32(rs1) << imm
 (decl rv_slliuw (XReg Imm12) XReg)
 (rule (rv_slliuw rs1 imm)
   (alu_rr_imm12 (AluOPRRI.SlliUw) rs1 imm))


 ;; `Zbb` Extension Instructions

 ;; Helper for emitting the `andn` ("And Negated") instruction.
 ;; rd ← rs1 ∧ ~(rs2)
 (decl rv_andn (XReg XReg) XReg)
 (rule (rv_andn rs1 rs2)
   (alu_rrr (AluOPRRR.Andn) rs1 rs2))

 ;; Helper for emitting the `orn` ("Or Negated") instruction.
 ;; rd ← rs1 ∨ ~(rs2)
 (decl rv_orn (XReg XReg) XReg)
 (rule (rv_orn rs1 rs2)
   (alu_rrr (AluOPRRR.Orn) rs1 rs2))

 ;; Helper for emitting the `clz` ("Count Leading Zero Bits") instruction.
 (decl rv_clz (XReg) XReg)
 (rule (rv_clz rs1)
   (alu_rr_funct12 (AluOPRRI.Clz) rs1))

 ;; Helper for emitting the `clzw` ("Count Leading Zero Bits in Word") instruction.
 (decl rv_clzw (XReg) XReg)
 (rule (rv_clzw rs1)
   (alu_rr_funct12 (AluOPRRI.Clzw) rs1))

 ;; Helper for emitting the `ctz` ("Count Trailing Zero Bits") instruction.
 (decl rv_ctz (XReg) XReg)
 (rule (rv_ctz rs1)
   (alu_rr_funct12 (AluOPRRI.Ctz) rs1))

 ;; Helper for emitting the `ctzw` ("Count Trailing Zero Bits in Word") instruction.
 (decl rv_ctzw (XReg) XReg)
 (rule (rv_ctzw rs1)
   (alu_rr_funct12 (AluOPRRI.Ctzw) rs1))

 ;; Helper for emitting the `cpop` ("Count Population") instruction.
 (decl rv_cpop (XReg) XReg)
 (rule (rv_cpop rs1)
   (alu_rr_funct12 (AluOPRRI.Cpop) rs1))

 ;; Helper for emitting the `cpopw` ("Count Population") instruction.
 (decl rv_cpopw (XReg) XReg)
 (rule (rv_cpopw rs1)
   (alu_rr_funct12 (AluOPRRI.Cpopw) rs1))

 ;; Helper for emitting the `max` instruction.
 (decl rv_max (XReg XReg) XReg)
 (rule (rv_max rs1 rs2)
   (alu_rrr (AluOPRRR.Max) rs1 rs2))

 ;; Helper for emitting the `maxu` instruction.
 (decl rv_maxu (XReg XReg) XReg)
 (rule (rv_maxu rs1 rs2)
   (alu_rrr (AluOPRRR.Maxu) rs1 rs2))

 ;; Helper for emitting the `min` instruction.
 (decl rv_min (XReg XReg) XReg)
 (rule (rv_min rs1 rs2)
   (alu_rrr (AluOPRRR.Max) rs1 rs2))

 ;; Helper for emitting the `minu` instruction.
 (decl rv_minu (XReg XReg) XReg)
 (rule (rv_minu rs1 rs2)
   (alu_rrr (AluOPRRR.Minu) rs1 rs2))

 ;; Helper for emitting the `sext.b` instruction.
 (decl rv_sextb (XReg) XReg)
 (rule (rv_sextb rs1)
   (alu_rr_imm12 (AluOPRRI.Sextb) rs1 (imm12_const 0)))

 ;; Helper for emitting the `sext.h` instruction.
 (decl rv_sexth (XReg) XReg)
 (rule (rv_sexth rs1)
   (alu_rr_imm12 (AluOPRRI.Sexth) rs1 (imm12_const 0)))

 ;; Helper for emitting the `zext.h` instruction.
 (decl rv_zexth (XReg) XReg)
 (rule (rv_zexth rs1)
   (alu_rr_imm12 (AluOPRRI.Zexth) rs1 (imm12_const 0)))

 ;; Helper for emitting the `rol` ("Rotate Left") instruction.
 (decl rv_rol (XReg XReg) XReg)
 (rule (rv_rol rs1 rs2)
   (alu_rrr (AluOPRRR.Rol) rs1 rs2))

 ;; Helper for emitting the `rolw` ("Rotate Left Word") instruction.
 (decl rv_rolw (XReg XReg) XReg)
 (rule (rv_rolw rs1 rs2)
   (alu_rrr (AluOPRRR.Rolw) rs1 rs2))

 ;; Helper for emitting the `ror` ("Rotate Right") instruction.
 (decl rv_ror (XReg XReg) XReg)
 (rule (rv_ror rs1 rs2)
   (alu_rrr (AluOPRRR.Ror) rs1 rs2))

 ;; Helper for emitting the `rorw` ("Rotate Right Word") instruction.
 (decl rv_rorw (XReg XReg) XReg)
 (rule (rv_rorw rs1 rs2)
   (alu_rrr (AluOPRRR.Rorw) rs1 rs2))

 ;; Helper for emitting the `rev8` ("Byte Reverse") instruction.
 (decl rv_rev8 (XReg) XReg)
 (rule (rv_rev8 rs1)
   (alu_rr_funct12 (AluOPRRI.Rev8) rs1))

 ;; Helper for emitting the `brev8` ("Bit Reverse Inside Bytes") instruction.
 ;; TODO: This instruction is mentioned in some older versions of the
 ;; spec, but has since disappeared, we should follow up on this.
 ;; It probably was renamed to `rev.b` which seems to be the closest match.
 (decl rv_brev8 (XReg) XReg)
 (rule (rv_brev8 rs1)
   (alu_rr_funct12 (AluOPRRI.Brev8) rs1))

 ;; Helper for emitting the `bseti` ("Single-Bit Set Immediate") instruction.
 (decl rv_bseti (XReg Imm12) XReg)
 (rule (rv_bseti rs1 imm)
   (alu_rr_imm12 (AluOPRRI.Bseti) rs1 imm))


 ;; `Zbkb` Extension Instructions

 ;; Helper for emitting the `pack` ("Pack low halves of registers") instruction.
 (decl rv_pack (XReg XReg) XReg)
 (rule (rv_pack rs1 rs2)
   (alu_rrr (AluOPRRR.Pack) rs1 rs2))

 ;; Helper for emitting the `packw` ("Pack low 16-bits of registers") instruction.
 (decl rv_packw (XReg XReg) XReg)
 (rule (rv_packw rs1 rs2)
   (alu_rrr (AluOPRRR.Packw) rs1 rs2))


 ;; `Zicsr` Extension Instructions

 ;; Helper for emitting the `csrrwi` instruction.
 (decl rv_csrrwi (CSR UImm5) XReg)
 (rule (rv_csrrwi csr imm)
   (csr_imm (CsrImmOP.CsrRWI) csr imm))

 ;; This is a special case of `csrrwi` when the CSR is the `frm` CSR.
 (decl rv_fsrmi (FRM) XReg)
 (rule (rv_fsrmi frm) (rv_csrrwi (CSR.Frm) frm))


 ;; Helper for emitting the `csrw` instruction. This is a special case of
 ;; `csrrw` where the destination register is always `x0`.
 (decl rv_csrw (CSR XReg) Unit)
 (rule (rv_csrw csr rs)
   (csr_reg_dst_zero (CsrRegOP.CsrRW) csr rs))

 ;; This is a special case of `csrw` when the CSR is the `frm` CSR.
 (decl rv_fsrm (XReg) Unit)
 (rule (rv_fsrm rs) (rv_csrw (CSR.Frm) rs))


 ;; Generate a mask for the bit-width of the given type
 (decl pure shift_mask (Type) u64)
 (rule (shift_mask ty) (u64_sub (ty_bits (lane_type ty)) 1))

 ;; Helper for generating a i64 from a pair of Imm20 and Imm12 constants
 (decl i64_generate_imm (Imm20 Imm12) i64)
 (extern extractor i64_generate_imm i64_generate_imm)

 ;; Helper for generating a i64 from a shift of a Imm20 constant with LUI
 (decl i64_shift_for_lui (u64 Imm12) i64)
 (extern extractor i64_shift_for_lui i64_shift_for_lui)

 ;; Helper for generating a i64 from a shift of a Imm20 constant
 (decl i64_shift (i64 Imm12) i64)
 (extern extractor i64_shift i64_shift)

 ;; Immediate Loading rules
 ;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it.
 ;; TODO: Load floats using `fld` instead of `ld`
 (decl imm (Type u64) Reg)

 ;; Refs get loaded as integers.
 (rule 5 (imm $R32 c) (imm $I32 c))
 (rule 5 (imm $R64 c) (imm $I64 c))

 ;; Floats get loaded as integers and then moved into an F register.
 (rule 5 (imm $F32 c) (gen_bitcast (imm $I32 c) $I32 $F32))
 (rule 5 (imm $F64 c) (gen_bitcast (imm $I64 c) $I64 $F64))

 ;; Try to match just an imm12
 (rule 4 (imm (ty_int ty) c)
   (if-let (i64_generate_imm (imm20_is_zero) imm12) (i64_sextend_u64 ty c))
   (rv_addi (zero_reg) imm12))

 ;; We can also try to load using a single LUI.
 ;; LUI takes a 20 bit immediate, places it on bits 13 to 32 of the register.
 ;; In RV64 this value is then sign extended to 64bits.
 (rule 3 (imm (ty_int ty) c)
   (if-let (i64_generate_imm imm20 (imm12_is_zero)) (i64_sextend_u64 ty c))
   (rv_lui imm20))

 ;; We can combo addi + lui to represent all 32-bit immediates
 ;; And some 64-bit immediates as well.
 (rule 2 (imm (ty_int ty) c)
   (if-let (i64_generate_imm imm20 imm12) (i64_sextend_u64 ty c))
   (rv_addi (rv_lui imm20) imm12))

 ;; If the non-zero bits of the immediate fit in 20 bits, we can use LUI + shift
 (rule 1 (imm (ty_int ty) c)
   (if-let (i64_shift_for_lui (imm20_from_u64 base) shift) (i64_sextend_u64 ty c))
   (rv_slli (rv_lui base) shift))

 ;; Combine one of the above rules with a shift-left if possible, This chops off
 ;; all trailing zeros from the input constant and then attempts if the resulting
 ;; constant can itself use one of the above rules via the `i64_generate_imm`
 ;; matcher. This will then recurse on the above rules to materialize a smaller
 ;; constant which is then shifted left to create the desired constant.
 (rule 0 (imm (ty_int ty) c)
   (if-let (i64_shift c_shifted shift) (i64_sextend_u64 ty c))  ;; constant to make
   (if-let (i64_generate_imm _ _) c_shifted)                    ;; can the smaller constant be made?
   (rv_slli (imm ty (i64_as_u64 c_shifted)) shift))

 ;; Otherwise we fall back to loading the immediate from the constant pool.
 (rule -1 (imm (ty_int ty) c)
   (gen_load
     (gen_const_amode (emit_u64_le_const c))
     (LoadOP.Ld)
     (mem_flags_trusted)))

 ;; Imm12 Rules

 (decl pure imm12_zero () Imm12)
 (rule (imm12_zero) (imm12_const 0))

 (decl pure imm12_const (i32) Imm12)
 (extern constructor imm12_const imm12_const)

 (decl load_imm12 (i32) Reg)
 (rule
   (load_imm12 x)
   (rv_addi (zero_reg) (imm12_const x)))

 ;; for load immediate
 (decl imm_from_bits (u64) Imm12)
 (extern constructor imm_from_bits imm_from_bits)

 (decl imm_from_neg_bits (i64) Imm12)
 (extern constructor imm_from_neg_bits imm_from_neg_bits)

 (decl imm12_const_add (i32 i32) Imm12)
 (extern constructor imm12_const_add imm12_const_add)

 (decl imm12_and (Imm12 u64) Imm12)
 (extern constructor imm12_and imm12_and)

 ;; Imm12 Extractors

 ;; Helper to go directly from a `Value`, when it's an `iconst`, to an `Imm12`.
 (decl imm12_from_value (Imm12) Value)
 (extractor (imm12_from_value n) (i64_from_iconst (imm12_from_i64 n)))

 (decl imm12_from_u64 (Imm12) u64)
 (extern extractor imm12_from_u64 imm12_from_u64)

 (decl imm12_from_i64 (Imm12) i64)
 (extern extractor imm12_from_i64 imm12_from_i64)

 (decl pure partial u64_to_imm12 (u64) Imm12)
 (rule (u64_to_imm12 (imm12_from_u64 n)) n)

 (decl pure imm12_is_zero () Imm12)
 (extern extractor imm12_is_zero imm12_is_zero)

 ;; Imm20

 ;; Extractor that matches if a Imm20 is zero
 (decl pure imm20_is_zero () Imm20)
 (extern extractor imm20_is_zero imm20_is_zero)

 (decl imm20_from_u64 (Imm20) u64)
 (extern extractor imm20_from_u64 imm20_from_u64)

 (decl imm20_from_i64 (Imm20) i64)
 (extern extractor imm20_from_i64 imm20_from_i64)


 ;; Imm5 Extractors

 (decl imm5_from_u64 (Imm5) u64)
 (extern extractor imm5_from_u64 imm5_from_u64)

 (decl imm5_from_i64 (Imm5) i64)
 (extern extractor imm5_from_i64 imm5_from_i64)

 ;; Construct a Imm5 from an i8
 (decl pure partial i8_to_imm5 (i8) Imm5)
 (extern constructor i8_to_imm5 i8_to_imm5)

 ;; Constructor that matches a `Value` equivalent to a replicated Imm5 on all lanes.
 (decl pure partial replicated_imm5 (Value) Imm5)
 (rule (replicated_imm5 (splat (i64_from_iconst (imm5_from_i64 n)))) n)
 (rule (replicated_imm5 (vconst (u128_from_constant n128)))
   (if-let (u128_replicated_u64 n64) n128)
   (if-let (u64_replicated_u32 n32) n64)
   (if-let (u32_replicated_u16 n16) n32)
   (if-let (u16_replicated_u8 n8) n16)
   (if-let n (i8_to_imm5 (u8_as_i8 n8)))
   n)

 ;; UImm5 Helpers

 ;; Constructor that matches a `Value` equivalent to a replicated UImm5 on all lanes.
 (decl pure partial replicated_uimm5 (Value) UImm5)
 (rule (replicated_uimm5 (splat (uimm5_from_value n))) n)
 (rule 1 (replicated_uimm5 (vconst (u128_from_constant n128)))
   (if-let (u128_replicated_u64 n64) n128)
   (if-let (u64_replicated_u32 n32) n64)
   (if-let (u32_replicated_u16 n16) n32)
   (if-let (u16_replicated_u8 n8) n16)
   (if-let (uimm5_from_u8 n) n8)
   n)

 ;; Helper to go directly from a `Value`, when it's an `iconst`, to an `UImm5`.
 (decl uimm5_from_value (UImm5) Value)
 (extractor (uimm5_from_value n)
   (iconst (u64_from_imm64 (uimm5_from_u64 n))))

 ;; Extract a `UImm5` from an `u8`.
 (decl pure partial uimm5_from_u8 (UImm5) u8)
 (extern extractor uimm5_from_u8 uimm5_from_u8)

 ;; Extract a `UImm5` from an `u64`.
 (decl pure partial uimm5_from_u64 (UImm5) u64)
 (extern extractor uimm5_from_u64 uimm5_from_u64)

 ;; Convert a `u64` into an `UImm5`
 (decl pure partial u64_to_uimm5 (u64) UImm5)
 (rule (u64_to_uimm5 (uimm5_from_u64 n)) n)

 (decl uimm5_bitcast_to_imm5 (UImm5) Imm5)
 (extern constructor uimm5_bitcast_to_imm5 uimm5_bitcast_to_imm5)

 ;; Float Helpers

 ;; Returns the bitpattern of the Canonical NaN for the given type.
 (decl pure canonical_nan_u64 (Type) u64)
 (rule (canonical_nan_u64 $F32) 0x7fc00000)
 (rule (canonical_nan_u64 $F64) 0x7ff8000000000000)

 (decl gen_default_frm () OptionFloatRoundingMode)
 (extern constructor gen_default_frm gen_default_frm)

 ;; Helper for emitting `MInst.FpuRR` instructions.
 (decl fpu_rr (FpuOPRR Type Reg) Reg)
 (rule (fpu_rr op ty src)
       (let ((dst WritableReg (temp_writable_reg ty))
             (_ Unit (emit (MInst.FpuRR op (gen_default_frm) dst src))))
         dst))

 ;; Helper for emitting `MInst.AluRRR` instructions.
 (decl alu_rrr (AluOPRRR Reg Reg) Reg)
 (rule (alu_rrr op src1 src2)
       (let ((dst WritableXReg (temp_writable_xreg))
             (_ Unit (emit (MInst.AluRRR op dst src1 src2))))
         dst))

 ;; Helper for emitting `MInst.AluRRR` instructions.
 (decl fpu_rrr (FpuOPRRR Type Reg Reg) Reg)
 (rule (fpu_rrr op ty src1 src2)
       (let ((dst WritableReg (temp_writable_reg ty))
             (_ Unit (emit (MInst.FpuRRR op (gen_default_frm) dst src1 src2))))
         dst))

 ;; Helper for emitting `MInst.FpuRRRR` instructions.
 (decl fpu_rrrr (FpuOPRRRR Type Reg Reg Reg) Reg)
 (rule (fpu_rrrr op ty src1 src2 src3)
       (let ((dst WritableReg (temp_writable_reg ty))
             (_ Unit (emit (MInst.FpuRRRR op (gen_default_frm) dst src1 src2 src3))))
         dst))


 ;; Helper for emitting `MInst.AluRRImm12` instructions.
 (decl alu_rr_imm12 (AluOPRRI Reg Imm12) Reg)
 (rule (alu_rr_imm12 op src imm)
       (let ((dst WritableXReg (temp_writable_xreg))
             (_ Unit (emit (MInst.AluRRImm12 op dst src imm))))
         dst))

 ;; some instruction use imm12 as funct12.
 ;; so we don't need the imm12 paramter.
 (decl alu_rr_funct12 (AluOPRRI Reg) Reg)
 (rule (alu_rr_funct12 op src)
       (let ((dst WritableXReg (temp_writable_xreg))
             (_ Unit (emit (MInst.AluRRImm12 op dst src (imm12_zero)))))
         dst))

 ;; Helper for emitting the `Lui` instruction.
 ;; TODO: This should be something like `emit_u_type`. And should share the
 ;; `MInst` with `auipc` since these instructions share the U-Type format.
 (decl rv_lui (Imm20) XReg)
 (rule (rv_lui imm)
       (let ((dst WritableXReg (temp_writable_xreg))
             (_ Unit (emit (MInst.Lui dst imm))))
         dst))

 ;; Helper for emitting `MInst.CsrImm` instructions.
 (decl csr_imm (CsrImmOP CSR UImm5) XReg)
 (rule (csr_imm op csr imm)
       (let ((dst WritableXReg (temp_writable_xreg))
             (_ Unit (emit (MInst.CsrImm op dst imm csr))))
         dst))

 ;; Helper for emitting a `MInst.CsrReg` instruction that writes the result to x0.
 (decl csr_reg_dst_zero (CsrRegOP CSR XReg) Unit)
 (rule (csr_reg_dst_zero op csr rs)
       (emit (MInst.CsrReg op (writable_zero_reg) rs csr)))


 (decl select_addi (Type) AluOPRRI)
 (rule 1 (select_addi (fits_in_32 ty)) (AluOPRRI.Addiw))
 (rule (select_addi (fits_in_64 ty)) (AluOPRRI.Addi))


 (decl gen_bnot (Type ValueRegs) ValueRegs)
 (rule 2 (gen_bnot (ty_scalar_float ty) x)
   (let ((val FReg (value_regs_get x 0))
         (x_val XReg (move_f_to_x val ty))
         (inverted XReg (rv_not x_val))
         (res FReg (move_x_to_f inverted (float_int_of_same_size ty))))
     (value_reg res)))

 (rule 1 (gen_bnot $I128 x)
   (let ((lo XReg (rv_not (value_regs_get x 0)))
         (hi XReg (rv_not (value_regs_get x 1))))
     (value_regs lo hi)))

 (rule 0 (gen_bnot (ty_int_ref_scalar_64 _) x)
   (rv_not (value_regs_get x 0)))


 (decl gen_and (Type ValueRegs ValueRegs) ValueRegs)
 (rule 1 (gen_and $I128 x y)
   (value_regs
     (rv_and (value_regs_get x 0) (value_regs_get y 0))
     (rv_and (value_regs_get x 1) (value_regs_get y 1))))

 (rule 0 (gen_and (fits_in_64 _) x y)
   (rv_and (value_regs_get x 0) (value_regs_get y 0)))


 (decl gen_andi (XReg u64) XReg)
 (rule 1 (gen_andi x (imm12_from_u64 y))
   (rv_andi x y))

 (rule 0 (gen_andi x y)
   (rv_and x (imm $I64 y)))


 (decl gen_or (Type ValueRegs ValueRegs) ValueRegs)
 (rule 1 (gen_or $I128 x y)
   (value_regs
     (rv_or (value_regs_get x 0) (value_regs_get y 0))
     (rv_or (value_regs_get x 1) (value_regs_get y 1))))

 (rule 0 (gen_or (fits_in_64 _) x y)
   (rv_or (value_regs_get x 0) (value_regs_get y 0)))

 (decl lower_bit_reverse (Reg Type) Reg)

 (rule
   (lower_bit_reverse r $I8)
   (gen_brev8 r $I8))

 (rule
   (lower_bit_reverse r $I16)
   (let
     ((tmp XReg (gen_brev8 r $I16))
       (tmp2 XReg (gen_rev8 tmp))
       (result XReg (rv_srli tmp2 (imm12_const 48))))
     result))

 (rule
   (lower_bit_reverse r $I32)
   (let
     ((tmp XReg (gen_brev8 r $I32))
       (tmp2 XReg (gen_rev8 tmp))
       (result XReg (rv_srli tmp2 (imm12_const 32))))
     result))

 (rule
   (lower_bit_reverse r $I64)
   (let
     ((tmp XReg (gen_rev8 r)))
     (gen_brev8 tmp $I64)))


 (decl lower_ctz (Type Reg) Reg)
 (rule (lower_ctz ty x)
   (gen_cltz $false x ty))

 (rule 1 (lower_ctz (fits_in_16 ty) x)
   (if-let $true (has_zbb))
   (let ((tmp Reg (gen_bseti x (ty_bits ty))))
     (rv_ctzw tmp)))

 (rule 2 (lower_ctz $I32 x)
   (if-let $true (has_zbb))
   (rv_ctzw x))

 (rule 2 (lower_ctz $I64 x)
   (if-let $true (has_zbb))
   (rv_ctz x))

 ;; Count leading zeros from a i128 bit value.
 ;; We count both halves separately and conditionally add them if it makes sense.

 (decl gen_cltz (bool XReg Type) XReg)
 (rule (gen_cltz leading rs ty)
   (let ((tmp WritableXReg (temp_writable_xreg))
         (step WritableXReg (temp_writable_xreg))
         (sum WritableXReg (temp_writable_xreg))
         (_ Unit (emit (MInst.Cltz leading sum step tmp rs ty))))
     sum))

 ;; Performs a zero extension of the given value
 (decl zext (Value) XReg)

 ;; In the most generic case, we shift left and then shift right.
 (rule 0 (zext val @ (value_type (fits_in_32 ty)))
   (let ((shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits ty)))))
     (rv_srli (rv_slli val shift) shift)))

 ;; If we are zero extending a U8 we can use a `andi` instruction.
 (rule 1 (zext val @ (value_type $I8))
   (rv_andi val (imm12_const 0xff)))

 ;; No point in trying to use `packh` here to zero extend 8 bit values
 ;; since we can just use `andi` instead which is part of the base ISA.

 ;; If we have the `zbkb` extension `packw` can be used to zero extend 16 bit values
 (rule 1 (zext val @ (value_type $I16))
   (if-let $true (has_zbkb))
   (rv_packw val (zero_reg)))

 ;; If we have the `zbkb` extension `pack` can be used to zero extend 32 bit registers
 (rule 1 (zext val @ (value_type $I32))
   (if-let $true (has_zbkb))
   (rv_pack val (zero_reg)))

 ;; If we have the `zbb` extension we can use the dedicated `zext.h` instruction.
 (rule 2 (zext val @ (value_type $I16))
   (if-let $true (has_zbb))
   (rv_zexth val))

 ;; With `zba` we have a `zext.w` instruction
 (rule 2 (zext val @ (value_type $I32))
   (if-let $true (has_zba))
   (rv_zextw val))

 ;; Ignore sign extensions for values whose representation is already the full
 ;; register width.
 (rule 3 (zext val)
   (if (val_already_extended val))
   val)

 ;; Performs a signed extension of the given value
 (decl sext (Value) XReg)

 ;; Same base case as `zext`, shift left-then-right.
 (rule 0 (sext val @ (value_type (fits_in_32 ty)))
   (let ((shift Imm12 (imm_from_bits (u64_sub 64 (ty_bits ty)))))
     (rv_srai (rv_slli val shift) shift)))

 ;; If we have the `zbb` extension we can use the dedicated `sext.b` instruction.
 (rule 1 (sext val @ (value_type $I8))
   (if-let $true (has_zbb))
   (rv_sextb val))

 ;; If we have the `zbb` extension we can use the dedicated `sext.h` instruction.
 (rule 1 (sext val @ (value_type $I16))
   (if-let $true (has_zbb))
   (rv_sexth val))

 ;; When signed extending from 32 to 64 bits we can use a
 ;; `addiw val 0`. Also known as a `sext.w`
 (rule 1 (sext val @ (value_type $I32))
   (rv_sextw val))

 ;; Ignore sign extensions for values whose representation is already the full
 ;; register width.
 (rule 2 (sext val)
   (if (val_already_extended val))
   val)

 ;; Helper matcher for when a value's representation is already sign or zero
 ;; extended to the full 64-bit register representation. This is used by `zext`
 ;; and `sext` above to skip the extension instruction entirely in some
 ;; circumstances.
 (decl pure partial val_already_extended (Value) bool)
 (rule 0 (val_already_extended v @ (value_type $I64)) $true)

 ;; When extending our backend always extends to the full register width, so
 ;; there's no need to extend-an-extend.
 (rule 1 (val_already_extended (uextend _)) $true)
 (rule 1 (val_already_extended (sextend _)) $true)

 ;; The result of `icmp`/`fcmp` is zero or one, meaning that it's already sign
 ;; extended to the full register width.
 (rule 1 (val_already_extended (icmp _ _ _)) $true)
 (rule 1 (val_already_extended (fcmp _ _ _)) $true)

 (type ExtendOp
   (enum
     (Zero)
     (Signed)))

 (decl lower_b128_binary (AluOPRRR ValueRegs ValueRegs) ValueRegs)
 (rule
   (lower_b128_binary op a b)
   (let
     ( ;; low part.
       (low XReg (alu_rrr op (value_regs_get a 0) (value_regs_get b 0)))
       ;; high part.
       (high XReg (alu_rrr op (value_regs_get a 1) (value_regs_get b 1))))
     (value_regs low high)))

 (decl lower_smlhi (Type XReg XReg) XReg)
 (rule 1
   (lower_smlhi $I64 rs1 rs2)
   (rv_mulh rs1 rs2))

 (rule
   (lower_smlhi ty rs1 rs2)
   (let
     ((tmp XReg (rv_mul rs1 rs2)))
     (rv_srli tmp (imm12_const (ty_bits ty)))))


 (decl lower_rotl (Type XReg XReg) XReg)

 (rule 1
   (lower_rotl $I64 rs amount)
   (if-let $true (has_zbb))
   (rv_rol rs amount))

 (rule
   (lower_rotl $I64 rs amount)
   (if-let $false (has_zbb))
   (lower_rotl_shift $I64 rs amount))

 (rule 1
   (lower_rotl $I32 rs amount)
   (if-let $true (has_zbb))
   (rv_rolw rs amount))

 (rule
   (lower_rotl $I32 rs amount)
   (if-let $false (has_zbb))
   (lower_rotl_shift $I32 rs amount))

 (rule -1
   (lower_rotl ty rs amount)
   (lower_rotl_shift ty rs amount))

 ;;; using shift to implement rotl.
 (decl lower_rotl_shift (Type XReg XReg) XReg)

 ;;; for I8 and I16 ...
 (rule
   (lower_rotl_shift ty rs amount)
   (let
     ((x ValueRegs (gen_shamt ty amount))
       (shamt Reg (value_regs_get x 0))
       (len_sub_shamt Reg (value_regs_get x 1))
       ;;
       (part1 Reg (rv_sll rs shamt))
       ;;
       (part2 Reg (rv_srl rs len_sub_shamt))
       (part3 Reg (gen_select_xreg (cmp_eqz shamt) (zero_reg) part2)))
     (rv_or part1 part3)))


 ;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function.
 ;;;; this will return shift amount and (ty_bits - "shift amount")
 ;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform.
 (decl gen_shamt (Type XReg) ValueRegs)
 (extern constructor gen_shamt gen_shamt)

 (decl lower_rotr (Type XReg XReg) XReg)

 (rule 1
   (lower_rotr $I64 rs amount)
   (if-let $true (has_zbb))
   (rv_ror rs amount))
 (rule
   (lower_rotr $I64 rs amount)
   (if-let $false (has_zbb))
   (lower_rotr_shift $I64 rs amount))

 (rule 1
   (lower_rotr $I32 rs amount)
   (if-let $true (has_zbb))
   (rv_rorw rs amount))

 (rule
   (lower_rotr $I32 rs amount)
   (if-let $false (has_zbb))
   (lower_rotr_shift $I32 rs amount))

 (rule -1
   (lower_rotr ty rs amount)
   (lower_rotr_shift ty rs amount))

 (decl lower_rotr_shift (Type XReg XReg) XReg)

 ;;;
 (rule
   (lower_rotr_shift ty rs amount)
   (let
     ((x ValueRegs (gen_shamt ty amount))
       (shamt XReg (value_regs_get x 0))
       (len_sub_shamt XReg (value_regs_get x 1))
       ;;
       (part1 XReg (rv_srl rs shamt))
       ;;
       (part2 XReg (rv_sll rs len_sub_shamt))
       ;;
       (part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) part2)))
     (rv_or part1 part3)))


 ;; bseti: Set a single bit in a register, indexed by a constant.
 (decl gen_bseti (Reg u64) Reg)
 (rule (gen_bseti val bit)
   (if-let $false (has_zbs))
   (if-let $false (u64_le bit 12))
   (let ((const XReg (imm $I64 (u64_shl 1 bit))))
     (rv_or val const)))

 (rule (gen_bseti val bit)
   (if-let $false (has_zbs))
   (if-let $true (u64_le bit 12))
   (rv_ori val (imm12_const (u64_as_i32 (u64_shl 1 bit)))))

 (rule (gen_bseti val bit)
   (if-let $true (has_zbs))
   (rv_bseti val (imm12_const (u64_as_i32 bit))))


 (decl gen_popcnt (XReg) Reg)
 (rule (gen_popcnt rs)
   (let
     ((tmp WritableXReg (temp_writable_xreg))
       (step WritableXReg (temp_writable_xreg))
       (sum WritableXReg (temp_writable_xreg))
       (_ Unit (emit (MInst.Popcnt sum step tmp rs $I64))))
     (writable_reg_to_reg sum)))


 (decl lower_i128_rotl (ValueRegs ValueRegs) ValueRegs)
 (rule
   (lower_i128_rotl x y)
   (let
     ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
       (shamt XReg (value_regs_get tmp 0))
       (len_sub_shamt XReg (value_regs_get tmp 1))
       ;;
       (low_part1 XReg (rv_sll (value_regs_get x 0) shamt))
       (low_part2 XReg (rv_srl (value_regs_get x 1) len_sub_shamt))
       ;;; if shamt == 0 low_part2 will overflow we should zero instead.
       (low_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part2))
       (low XReg (rv_or low_part1 low_part3))
       ;;
       (high_part1 XReg (rv_sll (value_regs_get x 1) shamt))
       (high_part2 XReg (rv_srl (value_regs_get x 0) len_sub_shamt))
       (high_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part2))
       (high XReg (rv_or high_part1 high_part3))
       ;;
       (const64 XReg (imm $I64 64))
       (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
     ;; right now we only rotate less than 64 bits.
     ;; if shamt is greater than or equal 64 , we should switch low and high.
     (gen_select_regs
       (cmp_geu shamt_128 const64)
       (value_regs high low)
       (value_regs low high)
     )))


 (decl lower_i128_rotr (ValueRegs ValueRegs) ValueRegs)
 (rule
   (lower_i128_rotr x y)
   (let
     ((tmp ValueRegs (gen_shamt $I128 (value_regs_get y 0)))
       (shamt XReg (value_regs_get tmp 0))
       (len_sub_shamt XReg (value_regs_get tmp 1))
       ;;
       (low_part1 XReg (rv_srl (value_regs_get x 0) shamt))
       (low_part2 XReg (rv_sll (value_regs_get x 1) len_sub_shamt))
       ;;; if shamt == 0 low_part2 will overflow we should zero instead.
       (low_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) low_part2))
       (low XReg (rv_or low_part1 low_part3))
       ;;
       (high_part1 XReg (rv_srl (value_regs_get x 1) shamt))
       (high_part2 XReg (rv_sll (value_regs_get x 0) len_sub_shamt))
       (high_part3 XReg (gen_select_xreg (cmp_eqz shamt) (zero_reg) high_part2))
       (high XReg (rv_or high_part1 high_part3))

       ;;
       (const64 XReg (imm $I64 64))
       (shamt_128 XReg (rv_andi (value_regs_get y 0) (imm12_const 127))))
     ;; right now we only rotate less than 64 bits.
     ;; if shamt is greater than or equal 64 , we should switch low and high.
     (gen_select_regs
       (cmp_geu shamt_128 const64)
       (value_regs high low)
       (value_regs low high)
     )))

 ;; Generates a AMode that points to a register plus an offset.
 (decl gen_reg_offset_amode (Reg i64 Type) AMode)
 (extern constructor gen_reg_offset_amode gen_reg_offset_amode)

 ;; Generates a AMode that an offset from the stack pointer.
 (decl gen_sp_offset_amode (i64 Type) AMode)
 (extern constructor gen_sp_offset_amode gen_sp_offset_amode)

 ;; Generates a AMode that an offset from the frame pointer.
 (decl gen_fp_offset_amode (i64 Type) AMode)
 (extern constructor gen_fp_offset_amode gen_fp_offset_amode)

 ;; Generates an AMode that points to a stack slot + offset.
 (decl gen_stack_slot_amode (StackSlot i64 Type) AMode)
 (extern constructor gen_stack_slot_amode gen_stack_slot_amode)

 ;; Generates a AMode that points to a constant in the constant pool.
 (decl gen_const_amode (VCodeConstant) AMode)
 (extern constructor gen_const_amode gen_const_amode)


 ;; Tries to match a Value + Offset into an AMode
 (decl amode (Value i32 Type) AMode)
 (rule 0 (amode addr offset ty) (amode_inner addr offset ty))

 ;; If we are adding a constant offset with an iadd we can instead make that
 ;; offset part of the amode offset.
 ;;
 ;; We can't recurse into `amode` again since that could cause stack overflows.
 ;; See: https://github.com/bytecodealliance/wasmtime/pull/6968
 (rule 1 (amode (iadd addr (iconst (simm32 y))) offset ty)
   (if-let new_offset (s32_add_fallible y offset))
   (amode_inner addr new_offset ty))
 (rule 2 (amode (iadd (iconst (simm32 x)) addr) offset ty)
   (if-let new_offset (s32_add_fallible x offset))
   (amode_inner addr new_offset ty))


 ;; These are the normal rules for generating an AMode.
 (decl amode_inner (Value i32 Type) AMode)

 ;; In the simplest case we just lower into a Reg+Offset
 (rule 0 (amode_inner r @ (value_type (ty_addr64 _)) offset ty)
   (gen_reg_offset_amode r offset ty))

 ;; If the value is a `get_frame_pointer`, we can just use the offset from that.
 (rule 1 (amode_inner (get_frame_pointer) offset ty)
   (gen_fp_offset_amode offset ty))

 ;; If the value is a `get_stack_pointer`, we can just use the offset from that.
 (rule 1 (amode_inner (get_stack_pointer) offset ty)
   (gen_sp_offset_amode offset ty))

 ;; Similarly if the value is a `stack_addr` we can also turn that into an sp offset.
 (rule 1 (amode_inner (stack_addr ss ss_offset) amode_offset ty)
   (if-let combined_offset (s32_add_fallible ss_offset amode_offset))
   (gen_stack_slot_amode ss combined_offset ty))


 ;; Returns a canonical type for a LoadOP. We only return I64 or F64.
 (decl load_op_reg_type (LoadOP) Type)
 (rule 1 (load_op_reg_type (LoadOP.Fld)) $F64)
 (rule 1 (load_op_reg_type (LoadOP.Flw)) $F64)
 (rule 0 (load_op_reg_type _) $I64)

 ;; helper function to load from memory.
 (decl gen_load (AMode LoadOP MemFlags) Reg)
 (rule (gen_load amode op flags)
   (let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
       (_ Unit (emit (MInst.Load dst op flags amode))))
     dst))

 ;; helper function to store to memory.
 (decl gen_store (AMode StoreOP MemFlags Reg) InstOutput)
 (rule (gen_store amode op flags src)
   (side_effect (SideEffectNoResult.Inst (MInst.Store amode op flags src))))


 (decl valid_atomic_transaction (Type) Type)
 (extern extractor valid_atomic_transaction valid_atomic_transaction)

 ;;helper function.
 ;;construct an atomic instruction.
 (decl gen_atomic (AtomicOP Reg Reg AMO) Reg)
 (rule
   (gen_atomic op addr src amo)
   (let
     ((tmp WritableXReg (temp_writable_xreg))
       (_ Unit (emit (MInst.Atomic op tmp addr src amo))))
     tmp))

 ;; helper function
 (decl get_atomic_rmw_op (Type AtomicRmwOp) AtomicOP)
 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Add))
   (AtomicOP.AmoaddW))
 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Add))
   (AtomicOP.AmoaddD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.And))
   (AtomicOP.AmoandW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.And))
   (AtomicOP.AmoandD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Or))
   (AtomicOP.AmoorW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Or))
   (AtomicOP.AmoorD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Smax))
   (AtomicOP.AmomaxW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Smax))
   (AtomicOP.AmomaxD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Smin))
   (AtomicOP.AmominW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Smin))
   (AtomicOP.AmominD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Umax))
   (AtomicOP.AmomaxuW)
 )

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Umax))
   (AtomicOP.AmomaxuD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Umin))
   (AtomicOP.AmominuW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Umin))
   (AtomicOP.AmominuD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Xchg))
   (AtomicOP.AmoswapW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Xchg))
   (AtomicOP.AmoswapD))

 (rule
   (get_atomic_rmw_op $I32 (AtomicRmwOp.Xor))
   (AtomicOP.AmoxorW))

 (rule
   (get_atomic_rmw_op $I64 (AtomicRmwOp.Xor))
   (AtomicOP.AmoxorD))

 (decl atomic_amo () AMO)
 (extern constructor atomic_amo atomic_amo)


 (decl gen_atomic_load (Reg Type) Reg)
 (rule
   (gen_atomic_load p ty)
   (let
     ((tmp WritableXReg (temp_writable_xreg))
       (_ Unit (emit (MInst.AtomicLoad tmp ty p))))
     (writable_reg_to_reg tmp)))

 ;;;
 (decl gen_atomic_store (Reg Type Reg) InstOutput)
 (rule
   (gen_atomic_store p ty src)
   (side_effect (SideEffectNoResult.Inst (MInst.AtomicStore src ty p)))
 )


 (decl gen_stack_addr (StackSlot Offset32) Reg)
 (extern constructor gen_stack_addr gen_stack_addr)

 (decl gen_select_xreg (IntegerCompare XReg XReg) XReg)

 (rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
   (if-let (IntCC.UnsignedLessThan) (intcc_without_eq cc))
   (if-let $true (has_zbb))
   (rv_minu x y))

 (rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
   (if-let (IntCC.SignedLessThan) (intcc_without_eq cc))
   (if-let $true (has_zbb))
   (rv_min x y))

 (rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
   (if-let (IntCC.UnsignedGreaterThan) (intcc_without_eq cc))
   (if-let $true (has_zbb))
   (rv_maxu x y))

 (rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
   (if-let (IntCC.SignedGreaterThan) (intcc_without_eq cc))
   (if-let $true (has_zbb))
   (rv_max x y))

 (rule 0 (gen_select_xreg c x y)
   (let
     ((dst WritableReg (temp_writable_xreg))
      (_ Unit (emit (MInst.Select dst c x y))))
     (writable_reg_to_reg dst)))


 (decl gen_select_vreg (IntegerCompare VReg VReg) VReg)
 (rule (gen_select_vreg c x y)
   (let
     ((dst WritableReg (temp_writable_vreg))
      (_ Unit (emit (MInst.Select dst c (vreg_to_reg x) (vreg_to_reg y)))))
     (writable_reg_to_reg dst)))
 (decl gen_select_freg (IntegerCompare FReg FReg) FReg)
 (rule (gen_select_freg c x y)
   (let
     ((dst WritableReg (temp_writable_freg))
      (_ Unit (emit (MInst.Select dst c (freg_to_reg x) (freg_to_reg y)))))
     (writable_reg_to_reg dst)))
 (decl gen_select_regs (IntegerCompare ValueRegs ValueRegs) ValueRegs)
 (rule (gen_select_regs c x y)
   (let
     ((dst1 WritableReg (temp_writable_xreg))
      (dst2 WritableReg (temp_writable_xreg))
      (_ Unit (emit (MInst.Select (writable_value_regs dst1 dst2) c x y))))
     (value_regs dst1 dst2)))

 (decl udf (TrapCode) InstOutput)
 (rule
   (udf code)
   (side_effect (SideEffectNoResult.Inst (MInst.Udf code))))

 (decl load_op (Type) LoadOP)
 (extern constructor load_op load_op)

 (decl store_op (Type) StoreOP)
 (extern constructor store_op store_op)


 ;;;; load extern name
 (decl load_ext_name (ExternalName i64) Reg)
 (extern constructor load_ext_name load_ext_name)

 (decl elf_tls_get_addr (ExternalName) Reg)
 (rule (elf_tls_get_addr name)
       (let ((dst WritableReg (temp_writable_reg $I64))
             (_ Unit (emit (MInst.ElfTlsGetAddr dst name))))
         dst))

 ;;;;
 (decl gen_fcvt_int (bool FReg bool Type Type) XReg)
 (rule
   (gen_fcvt_int is_sat rs is_signed in_type out_type)
   (let
     ((result WritableReg (temp_writable_reg out_type))
       (tmp WritableFReg (temp_writable_freg))
       (_ Unit (emit (MInst.FcvtToInt is_sat result tmp rs is_signed in_type out_type))))
     (writable_reg_to_reg result)))

 ;;; some float binary operation
 ;;; 1. need move into x reister.
 ;;; 2. do the operation.
 ;;; 3. move back.
 (decl lower_float_binary (AluOPRRR FReg FReg Type) FReg)
 (rule
   (lower_float_binary op rs1 rs2 ty)
   (let ((x_rs1 XReg (move_f_to_x rs1 ty))
         (x_rs2 XReg (move_f_to_x rs2 ty))
         (tmp XReg (alu_rrr op x_rs1 x_rs2)))
     (move_x_to_f tmp (float_int_of_same_size ty))))


 (decl i128_sub (ValueRegs ValueRegs) ValueRegs)
 (rule
   (i128_sub x y )
   (let
     (;; low part.
       (low XReg (rv_sub (value_regs_get x 0) (value_regs_get y 0)))
       ;; compute borrow.
       (borrow XReg (rv_sltu (value_regs_get x 0) low))
       ;;
       (high_tmp XReg (rv_sub (value_regs_get x 1) (value_regs_get y 1)))
       ;;
       (high XReg (rv_sub high_tmp borrow)))
     (value_regs low high)))

 ;; int scalar zero regs.
 (decl int_zero_reg (Type) ValueRegs)
 (extern constructor int_zero_reg int_zero_reg)

 ;; Consume a CmpResult, producing a branch on its result.
 (decl cond_br (IntegerCompare CondBrTarget CondBrTarget) SideEffectNoResult)
 (rule (cond_br cmp then else)
       (SideEffectNoResult.Inst
         (MInst.CondBr then else cmp)))

 ;; Helper for emitting the `j` mnemonic, an unconditional jump to label.
 (decl rv_j (MachLabel) SideEffectNoResult)
 (rule (rv_j label)
   (SideEffectNoResult.Inst (MInst.Jal label)))

 ;; Construct an IntegerCompare value.
 (decl int_compare (IntCC XReg XReg) IntegerCompare)
 (extern constructor int_compare int_compare)

 ;; Extract the components of an `IntegerCompare`
 (decl int_compare_decompose (IntCC XReg XReg) IntegerCompare)
 (extern extractor infallible int_compare_decompose int_compare_decompose)

 (decl label_to_br_target (MachLabel) CondBrTarget)
 (extern constructor label_to_br_target label_to_br_target)
 (convert MachLabel CondBrTarget label_to_br_target)

 (decl cmp_eqz (XReg) IntegerCompare)
 (rule (cmp_eqz r) (int_compare (IntCC.Equal) r (zero_reg)))

 (decl cmp_nez (XReg) IntegerCompare)
 (rule (cmp_nez r) (int_compare (IntCC.NotEqual) r (zero_reg)))

 (decl cmp_eq (XReg XReg) IntegerCompare)
 (rule (cmp_eq rs1 rs2) (int_compare (IntCC.Equal) rs1 rs2))

 (decl cmp_ne (XReg XReg) IntegerCompare)
 (rule (cmp_ne rs1 rs2) (int_compare (IntCC.NotEqual) rs1 rs2))

 (decl cmp_lt (XReg XReg) IntegerCompare)
 (rule (cmp_lt rs1 rs2) (int_compare (IntCC.SignedLessThan) rs1 rs2))

 (decl cmp_ltz (XReg) IntegerCompare)
 (rule (cmp_ltz rs) (int_compare (IntCC.SignedLessThan) rs (zero_reg)))

 (decl cmp_gt (XReg XReg) IntegerCompare)
 (rule (cmp_gt rs1 rs2) (int_compare (IntCC.SignedGreaterThan) rs1 rs2))

 (decl cmp_ge (XReg XReg) IntegerCompare)
 (rule (cmp_ge rs1 rs2) (int_compare (IntCC.SignedGreaterThanOrEqual) rs1 rs2))

 (decl cmp_le (XReg XReg) IntegerCompare)
 (rule (cmp_le rs1 rs2) (int_compare (IntCC.SignedLessThanOrEqual) rs1 rs2))

 (decl cmp_gtu (XReg XReg) IntegerCompare)
 (rule (cmp_gtu rs1 rs2) (int_compare (IntCC.UnsignedGreaterThan) rs1 rs2))

 (decl cmp_geu (XReg XReg) IntegerCompare)
 (rule (cmp_geu rs1 rs2) (int_compare (IntCC.UnsignedGreaterThanOrEqual) rs1 rs2))

 (decl cmp_ltu (XReg XReg) IntegerCompare)
 (rule (cmp_ltu rs1 rs2) (int_compare (IntCC.UnsignedLessThan) rs1 rs2))

 (decl cmp_leu (XReg XReg) IntegerCompare)
 (rule (cmp_leu rs1 rs2) (int_compare (IntCC.UnsignedLessThanOrEqual) rs1 rs2))

 ;; Helper to generate an `IntegerCompare` which represents the "truthy" value of
 ;; the input provided.
 ;;
 ;; This is used in `Select` and `brif` for example to generate conditional
 ;; branches. The returned comparison, when taken, represents that `Value` is
 ;; nonzero. When not taken the input `Value` is zero.
 (decl lower_int_compare (Value) IntegerCompare)

 ;; Base case - convert to a "truthy" value and compare it against zero.
 ;;
 ;; Note that non-64-bit types need to be extended since the upper bits from
 ;; Cranelift's point of view are undefined. Favor a zero extension for 8-bit
 ;; types because that's a single `andi` instruction, but favor sign-extension
 ;; for 16 and 32-bit types because many RISC-V which operate on the low 32-bits.
 ;; Additionally the base 64-bit ISA has a single instruction for sign-extending
 ;; from 32 to 64-bits which makes that a bit cheaper if used.
 ;; of registers sign-extend the results.
 (rule 0 (lower_int_compare val @ (value_type (fits_in_64 _)))
   (cmp_nez (sext val)))
 (rule 1 (lower_int_compare val @ (value_type $I8))
   (cmp_nez (zext val)))
 (rule 1 (lower_int_compare val @ (value_type $I128))
   (cmp_nez (rv_or (value_regs_get val 0) (value_regs_get val 1))))

 ;; If the input value is itself an `icmp` we can avoid generating the result of
 ;; the `icmp` and instead move the comparison directly into the `IntegerCompare`
 ;; that's returned. Note that comparisons compare full registers so
 ;; sign-extension according to the integer comparison performed here is
 ;; required.
 ;;
 ;; Also note that as a small optimization `Equal` and `NotEqual` use
 ;; sign-extension for 32-bit values since the same result is produced with
 ;; either zero-or-sign extension and many values are already sign-extended given
 ;; the RV64 instruction set (e.g. `addw` adds 32-bit values and sign extends),
 ;; theoretically resulting in more efficient codegen.
 (rule 2 (lower_int_compare (maybe_uextend (icmp cc a b @ (value_type (fits_in_64 in_ty)))))
   (int_compare cc (zext a) (zext b)))
 (rule 3 (lower_int_compare (maybe_uextend (icmp cc a b @ (value_type (fits_in_64 in_ty)))))
   (if (signed_cond_code cc))
   (int_compare cc (sext a) (sext b)))
 (rule 4 (lower_int_compare (maybe_uextend (icmp cc @ (IntCC.Equal) a b @ (value_type $I32))))
   (int_compare cc (sext a) (sext b)))
 (rule 4 (lower_int_compare (maybe_uextend (icmp cc @ (IntCC.NotEqual) a b @ (value_type $I32))))
   (int_compare cc (sext a) (sext b)))

 ;; If the input is an `fcmp` then the `FCmp` return value is directly
 ;; convertible to `IntegerCompare` which can shave off an instruction from the
 ;; fallback lowering above.
 (rule 2 (lower_int_compare (maybe_uextend (fcmp cc a @ (value_type ty) b)))
   (emit_fcmp cc ty a b))

 (decl partial lower_branch (Inst MachLabelSlice) Unit)
 (rule (lower_branch (jump _) (single_target label))
       (emit_side_effect (rv_j label)))

 (rule (lower_branch (brif v _ _) (two_targets then else))
   (emit_side_effect (cond_br (lower_int_compare v) then else)))

 (decl lower_br_table (Reg MachLabelSlice) Unit)
 (extern constructor lower_br_table lower_br_table)

 (rule (lower_branch (br_table index _) targets)
   (lower_br_table index targets))

 (decl load_ra () Reg)
 (extern constructor load_ra load_ra)


 ;; Generates a bitcast instruction.
 ;; Args are: src, src_ty, dst_ty
 (decl gen_bitcast (Reg Type Type) Reg)
 (rule 1 (gen_bitcast r $F32 $I32) (rv_fmvxw r))
 (rule 1 (gen_bitcast r $F64 $I64) (rv_fmvxd r))
 (rule 1 (gen_bitcast r $I32 $F32) (rv_fmvwx r))
 (rule 1 (gen_bitcast r $I64 $F64) (rv_fmvdx r))
 (rule (gen_bitcast r _ _) r)

 (decl move_f_to_x (FReg Type) XReg)
 (rule (move_f_to_x r $F32) (gen_bitcast r $F32 $I32))
 (rule (move_f_to_x r $F64) (gen_bitcast r $F64 $I64))

 (decl move_x_to_f (XReg Type) FReg)
 (rule (move_x_to_f r $I32) (gen_bitcast r $I32 $F32))
 (rule (move_x_to_f r $I64) (gen_bitcast r $I64 $F64))

 (decl float_int_of_same_size (Type) Type)
 (rule (float_int_of_same_size $F32) $I32)
 (rule (float_int_of_same_size $F64) $I64)


 (decl gen_rev8 (XReg) XReg)
 (rule 1
   (gen_rev8 rs)
   (if-let $true (has_zbb))
   (rv_rev8 rs))

 (rule
   (gen_rev8 rs)
   (if-let $false (has_zbb))
   (let
     ((rd WritableXReg (temp_writable_xreg))
       (tmp WritableXReg (temp_writable_xreg))
       (step WritableXReg (temp_writable_xreg))
       (_ Unit (emit (MInst.Rev8 rs step tmp rd))))
     (writable_reg_to_reg rd)))


 (decl gen_brev8 (Reg Type) Reg)
 (rule 1
   (gen_brev8 rs _)
   (if-let $true (has_zbkb))
   (rv_brev8 rs))
 (rule
   (gen_brev8 rs ty)
   (if-let $false (has_zbkb))
   (let
     ((tmp WritableXReg (temp_writable_xreg))
       (tmp2 WritableXReg (temp_writable_xreg))
       (step WritableXReg (temp_writable_xreg))
       (rd WritableXReg (temp_writable_xreg))
       (_ Unit (emit (MInst.Brev8 rs ty step tmp tmp2 rd))))
     (writable_reg_to_reg rd)))

 ;; Negates x
 ;; Equivalent to 0 - x
 (decl neg (Type ValueRegs) ValueRegs)
 (rule 1 (neg (fits_in_64 (ty_int ty)) val)
   (value_reg
     (rv_neg (value_regs_get val 0))))

 (rule 2 (neg $I128 val)
   (i128_sub (value_regs_zero) val))


 ;; Builds an instruction sequence that traps if the comparision succeeds.
 (decl gen_trapif (IntCC XReg XReg TrapCode) InstOutput)
 (rule (gen_trapif cc a b trap_code)
   (side_effect (SideEffectNoResult.Inst (MInst.TrapIf a b cc trap_code))))

 ;; Builds an instruction sequence that traps if the input is non-zero.
 (decl gen_trapnz (XReg TrapCode) InstOutput)
 (rule (gen_trapnz test trap_code)
   (gen_trapif (IntCC.NotEqual) test (zero_reg) trap_code))

 ;; Builds an instruction sequence that traps if the input is zero.
 (decl gen_trapz (XReg TrapCode) InstOutput)
 (rule (gen_trapz test trap_code)
   (gen_trapif (IntCC.Equal) test (zero_reg) trap_code))

 ;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
 (extern constructor gen_call gen_call)

 (decl gen_call_indirect (SigRef Value ValueSlice) InstOutput)
 (extern constructor gen_call_indirect gen_call_indirect)

 ;;; this is trying to imitate aarch64 `madd` instruction.
 (decl madd (XReg XReg XReg) XReg)
 (rule
   (madd n m a)
   (let
     ((t XReg (rv_mul n m)))
     (rv_add t a)))

 ;;;; Helpers for bmask ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; Generates either 0 if `Value` is zero or -1 otherwise.
 (decl gen_bmask (Value) XReg)
 (rule 0 (gen_bmask val @ (value_type (fits_in_64 _)))
   (let ((non_zero XReg (rv_snez (sext val))))
     (rv_neg non_zero)))
 (rule 1 (gen_bmask val @ (value_type $I128))
   (let ((non_zero XReg (rv_snez (rv_or (value_regs_get val 0) (value_regs_get val 1)))))
     (rv_neg non_zero)))

 (decl lower_bmask (Value Type) ValueRegs)
 (rule 0 (lower_bmask val (fits_in_64 _))
   (value_reg (gen_bmask val)))
 (rule 1 (lower_bmask val $I128)
   (let ((bits XReg (gen_bmask val)))
     (value_regs bits bits)))

 ;;;; Helpers for physical registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl gen_mov_from_preg (PReg) Reg)

 (rule
   (gen_mov_from_preg rm)
   (let ((rd WritableXReg (temp_writable_xreg))
         (_ Unit (emit (MInst.MovFromPReg rd rm))))
     rd))

 (decl fp_reg () PReg)
 (extern constructor fp_reg fp_reg)

 (decl sp_reg () PReg)
 (extern constructor sp_reg sp_reg)

 ;; Helper for creating the zero register.
 (decl zero_reg () Reg)
 (extern constructor zero_reg zero_reg)

 (decl value_regs_zero () ValueRegs)
 (rule (value_regs_zero)
   (value_regs (imm $I64 0) (imm $I64 0)))

 (decl writable_zero_reg () WritableReg)
 (extern constructor writable_zero_reg writable_zero_reg)


 ;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (decl is_not_nan (Type FReg) XReg)
 (rule (is_not_nan ty a) (rv_feq ty a a))

 (decl ordered (Type FReg FReg) XReg)
 (rule (ordered ty a b) (rv_and (is_not_nan ty a) (is_not_nan ty b)))

 (type FCmp (enum
   ;; The comparison succeeded if `r` is one
   (One (r XReg))
   ;; The comparison succeeded if `r` is zero
   (Zero (r XReg))
 ))

 (decl fcmp_invert (FCmp) FCmp)
 (rule (fcmp_invert (FCmp.One r)) (FCmp.Zero r))
 (rule (fcmp_invert (FCmp.Zero r)) (FCmp.One r))

 (decl fcmp_to_compare (FCmp) IntegerCompare)
 (rule (fcmp_to_compare (FCmp.One r)) (cmp_nez r))
 (rule (fcmp_to_compare (FCmp.Zero r)) (cmp_eqz r))
 (convert FCmp IntegerCompare fcmp_to_compare)

 ;; Compare two floating point numbers and return a zero/non-zero result.
 (decl emit_fcmp (FloatCC Type FReg FReg) FCmp)

 ;; Direct codegen for unordered comparisons is not that efficient, so invert
 ;; the comparison to get an ordered comparison and generate that. Then invert
 ;; the result to produce the final fcmp result.
 (rule 0 (emit_fcmp cc ty a b)
   (if-let $true (floatcc_unordered cc))
   (fcmp_invert (emit_fcmp (floatcc_complement cc) ty a b)))

 ;; a is not nan && b is not nan
 (rule 1 (emit_fcmp (FloatCC.Ordered) ty a b)
   (FCmp.One (ordered ty a b)))

 ;; a == b
 (rule 1 (emit_fcmp (FloatCC.Equal) ty a b)
   (FCmp.One (rv_feq ty a b)))

 ;; a != b
 ;; == !(a == b)
 (rule 1 (emit_fcmp (FloatCC.NotEqual) ty a b)
   (FCmp.Zero (rv_feq ty a b)))

 ;; a < b || a > b
 (rule 1 (emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
   (FCmp.One (rv_or (rv_flt ty a b) (rv_fgt ty a b))))

 ;; a < b
 (rule 1 (emit_fcmp (FloatCC.LessThan) ty a b)
   (FCmp.One (rv_flt ty a b)))

 ;; a <= b
 (rule 1 (emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
   (FCmp.One (rv_fle ty a b)))

 ;; a > b
 (rule 1 (emit_fcmp (FloatCC.GreaterThan) ty a b)
   (FCmp.One (rv_fgt ty a b)))

 ;; a >= b
 (rule 1 (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
   (FCmp.One (rv_fge ty a b)))