| //! AArch64 ISA: binary code emission. |
| |
| use cranelift_control::ControlPlane; |
| use regalloc2::Allocation; |
| |
| use crate::binemit::{Reloc, StackMap}; |
| use crate::ir::{self, types::*, LibCall, MemFlags, RelSourceLoc, TrapCode}; |
| use crate::isa::aarch64::inst::*; |
| use crate::machinst::{ty_bits, Reg, RegClass, Writable}; |
| use crate::trace; |
| use core::convert::TryFrom; |
| |
| /// Memory addressing mode finalization: convert "special" modes (e.g., |
| /// generic arbitrary stack offset) into real addressing modes, possibly by |
| /// emitting some helper instructions that come immediately before the use |
| /// of this amode. |
| pub fn mem_finalize( |
| sink: Option<&mut MachBuffer<Inst>>, |
| mem: &AMode, |
| state: &EmitState, |
| ) -> (SmallVec<[Inst; 4]>, AMode) { |
| match mem { |
| &AMode::RegOffset { off, ty, .. } |
| | &AMode::SPOffset { off, ty } |
| | &AMode::FPOffset { off, ty } |
| | &AMode::NominalSPOffset { off, ty } => { |
| let basereg = match mem { |
| &AMode::RegOffset { rn, .. } => rn, |
| &AMode::SPOffset { .. } | &AMode::NominalSPOffset { .. } => stack_reg(), |
| &AMode::FPOffset { .. } => fp_reg(), |
| _ => unreachable!(), |
| }; |
| let adj = match mem { |
| &AMode::NominalSPOffset { .. } => { |
| trace!( |
| "mem_finalize: nominal SP offset {} + adj {} -> {}", |
| off, |
| state.virtual_sp_offset, |
| off + state.virtual_sp_offset |
| ); |
| state.virtual_sp_offset |
| } |
| _ => 0, |
| }; |
| let off = off + adj; |
| |
| if let Some(simm9) = SImm9::maybe_from_i64(off) { |
| let mem = AMode::Unscaled { rn: basereg, simm9 }; |
| (smallvec![], mem) |
| } else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, ty) { |
| let mem = AMode::UnsignedOffset { |
| rn: basereg, |
| uimm12, |
| }; |
| (smallvec![], mem) |
| } else { |
| let tmp = writable_spilltmp_reg(); |
| ( |
| Inst::load_constant(tmp, off as u64, &mut |_| tmp), |
| AMode::RegExtended { |
| rn: basereg, |
| rm: tmp.to_reg(), |
| extendop: ExtendOp::SXTX, |
| }, |
| ) |
| } |
| } |
| |
| AMode::Const { addr } => { |
| let sink = match sink { |
| Some(sink) => sink, |
| None => return (smallvec![], mem.clone()), |
| }; |
| let label = sink.get_label_for_constant(*addr); |
| let label = MemLabel::Mach(label); |
| (smallvec![], AMode::Label { label }) |
| } |
| |
| _ => (smallvec![], mem.clone()), |
| } |
| } |
| |
| //============================================================================= |
| // Instructions and subcomponents: emission |
| |
| pub(crate) fn machreg_to_gpr(m: Reg) -> u32 { |
| assert_eq!(m.class(), RegClass::Int); |
| u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() |
| } |
| |
| pub(crate) fn machreg_to_vec(m: Reg) -> u32 { |
| assert_eq!(m.class(), RegClass::Float); |
| u32::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() |
| } |
| |
| fn machreg_to_gpr_or_vec(m: Reg) -> u32 { |
| u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() |
| } |
| |
| pub(crate) fn enc_arith_rrr( |
| bits_31_21: u32, |
| bits_15_10: u32, |
| rd: Writable<Reg>, |
| rn: Reg, |
| rm: Reg, |
| ) -> u32 { |
| (bits_31_21 << 21) |
| | (bits_15_10 << 10) |
| | machreg_to_gpr(rd.to_reg()) |
| | (machreg_to_gpr(rn) << 5) |
| | (machreg_to_gpr(rm) << 16) |
| } |
| |
| fn enc_arith_rr_imm12( |
| bits_31_24: u32, |
| immshift: u32, |
| imm12: u32, |
| rn: Reg, |
| rd: Writable<Reg>, |
| ) -> u32 { |
| (bits_31_24 << 24) |
| | (immshift << 22) |
| | (imm12 << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 { |
| (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 { |
| (top11 << 21) |
| | (machreg_to_gpr(rm) << 16) |
| | (bit15 << 15) |
| | (machreg_to_gpr(ra) << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 { |
| assert!(off_26_0 < (1 << 26)); |
| (op_31_26 << 26) | off_26_0 |
| } |
| |
| fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 { |
| assert!(off_18_0 < (1 << 19)); |
| (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg) |
| } |
| |
| fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 { |
| assert!(off_18_0 < (1 << 19)); |
| assert!(cond < (1 << 4)); |
| (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond |
| } |
| |
| fn enc_conditional_br( |
| taken: BranchTarget, |
| kind: CondBrKind, |
| allocs: &mut AllocationConsumer<'_>, |
| ) -> u32 { |
| match kind { |
| CondBrKind::Zero(reg) => { |
| let reg = allocs.next(reg); |
| enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg) |
| } |
| CondBrKind::NotZero(reg) => { |
| let reg = allocs.next(reg); |
| enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg) |
| } |
| CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()), |
| } |
| } |
| |
| fn enc_move_wide(op: MoveWideOp, rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 { |
| assert!(imm.shift <= 0b11); |
| let op = match op { |
| MoveWideOp::MovN => 0b00, |
| MoveWideOp::MovZ => 0b10, |
| }; |
| 0x12800000 |
| | size.sf_bit() << 31 |
| | op << 29 |
| | u32::from(imm.shift) << 21 |
| | u32::from(imm.bits) << 5 |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 { |
| assert!(imm.shift <= 0b11); |
| 0x72800000 |
| | size.sf_bit() << 31 |
| | u32::from(imm.shift) << 21 |
| | u32::from(imm.bits) << 5 |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 { |
| (op_31_22 << 22) |
| | (simm7.bits() << 15) |
| | (machreg_to_gpr(rt2) << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rt) |
| } |
| |
| fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 { |
| (op_31_22 << 22) |
| | (simm9.bits() << 12) |
| | (op_11_10 << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr_or_vec(rd) |
| } |
| |
| fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 { |
| (op_31_22 << 22) |
| | (0b1 << 24) |
| | (uimm12.bits() << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr_or_vec(rd) |
| } |
| |
| fn enc_ldst_reg( |
| op_31_22: u32, |
| rn: Reg, |
| rm: Reg, |
| s_bit: bool, |
| extendop: Option<ExtendOp>, |
| rd: Reg, |
| ) -> u32 { |
| let s_bit = if s_bit { 1 } else { 0 }; |
| let extend_bits = match extendop { |
| Some(ExtendOp::UXTW) => 0b010, |
| Some(ExtendOp::SXTW) => 0b110, |
| Some(ExtendOp::SXTX) => 0b111, |
| None => 0b011, // LSL |
| _ => panic!("bad extend mode for ld/st AMode"), |
| }; |
| (op_31_22 << 22) |
| | (1 << 21) |
| | (machreg_to_gpr(rm) << 16) |
| | (extend_bits << 13) |
| | (s_bit << 12) |
| | (0b10 << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr_or_vec(rd) |
| } |
| |
| pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 { |
| (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd) |
| } |
| |
| fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 { |
| debug_assert_eq!(q & 0b1, q); |
| debug_assert_eq!(size & 0b11, size); |
| 0b0_0_0011010_10_00000_110_0_00_00000_00000 |
| | q << 30 |
| | size << 10 |
| | machreg_to_gpr(rn) << 5 |
| | machreg_to_vec(rt.to_reg()) |
| } |
| |
| fn enc_ldst_vec_pair( |
| opc: u32, |
| amode: u32, |
| is_load: bool, |
| simm7: SImm7Scaled, |
| rn: Reg, |
| rt: Reg, |
| rt2: Reg, |
| ) -> u32 { |
| debug_assert_eq!(opc & 0b11, opc); |
| debug_assert_eq!(amode & 0b11, amode); |
| |
| 0b00_10110_00_0_0000000_00000_00000_00000 |
| | opc << 30 |
| | amode << 23 |
| | (is_load as u32) << 22 |
| | simm7.bits() << 15 |
| | machreg_to_vec(rt2) << 10 |
| | machreg_to_gpr(rn) << 5 |
| | machreg_to_vec(rt) |
| } |
| |
| fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 { |
| (top11 << 21) |
| | (machreg_to_vec(rm) << 16) |
| | (bit15_10 << 10) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_vec_rrr_long( |
| q: u32, |
| u: u32, |
| size: u32, |
| bit14: u32, |
| rm: Reg, |
| rn: Reg, |
| rd: Writable<Reg>, |
| ) -> u32 { |
| debug_assert_eq!(q & 0b1, q); |
| debug_assert_eq!(u & 0b1, u); |
| debug_assert_eq!(size & 0b11, size); |
| debug_assert_eq!(bit14 & 0b1, bit14); |
| |
| 0b0_0_0_01110_00_1_00000_100000_00000_00000 |
| | q << 30 |
| | u << 29 |
| | size << 22 |
| | bit14 << 14 |
| | (machreg_to_vec(rm) << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 { |
| (0b01011010110 << 21) |
| | size << 31 |
| | opcode2 << 16 |
| | opcode1 << 10 |
| | machreg_to_gpr(rn) << 5 |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| pub(crate) fn enc_br(rn: Reg) -> u32 { |
| 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5) |
| } |
| |
| pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 { |
| let off = u32::try_from(off).unwrap(); |
| let immlo = off & 3; |
| let immhi = (off >> 2) & ((1 << 19) - 1); |
| opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 { |
| let opcode = 0b00010000 << 24; |
| enc_adr_inst(opcode, off, rd) |
| } |
| |
| pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 { |
| let opcode = 0b10010000 << 24; |
| enc_adr_inst(opcode, off, rd) |
| } |
| |
| fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 { |
| debug_assert_eq!(op & 0b1, op); |
| debug_assert_eq!(o2 & 0b1, o2); |
| 0b100_11010100_00000_0000_00_00000_00000 |
| | (op << 30) |
| | (machreg_to_gpr(rm) << 16) |
| | (cond.bits() << 12) |
| | (o2 << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 { |
| 0b000_11110_00_1_00000_0000_11_00000_00000 |
| | (size.ftype() << 22) |
| | (machreg_to_vec(rm) << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()) |
| | (cond.bits() << 12) |
| } |
| |
| fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 { |
| 0b0_1_1_11010010_00000_0000_00_00000_0_0000 |
| | size.sf_bit() << 31 |
| | machreg_to_gpr(rm) << 16 |
| | cond.bits() << 12 |
| | machreg_to_gpr(rn) << 5 |
| | nzcv.bits() |
| } |
| |
| fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 { |
| 0b0_1_1_11010010_00000_0000_10_00000_0_0000 |
| | size.sf_bit() << 31 |
| | imm.bits() << 16 |
| | cond.bits() << 12 |
| | machreg_to_gpr(rn) << 5 |
| | nzcv.bits() |
| } |
| |
| fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 { |
| match size { |
| OperandSize::Size64 => { |
| debug_assert!(immr <= 63); |
| debug_assert!(imms <= 63); |
| } |
| OperandSize::Size32 => { |
| debug_assert!(immr <= 31); |
| debug_assert!(imms <= 31); |
| } |
| } |
| debug_assert_eq!(opc & 0b11, opc); |
| let n_bit = size.sf_bit(); |
| 0b0_00_100110_0_000000_000000_00000_00000 |
| | size.sf_bit() << 31 |
| | u32::from(opc) << 29 |
| | n_bit << 22 |
| | u32::from(immr) << 16 |
| | u32::from(imms) << 10 |
| | machreg_to_gpr(rn) << 5 |
| | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 { |
| 0b00001110_101_00000_00011_1_00000_00000 |
| | ((is_16b as u32) << 30) |
| | machreg_to_vec(rd.to_reg()) |
| | (machreg_to_vec(rn) << 16) |
| | (machreg_to_vec(rn) << 5) |
| } |
| |
| fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 { |
| (top22 << 10) |
| | (machreg_to_vec(rm) << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 { |
| (top17 << 15) |
| | (machreg_to_vec(rm) << 16) |
| | (machreg_to_vec(ra) << 10) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 { |
| 0b000_11110_00_1_00000_00_1000_00000_00000 |
| | (size.ftype() << 22) |
| | (machreg_to_vec(rm) << 16) |
| | (machreg_to_vec(rn) << 5) |
| } |
| |
| fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg()) |
| } |
| |
| fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| debug_assert_eq!(qu & 0b11, qu); |
| debug_assert_eq!(size & 0b11, size); |
| debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); |
| let bits = 0b0_00_01110_00_10000_00000_10_00000_00000; |
| bits | qu << 29 |
| | size << 22 |
| | bits_12_16 << 12 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); |
| |
| 0b010_11110_11_11000_11011_10_00000_00000 |
| | bits_12_16 << 12 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| debug_assert_eq!(u & 0b1, u); |
| debug_assert_eq!(enc_size & 0b1, enc_size); |
| |
| 0b0_1_0_01110_00_10000_00_0_10_10_00000_00000 |
| | u << 29 |
| | enc_size << 22 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 { |
| debug_assert_eq!(q & 0b1, q); |
| debug_assert_eq!(u & 0b1, u); |
| debug_assert_eq!(size & 0b11, size); |
| debug_assert_eq!(opcode & 0b11111, opcode); |
| 0b0_0_0_01110_00_11000_0_0000_10_00000_00000 |
| | q << 30 |
| | u << 29 |
| | size << 22 |
| | opcode << 12 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 { |
| debug_assert_eq!(len & 0b11, len); |
| 0b0_1_001110_000_00000_0_00_0_00_00000_00000 |
| | (machreg_to_vec(rm) << 16) |
| | len << 13 |
| | (is_extension as u32) << 12 |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| fn enc_dmb_ish() -> u32 { |
| 0xD5033BBF |
| } |
| |
| fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 { |
| assert!(machreg_to_gpr(rt.to_reg()) != 31); |
| let sz = match ty { |
| I64 => 0b11, |
| I32 => 0b10, |
| I16 => 0b01, |
| I8 => 0b00, |
| _ => unreachable!(), |
| }; |
| let bit15 = match op { |
| AtomicRMWOp::Swp => 0b1, |
| _ => 0b0, |
| }; |
| let op = match op { |
| AtomicRMWOp::Add => 0b000, |
| AtomicRMWOp::Clr => 0b001, |
| AtomicRMWOp::Eor => 0b010, |
| AtomicRMWOp::Set => 0b011, |
| AtomicRMWOp::Smax => 0b100, |
| AtomicRMWOp::Smin => 0b101, |
| AtomicRMWOp::Umax => 0b110, |
| AtomicRMWOp::Umin => 0b111, |
| AtomicRMWOp::Swp => 0b000, |
| }; |
| 0b00_111_000_111_00000_0_000_00_00000_00000 |
| | (sz << 30) |
| | (machreg_to_gpr(rs) << 16) |
| | bit15 << 15 |
| | (op << 12) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rt.to_reg()) |
| } |
| |
| fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 { |
| let sz = match ty { |
| I64 => 0b11, |
| I32 => 0b10, |
| I16 => 0b01, |
| I8 => 0b00, |
| _ => unreachable!(), |
| }; |
| 0b00_001000_1_1_0_11111_1_11111_00000_00000 |
| | (sz << 30) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rt.to_reg()) |
| } |
| |
| fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 { |
| let sz = match ty { |
| I64 => 0b11, |
| I32 => 0b10, |
| I16 => 0b01, |
| I8 => 0b00, |
| _ => unreachable!(), |
| }; |
| 0b00_001000_100_11111_1_11111_00000_00000 |
| | (sz << 30) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rt) |
| } |
| |
| fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 { |
| let sz = match ty { |
| I64 => 0b11, |
| I32 => 0b10, |
| I16 => 0b01, |
| I8 => 0b00, |
| _ => unreachable!(), |
| }; |
| 0b00_001000_0_1_0_11111_1_11111_00000_00000 |
| | (sz << 30) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rt.to_reg()) |
| } |
| |
| fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 { |
| let sz = match ty { |
| I64 => 0b11, |
| I32 => 0b10, |
| I16 => 0b01, |
| I8 => 0b00, |
| _ => unreachable!(), |
| }; |
| 0b00_001000_000_00000_1_11111_00000_00000 |
| | (sz << 30) |
| | (machreg_to_gpr(rs.to_reg()) << 16) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rt) |
| } |
| |
| fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 { |
| debug_assert_eq!(size & 0b11, size); |
| |
| 0b00_0010001_1_1_00000_1_11111_00000_00000 |
| | size << 30 |
| | machreg_to_gpr(rs.to_reg()) << 16 |
| | machreg_to_gpr(rn) << 5 |
| | machreg_to_gpr(rt) |
| } |
| |
| fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 { |
| let abc = (imm >> 5) as u32; |
| let defgh = (imm & 0b11111) as u32; |
| |
| debug_assert_eq!(cmode & 0b1111, cmode); |
| debug_assert_eq!(q_op & 0b11, q_op); |
| |
| 0b0_0_0_0111100000_000_0000_01_00000_00000 |
| | (q_op << 29) |
| | (abc << 16) |
| | (cmode << 12) |
| | (defgh << 5) |
| | machreg_to_vec(rd.to_reg()) |
| } |
| |
| /// State carried between emissions of a sequence of instructions. |
| #[derive(Default, Clone, Debug)] |
| pub struct EmitState { |
| /// Addend to convert nominal-SP offsets to real-SP offsets at the current |
| /// program point. |
| pub(crate) virtual_sp_offset: i64, |
| /// Offset of FP from nominal-SP. |
| pub(crate) nominal_sp_to_fp: i64, |
| /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. |
| stack_map: Option<StackMap>, |
| /// Current source-code location corresponding to instruction to be emitted. |
| cur_srcloc: RelSourceLoc, |
| /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and |
| /// optimized away at compiletime. See [cranelift_control]. |
| ctrl_plane: ControlPlane, |
| } |
| |
| impl MachInstEmitState<Inst> for EmitState { |
| fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self { |
| EmitState { |
| virtual_sp_offset: 0, |
| nominal_sp_to_fp: abi.frame_size() as i64, |
| stack_map: None, |
| cur_srcloc: Default::default(), |
| ctrl_plane, |
| } |
| } |
| |
| fn pre_safepoint(&mut self, stack_map: StackMap) { |
| self.stack_map = Some(stack_map); |
| } |
| |
| fn pre_sourceloc(&mut self, srcloc: RelSourceLoc) { |
| self.cur_srcloc = srcloc; |
| } |
| |
| fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { |
| &mut self.ctrl_plane |
| } |
| |
| fn take_ctrl_plane(self) -> ControlPlane { |
| self.ctrl_plane |
| } |
| } |
| |
| impl EmitState { |
| fn take_stack_map(&mut self) -> Option<StackMap> { |
| self.stack_map.take() |
| } |
| |
| fn clear_post_insn(&mut self) { |
| self.stack_map = None; |
| } |
| |
| fn cur_srcloc(&self) -> RelSourceLoc { |
| self.cur_srcloc |
| } |
| } |
| |
| /// Constant state used during function compilation. |
| pub struct EmitInfo(settings::Flags); |
| |
| impl EmitInfo { |
| /// Create a constant state for emission of instructions. |
| pub fn new(flags: settings::Flags) -> Self { |
| Self(flags) |
| } |
| } |
| |
| impl MachInstEmit for Inst { |
| type State = EmitState; |
| type Info = EmitInfo; |
| |
| fn emit( |
| &self, |
| allocs: &[Allocation], |
| sink: &mut MachBuffer<Inst>, |
| emit_info: &Self::Info, |
| state: &mut EmitState, |
| ) { |
| let mut allocs = AllocationConsumer::new(allocs); |
| |
| // N.B.: we *must* not exceed the "worst-case size" used to compute |
| // where to insert islands, except when islands are explicitly triggered |
| // (with an `EmitIsland`). We check this in debug builds. This is `mut` |
| // to allow disabling the check for `JTSequence`, which is always |
| // emitted following an `EmitIsland`. |
| let mut start_off = sink.cur_offset(); |
| |
| match self { |
| &Inst::AluRRR { |
| alu_op, |
| size, |
| rd, |
| rn, |
| rm, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| |
| debug_assert!(match alu_op { |
| ALUOp::SDiv | ALUOp::UDiv | ALUOp::SMulH | ALUOp::UMulH => |
| size == OperandSize::Size64, |
| _ => true, |
| }); |
| let top11 = match alu_op { |
| ALUOp::Add => 0b00001011_000, |
| ALUOp::Adc => 0b00011010_000, |
| ALUOp::AdcS => 0b00111010_000, |
| ALUOp::Sub => 0b01001011_000, |
| ALUOp::Sbc => 0b01011010_000, |
| ALUOp::SbcS => 0b01111010_000, |
| ALUOp::Orr => 0b00101010_000, |
| ALUOp::And => 0b00001010_000, |
| ALUOp::AndS => 0b01101010_000, |
| ALUOp::Eor => 0b01001010_000, |
| ALUOp::OrrNot => 0b00101010_001, |
| ALUOp::AndNot => 0b00001010_001, |
| ALUOp::EorNot => 0b01001010_001, |
| ALUOp::AddS => 0b00101011_000, |
| ALUOp::SubS => 0b01101011_000, |
| ALUOp::SDiv => 0b10011010_110, |
| ALUOp::UDiv => 0b10011010_110, |
| ALUOp::RotR | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110, |
| ALUOp::SMulH => 0b10011011_010, |
| ALUOp::UMulH => 0b10011011_110, |
| }; |
| let top11 = top11 | size.sf_bit() << 10; |
| let bit15_10 = match alu_op { |
| ALUOp::SDiv => 0b000011, |
| ALUOp::UDiv => 0b000010, |
| ALUOp::RotR => 0b001011, |
| ALUOp::Lsr => 0b001001, |
| ALUOp::Asr => 0b001010, |
| ALUOp::Lsl => 0b001000, |
| ALUOp::SMulH | ALUOp::UMulH => 0b011111, |
| _ => 0b000000, |
| }; |
| debug_assert_ne!(writable_stack_reg(), rd); |
| // The stack pointer is the zero register in this context, so this might be an |
| // indication that something is wrong. |
| debug_assert_ne!(stack_reg(), rn); |
| debug_assert_ne!(stack_reg(), rm); |
| sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm)); |
| } |
| &Inst::AluRRRR { |
| alu_op, |
| size, |
| rd, |
| rm, |
| rn, |
| ra, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let ra = allocs.next(ra); |
| |
| let (top11, bit15) = match alu_op { |
| ALUOp3::MAdd => (0b0_00_11011_000, 0), |
| ALUOp3::MSub => (0b0_00_11011_000, 1), |
| ALUOp3::UMAddL => { |
| debug_assert!(size == OperandSize::Size32); |
| (0b1_00_11011_1_01, 0) |
| } |
| ALUOp3::SMAddL => { |
| debug_assert!(size == OperandSize::Size32); |
| (0b1_00_11011_0_01, 0) |
| } |
| }; |
| let top11 = top11 | size.sf_bit() << 10; |
| sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd)); |
| } |
| &Inst::AluRRImm12 { |
| alu_op, |
| size, |
| rd, |
| rn, |
| ref imm12, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let top8 = match alu_op { |
| ALUOp::Add => 0b000_10001, |
| ALUOp::Sub => 0b010_10001, |
| ALUOp::AddS => 0b001_10001, |
| ALUOp::SubS => 0b011_10001, |
| _ => unimplemented!("{:?}", alu_op), |
| }; |
| let top8 = top8 | size.sf_bit() << 7; |
| sink.put4(enc_arith_rr_imm12( |
| top8, |
| imm12.shift_bits(), |
| imm12.imm_bits(), |
| rn, |
| rd, |
| )); |
| } |
| &Inst::AluRRImmLogic { |
| alu_op, |
| size, |
| rd, |
| rn, |
| ref imml, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (top9, inv) = match alu_op { |
| ALUOp::Orr => (0b001_100100, false), |
| ALUOp::And => (0b000_100100, false), |
| ALUOp::AndS => (0b011_100100, false), |
| ALUOp::Eor => (0b010_100100, false), |
| ALUOp::OrrNot => (0b001_100100, true), |
| ALUOp::AndNot => (0b000_100100, true), |
| ALUOp::EorNot => (0b010_100100, true), |
| _ => unimplemented!("{:?}", alu_op), |
| }; |
| let top9 = top9 | size.sf_bit() << 8; |
| let imml = if inv { imml.invert() } else { imml.clone() }; |
| sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd)); |
| } |
| |
| &Inst::AluRRImmShift { |
| alu_op, |
| size, |
| rd, |
| rn, |
| ref immshift, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let amt = immshift.value(); |
| let (top10, immr, imms) = match alu_op { |
| ALUOp::RotR => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)), |
| ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111), |
| ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111), |
| ALUOp::Lsl => { |
| let bits = if size.is64() { 64 } else { 32 }; |
| ( |
| 0b0101001100, |
| u32::from((bits - amt) % bits), |
| u32::from(bits - 1 - amt), |
| ) |
| } |
| _ => unimplemented!("{:?}", alu_op), |
| }; |
| let top10 = top10 | size.sf_bit() << 9 | size.sf_bit(); |
| let imms = match alu_op { |
| ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5, |
| _ => imms, |
| }; |
| sink.put4( |
| (top10 << 22) |
| | (immr << 16) |
| | (imms << 10) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_gpr(rd.to_reg()), |
| ); |
| } |
| |
| &Inst::AluRRRShift { |
| alu_op, |
| size, |
| rd, |
| rn, |
| rm, |
| ref shiftop, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let top11: u32 = match alu_op { |
| ALUOp::Add => 0b000_01011000, |
| ALUOp::AddS => 0b001_01011000, |
| ALUOp::Sub => 0b010_01011000, |
| ALUOp::SubS => 0b011_01011000, |
| ALUOp::Orr => 0b001_01010000, |
| ALUOp::And => 0b000_01010000, |
| ALUOp::AndS => 0b011_01010000, |
| ALUOp::Eor => 0b010_01010000, |
| ALUOp::OrrNot => 0b001_01010001, |
| ALUOp::EorNot => 0b010_01010001, |
| ALUOp::AndNot => 0b000_01010001, |
| _ => unimplemented!("{:?}", alu_op), |
| }; |
| let top11 = top11 | size.sf_bit() << 10; |
| let top11 = top11 | (u32::from(shiftop.op().bits()) << 1); |
| let bits_15_10 = u32::from(shiftop.amt().value()); |
| sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); |
| } |
| |
| &Inst::AluRRRExtend { |
| alu_op, |
| size, |
| rd, |
| rn, |
| rm, |
| extendop, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let top11: u32 = match alu_op { |
| ALUOp::Add => 0b00001011001, |
| ALUOp::Sub => 0b01001011001, |
| ALUOp::AddS => 0b00101011001, |
| ALUOp::SubS => 0b01101011001, |
| _ => unimplemented!("{:?}", alu_op), |
| }; |
| let top11 = top11 | size.sf_bit() << 10; |
| let bits_15_10 = u32::from(extendop.bits()) << 3; |
| sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); |
| } |
| |
| &Inst::BitRR { |
| op, size, rd, rn, .. |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (op1, op2) = match op { |
| BitOp::RBit => (0b00000, 0b000000), |
| BitOp::Clz => (0b00000, 0b000100), |
| BitOp::Cls => (0b00000, 0b000101), |
| BitOp::Rev16 => (0b00000, 0b000001), |
| BitOp::Rev32 => (0b00000, 0b000010), |
| BitOp::Rev64 => (0b00000, 0b000011), |
| }; |
| sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd)) |
| } |
| |
| &Inst::ULoad8 { rd, ref mem, flags } |
| | &Inst::SLoad8 { rd, ref mem, flags } |
| | &Inst::ULoad16 { rd, ref mem, flags } |
| | &Inst::SLoad16 { rd, ref mem, flags } |
| | &Inst::ULoad32 { rd, ref mem, flags } |
| | &Inst::SLoad32 { rd, ref mem, flags } |
| | &Inst::ULoad64 { |
| rd, ref mem, flags, .. |
| } |
| | &Inst::FpuLoad32 { rd, ref mem, flags } |
| | &Inst::FpuLoad64 { rd, ref mem, flags } |
| | &Inst::FpuLoad128 { rd, ref mem, flags } => { |
| let rd = allocs.next_writable(rd); |
| let mem = mem.with_allocs(&mut allocs); |
| let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state); |
| |
| for inst in mem_insts.into_iter() { |
| inst.emit(&[], sink, emit_info, state); |
| } |
| |
| // ldst encoding helpers take Reg, not Writable<Reg>. |
| let rd = rd.to_reg(); |
| |
| // This is the base opcode (top 10 bits) for the "unscaled |
| // immediate" form (Unscaled). Other addressing modes will OR in |
| // other values for bits 24/25 (bits 1/2 of this constant). |
| let (op, bits) = match self { |
| &Inst::ULoad8 { .. } => (0b0011100001, 8), |
| &Inst::SLoad8 { .. } => (0b0011100010, 8), |
| &Inst::ULoad16 { .. } => (0b0111100001, 16), |
| &Inst::SLoad16 { .. } => (0b0111100010, 16), |
| &Inst::ULoad32 { .. } => (0b1011100001, 32), |
| &Inst::SLoad32 { .. } => (0b1011100010, 32), |
| &Inst::ULoad64 { .. } => (0b1111100001, 64), |
| &Inst::FpuLoad32 { .. } => (0b1011110001, 32), |
| &Inst::FpuLoad64 { .. } => (0b1111110001, 64), |
| &Inst::FpuLoad128 { .. } => (0b0011110011, 128), |
| _ => unreachable!(), |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| match &mem { |
| &AMode::Unscaled { rn, simm9 } => { |
| let reg = allocs.next(rn); |
| sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); |
| } |
| &AMode::UnsignedOffset { rn, uimm12 } => { |
| let reg = allocs.next(rn); |
| if uimm12.value() != 0 { |
| assert_eq!(bits, ty_bits(uimm12.scale_ty())); |
| } |
| sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd)); |
| } |
| &AMode::RegReg { rn, rm } => { |
| let r1 = allocs.next(rn); |
| let r2 = allocs.next(rm); |
| sink.put4(enc_ldst_reg( |
| op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, |
| )); |
| } |
| &AMode::RegScaled { rn, rm, ty } |
| | &AMode::RegScaledExtended { rn, rm, ty, .. } => { |
| let r1 = allocs.next(rn); |
| let r2 = allocs.next(rm); |
| assert_eq!(bits, ty_bits(ty)); |
| let extendop = match &mem { |
| &AMode::RegScaled { .. } => None, |
| &AMode::RegScaledExtended { extendop, .. } => Some(extendop), |
| _ => unreachable!(), |
| }; |
| sink.put4(enc_ldst_reg( |
| op, r1, r2, /* scaled = */ true, extendop, rd, |
| )); |
| } |
| &AMode::RegExtended { rn, rm, extendop } => { |
| let r1 = allocs.next(rn); |
| let r2 = allocs.next(rm); |
| sink.put4(enc_ldst_reg( |
| op, |
| r1, |
| r2, |
| /* scaled = */ false, |
| Some(extendop), |
| rd, |
| )); |
| } |
| &AMode::Label { ref label } => { |
| let offset = match label { |
| // cast i32 to u32 (two's-complement) |
| MemLabel::PCRel(off) => *off as u32, |
| // Emit a relocation into the `MachBuffer` |
| // for the label that's being loaded from and |
| // encode an address of 0 in its place which will |
| // get filled in by relocation resolution later on. |
| MemLabel::Mach(label) => { |
| sink.use_label_at_offset( |
| sink.cur_offset(), |
| *label, |
| LabelUse::Ldr19, |
| ); |
| 0 |
| } |
| } / 4; |
| assert!(offset < (1 << 19)); |
| match self { |
| &Inst::ULoad32 { .. } => { |
| sink.put4(enc_ldst_imm19(0b00011000, offset, rd)); |
| } |
| &Inst::SLoad32 { .. } => { |
| sink.put4(enc_ldst_imm19(0b10011000, offset, rd)); |
| } |
| &Inst::FpuLoad32 { .. } => { |
| sink.put4(enc_ldst_imm19(0b00011100, offset, rd)); |
| } |
| &Inst::ULoad64 { .. } => { |
| sink.put4(enc_ldst_imm19(0b01011000, offset, rd)); |
| } |
| &Inst::FpuLoad64 { .. } => { |
| sink.put4(enc_ldst_imm19(0b01011100, offset, rd)); |
| } |
| &Inst::FpuLoad128 { .. } => { |
| sink.put4(enc_ldst_imm19(0b10011100, offset, rd)); |
| } |
| _ => panic!("Unspported size for LDR from constant pool!"), |
| } |
| } |
| &AMode::SPPreIndexed { simm9 } => { |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); |
| } |
| &AMode::SPPostIndexed { simm9 } => { |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); |
| } |
| // Eliminated by `mem_finalize()` above. |
| &AMode::SPOffset { .. } |
| | &AMode::FPOffset { .. } |
| | &AMode::NominalSPOffset { .. } |
| | &AMode::Const { .. } |
| | &AMode::RegOffset { .. } => { |
| panic!("Should not see {:?} here!", mem) |
| } |
| } |
| } |
| |
| &Inst::Store8 { rd, ref mem, flags } |
| | &Inst::Store16 { rd, ref mem, flags } |
| | &Inst::Store32 { rd, ref mem, flags } |
| | &Inst::Store64 { rd, ref mem, flags } |
| | &Inst::FpuStore32 { rd, ref mem, flags } |
| | &Inst::FpuStore64 { rd, ref mem, flags } |
| | &Inst::FpuStore128 { rd, ref mem, flags } => { |
| let rd = allocs.next(rd); |
| let mem = mem.with_allocs(&mut allocs); |
| let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state); |
| |
| for inst in mem_insts.into_iter() { |
| inst.emit(&[], sink, emit_info, state); |
| } |
| |
| let (op, bits) = match self { |
| &Inst::Store8 { .. } => (0b0011100000, 8), |
| &Inst::Store16 { .. } => (0b0111100000, 16), |
| &Inst::Store32 { .. } => (0b1011100000, 32), |
| &Inst::Store64 { .. } => (0b1111100000, 64), |
| &Inst::FpuStore32 { .. } => (0b1011110000, 32), |
| &Inst::FpuStore64 { .. } => (0b1111110000, 64), |
| &Inst::FpuStore128 { .. } => (0b0011110010, 128), |
| _ => unreachable!(), |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual store instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| match &mem { |
| &AMode::Unscaled { rn, simm9 } => { |
| let reg = allocs.next(rn); |
| sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); |
| } |
| &AMode::UnsignedOffset { rn, uimm12 } => { |
| let reg = allocs.next(rn); |
| if uimm12.value() != 0 { |
| assert_eq!(bits, ty_bits(uimm12.scale_ty())); |
| } |
| sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd)); |
| } |
| &AMode::RegReg { rn, rm } => { |
| let r1 = allocs.next(rn); |
| let r2 = allocs.next(rm); |
| sink.put4(enc_ldst_reg( |
| op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, |
| )); |
| } |
| &AMode::RegScaled { rn, rm, .. } | &AMode::RegScaledExtended { rn, rm, .. } => { |
| let r1 = allocs.next(rn); |
| let r2 = allocs.next(rm); |
| let extendop = match &mem { |
| &AMode::RegScaled { .. } => None, |
| &AMode::RegScaledExtended { extendop, .. } => Some(extendop), |
| _ => unreachable!(), |
| }; |
| sink.put4(enc_ldst_reg( |
| op, r1, r2, /* scaled = */ true, extendop, rd, |
| )); |
| } |
| &AMode::RegExtended { rn, rm, extendop } => { |
| let r1 = allocs.next(rn); |
| let r2 = allocs.next(rm); |
| sink.put4(enc_ldst_reg( |
| op, |
| r1, |
| r2, |
| /* scaled = */ false, |
| Some(extendop), |
| rd, |
| )); |
| } |
| &AMode::Label { .. } => { |
| panic!("Store to a MemLabel not implemented!"); |
| } |
| &AMode::SPPreIndexed { simm9 } => { |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); |
| } |
| &AMode::SPPostIndexed { simm9 } => { |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); |
| } |
| // Eliminated by `mem_finalize()` above. |
| &AMode::SPOffset { .. } |
| | &AMode::FPOffset { .. } |
| | &AMode::NominalSPOffset { .. } |
| | &AMode::Const { .. } |
| | &AMode::RegOffset { .. } => { |
| panic!("Should not see {:?} here!", mem) |
| } |
| } |
| } |
| |
| &Inst::StoreP64 { |
| rt, |
| rt2, |
| ref mem, |
| flags, |
| } => { |
| let rt = allocs.next(rt); |
| let rt2 = allocs.next(rt2); |
| let mem = mem.with_allocs(&mut allocs); |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual store instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| match &mem { |
| &PairAMode::SignedOffset { reg, simm7 } => { |
| assert_eq!(simm7.scale_ty, I64); |
| let reg = allocs.next(reg); |
| sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPreIndexed { simm7 } => { |
| assert_eq!(simm7.scale_ty, I64); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPostIndexed { simm7 } => { |
| assert_eq!(simm7.scale_ty, I64); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2)); |
| } |
| } |
| } |
| &Inst::LoadP64 { |
| rt, |
| rt2, |
| ref mem, |
| flags, |
| } => { |
| let rt = allocs.next(rt.to_reg()); |
| let rt2 = allocs.next(rt2.to_reg()); |
| let mem = mem.with_allocs(&mut allocs); |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| match &mem { |
| &PairAMode::SignedOffset { reg, simm7 } => { |
| assert_eq!(simm7.scale_ty, I64); |
| let reg = allocs.next(reg); |
| sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPreIndexed { simm7 } => { |
| assert_eq!(simm7.scale_ty, I64); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPostIndexed { simm7 } => { |
| assert_eq!(simm7.scale_ty, I64); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2)); |
| } |
| } |
| } |
| &Inst::FpuLoadP64 { |
| rt, |
| rt2, |
| ref mem, |
| flags, |
| } |
| | &Inst::FpuLoadP128 { |
| rt, |
| rt2, |
| ref mem, |
| flags, |
| } => { |
| let rt = allocs.next(rt.to_reg()); |
| let rt2 = allocs.next(rt2.to_reg()); |
| let mem = mem.with_allocs(&mut allocs); |
| let srcloc = state.cur_srcloc(); |
| |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| let opc = match self { |
| &Inst::FpuLoadP64 { .. } => 0b01, |
| &Inst::FpuLoadP128 { .. } => 0b10, |
| _ => unreachable!(), |
| }; |
| |
| match &mem { |
| &PairAMode::SignedOffset { reg, simm7 } => { |
| assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); |
| let reg = allocs.next(reg); |
| sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPreIndexed { simm7 } => { |
| assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPostIndexed { simm7 } => { |
| assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2)); |
| } |
| } |
| } |
| &Inst::FpuStoreP64 { |
| rt, |
| rt2, |
| ref mem, |
| flags, |
| } |
| | &Inst::FpuStoreP128 { |
| rt, |
| rt2, |
| ref mem, |
| flags, |
| } => { |
| let rt = allocs.next(rt); |
| let rt2 = allocs.next(rt2); |
| let mem = mem.with_allocs(&mut allocs); |
| let srcloc = state.cur_srcloc(); |
| |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual store instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| let opc = match self { |
| &Inst::FpuStoreP64 { .. } => 0b01, |
| &Inst::FpuStoreP128 { .. } => 0b10, |
| _ => unreachable!(), |
| }; |
| |
| match &mem { |
| &PairAMode::SignedOffset { reg, simm7 } => { |
| assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); |
| let reg = allocs.next(reg); |
| sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPreIndexed { simm7 } => { |
| assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2)); |
| } |
| &PairAMode::SPPostIndexed { simm7 } => { |
| assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); |
| let reg = stack_reg(); |
| sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2)); |
| } |
| } |
| } |
| &Inst::Mov { size, rd, rm } => { |
| let rd = allocs.next_writable(rd); |
| let rm = allocs.next(rm); |
| assert!(rd.to_reg().class() == rm.class()); |
| assert!(rm.class() == RegClass::Int); |
| |
| match size { |
| OperandSize::Size64 => { |
| // MOV to SP is interpreted as MOV to XZR instead. And our codegen |
| // should never MOV to XZR. |
| assert!(rd.to_reg() != stack_reg()); |
| |
| if rm == stack_reg() { |
| // We can't use ORR here, so use an `add rd, sp, #0` instead. |
| let imm12 = Imm12::maybe_from_u64(0).unwrap(); |
| sink.put4(enc_arith_rr_imm12( |
| 0b100_10001, |
| imm12.shift_bits(), |
| imm12.imm_bits(), |
| rm, |
| rd, |
| )); |
| } else { |
| // Encoded as ORR rd, rm, zero. |
| sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm)); |
| } |
| } |
| OperandSize::Size32 => { |
| // MOV to SP is interpreted as MOV to XZR instead. And our codegen |
| // should never MOV to XZR. |
| assert!(machreg_to_gpr(rd.to_reg()) != 31); |
| // Encoded as ORR rd, rm, zero. |
| sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); |
| } |
| } |
| } |
| &Inst::MovFromPReg { rd, rm } => { |
| let rd = allocs.next_writable(rd); |
| allocs.next_fixed_nonallocatable(rm); |
| let rm: Reg = rm.into(); |
| debug_assert!([ |
| regs::fp_reg(), |
| regs::stack_reg(), |
| regs::link_reg(), |
| regs::pinned_reg() |
| ] |
| .contains(&rm)); |
| assert!(rm.class() == RegClass::Int); |
| assert!(rd.to_reg().class() == rm.class()); |
| let size = OperandSize::Size64; |
| Inst::Mov { size, rd, rm }.emit(&[], sink, emit_info, state); |
| } |
| &Inst::MovToPReg { rd, rm } => { |
| allocs.next_fixed_nonallocatable(rd); |
| let rd: Writable<Reg> = Writable::from_reg(rd.into()); |
| let rm = allocs.next(rm); |
| debug_assert!([ |
| regs::fp_reg(), |
| regs::stack_reg(), |
| regs::link_reg(), |
| regs::pinned_reg() |
| ] |
| .contains(&rd.to_reg())); |
| assert!(rd.to_reg().class() == RegClass::Int); |
| assert!(rm.class() == rd.to_reg().class()); |
| let size = OperandSize::Size64; |
| Inst::Mov { size, rd, rm }.emit(&[], sink, emit_info, state); |
| } |
| &Inst::MovWide { op, rd, imm, size } => { |
| let rd = allocs.next_writable(rd); |
| sink.put4(enc_move_wide(op, rd, imm, size)); |
| } |
| &Inst::MovK { rd, rn, imm, size } => { |
| let rn = allocs.next(rn); |
| let rd = allocs.next_writable(rd); |
| debug_assert_eq!(rn, rd.to_reg()); |
| sink.put4(enc_movk(rd, imm, size)); |
| } |
| &Inst::CSel { rd, rn, rm, cond } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| sink.put4(enc_csel(rd, rn, rm, cond, 0, 0)); |
| } |
| &Inst::CSNeg { rd, rn, rm, cond } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| sink.put4(enc_csel(rd, rn, rm, cond, 1, 1)); |
| } |
| &Inst::CSet { rd, cond } => { |
| let rd = allocs.next_writable(rd); |
| sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1)); |
| } |
| &Inst::CSetm { rd, cond } => { |
| let rd = allocs.next_writable(rd); |
| sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0)); |
| } |
| &Inst::CCmp { |
| size, |
| rn, |
| rm, |
| nzcv, |
| cond, |
| } => { |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| sink.put4(enc_ccmp(size, rn, rm, nzcv, cond)); |
| } |
| &Inst::CCmpImm { |
| size, |
| rn, |
| imm, |
| nzcv, |
| cond, |
| } => { |
| let rn = allocs.next(rn); |
| sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); |
| } |
| &Inst::AtomicRMW { |
| ty, |
| op, |
| rs, |
| rt, |
| rn, |
| flags, |
| } => { |
| let rs = allocs.next(rs); |
| let rt = allocs.next_writable(rt); |
| let rn = allocs.next(rn); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_acq_rel(ty, op, rs, rt, rn)); |
| } |
| &Inst::AtomicRMWLoop { ty, op, flags, .. } => { |
| /* Emit this: |
| again: |
| ldaxr{,b,h} x/w27, [x25] |
| // maybe sign extend |
| op x28, x27, x26 // op is add,sub,and,orr,eor |
| stlxr{,b,h} w24, x/w28, [x25] |
| cbnz x24, again |
| |
| Operand conventions: |
| IN: x25 (addr), x26 (2nd arg for op) |
| OUT: x27 (old value), x24 (trashed), x28 (trashed) |
| |
| It is unfortunate that, per the ARM documentation, x28 cannot be used for |
| both the store-data and success-flag operands of stlxr. This causes the |
| instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24 |
| instead for the success-flag. |
| */ |
| // TODO: We should not hardcode registers here, a better idea would be to |
| // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those |
| let xzr = zero_reg(); |
| let x24 = xreg(24); |
| let x25 = xreg(25); |
| let x26 = xreg(26); |
| let x27 = xreg(27); |
| let x28 = xreg(28); |
| let x24wr = writable_xreg(24); |
| let x27wr = writable_xreg(27); |
| let x28wr = writable_xreg(28); |
| let again_label = sink.get_label(); |
| |
| // again: |
| sink.bind_label(again_label, &mut state.ctrl_plane); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25] |
| let size = OperandSize::from_ty(ty); |
| let sign_ext = match op { |
| AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty { |
| I16 => Some((ExtendOp::SXTH, 16)), |
| I8 => Some((ExtendOp::SXTB, 8)), |
| _ => None, |
| }, |
| _ => None, |
| }; |
| |
| // sxt{b|h} the loaded result if necessary. |
| if sign_ext.is_some() { |
| let (_, from_bits) = sign_ext.unwrap(); |
| Inst::Extend { |
| rd: x27wr, |
| rn: x27, |
| signed: true, |
| from_bits, |
| to_bits: size.bits(), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| match op { |
| AtomicRMWLoopOp::Xchg => {} // do nothing |
| AtomicRMWLoopOp::Nand => { |
| // and x28, x27, x26 |
| // mvn x28, x28 |
| |
| Inst::AluRRR { |
| alu_op: ALUOp::And, |
| size, |
| rd: x28wr, |
| rn: x27, |
| rm: x26, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| Inst::AluRRR { |
| alu_op: ALUOp::OrrNot, |
| size, |
| rd: x28wr, |
| rn: xzr, |
| rm: x28, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| AtomicRMWLoopOp::Umin |
| | AtomicRMWLoopOp::Umax |
| | AtomicRMWLoopOp::Smin |
| | AtomicRMWLoopOp::Smax => { |
| // cmp x27, x26 {?sxt} |
| // csel.op x28, x27, x26 |
| |
| let cond = match op { |
| AtomicRMWLoopOp::Umin => Cond::Lo, |
| AtomicRMWLoopOp::Umax => Cond::Hi, |
| AtomicRMWLoopOp::Smin => Cond::Lt, |
| AtomicRMWLoopOp::Smax => Cond::Gt, |
| _ => unreachable!(), |
| }; |
| |
| if sign_ext.is_some() { |
| let (extendop, _) = sign_ext.unwrap(); |
| Inst::AluRRRExtend { |
| alu_op: ALUOp::SubS, |
| size, |
| rd: writable_zero_reg(), |
| rn: x27, |
| rm: x26, |
| extendop, |
| } |
| .emit(&[], sink, emit_info, state); |
| } else { |
| Inst::AluRRR { |
| alu_op: ALUOp::SubS, |
| size, |
| rd: writable_zero_reg(), |
| rn: x27, |
| rm: x26, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| Inst::CSel { |
| cond, |
| rd: x28wr, |
| rn: x27, |
| rm: x26, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| _ => { |
| // add/sub/and/orr/eor x28, x27, x26 |
| let alu_op = match op { |
| AtomicRMWLoopOp::Add => ALUOp::Add, |
| AtomicRMWLoopOp::Sub => ALUOp::Sub, |
| AtomicRMWLoopOp::And => ALUOp::And, |
| AtomicRMWLoopOp::Orr => ALUOp::Orr, |
| AtomicRMWLoopOp::Eor => ALUOp::Eor, |
| AtomicRMWLoopOp::Nand |
| | AtomicRMWLoopOp::Umin |
| | AtomicRMWLoopOp::Umax |
| | AtomicRMWLoopOp::Smin |
| | AtomicRMWLoopOp::Smax |
| | AtomicRMWLoopOp::Xchg => unreachable!(), |
| }; |
| |
| Inst::AluRRR { |
| alu_op, |
| size, |
| rd: x28wr, |
| rn: x27, |
| rm: x26, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| } |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| if op == AtomicRMWLoopOp::Xchg { |
| sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25] |
| } else { |
| sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] |
| } |
| |
| // cbnz w24, again |
| // Note, we're actually testing x24, and relying on the default zero-high-half |
| // rule in the assignment that `stlxr` does. |
| let br_offset = sink.cur_offset(); |
| sink.put4(enc_conditional_br( |
| BranchTarget::Label(again_label), |
| CondBrKind::NotZero(x24), |
| &mut AllocationConsumer::default(), |
| )); |
| sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); |
| } |
| &Inst::AtomicCAS { |
| rd, |
| rs, |
| rt, |
| rn, |
| ty, |
| flags, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rs = allocs.next(rs); |
| debug_assert_eq!(rd.to_reg(), rs); |
| let rt = allocs.next(rt); |
| let rn = allocs.next(rn); |
| let size = match ty { |
| I8 => 0b00, |
| I16 => 0b01, |
| I32 => 0b10, |
| I64 => 0b11, |
| _ => panic!("Unsupported type: {}", ty), |
| }; |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_cas(size, rd, rt, rn)); |
| } |
| &Inst::AtomicCASLoop { ty, flags, .. } => { |
| /* Emit this: |
| again: |
| ldaxr{,b,h} x/w27, [x25] |
| cmp x27, x/w26 uxt{b,h} |
| b.ne out |
| stlxr{,b,h} w24, x/w28, [x25] |
| cbnz x24, again |
| out: |
| |
| Operand conventions: |
| IN: x25 (addr), x26 (expected value), x28 (replacement value) |
| OUT: x27 (old value), x24 (trashed) |
| */ |
| let x24 = xreg(24); |
| let x25 = xreg(25); |
| let x26 = xreg(26); |
| let x27 = xreg(27); |
| let x28 = xreg(28); |
| let xzrwr = writable_zero_reg(); |
| let x24wr = writable_xreg(24); |
| let x27wr = writable_xreg(27); |
| let again_label = sink.get_label(); |
| let out_label = sink.get_label(); |
| |
| // again: |
| sink.bind_label(again_label, &mut state.ctrl_plane); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| // ldaxr x27, [x25] |
| sink.put4(enc_ldaxr(ty, x27wr, x25)); |
| |
| // The top 32-bits are zero-extended by the ldaxr so we don't |
| // have to use UXTW, just the x-form of the register. |
| let (bit21, extend_op) = match ty { |
| I8 => (0b1, 0b000000), |
| I16 => (0b1, 0b001000), |
| _ => (0b0, 0b000000), |
| }; |
| let bits_31_21 = 0b111_01011_000 | bit21; |
| // cmp x27, x26 (== subs xzr, x27, x26) |
| sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26)); |
| |
| // b.ne out |
| let br_out_offset = sink.cur_offset(); |
| sink.put4(enc_conditional_br( |
| BranchTarget::Label(out_label), |
| CondBrKind::Cond(Cond::Ne), |
| &mut AllocationConsumer::default(), |
| )); |
| sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25] |
| |
| // cbnz w24, again. |
| // Note, we're actually testing x24, and relying on the default zero-high-half |
| // rule in the assignment that `stlxr` does. |
| let br_again_offset = sink.cur_offset(); |
| sink.put4(enc_conditional_br( |
| BranchTarget::Label(again_label), |
| CondBrKind::NotZero(x24), |
| &mut AllocationConsumer::default(), |
| )); |
| sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19); |
| |
| // out: |
| sink.bind_label(out_label, &mut state.ctrl_plane); |
| } |
| &Inst::LoadAcquire { |
| access_ty, |
| rt, |
| rn, |
| flags, |
| } => { |
| let rn = allocs.next(rn); |
| let rt = allocs.next_writable(rt); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_ldar(access_ty, rt, rn)); |
| } |
| &Inst::StoreRelease { |
| access_ty, |
| rt, |
| rn, |
| flags, |
| } => { |
| let rn = allocs.next(rn); |
| let rt = allocs.next(rt); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_stlr(access_ty, rt, rn)); |
| } |
| &Inst::Fence {} => { |
| sink.put4(enc_dmb_ish()); // dmb ish |
| } |
| &Inst::Csdb {} => { |
| sink.put4(0xd503229f); |
| } |
| &Inst::FpuMove64 { rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn)); |
| } |
| &Inst::FpuMove128 { rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); |
| } |
| &Inst::FpuMoveFromVec { rd, rn, idx, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (imm5, shift, mask) = match size.lane_size() { |
| ScalarSize::Size32 => (0b00100, 3, 0b011), |
| ScalarSize::Size64 => (0b01000, 4, 0b001), |
| _ => unimplemented!(), |
| }; |
| debug_assert_eq!(idx & mask, idx); |
| let imm5 = imm5 | ((idx as u32) << shift); |
| sink.put4( |
| 0b010_11110000_00000_000001_00000_00000 |
| | (imm5 << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::FpuExtend { rd, rn, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| sink.put4(enc_fpurr( |
| 0b000_11110_00_1_000000_10000 | (size.ftype() << 12), |
| rd, |
| rn, |
| )); |
| } |
| &Inst::FpuRR { |
| fpu_op, |
| size, |
| rd, |
| rn, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let top22 = match fpu_op { |
| FPUOp1::Abs => 0b000_11110_00_1_000001_10000, |
| FPUOp1::Neg => 0b000_11110_00_1_000010_10000, |
| FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000, |
| FPUOp1::Cvt32To64 => { |
| debug_assert_eq!(size, ScalarSize::Size32); |
| 0b000_11110_00_1_000101_10000 |
| } |
| FPUOp1::Cvt64To32 => { |
| debug_assert_eq!(size, ScalarSize::Size64); |
| 0b000_11110_01_1_000100_10000 |
| } |
| }; |
| let top22 = top22 | size.ftype() << 12; |
| sink.put4(enc_fpurr(top22, rd, rn)); |
| } |
| &Inst::FpuRRR { |
| fpu_op, |
| size, |
| rd, |
| rn, |
| rm, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let top22 = match fpu_op { |
| FPUOp2::Add => 0b000_11110_00_1_00000_001010, |
| FPUOp2::Sub => 0b000_11110_00_1_00000_001110, |
| FPUOp2::Mul => 0b000_11110_00_1_00000_000010, |
| FPUOp2::Div => 0b000_11110_00_1_00000_000110, |
| FPUOp2::Max => 0b000_11110_00_1_00000_010010, |
| FPUOp2::Min => 0b000_11110_00_1_00000_010110, |
| }; |
| let top22 = top22 | size.ftype() << 12; |
| sink.put4(enc_fpurrr(top22, rd, rn, rm)); |
| } |
| &Inst::FpuRRI { fpu_op, rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| match fpu_op { |
| FPUOpRI::UShr32(imm) => { |
| debug_assert_eq!(32, imm.lane_size_in_bits); |
| sink.put4( |
| 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000 |
| | imm.enc() << 16 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()), |
| ) |
| } |
| FPUOpRI::UShr64(imm) => { |
| debug_assert_eq!(64, imm.lane_size_in_bits); |
| sink.put4( |
| 0b01_1_111110_0000000_00_0_0_0_1_00000_00000 |
| | imm.enc() << 16 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()), |
| ) |
| } |
| } |
| } |
| &Inst::FpuRRIMod { fpu_op, rd, ri, rn } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| let rn = allocs.next(rn); |
| debug_assert_eq!(rd.to_reg(), ri); |
| match fpu_op { |
| FPUOpRIMod::Sli64(imm) => { |
| debug_assert_eq!(64, imm.lane_size_in_bits); |
| sink.put4( |
| 0b01_1_111110_0000000_010101_00000_00000 |
| | imm.enc() << 16 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()), |
| ) |
| } |
| FPUOpRIMod::Sli32(imm) => { |
| debug_assert_eq!(32, imm.lane_size_in_bits); |
| sink.put4( |
| 0b0_0_1_011110_0000000_010101_00000_00000 |
| | imm.enc() << 16 |
| | machreg_to_vec(rn) << 5 |
| | machreg_to_vec(rd.to_reg()), |
| ) |
| } |
| } |
| } |
| &Inst::FpuRRRR { |
| fpu_op, |
| size, |
| rd, |
| rn, |
| rm, |
| ra, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let ra = allocs.next(ra); |
| let top17 = match fpu_op { |
| FPUOp3::MAdd => 0b000_11111_00_0_00000_0, |
| }; |
| let top17 = top17 | size.ftype() << 7; |
| sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); |
| } |
| &Inst::VecMisc { op, rd, rn, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (q, enc_size) = size.enc_size(); |
| let (u, bits_12_16, size) = match op { |
| VecMisc2::Not => (0b1, 0b00101, 0b00), |
| VecMisc2::Neg => (0b1, 0b01011, enc_size), |
| VecMisc2::Abs => (0b0, 0b01011, enc_size), |
| VecMisc2::Fabs => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b01111, enc_size) |
| } |
| VecMisc2::Fneg => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b1, 0b01111, enc_size) |
| } |
| VecMisc2::Fsqrt => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b1, 0b11111, enc_size) |
| } |
| VecMisc2::Rev16 => { |
| debug_assert_eq!(size, VectorSize::Size8x16); |
| (0b0, 0b00001, enc_size) |
| } |
| VecMisc2::Rev32 => { |
| debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8); |
| (0b1, 0b00000, enc_size) |
| } |
| VecMisc2::Rev64 => { |
| debug_assert!( |
| size == VectorSize::Size8x16 |
| || size == VectorSize::Size16x8 |
| || size == VectorSize::Size32x4 |
| ); |
| (0b0, 0b00000, enc_size) |
| } |
| VecMisc2::Fcvtzs => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b11011, enc_size) |
| } |
| VecMisc2::Fcvtzu => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b1, 0b11011, enc_size) |
| } |
| VecMisc2::Scvtf => { |
| debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); |
| (0b0, 0b11101, enc_size & 0b1) |
| } |
| VecMisc2::Ucvtf => { |
| debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); |
| (0b1, 0b11101, enc_size & 0b1) |
| } |
| VecMisc2::Frintn => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b11000, enc_size & 0b01) |
| } |
| VecMisc2::Frintz => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b11001, enc_size) |
| } |
| VecMisc2::Frintm => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b11001, enc_size & 0b01) |
| } |
| VecMisc2::Frintp => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b11000, enc_size) |
| } |
| VecMisc2::Cnt => { |
| debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16); |
| (0b0, 0b00101, enc_size) |
| } |
| VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size), |
| VecMisc2::Cmge0 => (0b1, 0b01000, enc_size), |
| VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size), |
| VecMisc2::Cmle0 => (0b1, 0b01001, enc_size), |
| VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size), |
| VecMisc2::Fcmeq0 => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b01101, enc_size) |
| } |
| VecMisc2::Fcmge0 => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b1, 0b01100, enc_size) |
| } |
| VecMisc2::Fcmgt0 => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b01100, enc_size) |
| } |
| VecMisc2::Fcmle0 => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b1, 0b01101, enc_size) |
| } |
| VecMisc2::Fcmlt0 => { |
| debug_assert!( |
| size == VectorSize::Size32x2 |
| || size == VectorSize::Size32x4 |
| || size == VectorSize::Size64x2 |
| ); |
| (0b0, 0b01110, enc_size) |
| } |
| }; |
| sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); |
| } |
| &Inst::VecLanes { op, rd, rn, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (q, size) = match size { |
| VectorSize::Size8x8 => (0b0, 0b00), |
| VectorSize::Size8x16 => (0b1, 0b00), |
| VectorSize::Size16x4 => (0b0, 0b01), |
| VectorSize::Size16x8 => (0b1, 0b01), |
| VectorSize::Size32x4 => (0b1, 0b10), |
| _ => unreachable!(), |
| }; |
| let (u, opcode) = match op { |
| VecLanesOp::Uminv => (0b1, 0b11010), |
| VecLanesOp::Addv => (0b0, 0b11011), |
| }; |
| sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn)); |
| } |
| &Inst::VecShiftImm { |
| op, |
| rd, |
| rn, |
| size, |
| imm, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (is_shr, mut template) = match op { |
| VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32), |
| VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32), |
| VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32), |
| }; |
| if size.is_128bits() { |
| template |= 0b1 << 30; |
| } |
| let imm = imm as u32; |
| // Deal with the somewhat strange encoding scheme for, and limits on, |
| // the shift amount. |
| let immh_immb = match (size.lane_size(), is_shr) { |
| (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => { |
| 0b_1000_000_u32 | (64 - imm) |
| } |
| (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => { |
| 0b_0100_000_u32 | (32 - imm) |
| } |
| (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => { |
| 0b_0010_000_u32 | (16 - imm) |
| } |
| (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => { |
| 0b_0001_000_u32 | (8 - imm) |
| } |
| (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm, |
| (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm, |
| (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm, |
| (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm, |
| _ => panic!( |
| "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}", |
| op, size, imm |
| ), |
| }; |
| let rn_enc = machreg_to_vec(rn); |
| let rd_enc = machreg_to_vec(rd.to_reg()); |
| sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); |
| } |
| &Inst::VecShiftImmMod { |
| op, |
| rd, |
| ri, |
| rn, |
| size, |
| imm, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| let rn = allocs.next(rn); |
| let (is_shr, mut template) = match op { |
| VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32), |
| }; |
| if size.is_128bits() { |
| template |= 0b1 << 30; |
| } |
| let imm = imm as u32; |
| // Deal with the somewhat strange encoding scheme for, and limits on, |
| // the shift amount. |
| let immh_immb = match (size.lane_size(), is_shr) { |
| (ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => { |
| 0b_1000_000_u32 | (64 - imm) |
| } |
| (ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => { |
| 0b_0100_000_u32 | (32 - imm) |
| } |
| (ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => { |
| 0b_0010_000_u32 | (16 - imm) |
| } |
| (ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => { |
| 0b_0001_000_u32 | (8 - imm) |
| } |
| (ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm, |
| (ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm, |
| (ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm, |
| (ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm, |
| _ => panic!( |
| "aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {:?}, {:?}, {:?}", |
| op, size, imm |
| ), |
| }; |
| let rn_enc = machreg_to_vec(rn); |
| let rd_enc = machreg_to_vec(rd.to_reg()); |
| sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); |
| } |
| &Inst::VecExtract { rd, rn, rm, imm4 } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| if imm4 < 16 { |
| let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32; |
| let rm_enc = machreg_to_vec(rm); |
| let rn_enc = machreg_to_vec(rn); |
| let rd_enc = machreg_to_vec(rd.to_reg()); |
| sink.put4( |
| template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc, |
| ); |
| } else { |
| panic!( |
| "aarch64: Inst::VecExtract: emit: invalid extract index {}", |
| imm4 |
| ); |
| } |
| } |
| &Inst::VecTbl { rd, rn, rm } => { |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let rd = allocs.next_writable(rd); |
| sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm)); |
| } |
| &Inst::VecTblExt { rd, ri, rn, rm } => { |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm)); |
| } |
| &Inst::VecTbl2 { rd, rn, rn2, rm } => { |
| let rn = allocs.next(rn); |
| let rn2 = allocs.next(rn2); |
| let rm = allocs.next(rm); |
| let rd = allocs.next_writable(rd); |
| assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); |
| sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm)); |
| } |
| &Inst::VecTbl2Ext { |
| rd, |
| ri, |
| rn, |
| rn2, |
| rm, |
| } => { |
| let rn = allocs.next(rn); |
| let rn2 = allocs.next(rn2); |
| let rm = allocs.next(rm); |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); |
| sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm)); |
| } |
| &Inst::FpuCmp { size, rn, rm } => { |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| sink.put4(enc_fcmp(size, rn, rm)); |
| } |
| &Inst::FpuToInt { op, rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let top16 = match op { |
| // FCVTZS (32/32-bit) |
| FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000, |
| // FCVTZU (32/32-bit) |
| FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001, |
| // FCVTZS (32/64-bit) |
| FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000, |
| // FCVTZU (32/64-bit) |
| FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001, |
| // FCVTZS (64/32-bit) |
| FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000, |
| // FCVTZU (64/32-bit) |
| FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001, |
| // FCVTZS (64/64-bit) |
| FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000, |
| // FCVTZU (64/64-bit) |
| FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001, |
| }; |
| sink.put4(enc_fputoint(top16, rd, rn)); |
| } |
| &Inst::IntToFpu { op, rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let top16 = match op { |
| // SCVTF (32/32-bit) |
| IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010, |
| // UCVTF (32/32-bit) |
| IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011, |
| // SCVTF (64/32-bit) |
| IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010, |
| // UCVTF (64/32-bit) |
| IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011, |
| // SCVTF (32/64-bit) |
| IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010, |
| // UCVTF (32/64-bit) |
| IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011, |
| // SCVTF (64/64-bit) |
| IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010, |
| // UCVTF (64/64-bit) |
| IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011, |
| }; |
| sink.put4(enc_inttofpu(top16, rd, rn)); |
| } |
| &Inst::FpuCSel32 { rd, rn, rm, cond } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32)); |
| } |
| &Inst::FpuCSel64 { rd, rn, rm, cond } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64)); |
| } |
| &Inst::FpuRound { op, rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let top22 = match op { |
| FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000, |
| FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000, |
| FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000, |
| FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000, |
| FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000, |
| FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000, |
| FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000, |
| FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000, |
| }; |
| sink.put4(enc_fround(top22, rd, rn)); |
| } |
| &Inst::MovToFpu { rd, rn, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let template = match size { |
| ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000, |
| ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000, |
| _ => unreachable!(), |
| }; |
| sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())); |
| } |
| &Inst::FpuMoveFPImm { rd, imm, size } => { |
| let rd = allocs.next_writable(rd); |
| let size_code = match size { |
| ScalarSize::Size32 => 0b00, |
| ScalarSize::Size64 => 0b01, |
| _ => unimplemented!(), |
| }; |
| sink.put4( |
| 0b000_11110_00_1_00_000_000100_00000_00000 |
| | size_code << 22 |
| | ((imm.enc_bits() as u32) << 13) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::MovToVec { |
| rd, |
| ri, |
| rn, |
| idx, |
| size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| let rn = allocs.next(rn); |
| let (imm5, shift) = match size.lane_size() { |
| ScalarSize::Size8 => (0b00001, 1), |
| ScalarSize::Size16 => (0b00010, 2), |
| ScalarSize::Size32 => (0b00100, 3), |
| ScalarSize::Size64 => (0b01000, 4), |
| _ => unreachable!(), |
| }; |
| debug_assert_eq!(idx & (0b11111 >> shift), idx); |
| let imm5 = imm5 | ((idx as u32) << shift); |
| sink.put4( |
| 0b010_01110000_00000_0_0011_1_00000_00000 |
| | (imm5 << 16) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::MovFromVec { rd, rn, idx, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (q, imm5, shift, mask) = match size { |
| ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111), |
| ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111), |
| ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011), |
| ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001), |
| _ => panic!("Unexpected scalar FP operand size: {:?}", size), |
| }; |
| debug_assert_eq!(idx & mask, idx); |
| let imm5 = imm5 | ((idx as u32) << shift); |
| sink.put4( |
| 0b000_01110000_00000_0_0111_1_00000_00000 |
| | (q << 30) |
| | (imm5 << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_gpr(rd.to_reg()), |
| ); |
| } |
| &Inst::MovFromVecSigned { |
| rd, |
| rn, |
| idx, |
| size, |
| scalar_size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (imm5, shift, half) = match size { |
| VectorSize::Size8x8 => (0b00001, 1, true), |
| VectorSize::Size8x16 => (0b00001, 1, false), |
| VectorSize::Size16x4 => (0b00010, 2, true), |
| VectorSize::Size16x8 => (0b00010, 2, false), |
| VectorSize::Size32x2 => { |
| debug_assert_ne!(scalar_size, OperandSize::Size32); |
| (0b00100, 3, true) |
| } |
| VectorSize::Size32x4 => { |
| debug_assert_ne!(scalar_size, OperandSize::Size32); |
| (0b00100, 3, false) |
| } |
| _ => panic!("Unexpected vector operand size"), |
| }; |
| debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx); |
| let imm5 = imm5 | ((idx as u32) << shift); |
| sink.put4( |
| 0b000_01110000_00000_0_0101_1_00000_00000 |
| | (scalar_size.is64() as u32) << 30 |
| | (imm5 << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_gpr(rd.to_reg()), |
| ); |
| } |
| &Inst::VecDup { rd, rn, size } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let q = size.is_128bits() as u32; |
| let imm5 = match size.lane_size() { |
| ScalarSize::Size8 => 0b00001, |
| ScalarSize::Size16 => 0b00010, |
| ScalarSize::Size32 => 0b00100, |
| ScalarSize::Size64 => 0b01000, |
| _ => unreachable!(), |
| }; |
| sink.put4( |
| 0b0_0_0_01110000_00000_000011_00000_00000 |
| | (q << 30) |
| | (imm5 << 16) |
| | (machreg_to_gpr(rn) << 5) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::VecDupFromFpu { rd, rn, size, lane } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let q = size.is_128bits() as u32; |
| let imm5 = match size.lane_size() { |
| ScalarSize::Size8 => { |
| assert!(lane < 16); |
| 0b00001 | (u32::from(lane) << 1) |
| } |
| ScalarSize::Size16 => { |
| assert!(lane < 8); |
| 0b00010 | (u32::from(lane) << 2) |
| } |
| ScalarSize::Size32 => { |
| assert!(lane < 4); |
| 0b00100 | (u32::from(lane) << 3) |
| } |
| ScalarSize::Size64 => { |
| assert!(lane < 2); |
| 0b01000 | (u32::from(lane) << 4) |
| } |
| _ => unimplemented!(), |
| }; |
| sink.put4( |
| 0b000_01110000_00000_000001_00000_00000 |
| | (q << 30) |
| | (imm5 << 16) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::VecDupFPImm { rd, imm, size } => { |
| let rd = allocs.next_writable(rd); |
| let imm = imm.enc_bits(); |
| let op = match size.lane_size() { |
| ScalarSize::Size32 => 0, |
| ScalarSize::Size64 => 1, |
| _ => unimplemented!(), |
| }; |
| let q_op = op | ((size.is_128bits() as u32) << 1); |
| |
| sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm)); |
| } |
| &Inst::VecDupImm { |
| rd, |
| imm, |
| invert, |
| size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let (imm, shift, shift_ones) = imm.value(); |
| let (op, cmode) = match size.lane_size() { |
| ScalarSize::Size8 => { |
| assert!(!invert); |
| assert_eq!(shift, 0); |
| |
| (0, 0b1110) |
| } |
| ScalarSize::Size16 => { |
| let s = shift & 8; |
| |
| assert!(!shift_ones); |
| assert_eq!(s, shift); |
| |
| (invert as u32, 0b1000 | (s >> 2)) |
| } |
| ScalarSize::Size32 => { |
| if shift_ones { |
| assert!(shift == 8 || shift == 16); |
| |
| (invert as u32, 0b1100 | (shift >> 4)) |
| } else { |
| let s = shift & 24; |
| |
| assert_eq!(s, shift); |
| |
| (invert as u32, 0b0000 | (s >> 2)) |
| } |
| } |
| ScalarSize::Size64 => { |
| assert!(!invert); |
| assert_eq!(shift, 0); |
| |
| (1, 0b1110) |
| } |
| _ => unreachable!(), |
| }; |
| let q_op = op | ((size.is_128bits() as u32) << 1); |
| |
| sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm)); |
| } |
| &Inst::VecExtend { |
| t, |
| rd, |
| rn, |
| high_half, |
| lane_size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let immh = match lane_size { |
| ScalarSize::Size16 => 0b001, |
| ScalarSize::Size32 => 0b010, |
| ScalarSize::Size64 => 0b100, |
| _ => panic!("Unexpected VecExtend to lane size of {:?}", lane_size), |
| }; |
| let u = match t { |
| VecExtendOp::Sxtl => 0b0, |
| VecExtendOp::Uxtl => 0b1, |
| }; |
| sink.put4( |
| 0b000_011110_0000_000_101001_00000_00000 |
| | ((high_half as u32) << 30) |
| | (u << 29) |
| | (immh << 19) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::VecRRLong { |
| op, |
| rd, |
| rn, |
| high_half, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (u, size, bits_12_16) = match op { |
| VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111), |
| VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111), |
| VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011), |
| VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011), |
| VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011), |
| }; |
| |
| sink.put4(enc_vec_rr_misc( |
| ((high_half as u32) << 1) | u, |
| size, |
| bits_12_16, |
| rd, |
| rn, |
| )); |
| } |
| &Inst::VecRRNarrowLow { |
| op, |
| rd, |
| rn, |
| lane_size, |
| } |
| | &Inst::VecRRNarrowHigh { |
| op, |
| rd, |
| rn, |
| lane_size, |
| .. |
| } => { |
| let rn = allocs.next(rn); |
| let rd = allocs.next_writable(rd); |
| let high_half = match self { |
| &Inst::VecRRNarrowLow { .. } => false, |
| &Inst::VecRRNarrowHigh { .. } => true, |
| _ => unreachable!(), |
| }; |
| |
| let size = match lane_size { |
| ScalarSize::Size8 => 0b00, |
| ScalarSize::Size16 => 0b01, |
| ScalarSize::Size32 => 0b10, |
| _ => panic!("unsupported size: {:?}", lane_size), |
| }; |
| |
| // Floats use a single bit, to encode either half or single. |
| let size = match op { |
| VecRRNarrowOp::Fcvtn => size >> 1, |
| _ => size, |
| }; |
| |
| let (u, bits_12_16) = match op { |
| VecRRNarrowOp::Xtn => (0b0, 0b10010), |
| VecRRNarrowOp::Sqxtn => (0b0, 0b10100), |
| VecRRNarrowOp::Sqxtun => (0b1, 0b10010), |
| VecRRNarrowOp::Uqxtn => (0b1, 0b10100), |
| VecRRNarrowOp::Fcvtn => (0b0, 0b10110), |
| }; |
| |
| sink.put4(enc_vec_rr_misc( |
| ((high_half as u32) << 1) | u, |
| size, |
| bits_12_16, |
| rd, |
| rn, |
| )); |
| } |
| &Inst::VecMovElement { |
| rd, |
| ri, |
| rn, |
| dest_idx, |
| src_idx, |
| size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| let rn = allocs.next(rn); |
| let (imm5, shift) = match size.lane_size() { |
| ScalarSize::Size8 => (0b00001, 1), |
| ScalarSize::Size16 => (0b00010, 2), |
| ScalarSize::Size32 => (0b00100, 3), |
| ScalarSize::Size64 => (0b01000, 4), |
| _ => unreachable!(), |
| }; |
| let mask = 0b11111 >> shift; |
| debug_assert_eq!(dest_idx & mask, dest_idx); |
| debug_assert_eq!(src_idx & mask, src_idx); |
| let imm4 = (src_idx as u32) << (shift - 1); |
| let imm5 = imm5 | ((dest_idx as u32) << shift); |
| sink.put4( |
| 0b011_01110000_00000_0_0000_1_00000_00000 |
| | (imm5 << 16) |
| | (imm4 << 11) |
| | (machreg_to_vec(rn) << 5) |
| | machreg_to_vec(rd.to_reg()), |
| ); |
| } |
| &Inst::VecRRPair { op, rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let bits_12_16 = match op { |
| VecPairOp::Addp => 0b11011, |
| }; |
| |
| sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn)); |
| } |
| &Inst::VecRRRLong { |
| rd, |
| rn, |
| rm, |
| alu_op, |
| high_half, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let (u, size, bit14) = match alu_op { |
| VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1), |
| VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1), |
| VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1), |
| VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1), |
| VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1), |
| VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1), |
| }; |
| sink.put4(enc_vec_rrr_long( |
| high_half as u32, |
| u, |
| size, |
| bit14, |
| rm, |
| rn, |
| rd, |
| )); |
| } |
| &Inst::VecRRRLongMod { |
| rd, |
| ri, |
| rn, |
| rm, |
| alu_op, |
| high_half, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let (u, size, bit14) = match alu_op { |
| VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0), |
| VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0), |
| VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0), |
| }; |
| sink.put4(enc_vec_rrr_long( |
| high_half as u32, |
| u, |
| size, |
| bit14, |
| rm, |
| rn, |
| rd, |
| )); |
| } |
| &Inst::VecRRPairLong { op, rd, rn } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (u, size) = match op { |
| VecRRPairLongOp::Saddlp8 => (0b0, 0b0), |
| VecRRPairLongOp::Uaddlp8 => (0b1, 0b0), |
| VecRRPairLongOp::Saddlp16 => (0b0, 0b1), |
| VecRRPairLongOp::Uaddlp16 => (0b1, 0b1), |
| }; |
| |
| sink.put4(enc_vec_rr_pair_long(u, size, rd, rn)); |
| } |
| &Inst::VecRRR { |
| rd, |
| rn, |
| rm, |
| alu_op, |
| size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let (q, enc_size) = size.enc_size(); |
| let is_float = match alu_op { |
| VecALUOp::Fcmeq |
| | VecALUOp::Fcmgt |
| | VecALUOp::Fcmge |
| | VecALUOp::Fadd |
| | VecALUOp::Fsub |
| | VecALUOp::Fdiv |
| | VecALUOp::Fmax |
| | VecALUOp::Fmin |
| | VecALUOp::Fmul => true, |
| _ => false, |
| }; |
| |
| let (top11, bit15_10) = match alu_op { |
| VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011), |
| VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011), |
| VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011), |
| VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011), |
| VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011), |
| VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111), |
| VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101), |
| VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101), |
| VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111), |
| VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001), |
| VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001), |
| VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001), |
| // The following logical instructions operate on bytes, so are not encoded differently |
| // for the different vector types. |
| VecALUOp::And => (0b000_01110_00_1, 0b000111), |
| VecALUOp::Bic => (0b000_01110_01_1, 0b000111), |
| VecALUOp::Orr => (0b000_01110_10_1, 0b000111), |
| VecALUOp::Eor => (0b001_01110_00_1, 0b000111), |
| VecALUOp::Umaxp => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| |
| (0b001_01110_00_1 | enc_size << 1, 0b101001) |
| } |
| VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), |
| VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), |
| VecALUOp::Mul => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| (0b000_01110_00_1 | enc_size << 1, 0b100111) |
| } |
| VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001), |
| VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001), |
| VecALUOp::Umin => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| |
| (0b001_01110_00_1 | enc_size << 1, 0b011011) |
| } |
| VecALUOp::Smin => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| |
| (0b000_01110_00_1 | enc_size << 1, 0b011011) |
| } |
| VecALUOp::Umax => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| |
| (0b001_01110_00_1 | enc_size << 1, 0b011001) |
| } |
| VecALUOp::Smax => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| |
| (0b000_01110_00_1 | enc_size << 1, 0b011001) |
| } |
| VecALUOp::Urhadd => { |
| debug_assert_ne!(size, VectorSize::Size64x2); |
| |
| (0b001_01110_00_1 | enc_size << 1, 0b000101) |
| } |
| VecALUOp::Fadd => (0b000_01110_00_1, 0b110101), |
| VecALUOp::Fsub => (0b000_01110_10_1, 0b110101), |
| VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111), |
| VecALUOp::Fmax => (0b000_01110_00_1, 0b111101), |
| VecALUOp::Fmin => (0b000_01110_10_1, 0b111101), |
| VecALUOp::Fmul => (0b001_01110_00_1, 0b110111), |
| VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111), |
| VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110), |
| VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110), |
| VecALUOp::Sqrdmulh => { |
| debug_assert!( |
| size.lane_size() == ScalarSize::Size16 |
| || size.lane_size() == ScalarSize::Size32 |
| ); |
| |
| (0b001_01110_00_1 | enc_size << 1, 0b101101) |
| } |
| VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110), |
| VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110), |
| VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010), |
| VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010), |
| }; |
| let top11 = if is_float { |
| top11 | size.enc_float_size() << 1 |
| } else { |
| top11 |
| }; |
| sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); |
| } |
| &Inst::VecRRRMod { |
| rd, |
| ri, |
| rn, |
| rm, |
| alu_op, |
| size, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let (q, _enc_size) = size.enc_size(); |
| |
| let (top11, bit15_10) = match alu_op { |
| VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111), |
| VecALUModOp::Fmla => { |
| (0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011) |
| } |
| VecALUModOp::Fmls => { |
| (0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011) |
| } |
| }; |
| sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); |
| } |
| &Inst::VecFmlaElem { |
| rd, |
| ri, |
| rn, |
| rm, |
| alu_op, |
| size, |
| idx, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let ri = allocs.next(ri); |
| debug_assert_eq!(rd.to_reg(), ri); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| let idx = u32::from(idx); |
| |
| let (q, _size) = size.enc_size(); |
| let o2 = match alu_op { |
| VecALUModOp::Fmla => 0b0, |
| VecALUModOp::Fmls => 0b1, |
| _ => unreachable!(), |
| }; |
| |
| let (h, l) = match size { |
| VectorSize::Size32x4 => { |
| assert!(idx < 4); |
| (idx >> 1, idx & 1) |
| } |
| VectorSize::Size64x2 => { |
| assert!(idx < 2); |
| (idx, 0) |
| } |
| _ => unreachable!(), |
| }; |
| |
| let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l; |
| let bit15_10 = 0b000100 | (o2 << 4) | (h << 1); |
| sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); |
| } |
| &Inst::VecLoadReplicate { |
| rd, |
| rn, |
| size, |
| flags, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (q, size) = size.enc_size(); |
| |
| let srcloc = state.cur_srcloc(); |
| if !srcloc.is_default() && !flags.notrap() { |
| // Register the offset at which the actual load instruction starts. |
| sink.add_trap(TrapCode::HeapOutOfBounds); |
| } |
| |
| sink.put4(enc_ldst_vec(q, size, rn, rd)); |
| } |
| &Inst::VecCSel { rd, rn, rm, cond } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let rm = allocs.next(rm); |
| /* Emit this: |
| b.cond else |
| mov rd, rm |
| b out |
| else: |
| mov rd, rn |
| out: |
| |
| Note, we could do better in the cases where rd == rn or rd == rm. |
| */ |
| let else_label = sink.get_label(); |
| let out_label = sink.get_label(); |
| |
| // b.cond else |
| let br_else_offset = sink.cur_offset(); |
| sink.put4(enc_conditional_br( |
| BranchTarget::Label(else_label), |
| CondBrKind::Cond(cond), |
| &mut AllocationConsumer::default(), |
| )); |
| sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19); |
| |
| // mov rd, rm |
| sink.put4(enc_vecmov(/* 16b = */ true, rd, rm)); |
| |
| // b out |
| let b_out_offset = sink.cur_offset(); |
| sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26); |
| sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label); |
| sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */)); |
| |
| // else: |
| sink.bind_label(else_label, &mut state.ctrl_plane); |
| |
| // mov rd, rn |
| sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); |
| |
| // out: |
| sink.bind_label(out_label, &mut state.ctrl_plane); |
| } |
| &Inst::MovToNZCV { rn } => { |
| let rn = allocs.next(rn); |
| sink.put4(0xd51b4200 | machreg_to_gpr(rn)); |
| } |
| &Inst::MovFromNZCV { rd } => { |
| let rd = allocs.next_writable(rd); |
| sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg())); |
| } |
| &Inst::Extend { |
| rd, |
| rn, |
| signed: false, |
| from_bits: 1, |
| to_bits, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| assert!(to_bits <= 64); |
| // Reduce zero-extend-from-1-bit to: |
| // - and rd, rn, #1 |
| // Note: This is special cased as UBFX may take more cycles |
| // than AND on smaller cores. |
| let imml = ImmLogic::maybe_from_u64(1, I32).unwrap(); |
| Inst::AluRRImmLogic { |
| alu_op: ALUOp::And, |
| size: OperandSize::Size32, |
| rd, |
| rn, |
| imml, |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| &Inst::Extend { |
| rd, |
| rn, |
| signed: false, |
| from_bits: 32, |
| to_bits: 64, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let mov = Inst::Mov { |
| size: OperandSize::Size32, |
| rd, |
| rm: rn, |
| }; |
| mov.emit(&[], sink, emit_info, state); |
| } |
| &Inst::Extend { |
| rd, |
| rn, |
| signed, |
| from_bits, |
| to_bits, |
| } => { |
| let rd = allocs.next_writable(rd); |
| let rn = allocs.next(rn); |
| let (opc, size) = if signed { |
| (0b00, OperandSize::from_bits(to_bits)) |
| } else { |
| (0b10, OperandSize::Size32) |
| }; |
| sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1)); |
| } |
| &Inst::Jump { ref dest } => { |
| let off = sink.cur_offset(); |
| // Indicate that the jump uses a label, if so, so that a fixup can occur later. |
| if let Some(l) = dest.as_label() { |
| sink.use_label_at_offset(off, l, LabelUse::Branch26); |
| sink.add_uncond_branch(off, off + 4, l); |
| } |
| // Emit the jump itself. |
| sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero())); |
| } |
| &Inst::Args { .. } | &Inst::Rets { .. } => { |
| // Nothing: this is a pseudoinstruction that serves |
| // only to constrain registers at a certain point. |
| } |
| &Inst::Ret {} => { |
| sink.put4(0xd65f03c0); |
| } |
| &Inst::AuthenticatedRet { key, is_hint } => { |
| let (op2, is_hint) = match key { |
| APIKey::AZ => (0b100, true), |
| APIKey::ASP => (0b101, is_hint), |
| APIKey::BZ => (0b110, true), |
| APIKey::BSP => (0b111, is_hint), |
| }; |
| |
| if is_hint { |
| sink.put4(key.enc_auti_hint()); |
| Inst::Ret {}.emit(&[], sink, emit_info, state); |
| } else { |
| sink.put4(0xd65f0bff | (op2 << 9)); // reta{key} |
| } |
| } |
| &Inst::Call { ref info } => { |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); |
| } |
| sink.add_reloc(Reloc::Arm64Call, &info.dest, 0); |
| sink.put4(enc_jump26(0b100101, 0)); |
| if info.opcode.is_call() { |
| sink.add_call_site(info.opcode); |
| } |
| |
| let callee_pop_size = i64::from(info.callee_pop_size); |
| state.virtual_sp_offset -= callee_pop_size; |
| trace!( |
| "call adjusts virtual sp offset by {callee_pop_size} -> {}", |
| state.virtual_sp_offset |
| ); |
| } |
| &Inst::CallInd { ref info } => { |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); |
| } |
| let rn = allocs.next(info.rn); |
| sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)); |
| if info.opcode.is_call() { |
| sink.add_call_site(info.opcode); |
| } |
| |
| let callee_pop_size = i64::from(info.callee_pop_size); |
| state.virtual_sp_offset -= callee_pop_size; |
| trace!( |
| "call adjusts virtual sp offset by {callee_pop_size} -> {}", |
| state.virtual_sp_offset |
| ); |
| } |
| &Inst::ReturnCall { |
| ref callee, |
| ref info, |
| } => { |
| emit_return_call_common_sequence(&mut allocs, sink, emit_info, state, info); |
| |
| // Note: this is not `Inst::Jump { .. }.emit(..)` because we |
| // have different metadata in this case: we don't have a label |
| // for the target, but rather a function relocation. |
| sink.add_reloc(Reloc::Arm64Call, &**callee, 0); |
| sink.put4(enc_jump26(0b000101, 0)); |
| sink.add_call_site(ir::Opcode::ReturnCall); |
| |
| // `emit_return_call_common_sequence` emits an island if |
| // necessary, so we can safely disable the worst-case-size check |
| // in this case. |
| start_off = sink.cur_offset(); |
| } |
| &Inst::ReturnCallInd { callee, ref info } => { |
| let callee = allocs.next(callee); |
| |
| emit_return_call_common_sequence(&mut allocs, sink, emit_info, state, info); |
| |
| Inst::IndirectBr { |
| rn: callee, |
| targets: vec![], |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.add_call_site(ir::Opcode::ReturnCallIndirect); |
| |
| // `emit_return_call_common_sequence` emits an island if |
| // necessary, so we can safely disable the worst-case-size check |
| // in this case. |
| start_off = sink.cur_offset(); |
| } |
| &Inst::CondBr { |
| taken, |
| not_taken, |
| kind, |
| } => { |
| // Conditional part first. |
| let cond_off = sink.cur_offset(); |
| if let Some(l) = taken.as_label() { |
| sink.use_label_at_offset(cond_off, l, LabelUse::Branch19); |
| let mut allocs_inv = allocs.clone(); |
| let inverted = |
| enc_conditional_br(taken, kind.invert(), &mut allocs_inv).to_le_bytes(); |
| sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]); |
| } |
| sink.put4(enc_conditional_br(taken, kind, &mut allocs)); |
| |
| // Unconditional part next. |
| let uncond_off = sink.cur_offset(); |
| if let Some(l) = not_taken.as_label() { |
| sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26); |
| sink.add_uncond_branch(uncond_off, uncond_off + 4, l); |
| } |
| sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero())); |
| } |
| &Inst::TrapIf { kind, trap_code } => { |
| let label = sink.defer_trap(trap_code, state.take_stack_map()); |
| // condbr KIND, LABEL |
| let off = sink.cur_offset(); |
| sink.put4(enc_conditional_br( |
| BranchTarget::Label(label), |
| kind, |
| &mut allocs, |
| )); |
| sink.use_label_at_offset(off, label, LabelUse::Branch19); |
| } |
| &Inst::IndirectBr { rn, .. } => { |
| let rn = allocs.next(rn); |
| sink.put4(enc_br(rn)); |
| } |
| &Inst::Nop0 => {} |
| &Inst::Nop4 => { |
| sink.put4(0xd503201f); |
| } |
| &Inst::Brk => { |
| sink.put4(0xd4200000); |
| } |
| &Inst::Udf { trap_code } => { |
| sink.add_trap(trap_code); |
| if let Some(s) = state.take_stack_map() { |
| sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); |
| } |
| sink.put_data(Inst::TRAP_OPCODE); |
| } |
| &Inst::Adr { rd, off } => { |
| let rd = allocs.next_writable(rd); |
| assert!(off > -(1 << 20)); |
| assert!(off < (1 << 20)); |
| sink.put4(enc_adr(off, rd)); |
| } |
| &Inst::Adrp { rd, off } => { |
| let rd = allocs.next_writable(rd); |
| assert!(off > -(1 << 20)); |
| assert!(off < (1 << 20)); |
| sink.put4(enc_adrp(off, rd)); |
| } |
| &Inst::Word4 { data } => { |
| sink.put4(data); |
| } |
| &Inst::Word8 { data } => { |
| sink.put8(data); |
| } |
| &Inst::JTSequence { |
| ridx, |
| rtmp1, |
| rtmp2, |
| default, |
| ref targets, |
| .. |
| } => { |
| let ridx = allocs.next(ridx); |
| let rtmp1 = allocs.next_writable(rtmp1); |
| let rtmp2 = allocs.next_writable(rtmp2); |
| // This sequence is *one* instruction in the vcode, and is expanded only here at |
| // emission time, because we cannot allow the regalloc to insert spills/reloads in |
| // the middle; we depend on hardcoded PC-rel addressing below. |
| |
| // Branch to default when condition code from prior comparison indicates. |
| let br = enc_conditional_br( |
| BranchTarget::Label(default), |
| CondBrKind::Cond(Cond::Hs), |
| &mut AllocationConsumer::default(), |
| ); |
| |
| // No need to inform the sink's branch folding logic about this branch, because it |
| // will not be merged with any other branch, flipped, or elided (it is not preceded |
| // or succeeded by any other branch). Just emit it with the label use. |
| let default_br_offset = sink.cur_offset(); |
| sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19); |
| sink.put4(br); |
| |
| // Overwrite the index with a zero when the above |
| // branch misspeculates (Spectre mitigation). Save the |
| // resulting index in rtmp2. |
| let inst = Inst::CSel { |
| rd: rtmp2, |
| cond: Cond::Hs, |
| rn: zero_reg(), |
| rm: ridx, |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| // Prevent any data value speculation. |
| Inst::Csdb.emit(&[], sink, emit_info, state); |
| |
| // Load address of jump table |
| let inst = Inst::Adr { rd: rtmp1, off: 16 }; |
| inst.emit(&[], sink, emit_info, state); |
| // Load value out of jump table |
| let inst = Inst::SLoad32 { |
| rd: rtmp2, |
| mem: AMode::reg_plus_reg_scaled_extended( |
| rtmp1.to_reg(), |
| rtmp2.to_reg(), |
| I32, |
| ExtendOp::UXTW, |
| ), |
| flags: MemFlags::trusted(), |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| // Add base of jump table to jump-table-sourced block offset |
| let inst = Inst::AluRRR { |
| alu_op: ALUOp::Add, |
| size: OperandSize::Size64, |
| rd: rtmp1, |
| rn: rtmp1.to_reg(), |
| rm: rtmp2.to_reg(), |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| // Branch to computed address. (`targets` here is only used for successor queries |
| // and is not needed for emission.) |
| let inst = Inst::IndirectBr { |
| rn: rtmp1.to_reg(), |
| targets: vec![], |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| // Emit jump table (table of 32-bit offsets). |
| let jt_off = sink.cur_offset(); |
| for &target in targets.iter() { |
| let word_off = sink.cur_offset(); |
| // off_into_table is an addend here embedded in the label to be later patched |
| // at the end of codegen. The offset is initially relative to this jump table |
| // entry; with the extra addend, it'll be relative to the jump table's start, |
| // after patching. |
| let off_into_table = word_off - jt_off; |
| sink.use_label_at_offset(word_off, target, LabelUse::PCRel32); |
| sink.put4(off_into_table); |
| } |
| |
| // Lowering produces an EmitIsland before using a JTSequence, so we can safely |
| // disable the worst-case-size check in this case. |
| start_off = sink.cur_offset(); |
| } |
| &Inst::LoadExtName { |
| rd, |
| ref name, |
| offset, |
| } => { |
| let rd = allocs.next_writable(rd); |
| |
| if emit_info.0.is_pic() { |
| // See this CE Example for the variations of this with and without BTI & PAUTH |
| // https://godbolt.org/z/ncqjbbvvn |
| // |
| // Emit the following code: |
| // adrp rd, :got:X |
| // ldr rd, [rd, :got_lo12:X] |
| |
| // adrp rd, symbol |
| sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0); |
| let inst = Inst::Adrp { rd, off: 0 }; |
| inst.emit(&[], sink, emit_info, state); |
| |
| // ldr rd, [rd, :got_lo12:X] |
| sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0); |
| let inst = Inst::ULoad64 { |
| rd, |
| mem: AMode::reg(rd.to_reg()), |
| flags: MemFlags::trusted(), |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| } else { |
| // With absolute offsets we set up a load from a preallocated space, and then jump |
| // over it. |
| // |
| // Emit the following code: |
| // ldr rd, #8 |
| // b #0x10 |
| // <8 byte space> |
| |
| let inst = Inst::ULoad64 { |
| rd, |
| mem: AMode::Label { |
| label: MemLabel::PCRel(8), |
| }, |
| flags: MemFlags::trusted(), |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| let inst = Inst::Jump { |
| dest: BranchTarget::ResolvedOffset(12), |
| }; |
| inst.emit(&[], sink, emit_info, state); |
| sink.add_reloc(Reloc::Abs8, &**name, offset); |
| sink.put8(0); |
| } |
| } |
| &Inst::LoadAddr { rd, ref mem } => { |
| let rd = allocs.next_writable(rd); |
| let mem = mem.with_allocs(&mut allocs); |
| let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state); |
| for inst in mem_insts.into_iter() { |
| inst.emit(&[], sink, emit_info, state); |
| } |
| |
| let (reg, index_reg, offset) = match mem { |
| AMode::RegExtended { rn, rm, extendop } => { |
| let r = allocs.next(rn); |
| (r, Some((rm, extendop)), 0) |
| } |
| AMode::Unscaled { rn, simm9 } => { |
| let r = allocs.next(rn); |
| (r, None, simm9.value()) |
| } |
| AMode::UnsignedOffset { rn, uimm12 } => { |
| let r = allocs.next(rn); |
| (r, None, uimm12.value() as i32) |
| } |
| _ => panic!("Unsupported case for LoadAddr: {:?}", mem), |
| }; |
| let abs_offset = if offset < 0 { |
| -offset as u64 |
| } else { |
| offset as u64 |
| }; |
| let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add }; |
| |
| if let Some((idx, extendop)) = index_reg { |
| let add = Inst::AluRRRExtend { |
| alu_op: ALUOp::Add, |
| size: OperandSize::Size64, |
| rd, |
| rn: reg, |
| rm: idx, |
| extendop, |
| }; |
| |
| add.emit(&[], sink, emit_info, state); |
| } else if offset == 0 { |
| if reg != rd.to_reg() { |
| let mov = Inst::Mov { |
| size: OperandSize::Size64, |
| rd, |
| rm: reg, |
| }; |
| |
| mov.emit(&[], sink, emit_info, state); |
| } |
| } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { |
| let add = Inst::AluRRImm12 { |
| alu_op, |
| size: OperandSize::Size64, |
| rd, |
| rn: reg, |
| imm12, |
| }; |
| add.emit(&[], sink, emit_info, state); |
| } else { |
| // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction |
| // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note |
| // that no other instructions will be inserted here (we're emitting directly), |
| // and a live range of `tmp2` should not span this instruction, so this use |
| // should otherwise be correct. |
| debug_assert!(rd.to_reg() != tmp2_reg()); |
| debug_assert!(reg != tmp2_reg()); |
| let tmp = writable_tmp2_reg(); |
| for insn in Inst::load_constant(tmp, abs_offset, &mut |_| tmp).into_iter() { |
| insn.emit(&[], sink, emit_info, state); |
| } |
| let add = Inst::AluRRR { |
| alu_op, |
| size: OperandSize::Size64, |
| rd, |
| rn: reg, |
| rm: tmp.to_reg(), |
| }; |
| add.emit(&[], sink, emit_info, state); |
| } |
| } |
| &Inst::Paci { key } => { |
| let (crm, op2) = match key { |
| APIKey::AZ => (0b0011, 0b000), |
| APIKey::ASP => (0b0011, 0b001), |
| APIKey::BZ => (0b0011, 0b010), |
| APIKey::BSP => (0b0011, 0b011), |
| }; |
| |
| sink.put4(0xd503211f | (crm << 8) | (op2 << 5)); |
| } |
| &Inst::Xpaclri => sink.put4(0xd50320ff), |
| &Inst::Bti { targets } => { |
| let targets = match targets { |
| BranchTargetType::None => 0b00, |
| BranchTargetType::C => 0b01, |
| BranchTargetType::J => 0b10, |
| BranchTargetType::JC => 0b11, |
| }; |
| |
| sink.put4(0xd503241f | targets << 6); |
| } |
| &Inst::VirtualSPOffsetAdj { offset } => { |
| trace!( |
| "virtual sp offset adjusted by {} -> {}", |
| offset, |
| state.virtual_sp_offset + offset, |
| ); |
| state.virtual_sp_offset += offset; |
| } |
| &Inst::EmitIsland { needed_space } => { |
| if sink.island_needed(needed_space + 4) { |
| let jump_around_label = sink.get_label(); |
| let jmp = Inst::Jump { |
| dest: BranchTarget::Label(jump_around_label), |
| }; |
| jmp.emit(&[], sink, emit_info, state); |
| sink.emit_island(needed_space + 4, &mut state.ctrl_plane); |
| sink.bind_label(jump_around_label, &mut state.ctrl_plane); |
| } |
| } |
| |
| &Inst::ElfTlsGetAddr { ref symbol, rd } => { |
| let rd = allocs.next_writable(rd); |
| assert_eq!(xreg(0), rd.to_reg()); |
| |
| // This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch64 |
| // See: https://gcc.godbolt.org/z/KhMh5Gvra |
| |
| // adrp x0, <label> |
| sink.add_reloc(Reloc::Aarch64TlsGdAdrPage21, symbol, 0); |
| let inst = Inst::Adrp { rd, off: 0 }; |
| inst.emit(&[], sink, emit_info, state); |
| |
| // add x0, x0, <label> |
| sink.add_reloc(Reloc::Aarch64TlsGdAddLo12Nc, symbol, 0); |
| sink.put4(0x91000000); |
| |
| // bl __tls_get_addr |
| sink.add_reloc( |
| Reloc::Arm64Call, |
| &ExternalName::LibCall(LibCall::ElfTlsGetAddr), |
| 0, |
| ); |
| sink.put4(0x94000000); |
| |
| // nop |
| sink.put4(0xd503201f); |
| } |
| |
| &Inst::MachOTlsGetAddr { ref symbol, rd } => { |
| // Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer |
| // to a function that takes the descriptor address in x0, and after the function returns x0 |
| // contains the address for the thread local variable |
| // |
| // what we want to emit is basically: |
| // |
| // adrp x0, <label>@TLVPPAGE ; Load the address of the page of the thread local variable pointer (TLVP) |
| // ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0 |
| // ldr x1, [x0] ; Load the function pointer (the first part of the descriptor) |
| // blr x1 ; Call the function pointer with the descriptor address in x0 |
| // ; x0 now contains the TLV address |
| |
| let rd = allocs.next_writable(rd); |
| assert_eq!(xreg(0), rd.to_reg()); |
| let rtmp = writable_xreg(1); |
| |
| // adrp x0, <label>@TLVPPAGE |
| sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0); |
| sink.put4(0x90000000); |
| |
| // ldr x0, [x0, <label>@TLVPPAGEOFF] |
| sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0); |
| sink.put4(0xf9400000); |
| |
| // load [x0] into temp register |
| Inst::ULoad64 { |
| rd: rtmp, |
| mem: AMode::reg(rd.to_reg()), |
| flags: MemFlags::trusted(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // call function pointer in temp register |
| Inst::CallInd { |
| info: crate::isa::Box::new(CallIndInfo { |
| rn: rtmp.to_reg(), |
| uses: smallvec![], |
| defs: smallvec![], |
| clobbers: PRegSet::empty(), |
| opcode: Opcode::CallIndirect, |
| caller_callconv: CallConv::AppleAarch64, |
| callee_callconv: CallConv::AppleAarch64, |
| callee_pop_size: 0, |
| }), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| &Inst::Unwind { ref inst } => { |
| sink.add_unwind(inst.clone()); |
| } |
| |
| &Inst::DummyUse { .. } => {} |
| |
| &Inst::StackProbeLoop { start, end, step } => { |
| assert!(emit_info.0.enable_probestack()); |
| let start = allocs.next_writable(start); |
| let end = allocs.next(end); |
| |
| // The loop generated here uses `start` as a counter register to |
| // count backwards until negating it exceeds `end`. In other |
| // words `start` is an offset from `sp` we're testing where |
| // `end` is the max size we need to test. The loop looks like: |
| // |
| // loop_start: |
| // sub start, start, #step |
| // stur xzr, [sp, start] |
| // cmn start, end |
| // br.gt loop_start |
| // loop_end: |
| // |
| // Note that this loop cannot use the spilltmp and tmp2 |
| // registers as those are currently used as the input to this |
| // loop when generating the instruction. This means that some |
| // more flavorful address modes and lowerings need to be |
| // avoided. |
| // |
| // Perhaps someone more clever than I can figure out how to use |
| // `subs` or the like and skip the `cmn`, but I can't figure it |
| // out at this time. |
| |
| let loop_start = sink.get_label(); |
| sink.bind_label(loop_start, &mut state.ctrl_plane); |
| |
| Inst::AluRRImm12 { |
| alu_op: ALUOp::Sub, |
| size: OperandSize::Size64, |
| rd: start, |
| rn: start.to_reg(), |
| imm12: step, |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::Store32 { |
| rd: regs::zero_reg(), |
| mem: AMode::RegReg { |
| rn: regs::stack_reg(), |
| rm: start.to_reg(), |
| }, |
| flags: MemFlags::trusted(), |
| } |
| .emit(&[], sink, emit_info, state); |
| Inst::AluRRR { |
| alu_op: ALUOp::AddS, |
| size: OperandSize::Size64, |
| rd: regs::writable_zero_reg(), |
| rn: start.to_reg(), |
| rm: end, |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| let loop_end = sink.get_label(); |
| Inst::CondBr { |
| taken: BranchTarget::Label(loop_start), |
| not_taken: BranchTarget::Label(loop_end), |
| kind: CondBrKind::Cond(Cond::Gt), |
| } |
| .emit(&[], sink, emit_info, state); |
| sink.bind_label(loop_end, &mut state.ctrl_plane); |
| } |
| } |
| |
| let end_off = sink.cur_offset(); |
| debug_assert!( |
| (end_off - start_off) <= Inst::worst_case_size() |
| || matches!(self, Inst::EmitIsland { .. }), |
| "Worst case size exceed for {:?}: {}", |
| self, |
| end_off - start_off |
| ); |
| |
| state.clear_post_insn(); |
| } |
| |
| fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { |
| let mut allocs = AllocationConsumer::new(allocs); |
| self.print_with_state(state, &mut allocs) |
| } |
| } |
| |
| fn emit_return_call_common_sequence( |
| allocs: &mut AllocationConsumer<'_>, |
| sink: &mut MachBuffer<Inst>, |
| emit_info: &EmitInfo, |
| state: &mut EmitState, |
| info: &ReturnCallInfo, |
| ) { |
| for u in info.uses.iter() { |
| let _ = allocs.next(u.vreg); |
| } |
| |
| // We are emitting a dynamic number of instructions and might need an |
| // island. We emit four instructions regardless of how many stack arguments |
| // we have, and then two instructions per word of stack argument space. |
| let new_stack_words = info.new_stack_arg_size / 8; |
| let insts = 4 + 2 * new_stack_words; |
| let size_of_inst = 4; |
| let space_needed = insts * size_of_inst; |
| if sink.island_needed(space_needed) { |
| let jump_around_label = sink.get_label(); |
| let jmp = Inst::Jump { |
| dest: BranchTarget::Label(jump_around_label), |
| }; |
| jmp.emit(&[], sink, emit_info, state); |
| sink.emit_island(space_needed + 4, &mut state.ctrl_plane); |
| sink.bind_label(jump_around_label, &mut state.ctrl_plane); |
| } |
| |
| // Copy the new frame on top of our current frame. |
| // |
| // The current stack layout is the following: |
| // |
| // | ... | |
| // +---------------------+ |
| // | ... | |
| // | stack arguments | |
| // | ... | |
| // current | return address | |
| // frame | old FP | <-- FP |
| // | ... | |
| // | old stack slots | |
| // | ... | |
| // +---------------------+ |
| // | ... | |
| // new | new stack arguments | |
| // frame | ... | <-- SP |
| // +---------------------+ |
| // |
| // We need to restore the old FP, restore the return address from the stack |
| // to the link register, copy the new stack arguments over the old stack |
| // arguments, adjust SP to point to the new stack arguments, and then jump |
| // to the callee (which will push the old FP and RA again). Note that the |
| // actual jump happens outside this helper function. |
| |
| assert_eq!( |
| info.new_stack_arg_size % 8, |
| 0, |
| "size of new stack arguments must be 8-byte aligned" |
| ); |
| |
| // The delta from our frame pointer to the (eventual) stack pointer value |
| // when we jump to the tail callee. This is the difference in size of stack |
| // arguments as well as accounting for the two words we pushed onto the |
| // stack upon entry to this function (the return address and old frame |
| // pointer). |
| let fp_to_callee_sp = |
| i64::from(info.old_stack_arg_size) - i64::from(info.new_stack_arg_size) + 16; |
| |
| let tmp1 = regs::writable_spilltmp_reg(); |
| let tmp2 = regs::writable_tmp2_reg(); |
| |
| // Restore the return address to the link register, and load the old FP into |
| // a temporary register. |
| // |
| // We can't put the old FP into the FP register until after we copy the |
| // stack arguments into place, since that uses address modes that are |
| // relative to our current FP. |
| // |
| // Note that the FP is saved in the function prologue for all non-leaf |
| // functions, even when `preserve_frame_pointers=false`. Note also that |
| // `return_call` instructions make it so that a function is considered |
| // non-leaf. Therefore we always have an FP to restore here. |
| Inst::LoadP64 { |
| rt: tmp1, |
| rt2: writable_link_reg(), |
| mem: PairAMode::SignedOffset { |
| reg: regs::fp_reg(), |
| simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(), |
| }, |
| flags: MemFlags::trusted(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Copy the new stack arguments over the old stack arguments. |
| for i in (0..new_stack_words).rev() { |
| // Load the `i`th new stack argument word from the temporary stack |
| // space. |
| Inst::ULoad64 { |
| rd: tmp2, |
| mem: AMode::SPOffset { |
| off: i64::from(i * 8), |
| ty: types::I64, |
| }, |
| flags: ir::MemFlags::trusted(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Store it to its final destination on the stack, overwriting our |
| // current frame. |
| Inst::Store64 { |
| rd: tmp2.to_reg(), |
| mem: AMode::FPOffset { |
| off: fp_to_callee_sp + i64::from(i * 8), |
| ty: types::I64, |
| }, |
| flags: ir::MemFlags::trusted(), |
| } |
| .emit(&[], sink, emit_info, state); |
| } |
| |
| // Initialize the SP for the tail callee, deallocating the temporary stack |
| // argument space and our current frame at the same time. |
| let (off, alu_op) = if let Ok(off) = u64::try_from(fp_to_callee_sp) { |
| (off, ALUOp::Add) |
| } else { |
| let abs = fp_to_callee_sp.abs(); |
| let off = u64::try_from(abs).unwrap(); |
| (off, ALUOp::Sub) |
| }; |
| Inst::AluRRImm12 { |
| alu_op, |
| size: OperandSize::Size64, |
| rd: regs::writable_stack_reg(), |
| rn: regs::fp_reg(), |
| imm12: Imm12::maybe_from_u64(off).unwrap(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| // Move the old FP value from the temporary into the FP register. |
| Inst::Mov { |
| size: OperandSize::Size64, |
| rd: regs::writable_fp_reg(), |
| rm: tmp1.to_reg(), |
| } |
| .emit(&[], sink, emit_info, state); |
| |
| state.virtual_sp_offset -= i64::from(info.new_stack_arg_size); |
| trace!( |
| "return_call[_ind] adjusts virtual sp offset by {} -> {}", |
| info.new_stack_arg_size, |
| state.virtual_sp_offset |
| ); |
| |
| if let Some(key) = info.key { |
| sink.put4(key.enc_auti_hint()); |
| } |
| } |