blob: facef1b195662214bb53c74a31db9af0f11ed9b2 [file] [log] [blame]
//! LoongArch64 assembly backend
use core::arch::asm;
const K: [u32; 4] = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6];
macro_rules! c {
($($l:expr)*) => {
concat!($($l ,)*)
};
}
macro_rules! round0a {
($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
c!(
"ld.w $t5, $a1, (" $i " * 4);"
"revb.2h $t5, $t5;"
"rotri.w $t5, $t5, 16;"
"add.w " $e ", " $e ", $t5;"
"st.w $t5, $sp, (" $i " * 4);"
"xor $t5, " $c "," $d ";"
"and $t5, $t5, " $b ";"
"xor $t5, $t5, " $d ";"
roundtail!($a, $b, $e, $i, "$a4")
)
};
}
macro_rules! scheldule {
($i:literal, $e:literal) => {
c!(
"ld.w $t5, $sp, (((" $i " - 3) & 0xF) * 4);"
"ld.w $t6, $sp, (((" $i " - 8) & 0xF) * 4);"
"ld.w $t7, $sp, (((" $i " - 14) & 0xF) * 4);"
"ld.w $t8, $sp, (((" $i " - 16) & 0xF) * 4);"
"xor $t5, $t5, $t6;"
"xor $t5, $t5, $t7;"
"xor $t5, $t5, $t8;"
"rotri.w $t5, $t5, 31;"
"add.w " $e "," $e ", $t5;"
"st.w $t5, $sp, ((" $i " & 0xF) * 4);"
)
};
}
macro_rules! round0b {
($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
c!(
scheldule!($i, $e)
"xor $t5," $c "," $d ";"
"and $t5, $t5," $b ";"
"xor $t5, $t5," $d ";"
roundtail!($a, $b, $e, $i, "$a4")
)
};
}
macro_rules! round1 {
($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
c!(
scheldule!($i, $e)
"xor $t5," $b "," $c ";"
"xor $t5, $t5," $d ";"
roundtail!($a, $b, $e, $i, "$a5")
)
};
}
macro_rules! round2 {
($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
c!(
scheldule!($i, $e)
"or $t5," $c "," $d ";"
"and $t5, $t5, " $b ";"
"and $t7," $c "," $d ";"
"or $t5, $t5, $t7;"
roundtail!($a, $b, $e, $i, "$a6")
)
};
}
macro_rules! round3 {
($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
c!(
scheldule!($i, $e)
"xor $t5," $b "," $c ";"
"xor $t5, $t5," $d ";"
roundtail!($a, $b, $e, $i, "$a7")
)
};
}
macro_rules! roundtail {
($a:literal, $b:literal, $e:literal, $i:literal, $k:literal) => {
c!(
"rotri.w " $b "," $b ", 2;"
"add.w " $e "," $e ", $t5;"
"add.w " $e "," $e "," $k ";"
"rotri.w $t5," $a ", 27;"
"add.w " $e "," $e ", $t5;"
)
};
}
pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
if blocks.is_empty() {
return;
}
unsafe {
asm!(
// Allocate scratch stack space
"addi.d $sp, $sp, -64;",
// Load state
"ld.w $t0, $a0, 0",
"ld.w $t1, $a0, 4",
"ld.w $t2, $a0, 8",
"ld.w $t3, $a0, 12",
"ld.w $t4, $a0, 16",
"42:",
round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 0),
round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 1),
round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 2),
round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 3),
round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 4),
round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 5),
round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 6),
round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 7),
round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 8),
round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 9),
round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 10),
round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 11),
round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 12),
round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 13),
round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 14),
round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 15),
round0b!("$t4", "$t0", "$t1", "$t2", "$t3", 16),
round0b!("$t3", "$t4", "$t0", "$t1", "$t2", 17),
round0b!("$t2", "$t3", "$t4", "$t0", "$t1", 18),
round0b!("$t1", "$t2", "$t3", "$t4", "$t0", 19),
round1!("$t0", "$t1", "$t2", "$t3", "$t4", 20),
round1!("$t4", "$t0", "$t1", "$t2", "$t3", 21),
round1!("$t3", "$t4", "$t0", "$t1", "$t2", 22),
round1!("$t2", "$t3", "$t4", "$t0", "$t1", 23),
round1!("$t1", "$t2", "$t3", "$t4", "$t0", 24),
round1!("$t0", "$t1", "$t2", "$t3", "$t4", 25),
round1!("$t4", "$t0", "$t1", "$t2", "$t3", 26),
round1!("$t3", "$t4", "$t0", "$t1", "$t2", 27),
round1!("$t2", "$t3", "$t4", "$t0", "$t1", 28),
round1!("$t1", "$t2", "$t3", "$t4", "$t0", 29),
round1!("$t0", "$t1", "$t2", "$t3", "$t4", 30),
round1!("$t4", "$t0", "$t1", "$t2", "$t3", 31),
round1!("$t3", "$t4", "$t0", "$t1", "$t2", 32),
round1!("$t2", "$t3", "$t4", "$t0", "$t1", 33),
round1!("$t1", "$t2", "$t3", "$t4", "$t0", 34),
round1!("$t0", "$t1", "$t2", "$t3", "$t4", 35),
round1!("$t4", "$t0", "$t1", "$t2", "$t3", 36),
round1!("$t3", "$t4", "$t0", "$t1", "$t2", 37),
round1!("$t2", "$t3", "$t4", "$t0", "$t1", 38),
round1!("$t1", "$t2", "$t3", "$t4", "$t0", 39),
round2!("$t0", "$t1", "$t2", "$t3", "$t4", 40),
round2!("$t4", "$t0", "$t1", "$t2", "$t3", 41),
round2!("$t3", "$t4", "$t0", "$t1", "$t2", 42),
round2!("$t2", "$t3", "$t4", "$t0", "$t1", 43),
round2!("$t1", "$t2", "$t3", "$t4", "$t0", 44),
round2!("$t0", "$t1", "$t2", "$t3", "$t4", 45),
round2!("$t4", "$t0", "$t1", "$t2", "$t3", 46),
round2!("$t3", "$t4", "$t0", "$t1", "$t2", 47),
round2!("$t2", "$t3", "$t4", "$t0", "$t1", 48),
round2!("$t1", "$t2", "$t3", "$t4", "$t0", 49),
round2!("$t0", "$t1", "$t2", "$t3", "$t4", 50),
round2!("$t4", "$t0", "$t1", "$t2", "$t3", 51),
round2!("$t3", "$t4", "$t0", "$t1", "$t2", 52),
round2!("$t2", "$t3", "$t4", "$t0", "$t1", 53),
round2!("$t1", "$t2", "$t3", "$t4", "$t0", 54),
round2!("$t0", "$t1", "$t2", "$t3", "$t4", 55),
round2!("$t4", "$t0", "$t1", "$t2", "$t3", 56),
round2!("$t3", "$t4", "$t0", "$t1", "$t2", 57),
round2!("$t2", "$t3", "$t4", "$t0", "$t1", 58),
round2!("$t1", "$t2", "$t3", "$t4", "$t0", 59),
round3!("$t0", "$t1", "$t2", "$t3", "$t4", 60),
round3!("$t4", "$t0", "$t1", "$t2", "$t3", 61),
round3!("$t3", "$t4", "$t0", "$t1", "$t2", 62),
round3!("$t2", "$t3", "$t4", "$t0", "$t1", 63),
round3!("$t1", "$t2", "$t3", "$t4", "$t0", 64),
round3!("$t0", "$t1", "$t2", "$t3", "$t4", 65),
round3!("$t4", "$t0", "$t1", "$t2", "$t3", 66),
round3!("$t3", "$t4", "$t0", "$t1", "$t2", 67),
round3!("$t2", "$t3", "$t4", "$t0", "$t1", 68),
round3!("$t1", "$t2", "$t3", "$t4", "$t0", 69),
round3!("$t0", "$t1", "$t2", "$t3", "$t4", 70),
round3!("$t4", "$t0", "$t1", "$t2", "$t3", 71),
round3!("$t3", "$t4", "$t0", "$t1", "$t2", 72),
round3!("$t2", "$t3", "$t4", "$t0", "$t1", 73),
round3!("$t1", "$t2", "$t3", "$t4", "$t0", 74),
round3!("$t0", "$t1", "$t2", "$t3", "$t4", 75),
round3!("$t4", "$t0", "$t1", "$t2", "$t3", 76),
round3!("$t3", "$t4", "$t0", "$t1", "$t2", 77),
round3!("$t2", "$t3", "$t4", "$t0", "$t1", 78),
round3!("$t1", "$t2", "$t3", "$t4", "$t0", 79),
// Update state registers
"ld.w $t5, $a0, 0", // a
"ld.w $t6, $a0, 4", // b
"ld.w $t7, $a0, 8", // c
"ld.w $t8, $a0, 12", // d
"add.w $t0, $t0, $t5",
"ld.w $t5, $a0, 16", // e
"add.w $t1, $t1, $t6",
"add.w $t2, $t2, $t7",
"add.w $t3, $t3, $t8",
"add.w $t4, $t4, $t5",
// Save updated state
"st.w $t0, $a0, 0",
"st.w $t1, $a0, 4",
"st.w $t2, $a0, 8",
"st.w $t3, $a0, 12",
"st.w $t4, $a0, 16",
// Looping over blocks
"addi.d $a1, $a1, 64",
"addi.d $a2, $a2, -1",
"bnez $a2, 42b",
// Restore stack register
"addi.d $sp, $sp, 64",
in("$a0") state,
inout("$a1") blocks.as_ptr() => _,
inout("$a2") blocks.len() => _,
in("$a4") K[0],
in("$a5") K[1],
in("$a6") K[2],
in("$a7") K[3],
// Clobbers
out("$t0") _,
out("$t1") _,
out("$t2") _,
out("$t3") _,
out("$t4") _,
out("$t5") _,
out("$t6") _,
out("$t7") _,
out("$t8") _,
options(preserves_flags),
);
}
}