blob: 37c2063aabc6939f4829e8670dca04533ac89834 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 OR MIT
// Atomic{I,U}128 implementation on s390x.
//
// s390x supports 128-bit atomic load/store/cmpxchg:
// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc
//
// LLVM's minimal supported architecture level is z10:
// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/SystemZ/SystemZProcessors.td)
// This does not appear to have changed since the current s390x backend was added in LLVM 3.3:
// https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db
//
// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
// this module and use intrinsics.rs instead.
//
// Refs:
// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf
// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary
// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
//
// Generated asm:
// - s390x https://godbolt.org/z/b11znnEh4
// - s390x (z196) https://godbolt.org/z/s5n9PGcv6
// - s390x (z15) https://godbolt.org/z/Wf49h7bPf
include!("macros.rs");
use core::{arch::asm, sync::atomic::Ordering};
use crate::utils::{Pair, U128};
// Use distinct operands on z196 or later, otherwise split to lgr and $op.
#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))]
macro_rules! distinct_op {
($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!($op, "k ", $a0, ", ", $a1, ", ", $a2)
};
}
#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))]
macro_rules! distinct_op {
($op:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2)
};
}
// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op.
#[cfg(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
))]
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
macro_rules! select_op {
($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2)
};
}
#[cfg(not(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
)))]
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
macro_rules! select_op {
($cond:tt, $a0:tt, $a1:tt, $a2:tt) => {
concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1)
};
}
#[inline]
unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 {
debug_assert!(src as usize % 16 == 0);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic load is always SeqCst.
let (out_hi, out_lo);
asm!(
"lpq %r0, 0({src})",
src = in(reg) ptr_reg!(src),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") out_hi,
out("r1") out_lo,
options(nostack, preserves_flags),
);
U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole
}
}
#[inline]
unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
debug_assert!(dst as usize % 16 == 0);
// SAFETY: the caller must uphold the safety contract.
unsafe {
let val = U128 { whole: val };
macro_rules! atomic_store {
($fence:tt) => {
asm!(
"stpq %r0, 0({dst})",
$fence,
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
in("r0") val.pair.hi,
in("r1") val.pair.lo,
options(nostack, preserves_flags),
)
};
}
match order {
// Relaxed and Release stores are equivalent.
Ordering::Relaxed | Ordering::Release => atomic_store!(""),
// bcr 14,0 (fast-BCR-serialization) requires z196 or later.
#[cfg(any(
target_feature = "fast-serialization",
portable_atomic_target_feature = "fast-serialization",
))]
Ordering::SeqCst => atomic_store!("bcr 14, 0"),
#[cfg(not(any(
target_feature = "fast-serialization",
portable_atomic_target_feature = "fast-serialization",
)))]
Ordering::SeqCst => atomic_store!("bcr 15, 0"),
_ => unreachable!("{:?}", order),
}
}
}
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut u128,
old: u128,
new: u128,
_success: Ordering,
_failure: Ordering,
) -> Result<u128, u128> {
debug_assert!(dst as usize % 16 == 0);
// SAFETY: the caller must uphold the safety contract.
let prev = unsafe {
// atomic CAS is always SeqCst.
let old = U128 { whole: old };
let new = U128 { whole: new };
let (prev_hi, prev_lo);
asm!(
"cdsg %r0, %r12, 0({dst})",
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
inout("r0") old.pair.hi => prev_hi,
inout("r1") old.pair.lo => prev_lo,
in("r12") new.pair.hi,
in("r13") new.pair.lo,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
};
if prev == old {
Ok(prev)
} else {
Err(prev)
}
}
// cdsg is always strong.
use atomic_compare_exchange as atomic_compare_exchange_weak;
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
#[inline(always)]
unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128
where
F: FnMut(u128) -> u128,
{
// SAFETY: the caller must uphold the safety contract.
unsafe {
// This is a private function and all instances of `f` only operate on the value
// loaded, so there is no need to synchronize the first load/failed CAS.
let mut prev = atomic_load(dst, Ordering::Relaxed);
loop {
let next = f(prev);
match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) {
Ok(x) => return x,
Err(x) => prev = x,
}
}
}
}
#[inline]
unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
// SAFETY: the caller must uphold the safety contract.
//
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
//
// Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap.
unsafe {
// atomic swap is always SeqCst.
let val = U128 { whole: val };
let (mut prev_hi, mut prev_lo);
asm!(
"lpq %r0, 0({dst})",
"2:",
"cdsg %r0, %r12, 0({dst})",
"jl 2b",
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
in("r12") val.pair.hi,
in("r13") val.pair.lo,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
/// Atomic RMW by CAS loop (3 arguments)
/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - val_hi/val_lo pair: val argument (read-only for `$op`)
/// - r0/r1 pair: previous value loaded (read-only for `$op`)
/// - r12/r13 pair: new value that will be stored
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
macro_rules! atomic_rmw_cas_3 {
($name:ident, [$($reg:tt)*], $($op:tt)*) => {
#[inline]
unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic RMW is always SeqCst.
let val = U128 { whole: val };
let (mut prev_hi, mut prev_lo);
asm!(
"lpq %r0, 0({dst})",
"2:",
$($op)*
"cdsg %r0, %r12, 0({dst})",
"jl 2b",
dst = in(reg) ptr_reg!(dst),
val_hi = in(reg) val.pair.hi,
val_lo = in(reg) val.pair.lo,
$($reg)*
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
out("r12") _,
out("r13") _,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
};
}
/// Atomic RMW by CAS loop (2 arguments)
/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
///
/// `$op` can use the following registers:
/// - r0/r1 pair: previous value loaded (read-only for `$op`)
/// - r12/r13 pair: new value that will be stored
// We could use atomic_update here, but using an inline assembly allows omitting
// the comparison of results and the storing/comparing of condition flags.
macro_rules! atomic_rmw_cas_2 {
($name:ident, $($op:tt)*) => {
#[inline]
unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
// SAFETY: the caller must uphold the safety contract.
unsafe {
// atomic RMW is always SeqCst.
let (mut prev_hi, mut prev_lo);
asm!(
"lpq %r0, 0({dst})",
"2:",
$($op)*
"cdsg %r0, %r12, 0({dst})",
"jl 2b",
dst = in(reg) ptr_reg!(dst),
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
out("r0") prev_hi,
out("r1") prev_lo,
out("r12") _,
out("r13") _,
// Do not use `preserves_flags` because CDSG modifies the condition code.
options(nostack),
);
U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole
}
}
};
}
atomic_rmw_cas_3! {
atomic_add, [],
distinct_op!("algr", "%r13", "%r1", "{val_lo}"),
"lgr %r12, %r0",
"alcgr %r12, {val_hi}",
}
atomic_rmw_cas_3! {
atomic_sub, [],
distinct_op!("slgr", "%r13", "%r1", "{val_lo}"),
"lgr %r12, %r0",
"slbgr %r12, {val_hi}",
}
atomic_rmw_cas_3! {
atomic_and, [],
distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
}
// Use nngrk on z15 or later.
#[cfg(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
))]
atomic_rmw_cas_3! {
atomic_nand, [],
"nngrk %r13, %r1, {val_lo}",
"nngrk %r12, %r0, {val_hi}",
}
#[cfg(not(any(
target_feature = "miscellaneous-extensions-3",
portable_atomic_target_feature = "miscellaneous-extensions-3",
)))]
atomic_rmw_cas_3! {
atomic_nand, [],
distinct_op!("ngr", "%r13", "%r1", "{val_lo}"),
"xihf %r13, 4294967295",
"xilf %r13, 4294967295",
distinct_op!("ngr", "%r12", "%r0", "{val_hi}"),
"xihf %r12, 4294967295",
"xilf %r12, 4294967295",
}
atomic_rmw_cas_3! {
atomic_or, [],
distinct_op!("ogr", "%r13", "%r1", "{val_lo}"),
distinct_op!("ogr", "%r12", "%r0", "{val_hi}"),
}
atomic_rmw_cas_3! {
atomic_xor, [],
distinct_op!("xgr", "%r13", "%r1", "{val_lo}"),
distinct_op!("xgr", "%r12", "%r0", "{val_hi}"),
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_max, [],
"clgr %r1, {val_lo}",
select_op!("h", "%r12", "%r1", "{val_lo}"),
"cgr %r0, {val_hi}",
select_op!("h", "%r13", "%r1", "{val_lo}"),
"locgre %r13, %r12",
select_op!("h", "%r12", "%r0", "{val_hi}"),
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_umax, [tmp = out(reg) _,],
"clgr %r1, {val_lo}",
select_op!("h", "{tmp}", "%r1", "{val_lo}"),
"clgr %r0, {val_hi}",
select_op!("h", "%r12", "%r0", "{val_hi}"),
select_op!("h", "%r13", "%r1", "{val_lo}"),
"cgr %r0, {val_hi}",
"locgre %r13, {tmp}",
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_min, [],
"clgr %r1, {val_lo}",
select_op!("l", "%r12", "%r1", "{val_lo}"),
"cgr %r0, {val_hi}",
select_op!("l", "%r13", "%r1", "{val_lo}"),
"locgre %r13, %r12",
select_op!("l", "%r12", "%r0", "{val_hi}"),
}
#[cfg(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
))]
atomic_rmw_cas_3! {
atomic_umin, [tmp = out(reg) _,],
"clgr %r1, {val_lo}",
select_op!("l", "{tmp}", "%r1", "{val_lo}"),
"clgr %r0, {val_hi}",
select_op!("l", "%r12", "%r0", "{val_hi}"),
select_op!("l", "%r13", "%r1", "{val_lo}"),
"cgr %r0, {val_hi}",
"locgre %r13, {tmp}",
}
// We use atomic_update for atomic min/max on pre-z196 because
// z10 doesn't seem to have a good way to implement 128-bit min/max.
// loc{,g}r requires z196 or later.
// https://godbolt.org/z/j8KG9q5oq
#[cfg(not(any(
target_feature = "load-store-on-cond",
portable_atomic_target_feature = "load-store-on-cond",
)))]
atomic_rmw_by_atomic_update!(cmp);
atomic_rmw_cas_2! {
atomic_not,
"lgr %r13, %r1",
"xihf %r13, 4294967295",
"xilf %r13, 4294967295",
"lgr %r12, %r0",
"xihf %r12, 4294967295",
"xilf %r12, 4294967295",
}
atomic_rmw_cas_2! {
atomic_neg,
"lghi %r13, 0",
"slgr %r13, %r1",
"lghi %r12, 0",
"slbgr %r12, %r0",
}
#[inline]
const fn is_lock_free() -> bool {
IS_ALWAYS_LOCK_FREE
}
const IS_ALWAYS_LOCK_FREE: bool = true;
atomic128!(AtomicI128, i128, atomic_max, atomic_min);
atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
#[cfg(test)]
mod tests {
use super::*;
test_atomic_int!(i128);
test_atomic_int!(u128);
// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
stress_test!(u128);
}