blob: b2db52c891c15279bebfb7d568a2517ef08176aa [file] [log] [blame]
//! Mask reductions implementation for `aarch64` targets
/// 128-bit wide vectors
macro_rules! aarch64_128_neon_impl {
($id:ident, $vmin:ident, $vmax:ident) => {
impl All for $id {
#[inline]
#[target_feature(enable = "neon")]
unsafe fn all(self) -> bool {
use crate::arch::aarch64::$vmin;
$vmin(crate::mem::transmute(self)) != 0
}
}
impl Any for $id {
#[inline]
#[target_feature(enable = "neon")]
unsafe fn any(self) -> bool {
use crate::arch::aarch64::$vmax;
$vmax(crate::mem::transmute(self)) != 0
}
}
};
}
/// 64-bit wide vectors
macro_rules! aarch64_64_neon_impl {
($id:ident, $vec128:ident) => {
impl All for $id {
#[inline]
#[target_feature(enable = "neon")]
unsafe fn all(self) -> bool {
// Duplicates the 64-bit vector into a 128-bit one and
// calls all on that.
union U {
halves: ($id, $id),
vec: $vec128,
}
U { halves: (self, self) }.vec.all()
}
}
impl Any for $id {
#[inline]
#[target_feature(enable = "neon")]
unsafe fn any(self) -> bool {
union U {
halves: ($id, $id),
vec: $vec128,
}
U { halves: (self, self) }.vec.any()
}
}
};
}
/// Mask reduction implementation for `aarch64` targets
macro_rules! impl_mask_reductions {
// 64-bit wide masks
(m8x8) => {
aarch64_64_neon_impl!(m8x8, m8x16);
};
(m16x4) => {
aarch64_64_neon_impl!(m16x4, m16x8);
};
(m32x2) => {
aarch64_64_neon_impl!(m32x2, m32x4);
};
// 128-bit wide masks
(m8x16) => {
aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8);
};
(m16x8) => {
aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16);
};
(m32x4) => {
aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32);
};
// Fallback to LLVM's default code-generation:
($id:ident) => {
fallback_impl!($id);
};
}