blob: 0805d9bc3003967602177635f0e49b186f5e2dae [file] [log] [blame]
// Ignore everything except x86 and x86_64
// Any additional target are added to CI should be ignored here
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@ignore-target-aarch64
//@ignore-target-arm
//@ignore-target-avr
//@ignore-target-s390x
//@ignore-target-thumbv7em
//@ignore-target-wasm32
// SSSE3 implicitly enables SSE3
//@compile-flags: -C target-feature=+ssse3
use core::mem::transmute;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
fn main() {
// SSSE3 implicitly enables SSE3, still check it to be sure
assert!(is_x86_feature_detected!("sse3"));
assert!(is_x86_feature_detected!("ssse3"));
unsafe {
test_sse3();
test_ssse3();
}
}
#[target_feature(enable = "sse3")]
unsafe fn test_sse3() {
// Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs
#[target_feature(enable = "sse3")]
unsafe fn test_mm_addsub_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_addsub_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
}
test_mm_addsub_ps();
#[target_feature(enable = "sse3")]
unsafe fn test_mm_addsub_pd() {
let a = _mm_setr_pd(-1.0, 5.0);
let b = _mm_setr_pd(-100.0, 20.0);
let r = _mm_addsub_pd(a, b);
assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
}
test_mm_addsub_pd();
#[target_feature(enable = "sse3")]
unsafe fn test_mm_hadd_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_hadd_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
}
test_mm_hadd_ps();
#[target_feature(enable = "sse3")]
unsafe fn test_mm_hadd_pd() {
let a = _mm_setr_pd(-1.0, 5.0);
let b = _mm_setr_pd(-100.0, 20.0);
let r = _mm_hadd_pd(a, b);
assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
}
test_mm_hadd_pd();
#[target_feature(enable = "sse3")]
unsafe fn test_mm_hsub_ps() {
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
let r = _mm_hsub_ps(a, b);
assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
}
test_mm_hsub_ps();
#[target_feature(enable = "sse3")]
unsafe fn test_mm_hsub_pd() {
let a = _mm_setr_pd(-1.0, 5.0);
let b = _mm_setr_pd(-100.0, 20.0);
let r = _mm_hsub_pd(a, b);
assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
}
test_mm_hsub_pd();
#[target_feature(enable = "sse3")]
unsafe fn test_mm_lddqu_si128() {
let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm_lddqu_si128(&a);
assert_eq_m128i(a, r);
}
test_mm_lddqu_si128();
}
#[target_feature(enable = "ssse3")]
unsafe fn test_ssse3() {
// Mostly copied from library/stdarch/crates/core_arch/src/x86/ssse3.rs
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_abs_epi8() {
let r = _mm_abs_epi8(_mm_set1_epi8(-5));
assert_eq_m128i(r, _mm_set1_epi8(5));
}
test_mm_abs_epi8();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_abs_epi16() {
let r = _mm_abs_epi16(_mm_set1_epi16(-5));
assert_eq_m128i(r, _mm_set1_epi16(5));
}
test_mm_abs_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_abs_epi32() {
let r = _mm_abs_epi32(_mm_set1_epi32(-5));
assert_eq_m128i(r, _mm_set1_epi32(5));
}
test_mm_abs_epi32();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_shuffle_epi8() {
let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi8(4, 128_u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
let r = _mm_shuffle_epi8(a, b);
assert_eq_m128i(r, expected);
// Test indices greater than 15 wrapping around
let b = _mm_add_epi8(b, _mm_set1_epi8(32));
let r = _mm_shuffle_epi8(a, b);
assert_eq_m128i(r, expected);
}
test_mm_shuffle_epi8();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_hadd_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
let r = _mm_hadd_epi16(a, b);
assert_eq_m128i(r, expected);
// Test wrapping on overflow
let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
let expected = _mm_setr_epi16(
i16::MIN,
i16::MIN + 1,
i16::MIN + 2,
i16::MIN + 3,
i16::MAX,
i16::MAX - 1,
i16::MAX - 2,
i16::MAX - 3,
);
let r = _mm_hadd_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_hadd_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_hadds_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
let r = _mm_hadds_epi16(a, b);
assert_eq_m128i(r, expected);
// Test saturating on overflow
let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
let expected = _mm_setr_epi16(
i16::MAX,
i16::MAX,
i16::MAX,
i16::MAX,
i16::MIN,
i16::MIN,
i16::MIN,
i16::MIN,
);
let r = _mm_hadds_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_hadds_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_hadd_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let b = _mm_setr_epi32(4, 128, 4, 3);
let expected = _mm_setr_epi32(3, 7, 132, 7);
let r = _mm_hadd_epi32(a, b);
assert_eq_m128i(r, expected);
// Test wrapping on overflow
let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
let r = _mm_hadd_epi32(a, b);
assert_eq_m128i(r, expected);
}
test_mm_hadd_epi32();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_hsub_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
let r = _mm_hsub_epi16(a, b);
assert_eq_m128i(r, expected);
// Test wrapping on overflow
let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
let expected = _mm_setr_epi16(
i16::MIN,
i16::MIN + 1,
i16::MIN + 2,
i16::MIN + 3,
i16::MAX,
i16::MAX - 1,
i16::MAX - 2,
i16::MAX - 3,
);
let r = _mm_hsub_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_hsub_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_hsubs_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
let r = _mm_hsubs_epi16(a, b);
assert_eq_m128i(r, expected);
// Test saturating on overflow
let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
let expected = _mm_setr_epi16(
i16::MAX,
i16::MAX,
i16::MAX,
i16::MAX,
i16::MIN,
i16::MIN,
i16::MIN,
i16::MIN,
);
let r = _mm_hsubs_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_hsubs_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_hsub_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let b = _mm_setr_epi32(4, 128, 4, 3);
let expected = _mm_setr_epi32(-1, -1, -124, 1);
let r = _mm_hsub_epi32(a, b);
assert_eq_m128i(r, expected);
// Test wrapping on overflow
let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
let r = _mm_hsub_epi32(a, b);
assert_eq_m128i(r, expected);
}
test_mm_hsub_epi32();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_maddubs_epi16() {
let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi8(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
let r = _mm_maddubs_epi16(a, b);
assert_eq_m128i(r, expected);
// Test widening and saturation
let a = _mm_setr_epi8(
u8::MAX as i8,
u8::MAX as i8,
u8::MAX as i8,
u8::MAX as i8,
u8::MAX as i8,
u8::MAX as i8,
100,
100,
0,
0,
0,
0,
0,
0,
0,
0,
);
let b = _mm_setr_epi8(
i8::MAX,
i8::MAX,
i8::MAX,
i8::MIN,
i8::MIN,
i8::MIN,
50,
15,
0,
0,
0,
0,
0,
0,
0,
0,
);
let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
let r = _mm_maddubs_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_maddubs_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_mulhrs_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
let r = _mm_mulhrs_epi16(a, b);
assert_eq_m128i(r, expected);
// Test extreme values
let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
let r = _mm_mulhrs_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_mulhrs_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_sign_epi8() {
let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
let b = _mm_setr_epi8(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
let expected = _mm_setr_epi8(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
let r = _mm_sign_epi8(a, b);
assert_eq_m128i(r, expected);
}
test_mm_sign_epi8();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_sign_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
let r = _mm_sign_epi16(a, b);
assert_eq_m128i(r, expected);
}
test_mm_sign_epi16();
#[target_feature(enable = "ssse3")]
unsafe fn test_mm_sign_epi32() {
let a = _mm_setr_epi32(-1, 2, 3, 4);
let b = _mm_setr_epi32(1, -1, 1, 0);
let expected = _mm_setr_epi32(-1, -2, 3, 0);
let r = _mm_sign_epi32(a, b);
assert_eq_m128i(r, expected);
}
test_mm_sign_epi32();
}
#[track_caller]
#[target_feature(enable = "sse")]
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
let r = _mm_cmpeq_ps(a, b);
if _mm_movemask_ps(r) != 0b1111 {
panic!("{:?} != {:?}", a, b);
}
}
#[track_caller]
#[target_feature(enable = "sse2")]
unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
panic!("{:?} != {:?}", a, b);
}
}
#[track_caller]
#[target_feature(enable = "sse2")]
pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
}