use crate::{
core_arch::{simd::*, x86::*},
intrinsics::simd::*,
};
#[cfg(test)]
use stdarch_test::assert_instr;
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let zero = i8x16::splat(0);
let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
let a = a.as_i16x8();
let zero = i16x8::splat(0);
let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
let a = a.as_i32x4();
let zero = i32x4::splat(0);
let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
transmute(r)
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pshufb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
static_assert_uimm_bits!(IMM8, 8);
if IMM8 > 32 {
return _mm_set1_epi8(0);
}
let (a, b) = if IMM8 > 16 {
(_mm_set1_epi8(0), a)
} else {
(a, b)
};
const fn mask(shift: u32, i: u32) -> u32 {
if shift > 32 {
i
} else if shift > 16 {
shift - 16 + i
} else {
shift + i
}
}
let r: i8x16 = simd_shuffle!(
b.as_i8x16(),
a.as_i8x16(),
[
mask(IMM8 as u32, 0),
mask(IMM8 as u32, 1),
mask(IMM8 as u32, 2),
mask(IMM8 as u32, 3),
mask(IMM8 as u32, 4),
mask(IMM8 as u32, 5),
mask(IMM8 as u32, 6),
mask(IMM8 as u32, 7),
mask(IMM8 as u32, 8),
mask(IMM8 as u32, 9),
mask(IMM8 as u32, 10),
mask(IMM8 as u32, 11),
mask(IMM8 as u32, 12),
mask(IMM8 as u32, 13),
mask(IMM8 as u32, 14),
mask(IMM8 as u32, 15),
],
);
transmute(r)
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
}
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
#[link_name = "llvm.x86.ssse3.phadd.w.128"]
fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phadd.d.128"]
fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.ssse3.phsub.w.128"]
fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.phsub.d.128"]
fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
#[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.psign.b.128"]
fn psignb128(a: i8x16, b: i8x16) -> i8x16;
#[link_name = "llvm.x86.ssse3.psign.w.128"]
fn psignw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.ssse3.psign.d.128"]
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_abs_epi8() {
let r = _mm_abs_epi8(_mm_set1_epi8(-5));
assert_eq_m128i(r, _mm_set1_epi8(5));
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_abs_epi16() {
let r = _mm_abs_epi16(_mm_set1_epi16(-5));
assert_eq_m128i(r, _mm_set1_epi16(5));
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_abs_epi32() {
let r = _mm_abs_epi32(_mm_set1_epi32(-5));
assert_eq_m128i(r, _mm_set1_epi32(5));
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_shuffle_epi8() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
);
#[rustfmt::skip]
let b = _mm_setr_epi8(
4, 128_u8 as i8, 4, 3,
24, 12, 6, 19,
12, 5, 5, 10,
4, 1, 8, 0,
);
let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
let r = _mm_shuffle_epi8(a, b);
assert_eq_m128i(r, expected);
let b = _mm_add_epi8(b, _mm_set1_epi8(32));
let r = _mm_shuffle_epi8(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_alignr_epi8() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
);
#[rustfmt::skip]
let b = _mm_setr_epi8(
4, 63, 4, 3,
24, 12, 6, 19,
12, 5, 5, 10,
4, 1, 8, 0,
);
let r = _mm_alignr_epi8::<33>(a, b);
assert_eq_m128i(r, _mm_set1_epi8(0));
let r = _mm_alignr_epi8::<17>(a, b);
#[rustfmt::skip]
let expected = _mm_setr_epi8(
2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 0,
);
assert_eq_m128i(r, expected);
let r = _mm_alignr_epi8::<16>(a, b);
assert_eq_m128i(r, a);
let r = _mm_alignr_epi8::<15>(a, b);
#[rustfmt::skip]
let expected = _mm_setr_epi8(
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
);
assert_eq_m128i(r, expected);
let r = _mm_alignr_epi8::<0>(a, b);
assert_eq_m128i(r, b);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_hadd_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
let r = _mm_hadd_epi16(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
let expected = _mm_setr_epi16(
i16::MIN,
i16::MIN + 1,
i16::MIN + 2,
i16::MIN + 3,
i16::MAX,
i16::MAX - 1,
i16::MAX - 2,
i16::MAX - 3,
);
let r = _mm_hadd_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_hadds_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
let r = _mm_hadds_epi16(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
let expected = _mm_setr_epi16(
i16::MAX,
i16::MAX,
i16::MAX,
i16::MAX,
i16::MIN,
i16::MIN,
i16::MIN,
i16::MIN,
);
let r = _mm_hadds_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_hadd_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let b = _mm_setr_epi32(4, 128, 4, 3);
let expected = _mm_setr_epi32(3, 7, 132, 7);
let r = _mm_hadd_epi32(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
let r = _mm_hadd_epi32(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_hsub_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
let r = _mm_hsub_epi16(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
let expected = _mm_setr_epi16(
i16::MIN,
i16::MIN + 1,
i16::MIN + 2,
i16::MIN + 3,
i16::MAX,
i16::MAX - 1,
i16::MAX - 2,
i16::MAX - 3,
);
let r = _mm_hsub_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_hsubs_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
let r = _mm_hsubs_epi16(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
let expected = _mm_setr_epi16(
i16::MAX,
i16::MAX,
i16::MAX,
i16::MAX,
i16::MIN,
i16::MIN,
i16::MIN,
i16::MIN,
);
let r = _mm_hsubs_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_hsub_epi32() {
let a = _mm_setr_epi32(1, 2, 3, 4);
let b = _mm_setr_epi32(4, 128, 4, 3);
let expected = _mm_setr_epi32(-1, -1, -124, 1);
let r = _mm_hsub_epi32(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
let r = _mm_hsub_epi32(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_maddubs_epi16() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
);
#[rustfmt::skip]
let b = _mm_setr_epi8(
4, 63, 4, 3,
24, 12, 6, 19,
12, 5, 5, 10,
4, 1, 8, 0,
);
let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
let r = _mm_maddubs_epi16(a, b);
assert_eq_m128i(r, expected);
#[rustfmt::skip]
let a = _mm_setr_epi8(
u8::MAX as i8, u8::MAX as i8,
u8::MAX as i8, u8::MAX as i8,
u8::MAX as i8, u8::MAX as i8,
100, 100, 0, 0,
0, 0, 0, 0, 0, 0,
);
#[rustfmt::skip]
let b = _mm_setr_epi8(
i8::MAX, i8::MAX,
i8::MAX, i8::MIN,
i8::MIN, i8::MIN,
50, 15, 0, 0, 0,
0, 0, 0, 0, 0,
);
let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
let r = _mm_maddubs_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_mulhrs_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
let r = _mm_mulhrs_epi16(a, b);
assert_eq_m128i(r, expected);
let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
let r = _mm_mulhrs_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_sign_epi8() {
#[rustfmt::skip]
let a = _mm_setr_epi8(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, -14, -15, 16,
);
#[rustfmt::skip]
let b = _mm_setr_epi8(
4, 63, -4, 3, 24, 12, -6, -19,
12, 5, -5, 10, 4, 1, -8, 0,
);
#[rustfmt::skip]
let expected = _mm_setr_epi8(
1, 2, -3, 4, 5, 6, -7, -8,
9, 10, -11, 12, 13, -14, 15, 0,
);
let r = _mm_sign_epi8(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_sign_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
let r = _mm_sign_epi16(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3")]
unsafe fn test_mm_sign_epi32() {
let a = _mm_setr_epi32(-1, 2, 3, 4);
let b = _mm_setr_epi32(1, -1, 1, 0);
let expected = _mm_setr_epi32(-1, -2, 3, 0);
let r = _mm_sign_epi32(a, b);
assert_eq_m128i(r, expected);
}
}