core/stdarch/crates/core_arch/src/x86/
ssse3.rs

1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Computes the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20pub const fn _mm_abs_epi8(a: __m128i) -> __m128i {
21    unsafe {
22        let a = a.as_i8x16();
23        let zero = i8x16::ZERO;
24        let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
25        transmute(r)
26    }
27}
28
29/// Computes the absolute value of each of the packed 16-bit signed integers in
30/// `a` and
31/// return the 16-bit unsigned integer
32///
33/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
34#[inline]
35#[target_feature(enable = "ssse3")]
36#[cfg_attr(test, assert_instr(pabsw))]
37#[stable(feature = "simd_x86", since = "1.27.0")]
38#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39pub const fn _mm_abs_epi16(a: __m128i) -> __m128i {
40    unsafe {
41        let a = a.as_i16x8();
42        let zero = i16x8::ZERO;
43        let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
44        transmute(r)
45    }
46}
47
48/// Computes the absolute value of each of the packed 32-bit signed integers in
49/// `a` and
50/// return the 32-bit unsigned integer
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
53#[inline]
54#[target_feature(enable = "ssse3")]
55#[cfg_attr(test, assert_instr(pabsd))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
58pub const fn _mm_abs_epi32(a: __m128i) -> __m128i {
59    unsafe {
60        let a = a.as_i32x4();
61        let zero = i32x4::ZERO;
62        let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
63        transmute(r)
64    }
65}
66
67/// Shuffles bytes from `a` according to the content of `b`.
68///
69/// The last 4 bits of each byte of `b` are used as addresses
70/// into the 16 bytes of `a`.
71///
72/// In addition, if the highest significant bit of a byte of `b`
73/// is set, the respective destination byte is set to 0.
74///
75/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
76/// logically equivalent to:
77///
78/// ```
79/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
80///     let mut r = [0u8; 16];
81///     for i in 0..16 {
82///         // if the most significant bit of b is set,
83///         // then the destination byte is set to 0.
84///         if b[i] & 0x80 == 0u8 {
85///             r[i] = a[(b[i] % 16) as usize];
86///         }
87///     }
88///     r
89/// }
90/// ```
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
93#[inline]
94#[target_feature(enable = "ssse3")]
95#[cfg_attr(test, assert_instr(pshufb))]
96#[stable(feature = "simd_x86", since = "1.27.0")]
97pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
98    unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) }
99}
100
101/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
102/// shift the result right by `n` bytes, and returns the low 16 bytes.
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
105#[inline]
106#[target_feature(enable = "ssse3")]
107#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
108#[rustc_legacy_const_generics(2)]
109#[stable(feature = "simd_x86", since = "1.27.0")]
110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
111pub const fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
112    static_assert_uimm_bits!(IMM8, 8);
113    // If palignr is shifting the pair of vectors more than the size of two
114    // lanes, emit zero.
115    if IMM8 > 32 {
116        return _mm_setzero_si128();
117    }
118    // If palignr is shifting the pair of input vectors more than one lane,
119    // but less than two lanes, convert to shifting in zeroes.
120    let (a, b) = if IMM8 > 16 {
121        (_mm_setzero_si128(), a)
122    } else {
123        (a, b)
124    };
125    const fn mask(shift: u32, i: u32) -> u32 {
126        if shift > 32 {
127            // Unused, but needs to be a valid index.
128            i
129        } else if shift > 16 {
130            shift - 16 + i
131        } else {
132            shift + i
133        }
134    }
135    unsafe {
136        let r: i8x16 = simd_shuffle!(
137            b.as_i8x16(),
138            a.as_i8x16(),
139            [
140                mask(IMM8 as u32, 0),
141                mask(IMM8 as u32, 1),
142                mask(IMM8 as u32, 2),
143                mask(IMM8 as u32, 3),
144                mask(IMM8 as u32, 4),
145                mask(IMM8 as u32, 5),
146                mask(IMM8 as u32, 6),
147                mask(IMM8 as u32, 7),
148                mask(IMM8 as u32, 8),
149                mask(IMM8 as u32, 9),
150                mask(IMM8 as u32, 10),
151                mask(IMM8 as u32, 11),
152                mask(IMM8 as u32, 12),
153                mask(IMM8 as u32, 13),
154                mask(IMM8 as u32, 14),
155                mask(IMM8 as u32, 15),
156            ],
157        );
158        transmute(r)
159    }
160}
161
162/// Horizontally adds the adjacent pairs of values contained in 2 packed
163/// 128-bit vectors of `[8 x i16]`.
164///
165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
166#[inline]
167#[target_feature(enable = "ssse3")]
168#[cfg_attr(test, assert_instr(phaddw))]
169#[stable(feature = "simd_x86", since = "1.27.0")]
170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
171pub const fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
172    let a = a.as_i16x8();
173    let b = b.as_i16x8();
174    unsafe {
175        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
176        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
177        simd_add(even, odd).as_m128i()
178    }
179}
180
181/// Horizontally adds the adjacent pairs of values contained in 2 packed
182/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
183/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
184///
185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
186#[inline]
187#[target_feature(enable = "ssse3")]
188#[cfg_attr(test, assert_instr(phaddsw))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
191    unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) }
192}
193
194/// Horizontally adds the adjacent pairs of values contained in 2 packed
195/// 128-bit vectors of `[4 x i32]`.
196///
197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
198#[inline]
199#[target_feature(enable = "ssse3")]
200#[cfg_attr(test, assert_instr(phaddd))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
203pub const fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
204    let a = a.as_i32x4();
205    let b = b.as_i32x4();
206    unsafe {
207        let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
208        let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
209        simd_add(even, odd).as_m128i()
210    }
211}
212
213/// Horizontally subtract the adjacent pairs of values contained in 2
214/// packed 128-bit vectors of `[8 x i16]`.
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
217#[inline]
218#[target_feature(enable = "ssse3")]
219#[cfg_attr(test, assert_instr(phsubw))]
220#[stable(feature = "simd_x86", since = "1.27.0")]
221#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
222pub const fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
223    let a = a.as_i16x8();
224    let b = b.as_i16x8();
225    unsafe {
226        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
227        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
228        simd_sub(even, odd).as_m128i()
229    }
230}
231
232/// Horizontally subtract the adjacent pairs of values contained in 2
233/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
234/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
235/// saturated to 8000h.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
238#[inline]
239#[target_feature(enable = "ssse3")]
240#[cfg_attr(test, assert_instr(phsubsw))]
241#[stable(feature = "simd_x86", since = "1.27.0")]
242pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
243    unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) }
244}
245
246/// Horizontally subtract the adjacent pairs of values contained in 2
247/// packed 128-bit vectors of `[4 x i32]`.
248///
249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
250#[inline]
251#[target_feature(enable = "ssse3")]
252#[cfg_attr(test, assert_instr(phsubd))]
253#[stable(feature = "simd_x86", since = "1.27.0")]
254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
255pub const fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
256    let a = a.as_i32x4();
257    let b = b.as_i32x4();
258    unsafe {
259        let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
260        let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
261        simd_sub(even, odd).as_m128i()
262    }
263}
264
265/// Multiplies corresponding pairs of packed 8-bit unsigned integer
266/// values contained in the first source operand and packed 8-bit signed
267/// integer values contained in the second source operand, add pairs of
268/// contiguous products with signed saturation, and writes the 16-bit sums to
269/// the corresponding bits in the destination.
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
272#[inline]
273#[target_feature(enable = "ssse3")]
274#[cfg_attr(test, assert_instr(pmaddubsw))]
275#[stable(feature = "simd_x86", since = "1.27.0")]
276pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
277    unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
278}
279
280/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
281/// product to the 18 most significant bits by right-shifting, round the
282/// truncated value by adding 1, and write bits `[16:1]` to the destination.
283///
284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
285#[inline]
286#[target_feature(enable = "ssse3")]
287#[cfg_attr(test, assert_instr(pmulhrsw))]
288#[stable(feature = "simd_x86", since = "1.27.0")]
289pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
290    unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
291}
292
293/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
294/// integer in `b` is negative, and returns the result.
295/// Elements in result are zeroed out when the corresponding element in `b`
296/// is zero.
297///
298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
299#[inline]
300#[target_feature(enable = "ssse3")]
301#[cfg_attr(test, assert_instr(psignb))]
302#[stable(feature = "simd_x86", since = "1.27.0")]
303pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
304    unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) }
305}
306
307/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
308/// integer in `b` is negative, and returns the results.
309/// Elements in result are zeroed out when the corresponding element in `b`
310/// is zero.
311///
312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
313#[inline]
314#[target_feature(enable = "ssse3")]
315#[cfg_attr(test, assert_instr(psignw))]
316#[stable(feature = "simd_x86", since = "1.27.0")]
317pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
318    unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) }
319}
320
321/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
322/// integer in `b` is negative, and returns the results.
323/// Element in result are zeroed out when the corresponding element in `b`
324/// is zero.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
327#[inline]
328#[target_feature(enable = "ssse3")]
329#[cfg_attr(test, assert_instr(psignd))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
332    unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) }
333}
334
335#[allow(improper_ctypes)]
336unsafe extern "C" {
337    #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
338    fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
339
340    #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
341    fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
342
343    #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
344    fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
345
346    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
347    fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
348
349    #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
350    fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
351
352    #[link_name = "llvm.x86.ssse3.psign.b.128"]
353    fn psignb128(a: i8x16, b: i8x16) -> i8x16;
354
355    #[link_name = "llvm.x86.ssse3.psign.w.128"]
356    fn psignw128(a: i16x8, b: i16x8) -> i16x8;
357
358    #[link_name = "llvm.x86.ssse3.psign.d.128"]
359    fn psignd128(a: i32x4, b: i32x4) -> i32x4;
360}
361
362#[cfg(test)]
363mod tests {
364    use crate::core_arch::assert_eq_const as assert_eq;
365    use stdarch_test::simd_test;
366
367    use crate::core_arch::x86::*;
368
369    #[simd_test(enable = "ssse3")]
370    const unsafe fn test_mm_abs_epi8() {
371        let r = _mm_abs_epi8(_mm_set1_epi8(-5));
372        assert_eq_m128i(r, _mm_set1_epi8(5));
373    }
374
375    #[simd_test(enable = "ssse3")]
376    const unsafe fn test_mm_abs_epi16() {
377        let r = _mm_abs_epi16(_mm_set1_epi16(-5));
378        assert_eq_m128i(r, _mm_set1_epi16(5));
379    }
380
381    #[simd_test(enable = "ssse3")]
382    const unsafe fn test_mm_abs_epi32() {
383        let r = _mm_abs_epi32(_mm_set1_epi32(-5));
384        assert_eq_m128i(r, _mm_set1_epi32(5));
385    }
386
387    #[simd_test(enable = "ssse3")]
388    unsafe fn test_mm_shuffle_epi8() {
389        #[rustfmt::skip]
390        let a = _mm_setr_epi8(
391            1, 2, 3, 4, 5, 6, 7, 8,
392            9, 10, 11, 12, 13, 14, 15, 16,
393        );
394        #[rustfmt::skip]
395        let b = _mm_setr_epi8(
396            4, 128_u8 as i8, 4, 3,
397            24, 12, 6, 19,
398            12, 5, 5, 10,
399            4, 1, 8, 0,
400        );
401        let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
402        let r = _mm_shuffle_epi8(a, b);
403        assert_eq_m128i(r, expected);
404
405        // Test indices greater than 15 wrapping around
406        let b = _mm_add_epi8(b, _mm_set1_epi8(32));
407        let r = _mm_shuffle_epi8(a, b);
408        assert_eq_m128i(r, expected);
409    }
410
411    #[simd_test(enable = "ssse3")]
412    const unsafe fn test_mm_alignr_epi8() {
413        #[rustfmt::skip]
414        let a = _mm_setr_epi8(
415            1, 2, 3, 4, 5, 6, 7, 8,
416            9, 10, 11, 12, 13, 14, 15, 16,
417        );
418        #[rustfmt::skip]
419        let b = _mm_setr_epi8(
420            4, 63, 4, 3,
421            24, 12, 6, 19,
422            12, 5, 5, 10,
423            4, 1, 8, 0,
424        );
425        let r = _mm_alignr_epi8::<33>(a, b);
426        assert_eq_m128i(r, _mm_set1_epi8(0));
427
428        let r = _mm_alignr_epi8::<17>(a, b);
429        #[rustfmt::skip]
430        let expected = _mm_setr_epi8(
431            2, 3, 4, 5, 6, 7, 8, 9,
432            10, 11, 12, 13, 14, 15, 16, 0,
433        );
434        assert_eq_m128i(r, expected);
435
436        let r = _mm_alignr_epi8::<16>(a, b);
437        assert_eq_m128i(r, a);
438
439        let r = _mm_alignr_epi8::<15>(a, b);
440        #[rustfmt::skip]
441        let expected = _mm_setr_epi8(
442            0, 1, 2, 3, 4, 5, 6, 7,
443            8, 9, 10, 11, 12, 13, 14, 15,
444        );
445        assert_eq_m128i(r, expected);
446
447        let r = _mm_alignr_epi8::<0>(a, b);
448        assert_eq_m128i(r, b);
449    }
450
451    #[simd_test(enable = "ssse3")]
452    const unsafe fn test_mm_hadd_epi16() {
453        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
454        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
455        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
456        let r = _mm_hadd_epi16(a, b);
457        assert_eq_m128i(r, expected);
458
459        // Test wrapping on overflow
460        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
461        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
462        let expected = _mm_setr_epi16(
463            i16::MIN,
464            i16::MIN + 1,
465            i16::MIN + 2,
466            i16::MIN + 3,
467            i16::MAX,
468            i16::MAX - 1,
469            i16::MAX - 2,
470            i16::MAX - 3,
471        );
472        let r = _mm_hadd_epi16(a, b);
473        assert_eq_m128i(r, expected);
474    }
475
476    #[simd_test(enable = "ssse3")]
477    unsafe fn test_mm_hadds_epi16() {
478        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
479        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
480        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
481        let r = _mm_hadds_epi16(a, b);
482        assert_eq_m128i(r, expected);
483
484        // Test saturating on overflow
485        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
486        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
487        let expected = _mm_setr_epi16(
488            i16::MAX,
489            i16::MAX,
490            i16::MAX,
491            i16::MAX,
492            i16::MIN,
493            i16::MIN,
494            i16::MIN,
495            i16::MIN,
496        );
497        let r = _mm_hadds_epi16(a, b);
498        assert_eq_m128i(r, expected);
499    }
500
501    #[simd_test(enable = "ssse3")]
502    const unsafe fn test_mm_hadd_epi32() {
503        let a = _mm_setr_epi32(1, 2, 3, 4);
504        let b = _mm_setr_epi32(4, 128, 4, 3);
505        let expected = _mm_setr_epi32(3, 7, 132, 7);
506        let r = _mm_hadd_epi32(a, b);
507        assert_eq_m128i(r, expected);
508
509        // Test wrapping on overflow
510        let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
511        let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
512        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
513        let r = _mm_hadd_epi32(a, b);
514        assert_eq_m128i(r, expected);
515    }
516
517    #[simd_test(enable = "ssse3")]
518    const unsafe fn test_mm_hsub_epi16() {
519        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
520        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
521        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
522        let r = _mm_hsub_epi16(a, b);
523        assert_eq_m128i(r, expected);
524
525        // Test wrapping on overflow
526        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
527        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
528        let expected = _mm_setr_epi16(
529            i16::MIN,
530            i16::MIN + 1,
531            i16::MIN + 2,
532            i16::MIN + 3,
533            i16::MAX,
534            i16::MAX - 1,
535            i16::MAX - 2,
536            i16::MAX - 3,
537        );
538        let r = _mm_hsub_epi16(a, b);
539        assert_eq_m128i(r, expected);
540    }
541
542    #[simd_test(enable = "ssse3")]
543    unsafe fn test_mm_hsubs_epi16() {
544        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
545        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
546        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
547        let r = _mm_hsubs_epi16(a, b);
548        assert_eq_m128i(r, expected);
549
550        // Test saturating on overflow
551        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
552        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
553        let expected = _mm_setr_epi16(
554            i16::MAX,
555            i16::MAX,
556            i16::MAX,
557            i16::MAX,
558            i16::MIN,
559            i16::MIN,
560            i16::MIN,
561            i16::MIN,
562        );
563        let r = _mm_hsubs_epi16(a, b);
564        assert_eq_m128i(r, expected);
565    }
566
567    #[simd_test(enable = "ssse3")]
568    const unsafe fn test_mm_hsub_epi32() {
569        let a = _mm_setr_epi32(1, 2, 3, 4);
570        let b = _mm_setr_epi32(4, 128, 4, 3);
571        let expected = _mm_setr_epi32(-1, -1, -124, 1);
572        let r = _mm_hsub_epi32(a, b);
573        assert_eq_m128i(r, expected);
574
575        // Test wrapping on overflow
576        let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
577        let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
578        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
579        let r = _mm_hsub_epi32(a, b);
580        assert_eq_m128i(r, expected);
581    }
582
583    #[simd_test(enable = "ssse3")]
584    unsafe fn test_mm_maddubs_epi16() {
585        #[rustfmt::skip]
586        let a = _mm_setr_epi8(
587            1, 2, 3, 4, 5, 6, 7, 8,
588            9, 10, 11, 12, 13, 14, 15, 16,
589        );
590        #[rustfmt::skip]
591        let b = _mm_setr_epi8(
592            4, 63, 4, 3,
593            24, 12, 6, 19,
594            12, 5, 5, 10,
595            4, 1, 8, 0,
596        );
597        let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
598        let r = _mm_maddubs_epi16(a, b);
599        assert_eq_m128i(r, expected);
600
601        // Test widening and saturation
602        #[rustfmt::skip]
603        let a = _mm_setr_epi8(
604            u8::MAX as i8, u8::MAX as i8,
605            u8::MAX as i8, u8::MAX as i8,
606            u8::MAX as i8, u8::MAX as i8,
607            100, 100, 0, 0,
608            0, 0, 0, 0, 0, 0,
609        );
610        #[rustfmt::skip]
611        let b = _mm_setr_epi8(
612            i8::MAX, i8::MAX,
613            i8::MAX, i8::MIN,
614            i8::MIN, i8::MIN,
615            50, 15, 0, 0, 0,
616            0, 0, 0, 0, 0,
617        );
618        let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
619        let r = _mm_maddubs_epi16(a, b);
620        assert_eq_m128i(r, expected);
621    }
622
623    #[simd_test(enable = "ssse3")]
624    unsafe fn test_mm_mulhrs_epi16() {
625        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
626        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
627        let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
628        let r = _mm_mulhrs_epi16(a, b);
629        assert_eq_m128i(r, expected);
630
631        // Test extreme values
632        let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
633        let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
634        let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
635        let r = _mm_mulhrs_epi16(a, b);
636        assert_eq_m128i(r, expected);
637    }
638
639    #[simd_test(enable = "ssse3")]
640    unsafe fn test_mm_sign_epi8() {
641        #[rustfmt::skip]
642        let a = _mm_setr_epi8(
643            1, 2, 3, 4, 5, 6, 7, 8,
644            9, 10, 11, 12, 13, -14, -15, 16,
645        );
646        #[rustfmt::skip]
647        let b = _mm_setr_epi8(
648            4, 63, -4, 3, 24, 12, -6, -19,
649            12, 5, -5, 10, 4, 1, -8, 0,
650        );
651        #[rustfmt::skip]
652        let expected = _mm_setr_epi8(
653            1, 2, -3, 4, 5, 6, -7, -8,
654            9, 10, -11, 12, 13, -14, 15, 0,
655        );
656        let r = _mm_sign_epi8(a, b);
657        assert_eq_m128i(r, expected);
658    }
659
660    #[simd_test(enable = "ssse3")]
661    unsafe fn test_mm_sign_epi16() {
662        let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
663        let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
664        let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
665        let r = _mm_sign_epi16(a, b);
666        assert_eq_m128i(r, expected);
667    }
668
669    #[simd_test(enable = "ssse3")]
670    unsafe fn test_mm_sign_epi32() {
671        let a = _mm_setr_epi32(-1, 2, 3, 4);
672        let b = _mm_setr_epi32(1, -1, 1, 0);
673        let expected = _mm_setr_epi32(-1, -2, 3, 0);
674        let r = _mm_sign_epi32(a, b);
675        assert_eq_m128i(r, expected);
676    }
677}