Skip to main content

core/stdarch/crates/core_arch/src/x86/
ssse3.rs

1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Computes the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20pub const fn _mm_abs_epi8(a: __m128i) -> __m128i {
21    unsafe {
22        let a = a.as_i8x16();
23        let zero = i8x16::ZERO;
24        let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
25        transmute(r)
26    }
27}
28
29/// Computes the absolute value of each of the packed 16-bit signed integers in
30/// `a` and
31/// return the 16-bit unsigned integer
32///
33/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
34#[inline]
35#[target_feature(enable = "ssse3")]
36#[cfg_attr(test, assert_instr(pabsw))]
37#[stable(feature = "simd_x86", since = "1.27.0")]
38#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39pub const fn _mm_abs_epi16(a: __m128i) -> __m128i {
40    unsafe {
41        let a = a.as_i16x8();
42        let zero = i16x8::ZERO;
43        let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
44        transmute(r)
45    }
46}
47
48/// Computes the absolute value of each of the packed 32-bit signed integers in
49/// `a` and
50/// return the 32-bit unsigned integer
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
53#[inline]
54#[target_feature(enable = "ssse3")]
55#[cfg_attr(test, assert_instr(pabsd))]
56#[stable(feature = "simd_x86", since = "1.27.0")]
57#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
58pub const fn _mm_abs_epi32(a: __m128i) -> __m128i {
59    unsafe {
60        let a = a.as_i32x4();
61        let zero = i32x4::ZERO;
62        let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
63        transmute(r)
64    }
65}
66
67/// Shuffles bytes from `a` according to the content of `b`.
68///
69/// The last 4 bits of each byte of `b` are used as addresses
70/// into the 16 bytes of `a`.
71///
72/// In addition, if the highest significant bit of a byte of `b`
73/// is set, the respective destination byte is set to 0.
74///
75/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
76/// logically equivalent to:
77///
78/// ```
79/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
80///     let mut r = [0u8; 16];
81///     for i in 0..16 {
82///         // if the most significant bit of b is set,
83///         // then the destination byte is set to 0.
84///         if b[i] & 0x80 == 0u8 {
85///             r[i] = a[(b[i] % 16) as usize];
86///         }
87///     }
88///     r
89/// }
90/// ```
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
93#[inline]
94#[target_feature(enable = "ssse3")]
95#[cfg_attr(test, assert_instr(pshufb))]
96#[stable(feature = "simd_x86", since = "1.27.0")]
97pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
98    unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) }
99}
100
101/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
102/// shift the result right by `n` bytes, and returns the low 16 bytes.
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
105#[inline]
106#[target_feature(enable = "ssse3")]
107#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
108#[rustc_legacy_const_generics(2)]
109#[stable(feature = "simd_x86", since = "1.27.0")]
110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
111pub const fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
112    static_assert_uimm_bits!(IMM8, 8);
113    // If palignr is shifting the pair of vectors more than the size of two
114    // lanes, emit zero.
115    if IMM8 > 32 {
116        return _mm_setzero_si128();
117    }
118    // If palignr is shifting the pair of input vectors more than one lane,
119    // but less than two lanes, convert to shifting in zeroes.
120    let (a, b) = if IMM8 > 16 {
121        (_mm_setzero_si128(), a)
122    } else {
123        (a, b)
124    };
125    const fn mask(shift: u32, i: u32) -> u32 {
126        if shift > 32 {
127            // Unused, but needs to be a valid index.
128            i
129        } else if shift > 16 {
130            shift - 16 + i
131        } else {
132            shift + i
133        }
134    }
135    unsafe {
136        let r: i8x16 = simd_shuffle!(
137            b.as_i8x16(),
138            a.as_i8x16(),
139            [
140                mask(IMM8 as u32, 0),
141                mask(IMM8 as u32, 1),
142                mask(IMM8 as u32, 2),
143                mask(IMM8 as u32, 3),
144                mask(IMM8 as u32, 4),
145                mask(IMM8 as u32, 5),
146                mask(IMM8 as u32, 6),
147                mask(IMM8 as u32, 7),
148                mask(IMM8 as u32, 8),
149                mask(IMM8 as u32, 9),
150                mask(IMM8 as u32, 10),
151                mask(IMM8 as u32, 11),
152                mask(IMM8 as u32, 12),
153                mask(IMM8 as u32, 13),
154                mask(IMM8 as u32, 14),
155                mask(IMM8 as u32, 15),
156            ],
157        );
158        transmute(r)
159    }
160}
161
162/// Horizontally adds the adjacent pairs of values contained in 2 packed
163/// 128-bit vectors of `[8 x i16]`.
164///
165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
166#[inline]
167#[target_feature(enable = "ssse3")]
168#[cfg_attr(test, assert_instr(phaddw))]
169#[stable(feature = "simd_x86", since = "1.27.0")]
170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
171pub const fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
172    let a = a.as_i16x8();
173    let b = b.as_i16x8();
174    unsafe {
175        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
176        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
177        simd_add(even, odd).as_m128i()
178    }
179}
180
181/// Horizontally adds the adjacent pairs of values contained in 2 packed
182/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
183/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
184///
185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
186#[inline]
187#[target_feature(enable = "ssse3")]
188#[cfg_attr(test, assert_instr(phaddsw))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
191    let a = a.as_i16x8();
192    let b = b.as_i16x8();
193    unsafe {
194        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
195        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
196        simd_saturating_add(even, odd).as_m128i()
197    }
198}
199
200/// Horizontally adds the adjacent pairs of values contained in 2 packed
201/// 128-bit vectors of `[4 x i32]`.
202///
203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
204#[inline]
205#[target_feature(enable = "ssse3")]
206#[cfg_attr(test, assert_instr(phaddd))]
207#[stable(feature = "simd_x86", since = "1.27.0")]
208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
209pub const fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
210    let a = a.as_i32x4();
211    let b = b.as_i32x4();
212    unsafe {
213        let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
214        let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
215        simd_add(even, odd).as_m128i()
216    }
217}
218
219/// Horizontally subtract the adjacent pairs of values contained in 2
220/// packed 128-bit vectors of `[8 x i16]`.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
223#[inline]
224#[target_feature(enable = "ssse3")]
225#[cfg_attr(test, assert_instr(phsubw))]
226#[stable(feature = "simd_x86", since = "1.27.0")]
227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
228pub const fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
229    let a = a.as_i16x8();
230    let b = b.as_i16x8();
231    unsafe {
232        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
233        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
234        simd_sub(even, odd).as_m128i()
235    }
236}
237
238/// Horizontally subtract the adjacent pairs of values contained in 2
239/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
240/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
241/// saturated to 8000h.
242///
243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
244#[inline]
245#[target_feature(enable = "ssse3")]
246#[cfg_attr(test, assert_instr(phsubsw))]
247#[stable(feature = "simd_x86", since = "1.27.0")]
248pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
249    let a = a.as_i16x8();
250    let b = b.as_i16x8();
251    unsafe {
252        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
253        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
254        simd_saturating_sub(even, odd).as_m128i()
255    }
256}
257
258/// Horizontally subtract the adjacent pairs of values contained in 2
259/// packed 128-bit vectors of `[4 x i32]`.
260///
261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
262#[inline]
263#[target_feature(enable = "ssse3")]
264#[cfg_attr(test, assert_instr(phsubd))]
265#[stable(feature = "simd_x86", since = "1.27.0")]
266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
267pub const fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
268    let a = a.as_i32x4();
269    let b = b.as_i32x4();
270    unsafe {
271        let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
272        let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
273        simd_sub(even, odd).as_m128i()
274    }
275}
276
277/// Multiplies corresponding pairs of packed 8-bit unsigned integer
278/// values contained in the first source operand and packed 8-bit signed
279/// integer values contained in the second source operand, add pairs of
280/// contiguous products with signed saturation, and writes the 16-bit sums to
281/// the corresponding bits in the destination.
282///
283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
284#[inline]
285#[target_feature(enable = "ssse3")]
286#[cfg_attr(test, assert_instr(pmaddubsw))]
287#[stable(feature = "simd_x86", since = "1.27.0")]
288pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
289    unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
290}
291
292/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
293/// product to the 18 most significant bits by right-shifting, round the
294/// truncated value by adding 1, and write bits `[16:1]` to the destination.
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
297#[inline]
298#[target_feature(enable = "ssse3")]
299#[cfg_attr(test, assert_instr(pmulhrsw))]
300#[stable(feature = "simd_x86", since = "1.27.0")]
301pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
302    unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
303}
304
305/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
306/// integer in `b` is negative, and returns the result.
307/// Elements in result are zeroed out when the corresponding element in `b`
308/// is zero.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
311#[inline]
312#[target_feature(enable = "ssse3")]
313#[cfg_attr(test, assert_instr(psignb))]
314#[stable(feature = "simd_x86", since = "1.27.0")]
315pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
316    unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) }
317}
318
319/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
320/// integer in `b` is negative, and returns the results.
321/// Elements in result are zeroed out when the corresponding element in `b`
322/// is zero.
323///
324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
325#[inline]
326#[target_feature(enable = "ssse3")]
327#[cfg_attr(test, assert_instr(psignw))]
328#[stable(feature = "simd_x86", since = "1.27.0")]
329pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
330    unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) }
331}
332
333/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
334/// integer in `b` is negative, and returns the results.
335/// Element in result are zeroed out when the corresponding element in `b`
336/// is zero.
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
339#[inline]
340#[target_feature(enable = "ssse3")]
341#[cfg_attr(test, assert_instr(psignd))]
342#[stable(feature = "simd_x86", since = "1.27.0")]
343pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
344    unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) }
345}
346
347#[allow(improper_ctypes)]
348unsafe extern "C" {
349    #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
350    fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
351
352    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
353    fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
354
355    #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
356    fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
357
358    #[link_name = "llvm.x86.ssse3.psign.b.128"]
359    fn psignb128(a: i8x16, b: i8x16) -> i8x16;
360
361    #[link_name = "llvm.x86.ssse3.psign.w.128"]
362    fn psignw128(a: i16x8, b: i16x8) -> i16x8;
363
364    #[link_name = "llvm.x86.ssse3.psign.d.128"]
365    fn psignd128(a: i32x4, b: i32x4) -> i32x4;
366}
367
368#[cfg(test)]
369mod tests {
370    use crate::core_arch::assert_eq_const as assert_eq;
371    use stdarch_test::simd_test;
372
373    use crate::core_arch::x86::*;
374
375    #[simd_test(enable = "ssse3")]
376    const fn test_mm_abs_epi8() {
377        let r = _mm_abs_epi8(_mm_set1_epi8(-5));
378        assert_eq_m128i(r, _mm_set1_epi8(5));
379    }
380
381    #[simd_test(enable = "ssse3")]
382    const fn test_mm_abs_epi16() {
383        let r = _mm_abs_epi16(_mm_set1_epi16(-5));
384        assert_eq_m128i(r, _mm_set1_epi16(5));
385    }
386
387    #[simd_test(enable = "ssse3")]
388    const fn test_mm_abs_epi32() {
389        let r = _mm_abs_epi32(_mm_set1_epi32(-5));
390        assert_eq_m128i(r, _mm_set1_epi32(5));
391    }
392
393    #[simd_test(enable = "ssse3")]
394    fn test_mm_shuffle_epi8() {
395        #[rustfmt::skip]
396        let a = _mm_setr_epi8(
397            1, 2, 3, 4, 5, 6, 7, 8,
398            9, 10, 11, 12, 13, 14, 15, 16,
399        );
400        #[rustfmt::skip]
401        let b = _mm_setr_epi8(
402            4, 128_u8 as i8, 4, 3,
403            24, 12, 6, 19,
404            12, 5, 5, 10,
405            4, 1, 8, 0,
406        );
407        let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
408        let r = _mm_shuffle_epi8(a, b);
409        assert_eq_m128i(r, expected);
410
411        // Test indices greater than 15 wrapping around
412        let b = _mm_add_epi8(b, _mm_set1_epi8(32));
413        let r = _mm_shuffle_epi8(a, b);
414        assert_eq_m128i(r, expected);
415    }
416
417    #[simd_test(enable = "ssse3")]
418    const fn test_mm_alignr_epi8() {
419        #[rustfmt::skip]
420        let a = _mm_setr_epi8(
421            1, 2, 3, 4, 5, 6, 7, 8,
422            9, 10, 11, 12, 13, 14, 15, 16,
423        );
424        #[rustfmt::skip]
425        let b = _mm_setr_epi8(
426            4, 63, 4, 3,
427            24, 12, 6, 19,
428            12, 5, 5, 10,
429            4, 1, 8, 0,
430        );
431        let r = _mm_alignr_epi8::<33>(a, b);
432        assert_eq_m128i(r, _mm_set1_epi8(0));
433
434        let r = _mm_alignr_epi8::<17>(a, b);
435        #[rustfmt::skip]
436        let expected = _mm_setr_epi8(
437            2, 3, 4, 5, 6, 7, 8, 9,
438            10, 11, 12, 13, 14, 15, 16, 0,
439        );
440        assert_eq_m128i(r, expected);
441
442        let r = _mm_alignr_epi8::<16>(a, b);
443        assert_eq_m128i(r, a);
444
445        let r = _mm_alignr_epi8::<15>(a, b);
446        #[rustfmt::skip]
447        let expected = _mm_setr_epi8(
448            0, 1, 2, 3, 4, 5, 6, 7,
449            8, 9, 10, 11, 12, 13, 14, 15,
450        );
451        assert_eq_m128i(r, expected);
452
453        let r = _mm_alignr_epi8::<0>(a, b);
454        assert_eq_m128i(r, b);
455    }
456
457    #[simd_test(enable = "ssse3")]
458    const fn test_mm_hadd_epi16() {
459        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
460        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
461        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
462        let r = _mm_hadd_epi16(a, b);
463        assert_eq_m128i(r, expected);
464
465        // Test wrapping on overflow
466        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
467        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
468        let expected = _mm_setr_epi16(
469            i16::MIN,
470            i16::MIN + 1,
471            i16::MIN + 2,
472            i16::MIN + 3,
473            i16::MAX,
474            i16::MAX - 1,
475            i16::MAX - 2,
476            i16::MAX - 3,
477        );
478        let r = _mm_hadd_epi16(a, b);
479        assert_eq_m128i(r, expected);
480    }
481
482    #[simd_test(enable = "ssse3")]
483    fn test_mm_hadds_epi16() {
484        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
485        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
486        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
487        let r = _mm_hadds_epi16(a, b);
488        assert_eq_m128i(r, expected);
489
490        // Test saturating on overflow
491        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
492        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
493        let expected = _mm_setr_epi16(
494            i16::MAX,
495            i16::MAX,
496            i16::MAX,
497            i16::MAX,
498            i16::MIN,
499            i16::MIN,
500            i16::MIN,
501            i16::MIN,
502        );
503        let r = _mm_hadds_epi16(a, b);
504        assert_eq_m128i(r, expected);
505    }
506
507    #[simd_test(enable = "ssse3")]
508    const fn test_mm_hadd_epi32() {
509        let a = _mm_setr_epi32(1, 2, 3, 4);
510        let b = _mm_setr_epi32(4, 128, 4, 3);
511        let expected = _mm_setr_epi32(3, 7, 132, 7);
512        let r = _mm_hadd_epi32(a, b);
513        assert_eq_m128i(r, expected);
514
515        // Test wrapping on overflow
516        let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
517        let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
518        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
519        let r = _mm_hadd_epi32(a, b);
520        assert_eq_m128i(r, expected);
521    }
522
523    #[simd_test(enable = "ssse3")]
524    const fn test_mm_hsub_epi16() {
525        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
526        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
527        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
528        let r = _mm_hsub_epi16(a, b);
529        assert_eq_m128i(r, expected);
530
531        // Test wrapping on overflow
532        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
533        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
534        let expected = _mm_setr_epi16(
535            i16::MIN,
536            i16::MIN + 1,
537            i16::MIN + 2,
538            i16::MIN + 3,
539            i16::MAX,
540            i16::MAX - 1,
541            i16::MAX - 2,
542            i16::MAX - 3,
543        );
544        let r = _mm_hsub_epi16(a, b);
545        assert_eq_m128i(r, expected);
546    }
547
548    #[simd_test(enable = "ssse3")]
549    fn test_mm_hsubs_epi16() {
550        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
551        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
552        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
553        let r = _mm_hsubs_epi16(a, b);
554        assert_eq_m128i(r, expected);
555
556        // Test saturating on overflow
557        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
558        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
559        let expected = _mm_setr_epi16(
560            i16::MAX,
561            i16::MAX,
562            i16::MAX,
563            i16::MAX,
564            i16::MIN,
565            i16::MIN,
566            i16::MIN,
567            i16::MIN,
568        );
569        let r = _mm_hsubs_epi16(a, b);
570        assert_eq_m128i(r, expected);
571    }
572
573    #[simd_test(enable = "ssse3")]
574    const fn test_mm_hsub_epi32() {
575        let a = _mm_setr_epi32(1, 2, 3, 4);
576        let b = _mm_setr_epi32(4, 128, 4, 3);
577        let expected = _mm_setr_epi32(-1, -1, -124, 1);
578        let r = _mm_hsub_epi32(a, b);
579        assert_eq_m128i(r, expected);
580
581        // Test wrapping on overflow
582        let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
583        let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
584        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
585        let r = _mm_hsub_epi32(a, b);
586        assert_eq_m128i(r, expected);
587    }
588
589    #[simd_test(enable = "ssse3")]
590    fn test_mm_maddubs_epi16() {
591        #[rustfmt::skip]
592        let a = _mm_setr_epi8(
593            1, 2, 3, 4, 5, 6, 7, 8,
594            9, 10, 11, 12, 13, 14, 15, 16,
595        );
596        #[rustfmt::skip]
597        let b = _mm_setr_epi8(
598            4, 63, 4, 3,
599            24, 12, 6, 19,
600            12, 5, 5, 10,
601            4, 1, 8, 0,
602        );
603        let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
604        let r = _mm_maddubs_epi16(a, b);
605        assert_eq_m128i(r, expected);
606
607        // Test widening and saturation
608        #[rustfmt::skip]
609        let a = _mm_setr_epi8(
610            u8::MAX as i8, u8::MAX as i8,
611            u8::MAX as i8, u8::MAX as i8,
612            u8::MAX as i8, u8::MAX as i8,
613            100, 100, 0, 0,
614            0, 0, 0, 0, 0, 0,
615        );
616        #[rustfmt::skip]
617        let b = _mm_setr_epi8(
618            i8::MAX, i8::MAX,
619            i8::MAX, i8::MIN,
620            i8::MIN, i8::MIN,
621            50, 15, 0, 0, 0,
622            0, 0, 0, 0, 0,
623        );
624        let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
625        let r = _mm_maddubs_epi16(a, b);
626        assert_eq_m128i(r, expected);
627    }
628
629    #[simd_test(enable = "ssse3")]
630    fn test_mm_mulhrs_epi16() {
631        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
632        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
633        let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
634        let r = _mm_mulhrs_epi16(a, b);
635        assert_eq_m128i(r, expected);
636
637        // Test extreme values
638        let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
639        let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
640        let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
641        let r = _mm_mulhrs_epi16(a, b);
642        assert_eq_m128i(r, expected);
643    }
644
645    #[simd_test(enable = "ssse3")]
646    fn test_mm_sign_epi8() {
647        #[rustfmt::skip]
648        let a = _mm_setr_epi8(
649            1, 2, 3, 4, 5, 6, 7, 8,
650            9, 10, 11, 12, 13, -14, -15, 16,
651        );
652        #[rustfmt::skip]
653        let b = _mm_setr_epi8(
654            4, 63, -4, 3, 24, 12, -6, -19,
655            12, 5, -5, 10, 4, 1, -8, 0,
656        );
657        #[rustfmt::skip]
658        let expected = _mm_setr_epi8(
659            1, 2, -3, 4, 5, 6, -7, -8,
660            9, 10, -11, 12, 13, -14, 15, 0,
661        );
662        let r = _mm_sign_epi8(a, b);
663        assert_eq_m128i(r, expected);
664    }
665
666    #[simd_test(enable = "ssse3")]
667    fn test_mm_sign_epi16() {
668        let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
669        let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
670        let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
671        let r = _mm_sign_epi16(a, b);
672        assert_eq_m128i(r, expected);
673    }
674
675    #[simd_test(enable = "ssse3")]
676    fn test_mm_sign_epi32() {
677        let a = _mm_setr_epi32(-1, 2, 3, 4);
678        let b = _mm_setr_epi32(1, -1, 1, 0);
679        let expected = _mm_setr_epi32(-1, -2, 3, 0);
680        let r = _mm_sign_epi32(a, b);
681        assert_eq_m128i(r, expected);
682    }
683}