core/stdarch/crates/core_arch/src/x86/
ssse3.rs

1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use crate::{
4    core_arch::{simd::*, x86::*},
5    intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11/// Computes the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
20    unsafe {
21        let a = a.as_i8x16();
22        let zero = i8x16::ZERO;
23        let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of each of the packed 16-bit signed integers in
29/// `a` and
30/// return the 16-bit unsigned integer
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi16)
33#[inline]
34#[target_feature(enable = "ssse3")]
35#[cfg_attr(test, assert_instr(pabsw))]
36#[stable(feature = "simd_x86", since = "1.27.0")]
37pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
38    unsafe {
39        let a = a.as_i16x8();
40        let zero = i16x8::ZERO;
41        let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
42        transmute(r)
43    }
44}
45
46/// Computes the absolute value of each of the packed 32-bit signed integers in
47/// `a` and
48/// return the 32-bit unsigned integer
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi32)
51#[inline]
52#[target_feature(enable = "ssse3")]
53#[cfg_attr(test, assert_instr(pabsd))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
56    unsafe {
57        let a = a.as_i32x4();
58        let zero = i32x4::ZERO;
59        let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
60        transmute(r)
61    }
62}
63
64/// Shuffles bytes from `a` according to the content of `b`.
65///
66/// The last 4 bits of each byte of `b` are used as addresses
67/// into the 16 bytes of `a`.
68///
69/// In addition, if the highest significant bit of a byte of `b`
70/// is set, the respective destination byte is set to 0.
71///
72/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
73/// logically equivalent to:
74///
75/// ```
76/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
77///     let mut r = [0u8; 16];
78///     for i in 0..16 {
79///         // if the most significant bit of b is set,
80///         // then the destination byte is set to 0.
81///         if b[i] & 0x80 == 0u8 {
82///             r[i] = a[(b[i] % 16) as usize];
83///         }
84///     }
85///     r
86/// }
87/// ```
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8)
90#[inline]
91#[target_feature(enable = "ssse3")]
92#[cfg_attr(test, assert_instr(pshufb))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
95    unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) }
96}
97
98/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
99/// shift the result right by `n` bytes, and returns the low 16 bytes.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi8)
102#[inline]
103#[target_feature(enable = "ssse3")]
104#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
105#[rustc_legacy_const_generics(2)]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
108    static_assert_uimm_bits!(IMM8, 8);
109    // If palignr is shifting the pair of vectors more than the size of two
110    // lanes, emit zero.
111    if IMM8 > 32 {
112        return _mm_setzero_si128();
113    }
114    // If palignr is shifting the pair of input vectors more than one lane,
115    // but less than two lanes, convert to shifting in zeroes.
116    let (a, b) = if IMM8 > 16 {
117        (_mm_setzero_si128(), a)
118    } else {
119        (a, b)
120    };
121    const fn mask(shift: u32, i: u32) -> u32 {
122        if shift > 32 {
123            // Unused, but needs to be a valid index.
124            i
125        } else if shift > 16 {
126            shift - 16 + i
127        } else {
128            shift + i
129        }
130    }
131    unsafe {
132        let r: i8x16 = simd_shuffle!(
133            b.as_i8x16(),
134            a.as_i8x16(),
135            [
136                mask(IMM8 as u32, 0),
137                mask(IMM8 as u32, 1),
138                mask(IMM8 as u32, 2),
139                mask(IMM8 as u32, 3),
140                mask(IMM8 as u32, 4),
141                mask(IMM8 as u32, 5),
142                mask(IMM8 as u32, 6),
143                mask(IMM8 as u32, 7),
144                mask(IMM8 as u32, 8),
145                mask(IMM8 as u32, 9),
146                mask(IMM8 as u32, 10),
147                mask(IMM8 as u32, 11),
148                mask(IMM8 as u32, 12),
149                mask(IMM8 as u32, 13),
150                mask(IMM8 as u32, 14),
151                mask(IMM8 as u32, 15),
152            ],
153        );
154        transmute(r)
155    }
156}
157
158/// Horizontally adds the adjacent pairs of values contained in 2 packed
159/// 128-bit vectors of `[8 x i16]`.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi16)
162#[inline]
163#[target_feature(enable = "ssse3")]
164#[cfg_attr(test, assert_instr(phaddw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167    unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) }
168}
169
170/// Horizontally adds the adjacent pairs of values contained in 2 packed
171/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
172/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
173///
174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadds_epi16)
175#[inline]
176#[target_feature(enable = "ssse3")]
177#[cfg_attr(test, assert_instr(phaddsw))]
178#[stable(feature = "simd_x86", since = "1.27.0")]
179pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
180    unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) }
181}
182
183/// Horizontally adds the adjacent pairs of values contained in 2 packed
184/// 128-bit vectors of `[4 x i32]`.
185///
186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hadd_epi32)
187#[inline]
188#[target_feature(enable = "ssse3")]
189#[cfg_attr(test, assert_instr(phaddd))]
190#[stable(feature = "simd_x86", since = "1.27.0")]
191pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
192    unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) }
193}
194
195/// Horizontally subtract the adjacent pairs of values contained in 2
196/// packed 128-bit vectors of `[8 x i16]`.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi16)
199#[inline]
200#[target_feature(enable = "ssse3")]
201#[cfg_attr(test, assert_instr(phsubw))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
204    unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) }
205}
206
207/// Horizontally subtract the adjacent pairs of values contained in 2
208/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
209/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
210/// saturated to 8000h.
211///
212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsubs_epi16)
213#[inline]
214#[target_feature(enable = "ssse3")]
215#[cfg_attr(test, assert_instr(phsubsw))]
216#[stable(feature = "simd_x86", since = "1.27.0")]
217pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
218    unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) }
219}
220
221/// Horizontally subtract the adjacent pairs of values contained in 2
222/// packed 128-bit vectors of `[4 x i32]`.
223///
224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_hsub_epi32)
225#[inline]
226#[target_feature(enable = "ssse3")]
227#[cfg_attr(test, assert_instr(phsubd))]
228#[stable(feature = "simd_x86", since = "1.27.0")]
229pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
230    unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) }
231}
232
233/// Multiplies corresponding pairs of packed 8-bit unsigned integer
234/// values contained in the first source operand and packed 8-bit signed
235/// integer values contained in the second source operand, add pairs of
236/// contiguous products with signed saturation, and writes the 16-bit sums to
237/// the corresponding bits in the destination.
238///
239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16)
240#[inline]
241#[target_feature(enable = "ssse3")]
242#[cfg_attr(test, assert_instr(pmaddubsw))]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
245    unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
246}
247
248/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
249/// product to the 18 most significant bits by right-shifting, round the
250/// truncated value by adding 1, and write bits `[16:1]` to the destination.
251///
252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16)
253#[inline]
254#[target_feature(enable = "ssse3")]
255#[cfg_attr(test, assert_instr(pmulhrsw))]
256#[stable(feature = "simd_x86", since = "1.27.0")]
257pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
258    unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
259}
260
261/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
262/// integer in `b` is negative, and returns the result.
263/// Elements in result are zeroed out when the corresponding element in `b`
264/// is zero.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi8)
267#[inline]
268#[target_feature(enable = "ssse3")]
269#[cfg_attr(test, assert_instr(psignb))]
270#[stable(feature = "simd_x86", since = "1.27.0")]
271pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
272    unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) }
273}
274
275/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
276/// integer in `b` is negative, and returns the results.
277/// Elements in result are zeroed out when the corresponding element in `b`
278/// is zero.
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi16)
281#[inline]
282#[target_feature(enable = "ssse3")]
283#[cfg_attr(test, assert_instr(psignw))]
284#[stable(feature = "simd_x86", since = "1.27.0")]
285pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
286    unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) }
287}
288
289/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
290/// integer in `b` is negative, and returns the results.
291/// Element in result are zeroed out when the corresponding element in `b`
292/// is zero.
293///
294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sign_epi32)
295#[inline]
296#[target_feature(enable = "ssse3")]
297#[cfg_attr(test, assert_instr(psignd))]
298#[stable(feature = "simd_x86", since = "1.27.0")]
299pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
300    unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) }
301}
302
303#[allow(improper_ctypes)]
304unsafe extern "C" {
305    #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
306    fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
307
308    #[link_name = "llvm.x86.ssse3.phadd.w.128"]
309    fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
310
311    #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
312    fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
313
314    #[link_name = "llvm.x86.ssse3.phadd.d.128"]
315    fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
316
317    #[link_name = "llvm.x86.ssse3.phsub.w.128"]
318    fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
319
320    #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
321    fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
322
323    #[link_name = "llvm.x86.ssse3.phsub.d.128"]
324    fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
325
326    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
327    fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
328
329    #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
330    fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
331
332    #[link_name = "llvm.x86.ssse3.psign.b.128"]
333    fn psignb128(a: i8x16, b: i8x16) -> i8x16;
334
335    #[link_name = "llvm.x86.ssse3.psign.w.128"]
336    fn psignw128(a: i16x8, b: i16x8) -> i16x8;
337
338    #[link_name = "llvm.x86.ssse3.psign.d.128"]
339    fn psignd128(a: i32x4, b: i32x4) -> i32x4;
340}
341
342#[cfg(test)]
343mod tests {
344    use stdarch_test::simd_test;
345
346    use crate::core_arch::x86::*;
347
348    #[simd_test(enable = "ssse3")]
349    unsafe fn test_mm_abs_epi8() {
350        let r = _mm_abs_epi8(_mm_set1_epi8(-5));
351        assert_eq_m128i(r, _mm_set1_epi8(5));
352    }
353
354    #[simd_test(enable = "ssse3")]
355    unsafe fn test_mm_abs_epi16() {
356        let r = _mm_abs_epi16(_mm_set1_epi16(-5));
357        assert_eq_m128i(r, _mm_set1_epi16(5));
358    }
359
360    #[simd_test(enable = "ssse3")]
361    unsafe fn test_mm_abs_epi32() {
362        let r = _mm_abs_epi32(_mm_set1_epi32(-5));
363        assert_eq_m128i(r, _mm_set1_epi32(5));
364    }
365
366    #[simd_test(enable = "ssse3")]
367    unsafe fn test_mm_shuffle_epi8() {
368        #[rustfmt::skip]
369        let a = _mm_setr_epi8(
370            1, 2, 3, 4, 5, 6, 7, 8,
371            9, 10, 11, 12, 13, 14, 15, 16,
372        );
373        #[rustfmt::skip]
374        let b = _mm_setr_epi8(
375            4, 128_u8 as i8, 4, 3,
376            24, 12, 6, 19,
377            12, 5, 5, 10,
378            4, 1, 8, 0,
379        );
380        let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
381        let r = _mm_shuffle_epi8(a, b);
382        assert_eq_m128i(r, expected);
383
384        // Test indices greater than 15 wrapping around
385        let b = _mm_add_epi8(b, _mm_set1_epi8(32));
386        let r = _mm_shuffle_epi8(a, b);
387        assert_eq_m128i(r, expected);
388    }
389
390    #[simd_test(enable = "ssse3")]
391    unsafe fn test_mm_alignr_epi8() {
392        #[rustfmt::skip]
393        let a = _mm_setr_epi8(
394            1, 2, 3, 4, 5, 6, 7, 8,
395            9, 10, 11, 12, 13, 14, 15, 16,
396        );
397        #[rustfmt::skip]
398        let b = _mm_setr_epi8(
399            4, 63, 4, 3,
400            24, 12, 6, 19,
401            12, 5, 5, 10,
402            4, 1, 8, 0,
403        );
404        let r = _mm_alignr_epi8::<33>(a, b);
405        assert_eq_m128i(r, _mm_set1_epi8(0));
406
407        let r = _mm_alignr_epi8::<17>(a, b);
408        #[rustfmt::skip]
409        let expected = _mm_setr_epi8(
410            2, 3, 4, 5, 6, 7, 8, 9,
411            10, 11, 12, 13, 14, 15, 16, 0,
412        );
413        assert_eq_m128i(r, expected);
414
415        let r = _mm_alignr_epi8::<16>(a, b);
416        assert_eq_m128i(r, a);
417
418        let r = _mm_alignr_epi8::<15>(a, b);
419        #[rustfmt::skip]
420        let expected = _mm_setr_epi8(
421            0, 1, 2, 3, 4, 5, 6, 7,
422            8, 9, 10, 11, 12, 13, 14, 15,
423        );
424        assert_eq_m128i(r, expected);
425
426        let r = _mm_alignr_epi8::<0>(a, b);
427        assert_eq_m128i(r, b);
428    }
429
430    #[simd_test(enable = "ssse3")]
431    unsafe fn test_mm_hadd_epi16() {
432        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
433        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
434        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
435        let r = _mm_hadd_epi16(a, b);
436        assert_eq_m128i(r, expected);
437
438        // Test wrapping on overflow
439        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
440        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
441        let expected = _mm_setr_epi16(
442            i16::MIN,
443            i16::MIN + 1,
444            i16::MIN + 2,
445            i16::MIN + 3,
446            i16::MAX,
447            i16::MAX - 1,
448            i16::MAX - 2,
449            i16::MAX - 3,
450        );
451        let r = _mm_hadd_epi16(a, b);
452        assert_eq_m128i(r, expected);
453    }
454
455    #[simd_test(enable = "ssse3")]
456    unsafe fn test_mm_hadds_epi16() {
457        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
458        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
459        let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
460        let r = _mm_hadds_epi16(a, b);
461        assert_eq_m128i(r, expected);
462
463        // Test saturating on overflow
464        let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
465        let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
466        let expected = _mm_setr_epi16(
467            i16::MAX,
468            i16::MAX,
469            i16::MAX,
470            i16::MAX,
471            i16::MIN,
472            i16::MIN,
473            i16::MIN,
474            i16::MIN,
475        );
476        let r = _mm_hadds_epi16(a, b);
477        assert_eq_m128i(r, expected);
478    }
479
480    #[simd_test(enable = "ssse3")]
481    unsafe fn test_mm_hadd_epi32() {
482        let a = _mm_setr_epi32(1, 2, 3, 4);
483        let b = _mm_setr_epi32(4, 128, 4, 3);
484        let expected = _mm_setr_epi32(3, 7, 132, 7);
485        let r = _mm_hadd_epi32(a, b);
486        assert_eq_m128i(r, expected);
487
488        // Test wrapping on overflow
489        let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
490        let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
491        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
492        let r = _mm_hadd_epi32(a, b);
493        assert_eq_m128i(r, expected);
494    }
495
496    #[simd_test(enable = "ssse3")]
497    unsafe fn test_mm_hsub_epi16() {
498        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
499        let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
500        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
501        let r = _mm_hsub_epi16(a, b);
502        assert_eq_m128i(r, expected);
503
504        // Test wrapping on overflow
505        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
506        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
507        let expected = _mm_setr_epi16(
508            i16::MIN,
509            i16::MIN + 1,
510            i16::MIN + 2,
511            i16::MIN + 3,
512            i16::MAX,
513            i16::MAX - 1,
514            i16::MAX - 2,
515            i16::MAX - 3,
516        );
517        let r = _mm_hsub_epi16(a, b);
518        assert_eq_m128i(r, expected);
519    }
520
521    #[simd_test(enable = "ssse3")]
522    unsafe fn test_mm_hsubs_epi16() {
523        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
524        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
525        let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
526        let r = _mm_hsubs_epi16(a, b);
527        assert_eq_m128i(r, expected);
528
529        // Test saturating on overflow
530        let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
531        let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
532        let expected = _mm_setr_epi16(
533            i16::MAX,
534            i16::MAX,
535            i16::MAX,
536            i16::MAX,
537            i16::MIN,
538            i16::MIN,
539            i16::MIN,
540            i16::MIN,
541        );
542        let r = _mm_hsubs_epi16(a, b);
543        assert_eq_m128i(r, expected);
544    }
545
546    #[simd_test(enable = "ssse3")]
547    unsafe fn test_mm_hsub_epi32() {
548        let a = _mm_setr_epi32(1, 2, 3, 4);
549        let b = _mm_setr_epi32(4, 128, 4, 3);
550        let expected = _mm_setr_epi32(-1, -1, -124, 1);
551        let r = _mm_hsub_epi32(a, b);
552        assert_eq_m128i(r, expected);
553
554        // Test wrapping on overflow
555        let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
556        let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
557        let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
558        let r = _mm_hsub_epi32(a, b);
559        assert_eq_m128i(r, expected);
560    }
561
562    #[simd_test(enable = "ssse3")]
563    unsafe fn test_mm_maddubs_epi16() {
564        #[rustfmt::skip]
565        let a = _mm_setr_epi8(
566            1, 2, 3, 4, 5, 6, 7, 8,
567            9, 10, 11, 12, 13, 14, 15, 16,
568        );
569        #[rustfmt::skip]
570        let b = _mm_setr_epi8(
571            4, 63, 4, 3,
572            24, 12, 6, 19,
573            12, 5, 5, 10,
574            4, 1, 8, 0,
575        );
576        let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
577        let r = _mm_maddubs_epi16(a, b);
578        assert_eq_m128i(r, expected);
579
580        // Test widening and saturation
581        #[rustfmt::skip]
582        let a = _mm_setr_epi8(
583            u8::MAX as i8, u8::MAX as i8,
584            u8::MAX as i8, u8::MAX as i8,
585            u8::MAX as i8, u8::MAX as i8,
586            100, 100, 0, 0,
587            0, 0, 0, 0, 0, 0,
588        );
589        #[rustfmt::skip]
590        let b = _mm_setr_epi8(
591            i8::MAX, i8::MAX,
592            i8::MAX, i8::MIN,
593            i8::MIN, i8::MIN,
594            50, 15, 0, 0, 0,
595            0, 0, 0, 0, 0,
596        );
597        let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
598        let r = _mm_maddubs_epi16(a, b);
599        assert_eq_m128i(r, expected);
600    }
601
602    #[simd_test(enable = "ssse3")]
603    unsafe fn test_mm_mulhrs_epi16() {
604        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
605        let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
606        let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
607        let r = _mm_mulhrs_epi16(a, b);
608        assert_eq_m128i(r, expected);
609
610        // Test extreme values
611        let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
612        let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
613        let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
614        let r = _mm_mulhrs_epi16(a, b);
615        assert_eq_m128i(r, expected);
616    }
617
618    #[simd_test(enable = "ssse3")]
619    unsafe fn test_mm_sign_epi8() {
620        #[rustfmt::skip]
621        let a = _mm_setr_epi8(
622            1, 2, 3, 4, 5, 6, 7, 8,
623            9, 10, 11, 12, 13, -14, -15, 16,
624        );
625        #[rustfmt::skip]
626        let b = _mm_setr_epi8(
627            4, 63, -4, 3, 24, 12, -6, -19,
628            12, 5, -5, 10, 4, 1, -8, 0,
629        );
630        #[rustfmt::skip]
631        let expected = _mm_setr_epi8(
632            1, 2, -3, 4, 5, 6, -7, -8,
633            9, 10, -11, 12, 13, -14, 15, 0,
634        );
635        let r = _mm_sign_epi8(a, b);
636        assert_eq_m128i(r, expected);
637    }
638
639    #[simd_test(enable = "ssse3")]
640    unsafe fn test_mm_sign_epi16() {
641        let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
642        let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
643        let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
644        let r = _mm_sign_epi16(a, b);
645        assert_eq_m128i(r, expected);
646    }
647
648    #[simd_test(enable = "ssse3")]
649    unsafe fn test_mm_sign_epi32() {
650        let a = _mm_setr_epi32(-1, 2, 3, 4);
651        let b = _mm_setr_epi32(1, -1, 1, 0);
652        let expected = _mm_setr_epi32(-1, -2, 3, 0);
653        let r = _mm_sign_epi32(a, b);
654        assert_eq_m128i(r, expected);
655    }
656}