1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 unsafe {
21 let a = a.as_i8x16();
22 let zero = i8x16::ZERO;
23 let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
24 transmute(r)
25 }
26}
27
28#[inline]
34#[target_feature(enable = "ssse3")]
35#[cfg_attr(test, assert_instr(pabsw))]
36#[stable(feature = "simd_x86", since = "1.27.0")]
37pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
38 unsafe {
39 let a = a.as_i16x8();
40 let zero = i16x8::ZERO;
41 let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
42 transmute(r)
43 }
44}
45
46#[inline]
52#[target_feature(enable = "ssse3")]
53#[cfg_attr(test, assert_instr(pabsd))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
56 unsafe {
57 let a = a.as_i32x4();
58 let zero = i32x4::ZERO;
59 let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
60 transmute(r)
61 }
62}
63
64#[inline]
91#[target_feature(enable = "ssse3")]
92#[cfg_attr(test, assert_instr(pshufb))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
95 unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) }
96}
97
98#[inline]
103#[target_feature(enable = "ssse3")]
104#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
105#[rustc_legacy_const_generics(2)]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
108 static_assert_uimm_bits!(IMM8, 8);
109 if IMM8 > 32 {
112 return _mm_setzero_si128();
113 }
114 let (a, b) = if IMM8 > 16 {
117 (_mm_setzero_si128(), a)
118 } else {
119 (a, b)
120 };
121 const fn mask(shift: u32, i: u32) -> u32 {
122 if shift > 32 {
123 i
125 } else if shift > 16 {
126 shift - 16 + i
127 } else {
128 shift + i
129 }
130 }
131 unsafe {
132 let r: i8x16 = simd_shuffle!(
133 b.as_i8x16(),
134 a.as_i8x16(),
135 [
136 mask(IMM8 as u32, 0),
137 mask(IMM8 as u32, 1),
138 mask(IMM8 as u32, 2),
139 mask(IMM8 as u32, 3),
140 mask(IMM8 as u32, 4),
141 mask(IMM8 as u32, 5),
142 mask(IMM8 as u32, 6),
143 mask(IMM8 as u32, 7),
144 mask(IMM8 as u32, 8),
145 mask(IMM8 as u32, 9),
146 mask(IMM8 as u32, 10),
147 mask(IMM8 as u32, 11),
148 mask(IMM8 as u32, 12),
149 mask(IMM8 as u32, 13),
150 mask(IMM8 as u32, 14),
151 mask(IMM8 as u32, 15),
152 ],
153 );
154 transmute(r)
155 }
156}
157
158#[inline]
163#[target_feature(enable = "ssse3")]
164#[cfg_attr(test, assert_instr(phaddw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167 unsafe { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) }
168}
169
170#[inline]
176#[target_feature(enable = "ssse3")]
177#[cfg_attr(test, assert_instr(phaddsw))]
178#[stable(feature = "simd_x86", since = "1.27.0")]
179pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
180 unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) }
181}
182
183#[inline]
188#[target_feature(enable = "ssse3")]
189#[cfg_attr(test, assert_instr(phaddd))]
190#[stable(feature = "simd_x86", since = "1.27.0")]
191pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
192 unsafe { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) }
193}
194
195#[inline]
200#[target_feature(enable = "ssse3")]
201#[cfg_attr(test, assert_instr(phsubw))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
214#[target_feature(enable = "ssse3")]
215#[cfg_attr(test, assert_instr(phsubsw))]
216#[stable(feature = "simd_x86", since = "1.27.0")]
217pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
218 unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) }
219}
220
221#[inline]
226#[target_feature(enable = "ssse3")]
227#[cfg_attr(test, assert_instr(phsubd))]
228#[stable(feature = "simd_x86", since = "1.27.0")]
229pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
230 unsafe { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) }
231}
232
233#[inline]
241#[target_feature(enable = "ssse3")]
242#[cfg_attr(test, assert_instr(pmaddubsw))]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
245 unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
246}
247
248#[inline]
254#[target_feature(enable = "ssse3")]
255#[cfg_attr(test, assert_instr(pmulhrsw))]
256#[stable(feature = "simd_x86", since = "1.27.0")]
257pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
258 unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
259}
260
261#[inline]
268#[target_feature(enable = "ssse3")]
269#[cfg_attr(test, assert_instr(psignb))]
270#[stable(feature = "simd_x86", since = "1.27.0")]
271pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
272 unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) }
273}
274
275#[inline]
282#[target_feature(enable = "ssse3")]
283#[cfg_attr(test, assert_instr(psignw))]
284#[stable(feature = "simd_x86", since = "1.27.0")]
285pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
286 unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) }
287}
288
289#[inline]
296#[target_feature(enable = "ssse3")]
297#[cfg_attr(test, assert_instr(psignd))]
298#[stable(feature = "simd_x86", since = "1.27.0")]
299pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
300 unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) }
301}
302
303#[allow(improper_ctypes)]
304unsafe extern "C" {
305 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
306 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
307
308 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
309 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
310
311 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
312 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
313
314 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
315 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
316
317 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
318 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
319
320 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
321 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
322
323 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
324 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
325
326 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
327 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
328
329 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
330 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
331
332 #[link_name = "llvm.x86.ssse3.psign.b.128"]
333 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
334
335 #[link_name = "llvm.x86.ssse3.psign.w.128"]
336 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
337
338 #[link_name = "llvm.x86.ssse3.psign.d.128"]
339 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
340}
341
342#[cfg(test)]
343mod tests {
344 use stdarch_test::simd_test;
345
346 use crate::core_arch::x86::*;
347
348 #[simd_test(enable = "ssse3")]
349 unsafe fn test_mm_abs_epi8() {
350 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
351 assert_eq_m128i(r, _mm_set1_epi8(5));
352 }
353
354 #[simd_test(enable = "ssse3")]
355 unsafe fn test_mm_abs_epi16() {
356 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
357 assert_eq_m128i(r, _mm_set1_epi16(5));
358 }
359
360 #[simd_test(enable = "ssse3")]
361 unsafe fn test_mm_abs_epi32() {
362 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
363 assert_eq_m128i(r, _mm_set1_epi32(5));
364 }
365
366 #[simd_test(enable = "ssse3")]
367 unsafe fn test_mm_shuffle_epi8() {
368 #[rustfmt::skip]
369 let a = _mm_setr_epi8(
370 1, 2, 3, 4, 5, 6, 7, 8,
371 9, 10, 11, 12, 13, 14, 15, 16,
372 );
373 #[rustfmt::skip]
374 let b = _mm_setr_epi8(
375 4, 128_u8 as i8, 4, 3,
376 24, 12, 6, 19,
377 12, 5, 5, 10,
378 4, 1, 8, 0,
379 );
380 let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
381 let r = _mm_shuffle_epi8(a, b);
382 assert_eq_m128i(r, expected);
383
384 let b = _mm_add_epi8(b, _mm_set1_epi8(32));
386 let r = _mm_shuffle_epi8(a, b);
387 assert_eq_m128i(r, expected);
388 }
389
390 #[simd_test(enable = "ssse3")]
391 unsafe fn test_mm_alignr_epi8() {
392 #[rustfmt::skip]
393 let a = _mm_setr_epi8(
394 1, 2, 3, 4, 5, 6, 7, 8,
395 9, 10, 11, 12, 13, 14, 15, 16,
396 );
397 #[rustfmt::skip]
398 let b = _mm_setr_epi8(
399 4, 63, 4, 3,
400 24, 12, 6, 19,
401 12, 5, 5, 10,
402 4, 1, 8, 0,
403 );
404 let r = _mm_alignr_epi8::<33>(a, b);
405 assert_eq_m128i(r, _mm_set1_epi8(0));
406
407 let r = _mm_alignr_epi8::<17>(a, b);
408 #[rustfmt::skip]
409 let expected = _mm_setr_epi8(
410 2, 3, 4, 5, 6, 7, 8, 9,
411 10, 11, 12, 13, 14, 15, 16, 0,
412 );
413 assert_eq_m128i(r, expected);
414
415 let r = _mm_alignr_epi8::<16>(a, b);
416 assert_eq_m128i(r, a);
417
418 let r = _mm_alignr_epi8::<15>(a, b);
419 #[rustfmt::skip]
420 let expected = _mm_setr_epi8(
421 0, 1, 2, 3, 4, 5, 6, 7,
422 8, 9, 10, 11, 12, 13, 14, 15,
423 );
424 assert_eq_m128i(r, expected);
425
426 let r = _mm_alignr_epi8::<0>(a, b);
427 assert_eq_m128i(r, b);
428 }
429
430 #[simd_test(enable = "ssse3")]
431 unsafe fn test_mm_hadd_epi16() {
432 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
433 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
434 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
435 let r = _mm_hadd_epi16(a, b);
436 assert_eq_m128i(r, expected);
437
438 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
440 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
441 let expected = _mm_setr_epi16(
442 i16::MIN,
443 i16::MIN + 1,
444 i16::MIN + 2,
445 i16::MIN + 3,
446 i16::MAX,
447 i16::MAX - 1,
448 i16::MAX - 2,
449 i16::MAX - 3,
450 );
451 let r = _mm_hadd_epi16(a, b);
452 assert_eq_m128i(r, expected);
453 }
454
455 #[simd_test(enable = "ssse3")]
456 unsafe fn test_mm_hadds_epi16() {
457 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
458 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
459 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
460 let r = _mm_hadds_epi16(a, b);
461 assert_eq_m128i(r, expected);
462
463 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
465 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
466 let expected = _mm_setr_epi16(
467 i16::MAX,
468 i16::MAX,
469 i16::MAX,
470 i16::MAX,
471 i16::MIN,
472 i16::MIN,
473 i16::MIN,
474 i16::MIN,
475 );
476 let r = _mm_hadds_epi16(a, b);
477 assert_eq_m128i(r, expected);
478 }
479
480 #[simd_test(enable = "ssse3")]
481 unsafe fn test_mm_hadd_epi32() {
482 let a = _mm_setr_epi32(1, 2, 3, 4);
483 let b = _mm_setr_epi32(4, 128, 4, 3);
484 let expected = _mm_setr_epi32(3, 7, 132, 7);
485 let r = _mm_hadd_epi32(a, b);
486 assert_eq_m128i(r, expected);
487
488 let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
490 let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
491 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
492 let r = _mm_hadd_epi32(a, b);
493 assert_eq_m128i(r, expected);
494 }
495
496 #[simd_test(enable = "ssse3")]
497 unsafe fn test_mm_hsub_epi16() {
498 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
499 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
500 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
501 let r = _mm_hsub_epi16(a, b);
502 assert_eq_m128i(r, expected);
503
504 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
506 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
507 let expected = _mm_setr_epi16(
508 i16::MIN,
509 i16::MIN + 1,
510 i16::MIN + 2,
511 i16::MIN + 3,
512 i16::MAX,
513 i16::MAX - 1,
514 i16::MAX - 2,
515 i16::MAX - 3,
516 );
517 let r = _mm_hsub_epi16(a, b);
518 assert_eq_m128i(r, expected);
519 }
520
521 #[simd_test(enable = "ssse3")]
522 unsafe fn test_mm_hsubs_epi16() {
523 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
524 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
525 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
526 let r = _mm_hsubs_epi16(a, b);
527 assert_eq_m128i(r, expected);
528
529 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
531 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
532 let expected = _mm_setr_epi16(
533 i16::MAX,
534 i16::MAX,
535 i16::MAX,
536 i16::MAX,
537 i16::MIN,
538 i16::MIN,
539 i16::MIN,
540 i16::MIN,
541 );
542 let r = _mm_hsubs_epi16(a, b);
543 assert_eq_m128i(r, expected);
544 }
545
546 #[simd_test(enable = "ssse3")]
547 unsafe fn test_mm_hsub_epi32() {
548 let a = _mm_setr_epi32(1, 2, 3, 4);
549 let b = _mm_setr_epi32(4, 128, 4, 3);
550 let expected = _mm_setr_epi32(-1, -1, -124, 1);
551 let r = _mm_hsub_epi32(a, b);
552 assert_eq_m128i(r, expected);
553
554 let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
556 let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
557 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
558 let r = _mm_hsub_epi32(a, b);
559 assert_eq_m128i(r, expected);
560 }
561
562 #[simd_test(enable = "ssse3")]
563 unsafe fn test_mm_maddubs_epi16() {
564 #[rustfmt::skip]
565 let a = _mm_setr_epi8(
566 1, 2, 3, 4, 5, 6, 7, 8,
567 9, 10, 11, 12, 13, 14, 15, 16,
568 );
569 #[rustfmt::skip]
570 let b = _mm_setr_epi8(
571 4, 63, 4, 3,
572 24, 12, 6, 19,
573 12, 5, 5, 10,
574 4, 1, 8, 0,
575 );
576 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
577 let r = _mm_maddubs_epi16(a, b);
578 assert_eq_m128i(r, expected);
579
580 #[rustfmt::skip]
582 let a = _mm_setr_epi8(
583 u8::MAX as i8, u8::MAX as i8,
584 u8::MAX as i8, u8::MAX as i8,
585 u8::MAX as i8, u8::MAX as i8,
586 100, 100, 0, 0,
587 0, 0, 0, 0, 0, 0,
588 );
589 #[rustfmt::skip]
590 let b = _mm_setr_epi8(
591 i8::MAX, i8::MAX,
592 i8::MAX, i8::MIN,
593 i8::MIN, i8::MIN,
594 50, 15, 0, 0, 0,
595 0, 0, 0, 0, 0,
596 );
597 let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
598 let r = _mm_maddubs_epi16(a, b);
599 assert_eq_m128i(r, expected);
600 }
601
602 #[simd_test(enable = "ssse3")]
603 unsafe fn test_mm_mulhrs_epi16() {
604 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
605 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
606 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
607 let r = _mm_mulhrs_epi16(a, b);
608 assert_eq_m128i(r, expected);
609
610 let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
612 let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
613 let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
614 let r = _mm_mulhrs_epi16(a, b);
615 assert_eq_m128i(r, expected);
616 }
617
618 #[simd_test(enable = "ssse3")]
619 unsafe fn test_mm_sign_epi8() {
620 #[rustfmt::skip]
621 let a = _mm_setr_epi8(
622 1, 2, 3, 4, 5, 6, 7, 8,
623 9, 10, 11, 12, 13, -14, -15, 16,
624 );
625 #[rustfmt::skip]
626 let b = _mm_setr_epi8(
627 4, 63, -4, 3, 24, 12, -6, -19,
628 12, 5, -5, 10, 4, 1, -8, 0,
629 );
630 #[rustfmt::skip]
631 let expected = _mm_setr_epi8(
632 1, 2, -3, 4, 5, 6, -7, -8,
633 9, 10, -11, 12, 13, -14, 15, 0,
634 );
635 let r = _mm_sign_epi8(a, b);
636 assert_eq_m128i(r, expected);
637 }
638
639 #[simd_test(enable = "ssse3")]
640 unsafe fn test_mm_sign_epi16() {
641 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
642 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
643 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
644 let r = _mm_sign_epi16(a, b);
645 assert_eq_m128i(r, expected);
646 }
647
648 #[simd_test(enable = "ssse3")]
649 unsafe fn test_mm_sign_epi32() {
650 let a = _mm_setr_epi32(-1, 2, 3, 4);
651 let b = _mm_setr_epi32(1, -1, 1, 0);
652 let expected = _mm_setr_epi32(-1, -2, 3, 0);
653 let r = _mm_sign_epi32(a, b);
654 assert_eq_m128i(r, expected);
655 }
656}