1use crate::{
4 core_arch::{simd::*, x86::*},
5 intrinsics::simd::*,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
11#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 unsafe {
21 let a = a.as_i8x16();
22 let zero = i8x16::ZERO;
23 let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
24 transmute(r)
25 }
26}
27
28#[inline]
34#[target_feature(enable = "ssse3")]
35#[cfg_attr(test, assert_instr(pabsw))]
36#[stable(feature = "simd_x86", since = "1.27.0")]
37pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
38 unsafe {
39 let a = a.as_i16x8();
40 let zero = i16x8::ZERO;
41 let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
42 transmute(r)
43 }
44}
45
46#[inline]
52#[target_feature(enable = "ssse3")]
53#[cfg_attr(test, assert_instr(pabsd))]
54#[stable(feature = "simd_x86", since = "1.27.0")]
55pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
56 unsafe {
57 let a = a.as_i32x4();
58 let zero = i32x4::ZERO;
59 let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
60 transmute(r)
61 }
62}
63
64#[inline]
91#[target_feature(enable = "ssse3")]
92#[cfg_attr(test, assert_instr(pshufb))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
95 unsafe { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) }
96}
97
98#[inline]
103#[target_feature(enable = "ssse3")]
104#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
105#[rustc_legacy_const_generics(2)]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
108 static_assert_uimm_bits!(IMM8, 8);
109 if IMM8 > 32 {
112 return _mm_setzero_si128();
113 }
114 let (a, b) = if IMM8 > 16 {
117 (_mm_setzero_si128(), a)
118 } else {
119 (a, b)
120 };
121 const fn mask(shift: u32, i: u32) -> u32 {
122 if shift > 32 {
123 i
125 } else if shift > 16 {
126 shift - 16 + i
127 } else {
128 shift + i
129 }
130 }
131 unsafe {
132 let r: i8x16 = simd_shuffle!(
133 b.as_i8x16(),
134 a.as_i8x16(),
135 [
136 mask(IMM8 as u32, 0),
137 mask(IMM8 as u32, 1),
138 mask(IMM8 as u32, 2),
139 mask(IMM8 as u32, 3),
140 mask(IMM8 as u32, 4),
141 mask(IMM8 as u32, 5),
142 mask(IMM8 as u32, 6),
143 mask(IMM8 as u32, 7),
144 mask(IMM8 as u32, 8),
145 mask(IMM8 as u32, 9),
146 mask(IMM8 as u32, 10),
147 mask(IMM8 as u32, 11),
148 mask(IMM8 as u32, 12),
149 mask(IMM8 as u32, 13),
150 mask(IMM8 as u32, 14),
151 mask(IMM8 as u32, 15),
152 ],
153 );
154 transmute(r)
155 }
156}
157
158#[inline]
163#[target_feature(enable = "ssse3")]
164#[cfg_attr(test, assert_instr(phaddw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
167 let a = a.as_i16x8();
168 let b = b.as_i16x8();
169 unsafe {
170 let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
171 let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
172 simd_add(even, odd).as_m128i()
173 }
174}
175
176#[inline]
182#[target_feature(enable = "ssse3")]
183#[cfg_attr(test, assert_instr(phaddsw))]
184#[stable(feature = "simd_x86", since = "1.27.0")]
185pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
186 unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) }
187}
188
189#[inline]
194#[target_feature(enable = "ssse3")]
195#[cfg_attr(test, assert_instr(phaddd))]
196#[stable(feature = "simd_x86", since = "1.27.0")]
197pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
198 let a = a.as_i32x4();
199 let b = b.as_i32x4();
200 unsafe {
201 let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
202 let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
203 simd_add(even, odd).as_m128i()
204 }
205}
206
207#[inline]
212#[target_feature(enable = "ssse3")]
213#[cfg_attr(test, assert_instr(phsubw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
216 let a = a.as_i16x8();
217 let b = b.as_i16x8();
218 unsafe {
219 let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
220 let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
221 simd_sub(even, odd).as_m128i()
222 }
223}
224
225#[inline]
232#[target_feature(enable = "ssse3")]
233#[cfg_attr(test, assert_instr(phsubsw))]
234#[stable(feature = "simd_x86", since = "1.27.0")]
235pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
236 unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) }
237}
238
239#[inline]
244#[target_feature(enable = "ssse3")]
245#[cfg_attr(test, assert_instr(phsubd))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
248 let a = a.as_i32x4();
249 let b = b.as_i32x4();
250 unsafe {
251 let even: i32x4 = simd_shuffle!(a, b, [0, 2, 4, 6]);
252 let odd: i32x4 = simd_shuffle!(a, b, [1, 3, 5, 7]);
253 simd_sub(even, odd).as_m128i()
254 }
255}
256
257#[inline]
265#[target_feature(enable = "ssse3")]
266#[cfg_attr(test, assert_instr(pmaddubsw))]
267#[stable(feature = "simd_x86", since = "1.27.0")]
268pub fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
269 unsafe { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) }
270}
271
272#[inline]
278#[target_feature(enable = "ssse3")]
279#[cfg_attr(test, assert_instr(pmulhrsw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) }
283}
284
285#[inline]
292#[target_feature(enable = "ssse3")]
293#[cfg_attr(test, assert_instr(psignb))]
294#[stable(feature = "simd_x86", since = "1.27.0")]
295pub fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
296 unsafe { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) }
297}
298
299#[inline]
306#[target_feature(enable = "ssse3")]
307#[cfg_attr(test, assert_instr(psignw))]
308#[stable(feature = "simd_x86", since = "1.27.0")]
309pub fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
310 unsafe { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) }
311}
312
313#[inline]
320#[target_feature(enable = "ssse3")]
321#[cfg_attr(test, assert_instr(psignd))]
322#[stable(feature = "simd_x86", since = "1.27.0")]
323pub fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
324 unsafe { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) }
325}
326
327#[allow(improper_ctypes)]
328unsafe extern "C" {
329 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
330 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
331
332 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
333 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
334
335 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
336 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
337
338 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
339 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
340
341 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
342 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
343
344 #[link_name = "llvm.x86.ssse3.psign.b.128"]
345 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
346
347 #[link_name = "llvm.x86.ssse3.psign.w.128"]
348 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
349
350 #[link_name = "llvm.x86.ssse3.psign.d.128"]
351 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
352}
353
354#[cfg(test)]
355mod tests {
356 use stdarch_test::simd_test;
357
358 use crate::core_arch::x86::*;
359
360 #[simd_test(enable = "ssse3")]
361 unsafe fn test_mm_abs_epi8() {
362 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
363 assert_eq_m128i(r, _mm_set1_epi8(5));
364 }
365
366 #[simd_test(enable = "ssse3")]
367 unsafe fn test_mm_abs_epi16() {
368 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
369 assert_eq_m128i(r, _mm_set1_epi16(5));
370 }
371
372 #[simd_test(enable = "ssse3")]
373 unsafe fn test_mm_abs_epi32() {
374 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
375 assert_eq_m128i(r, _mm_set1_epi32(5));
376 }
377
378 #[simd_test(enable = "ssse3")]
379 unsafe fn test_mm_shuffle_epi8() {
380 #[rustfmt::skip]
381 let a = _mm_setr_epi8(
382 1, 2, 3, 4, 5, 6, 7, 8,
383 9, 10, 11, 12, 13, 14, 15, 16,
384 );
385 #[rustfmt::skip]
386 let b = _mm_setr_epi8(
387 4, 128_u8 as i8, 4, 3,
388 24, 12, 6, 19,
389 12, 5, 5, 10,
390 4, 1, 8, 0,
391 );
392 let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
393 let r = _mm_shuffle_epi8(a, b);
394 assert_eq_m128i(r, expected);
395
396 let b = _mm_add_epi8(b, _mm_set1_epi8(32));
398 let r = _mm_shuffle_epi8(a, b);
399 assert_eq_m128i(r, expected);
400 }
401
402 #[simd_test(enable = "ssse3")]
403 unsafe fn test_mm_alignr_epi8() {
404 #[rustfmt::skip]
405 let a = _mm_setr_epi8(
406 1, 2, 3, 4, 5, 6, 7, 8,
407 9, 10, 11, 12, 13, 14, 15, 16,
408 );
409 #[rustfmt::skip]
410 let b = _mm_setr_epi8(
411 4, 63, 4, 3,
412 24, 12, 6, 19,
413 12, 5, 5, 10,
414 4, 1, 8, 0,
415 );
416 let r = _mm_alignr_epi8::<33>(a, b);
417 assert_eq_m128i(r, _mm_set1_epi8(0));
418
419 let r = _mm_alignr_epi8::<17>(a, b);
420 #[rustfmt::skip]
421 let expected = _mm_setr_epi8(
422 2, 3, 4, 5, 6, 7, 8, 9,
423 10, 11, 12, 13, 14, 15, 16, 0,
424 );
425 assert_eq_m128i(r, expected);
426
427 let r = _mm_alignr_epi8::<16>(a, b);
428 assert_eq_m128i(r, a);
429
430 let r = _mm_alignr_epi8::<15>(a, b);
431 #[rustfmt::skip]
432 let expected = _mm_setr_epi8(
433 0, 1, 2, 3, 4, 5, 6, 7,
434 8, 9, 10, 11, 12, 13, 14, 15,
435 );
436 assert_eq_m128i(r, expected);
437
438 let r = _mm_alignr_epi8::<0>(a, b);
439 assert_eq_m128i(r, b);
440 }
441
442 #[simd_test(enable = "ssse3")]
443 unsafe fn test_mm_hadd_epi16() {
444 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
445 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
446 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
447 let r = _mm_hadd_epi16(a, b);
448 assert_eq_m128i(r, expected);
449
450 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
452 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
453 let expected = _mm_setr_epi16(
454 i16::MIN,
455 i16::MIN + 1,
456 i16::MIN + 2,
457 i16::MIN + 3,
458 i16::MAX,
459 i16::MAX - 1,
460 i16::MAX - 2,
461 i16::MAX - 3,
462 );
463 let r = _mm_hadd_epi16(a, b);
464 assert_eq_m128i(r, expected);
465 }
466
467 #[simd_test(enable = "ssse3")]
468 unsafe fn test_mm_hadds_epi16() {
469 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
470 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
471 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
472 let r = _mm_hadds_epi16(a, b);
473 assert_eq_m128i(r, expected);
474
475 let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4);
477 let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4);
478 let expected = _mm_setr_epi16(
479 i16::MAX,
480 i16::MAX,
481 i16::MAX,
482 i16::MAX,
483 i16::MIN,
484 i16::MIN,
485 i16::MIN,
486 i16::MIN,
487 );
488 let r = _mm_hadds_epi16(a, b);
489 assert_eq_m128i(r, expected);
490 }
491
492 #[simd_test(enable = "ssse3")]
493 unsafe fn test_mm_hadd_epi32() {
494 let a = _mm_setr_epi32(1, 2, 3, 4);
495 let b = _mm_setr_epi32(4, 128, 4, 3);
496 let expected = _mm_setr_epi32(3, 7, 132, 7);
497 let r = _mm_hadd_epi32(a, b);
498 assert_eq_m128i(r, expected);
499
500 let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2);
502 let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2);
503 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
504 let r = _mm_hadd_epi32(a, b);
505 assert_eq_m128i(r, expected);
506 }
507
508 #[simd_test(enable = "ssse3")]
509 unsafe fn test_mm_hsub_epi16() {
510 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
511 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
512 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
513 let r = _mm_hsub_epi16(a, b);
514 assert_eq_m128i(r, expected);
515
516 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
518 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
519 let expected = _mm_setr_epi16(
520 i16::MIN,
521 i16::MIN + 1,
522 i16::MIN + 2,
523 i16::MIN + 3,
524 i16::MAX,
525 i16::MAX - 1,
526 i16::MAX - 2,
527 i16::MAX - 3,
528 );
529 let r = _mm_hsub_epi16(a, b);
530 assert_eq_m128i(r, expected);
531 }
532
533 #[simd_test(enable = "ssse3")]
534 unsafe fn test_mm_hsubs_epi16() {
535 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
536 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
537 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
538 let r = _mm_hsubs_epi16(a, b);
539 assert_eq_m128i(r, expected);
540
541 let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4);
543 let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4);
544 let expected = _mm_setr_epi16(
545 i16::MAX,
546 i16::MAX,
547 i16::MAX,
548 i16::MAX,
549 i16::MIN,
550 i16::MIN,
551 i16::MIN,
552 i16::MIN,
553 );
554 let r = _mm_hsubs_epi16(a, b);
555 assert_eq_m128i(r, expected);
556 }
557
558 #[simd_test(enable = "ssse3")]
559 unsafe fn test_mm_hsub_epi32() {
560 let a = _mm_setr_epi32(1, 2, 3, 4);
561 let b = _mm_setr_epi32(4, 128, 4, 3);
562 let expected = _mm_setr_epi32(-1, -1, -124, 1);
563 let r = _mm_hsub_epi32(a, b);
564 assert_eq_m128i(r, expected);
565
566 let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2);
568 let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2);
569 let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1);
570 let r = _mm_hsub_epi32(a, b);
571 assert_eq_m128i(r, expected);
572 }
573
574 #[simd_test(enable = "ssse3")]
575 unsafe fn test_mm_maddubs_epi16() {
576 #[rustfmt::skip]
577 let a = _mm_setr_epi8(
578 1, 2, 3, 4, 5, 6, 7, 8,
579 9, 10, 11, 12, 13, 14, 15, 16,
580 );
581 #[rustfmt::skip]
582 let b = _mm_setr_epi8(
583 4, 63, 4, 3,
584 24, 12, 6, 19,
585 12, 5, 5, 10,
586 4, 1, 8, 0,
587 );
588 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
589 let r = _mm_maddubs_epi16(a, b);
590 assert_eq_m128i(r, expected);
591
592 #[rustfmt::skip]
594 let a = _mm_setr_epi8(
595 u8::MAX as i8, u8::MAX as i8,
596 u8::MAX as i8, u8::MAX as i8,
597 u8::MAX as i8, u8::MAX as i8,
598 100, 100, 0, 0,
599 0, 0, 0, 0, 0, 0,
600 );
601 #[rustfmt::skip]
602 let b = _mm_setr_epi8(
603 i8::MAX, i8::MAX,
604 i8::MAX, i8::MIN,
605 i8::MIN, i8::MIN,
606 50, 15, 0, 0, 0,
607 0, 0, 0, 0, 0,
608 );
609 let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0);
610 let r = _mm_maddubs_epi16(a, b);
611 assert_eq_m128i(r, expected);
612 }
613
614 #[simd_test(enable = "ssse3")]
615 unsafe fn test_mm_mulhrs_epi16() {
616 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
617 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
618 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
619 let r = _mm_mulhrs_epi16(a, b);
620 assert_eq_m128i(r, expected);
621
622 let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0);
624 let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0);
625 let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0);
626 let r = _mm_mulhrs_epi16(a, b);
627 assert_eq_m128i(r, expected);
628 }
629
630 #[simd_test(enable = "ssse3")]
631 unsafe fn test_mm_sign_epi8() {
632 #[rustfmt::skip]
633 let a = _mm_setr_epi8(
634 1, 2, 3, 4, 5, 6, 7, 8,
635 9, 10, 11, 12, 13, -14, -15, 16,
636 );
637 #[rustfmt::skip]
638 let b = _mm_setr_epi8(
639 4, 63, -4, 3, 24, 12, -6, -19,
640 12, 5, -5, 10, 4, 1, -8, 0,
641 );
642 #[rustfmt::skip]
643 let expected = _mm_setr_epi8(
644 1, 2, -3, 4, 5, 6, -7, -8,
645 9, 10, -11, 12, 13, -14, 15, 0,
646 );
647 let r = _mm_sign_epi8(a, b);
648 assert_eq_m128i(r, expected);
649 }
650
651 #[simd_test(enable = "ssse3")]
652 unsafe fn test_mm_sign_epi16() {
653 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
654 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
655 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
656 let r = _mm_sign_epi16(a, b);
657 assert_eq_m128i(r, expected);
658 }
659
660 #[simd_test(enable = "ssse3")]
661 unsafe fn test_mm_sign_epi32() {
662 let a = _mm_setr_epi32(-1, 2, 3, 4);
663 let b = _mm_setr_epi32(1, -1, 1, 0);
664 let expected = _mm_setr_epi32(-1, -2, 3, 0);
665 let r = _mm_sign_epi32(a, b);
666 assert_eq_m128i(r, expected);
667 }
668}