1use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9#[stable(feature = "simd_x86", since = "1.27.0")]
12pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
13#[stable(feature = "simd_x86", since = "1.27.0")]
15pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
16#[stable(feature = "simd_x86", since = "1.27.0")]
18pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
19#[stable(feature = "simd_x86", since = "1.27.0")]
21pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
22#[stable(feature = "simd_x86", since = "1.27.0")]
24pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
25#[stable(feature = "simd_x86", since = "1.27.0")]
27pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
28#[stable(feature = "simd_x86", since = "1.27.0")]
30pub const _MM_FROUND_NO_EXC: i32 = 0x08;
31#[stable(feature = "simd_x86", since = "1.27.0")]
33pub const _MM_FROUND_NINT: i32 = 0x00;
34#[stable(feature = "simd_x86", since = "1.27.0")]
36pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
37#[stable(feature = "simd_x86", since = "1.27.0")]
39pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
40#[stable(feature = "simd_x86", since = "1.27.0")]
42pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
43#[stable(feature = "simd_x86", since = "1.27.0")]
46pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
47#[stable(feature = "simd_x86", since = "1.27.0")]
49pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
50
51#[inline]
59#[target_feature(enable = "sse4.1")]
60#[cfg_attr(test, assert_instr(pblendvb))]
61#[stable(feature = "simd_x86", since = "1.27.0")]
62pub fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
63 unsafe {
64 let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO);
65 transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
66 }
67}
68
69#[inline]
77#[target_feature(enable = "sse4.1")]
78#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
79#[rustc_legacy_const_generics(2)]
80#[stable(feature = "simd_x86", since = "1.27.0")]
81pub fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
82 static_assert_uimm_bits!(IMM8, 8);
83 unsafe {
84 transmute::<i16x8, _>(simd_shuffle!(
85 a.as_i16x8(),
86 b.as_i16x8(),
87 [
88 [0, 8][IMM8 as usize & 1],
89 [1, 9][(IMM8 >> 1) as usize & 1],
90 [2, 10][(IMM8 >> 2) as usize & 1],
91 [3, 11][(IMM8 >> 3) as usize & 1],
92 [4, 12][(IMM8 >> 4) as usize & 1],
93 [5, 13][(IMM8 >> 5) as usize & 1],
94 [6, 14][(IMM8 >> 6) as usize & 1],
95 [7, 15][(IMM8 >> 7) as usize & 1],
96 ]
97 ))
98 }
99}
100
101#[inline]
106#[target_feature(enable = "sse4.1")]
107#[cfg_attr(test, assert_instr(blendvpd))]
108#[stable(feature = "simd_x86", since = "1.27.0")]
109pub fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
110 unsafe {
111 let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO);
112 transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
113 }
114}
115
116#[inline]
121#[target_feature(enable = "sse4.1")]
122#[cfg_attr(test, assert_instr(blendvps))]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
125 unsafe {
126 let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO);
127 transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
128 }
129}
130
131#[inline]
136#[target_feature(enable = "sse4.1")]
137#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
141#[rustc_legacy_const_generics(2)]
142#[stable(feature = "simd_x86", since = "1.27.0")]
143pub fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
144 static_assert_uimm_bits!(IMM2, 2);
145 unsafe {
146 transmute::<f64x2, _>(simd_shuffle!(
147 a.as_f64x2(),
148 b.as_f64x2(),
149 [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
150 ))
151 }
152}
153
154#[inline]
159#[target_feature(enable = "sse4.1")]
160#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
161#[rustc_legacy_const_generics(2)]
162#[stable(feature = "simd_x86", since = "1.27.0")]
163pub fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
164 static_assert_uimm_bits!(IMM4, 4);
165 unsafe {
166 transmute::<f32x4, _>(simd_shuffle!(
167 a.as_f32x4(),
168 b.as_f32x4(),
169 [
170 [0, 4][IMM4 as usize & 1],
171 [1, 5][(IMM4 >> 1) as usize & 1],
172 [2, 6][(IMM4 >> 2) as usize & 1],
173 [3, 7][(IMM4 >> 3) as usize & 1],
174 ]
175 ))
176 }
177}
178
179#[inline]
206#[target_feature(enable = "sse4.1")]
207#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 0))]
208#[rustc_legacy_const_generics(1)]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
211 static_assert_uimm_bits!(IMM8, 2);
212 unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 }
213}
214
215#[inline]
222#[target_feature(enable = "sse4.1")]
223#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
224#[rustc_legacy_const_generics(1)]
225#[stable(feature = "simd_x86", since = "1.27.0")]
226pub fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
227 static_assert_uimm_bits!(IMM8, 4);
228 unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 }
229}
230
231#[inline]
235#[target_feature(enable = "sse4.1")]
236#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 1))]
237#[rustc_legacy_const_generics(1)]
238#[stable(feature = "simd_x86", since = "1.27.0")]
239pub fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
240 static_assert_uimm_bits!(IMM8, 2);
241 unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) }
242}
243
244#[inline]
269#[target_feature(enable = "sse4.1")]
270#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
271#[rustc_legacy_const_generics(2)]
272#[stable(feature = "simd_x86", since = "1.27.0")]
273pub fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
274 static_assert_uimm_bits!(IMM8, 8);
275 unsafe { insertps(a, b, IMM8 as u8) }
276}
277
278#[inline]
283#[target_feature(enable = "sse4.1")]
284#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
285#[rustc_legacy_const_generics(2)]
286#[stable(feature = "simd_x86", since = "1.27.0")]
287pub fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
288 static_assert_uimm_bits!(IMM8, 4);
289 unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) }
290}
291
292#[inline]
297#[target_feature(enable = "sse4.1")]
298#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
299#[rustc_legacy_const_generics(2)]
300#[stable(feature = "simd_x86", since = "1.27.0")]
301pub fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
302 static_assert_uimm_bits!(IMM8, 2);
303 unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) }
304}
305
306#[inline]
311#[target_feature(enable = "sse4.1")]
312#[cfg_attr(test, assert_instr(pmaxsb))]
313#[stable(feature = "simd_x86", since = "1.27.0")]
314pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
315 unsafe {
316 let a = a.as_i8x16();
317 let b = b.as_i8x16();
318 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
319 }
320}
321
322#[inline]
327#[target_feature(enable = "sse4.1")]
328#[cfg_attr(test, assert_instr(pmaxuw))]
329#[stable(feature = "simd_x86", since = "1.27.0")]
330pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
331 unsafe {
332 let a = a.as_u16x8();
333 let b = b.as_u16x8();
334 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
335 }
336}
337
338#[inline]
343#[target_feature(enable = "sse4.1")]
344#[cfg_attr(test, assert_instr(pmaxsd))]
345#[stable(feature = "simd_x86", since = "1.27.0")]
346pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
347 unsafe {
348 let a = a.as_i32x4();
349 let b = b.as_i32x4();
350 transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
351 }
352}
353
354#[inline]
359#[target_feature(enable = "sse4.1")]
360#[cfg_attr(test, assert_instr(pmaxud))]
361#[stable(feature = "simd_x86", since = "1.27.0")]
362pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
363 unsafe {
364 let a = a.as_u32x4();
365 let b = b.as_u32x4();
366 transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
367 }
368}
369
370#[inline]
375#[target_feature(enable = "sse4.1")]
376#[cfg_attr(test, assert_instr(pminsb))]
377#[stable(feature = "simd_x86", since = "1.27.0")]
378pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
379 unsafe {
380 let a = a.as_i8x16();
381 let b = b.as_i8x16();
382 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
383 }
384}
385
386#[inline]
391#[target_feature(enable = "sse4.1")]
392#[cfg_attr(test, assert_instr(pminuw))]
393#[stable(feature = "simd_x86", since = "1.27.0")]
394pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
395 unsafe {
396 let a = a.as_u16x8();
397 let b = b.as_u16x8();
398 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
399 }
400}
401
402#[inline]
407#[target_feature(enable = "sse4.1")]
408#[cfg_attr(test, assert_instr(pminsd))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
411 unsafe {
412 let a = a.as_i32x4();
413 let b = b.as_i32x4();
414 transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
415 }
416}
417
418#[inline]
423#[target_feature(enable = "sse4.1")]
424#[cfg_attr(test, assert_instr(pminud))]
425#[stable(feature = "simd_x86", since = "1.27.0")]
426pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
427 unsafe {
428 let a = a.as_u32x4();
429 let b = b.as_u32x4();
430 transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
431 }
432}
433
434#[inline]
439#[target_feature(enable = "sse4.1")]
440#[cfg_attr(test, assert_instr(packusdw))]
441#[stable(feature = "simd_x86", since = "1.27.0")]
442pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
443 unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) }
444}
445
446#[inline]
450#[target_feature(enable = "sse4.1")]
451#[cfg_attr(test, assert_instr(pcmpeqq))]
452#[stable(feature = "simd_x86", since = "1.27.0")]
453pub fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
454 unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) }
455}
456
457#[inline]
461#[target_feature(enable = "sse4.1")]
462#[cfg_attr(test, assert_instr(pmovsxbw))]
463#[stable(feature = "simd_x86", since = "1.27.0")]
464pub fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
465 unsafe {
466 let a = a.as_i8x16();
467 let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
468 transmute(simd_cast::<_, i16x8>(a))
469 }
470}
471
472#[inline]
476#[target_feature(enable = "sse4.1")]
477#[cfg_attr(test, assert_instr(pmovsxbd))]
478#[stable(feature = "simd_x86", since = "1.27.0")]
479pub fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
480 unsafe {
481 let a = a.as_i8x16();
482 let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
483 transmute(simd_cast::<_, i32x4>(a))
484 }
485}
486
487#[inline]
492#[target_feature(enable = "sse4.1")]
493#[cfg_attr(test, assert_instr(pmovsxbq))]
494#[stable(feature = "simd_x86", since = "1.27.0")]
495pub fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
496 unsafe {
497 let a = a.as_i8x16();
498 let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
499 transmute(simd_cast::<_, i64x2>(a))
500 }
501}
502
503#[inline]
507#[target_feature(enable = "sse4.1")]
508#[cfg_attr(test, assert_instr(pmovsxwd))]
509#[stable(feature = "simd_x86", since = "1.27.0")]
510pub fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
511 unsafe {
512 let a = a.as_i16x8();
513 let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
514 transmute(simd_cast::<_, i32x4>(a))
515 }
516}
517
518#[inline]
522#[target_feature(enable = "sse4.1")]
523#[cfg_attr(test, assert_instr(pmovsxwq))]
524#[stable(feature = "simd_x86", since = "1.27.0")]
525pub fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
526 unsafe {
527 let a = a.as_i16x8();
528 let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
529 transmute(simd_cast::<_, i64x2>(a))
530 }
531}
532
533#[inline]
537#[target_feature(enable = "sse4.1")]
538#[cfg_attr(test, assert_instr(pmovsxdq))]
539#[stable(feature = "simd_x86", since = "1.27.0")]
540pub fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
541 unsafe {
542 let a = a.as_i32x4();
543 let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
544 transmute(simd_cast::<_, i64x2>(a))
545 }
546}
547
548#[inline]
552#[target_feature(enable = "sse4.1")]
553#[cfg_attr(test, assert_instr(pmovzxbw))]
554#[stable(feature = "simd_x86", since = "1.27.0")]
555pub fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
556 unsafe {
557 let a = a.as_u8x16();
558 let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
559 transmute(simd_cast::<_, i16x8>(a))
560 }
561}
562
563#[inline]
567#[target_feature(enable = "sse4.1")]
568#[cfg_attr(test, assert_instr(pmovzxbd))]
569#[stable(feature = "simd_x86", since = "1.27.0")]
570pub fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
571 unsafe {
572 let a = a.as_u8x16();
573 let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
574 transmute(simd_cast::<_, i32x4>(a))
575 }
576}
577
578#[inline]
582#[target_feature(enable = "sse4.1")]
583#[cfg_attr(test, assert_instr(pmovzxbq))]
584#[stable(feature = "simd_x86", since = "1.27.0")]
585pub fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
586 unsafe {
587 let a = a.as_u8x16();
588 let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
589 transmute(simd_cast::<_, i64x2>(a))
590 }
591}
592
593#[inline]
598#[target_feature(enable = "sse4.1")]
599#[cfg_attr(test, assert_instr(pmovzxwd))]
600#[stable(feature = "simd_x86", since = "1.27.0")]
601pub fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
602 unsafe {
603 let a = a.as_u16x8();
604 let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
605 transmute(simd_cast::<_, i32x4>(a))
606 }
607}
608
609#[inline]
614#[target_feature(enable = "sse4.1")]
615#[cfg_attr(test, assert_instr(pmovzxwq))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617pub fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
618 unsafe {
619 let a = a.as_u16x8();
620 let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
621 transmute(simd_cast::<_, i64x2>(a))
622 }
623}
624
625#[inline]
630#[target_feature(enable = "sse4.1")]
631#[cfg_attr(test, assert_instr(pmovzxdq))]
632#[stable(feature = "simd_x86", since = "1.27.0")]
633pub fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
634 unsafe {
635 let a = a.as_u32x4();
636 let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
637 transmute(simd_cast::<_, i64x2>(a))
638 }
639}
640
641#[inline]
651#[target_feature(enable = "sse4.1")]
652#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
653#[rustc_legacy_const_generics(2)]
654#[stable(feature = "simd_x86", since = "1.27.0")]
655pub fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
656 unsafe {
657 static_assert_uimm_bits!(IMM8, 8);
658 dppd(a, b, IMM8 as u8)
659 }
660}
661
662#[inline]
672#[target_feature(enable = "sse4.1")]
673#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
674#[rustc_legacy_const_generics(2)]
675#[stable(feature = "simd_x86", since = "1.27.0")]
676pub fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
677 static_assert_uimm_bits!(IMM8, 8);
678 unsafe { dpps(a, b, IMM8 as u8) }
679}
680
681#[inline]
687#[target_feature(enable = "sse4.1")]
688#[cfg_attr(test, assert_instr(roundpd))]
689#[stable(feature = "simd_x86", since = "1.27.0")]
690pub fn _mm_floor_pd(a: __m128d) -> __m128d {
691 unsafe { simd_floor(a) }
692}
693
694#[inline]
700#[target_feature(enable = "sse4.1")]
701#[cfg_attr(test, assert_instr(roundps))]
702#[stable(feature = "simd_x86", since = "1.27.0")]
703pub fn _mm_floor_ps(a: __m128) -> __m128 {
704 unsafe { simd_floor(a) }
705}
706
707#[inline]
715#[target_feature(enable = "sse4.1")]
716#[cfg_attr(test, assert_instr(roundsd))]
717#[stable(feature = "simd_x86", since = "1.27.0")]
718pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
719 unsafe { roundsd(a, b, _MM_FROUND_FLOOR) }
720}
721
722#[inline]
730#[target_feature(enable = "sse4.1")]
731#[cfg_attr(test, assert_instr(roundss))]
732#[stable(feature = "simd_x86", since = "1.27.0")]
733pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
734 unsafe { roundss(a, b, _MM_FROUND_FLOOR) }
735}
736
737#[inline]
743#[target_feature(enable = "sse4.1")]
744#[cfg_attr(test, assert_instr(roundpd))]
745#[stable(feature = "simd_x86", since = "1.27.0")]
746pub fn _mm_ceil_pd(a: __m128d) -> __m128d {
747 unsafe { simd_ceil(a) }
748}
749
750#[inline]
756#[target_feature(enable = "sse4.1")]
757#[cfg_attr(test, assert_instr(roundps))]
758#[stable(feature = "simd_x86", since = "1.27.0")]
759pub fn _mm_ceil_ps(a: __m128) -> __m128 {
760 unsafe { simd_ceil(a) }
761}
762
763#[inline]
771#[target_feature(enable = "sse4.1")]
772#[cfg_attr(test, assert_instr(roundsd))]
773#[stable(feature = "simd_x86", since = "1.27.0")]
774pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
775 unsafe { roundsd(a, b, _MM_FROUND_CEIL) }
776}
777
778#[inline]
786#[target_feature(enable = "sse4.1")]
787#[cfg_attr(test, assert_instr(roundss))]
788#[stable(feature = "simd_x86", since = "1.27.0")]
789pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
790 unsafe { roundss(a, b, _MM_FROUND_CEIL) }
791}
792
793#[inline]
806#[target_feature(enable = "sse4.1")]
807#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
808#[rustc_legacy_const_generics(1)]
809#[stable(feature = "simd_x86", since = "1.27.0")]
810pub fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
811 static_assert_uimm_bits!(ROUNDING, 4);
812 unsafe { roundpd(a, ROUNDING) }
813}
814
815#[inline]
828#[target_feature(enable = "sse4.1")]
829#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
830#[rustc_legacy_const_generics(1)]
831#[stable(feature = "simd_x86", since = "1.27.0")]
832pub fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
833 static_assert_uimm_bits!(ROUNDING, 4);
834 unsafe { roundps(a, ROUNDING) }
835}
836
837#[inline]
852#[target_feature(enable = "sse4.1")]
853#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
854#[rustc_legacy_const_generics(2)]
855#[stable(feature = "simd_x86", since = "1.27.0")]
856pub fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
857 static_assert_uimm_bits!(ROUNDING, 4);
858 unsafe { roundsd(a, b, ROUNDING) }
859}
860
861#[inline]
876#[target_feature(enable = "sse4.1")]
877#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
878#[rustc_legacy_const_generics(2)]
879#[stable(feature = "simd_x86", since = "1.27.0")]
880pub fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
881 static_assert_uimm_bits!(ROUNDING, 4);
882 unsafe { roundss(a, b, ROUNDING) }
883}
884
885#[inline]
907#[target_feature(enable = "sse4.1")]
908#[cfg_attr(test, assert_instr(phminposuw))]
909#[stable(feature = "simd_x86", since = "1.27.0")]
910pub fn _mm_minpos_epu16(a: __m128i) -> __m128i {
911 unsafe { transmute(phminposuw(a.as_u16x8())) }
912}
913
914#[inline]
919#[target_feature(enable = "sse4.1")]
920#[cfg_attr(test, assert_instr(pmuldq))]
921#[stable(feature = "simd_x86", since = "1.27.0")]
922pub fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
923 unsafe {
924 let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
925 let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
926 transmute(simd_mul(a, b))
927 }
928}
929
930#[inline]
939#[target_feature(enable = "sse4.1")]
940#[cfg_attr(test, assert_instr(pmulld))]
941#[stable(feature = "simd_x86", since = "1.27.0")]
942pub fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
943 unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) }
944}
945
946#[inline]
980#[target_feature(enable = "sse4.1")]
981#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
982#[rustc_legacy_const_generics(2)]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984pub fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
985 static_assert_uimm_bits!(IMM8, 3);
986 unsafe { transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) }
987}
988
989#[inline]
1005#[target_feature(enable = "sse4.1")]
1006#[cfg_attr(test, assert_instr(ptest))]
1007#[stable(feature = "simd_x86", since = "1.27.0")]
1008pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
1009 unsafe { ptestz(a.as_i64x2(), mask.as_i64x2()) }
1010}
1011
1012#[inline]
1028#[target_feature(enable = "sse4.1")]
1029#[cfg_attr(test, assert_instr(ptest))]
1030#[stable(feature = "simd_x86", since = "1.27.0")]
1031pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1032 unsafe { ptestc(a.as_i64x2(), mask.as_i64x2()) }
1033}
1034
1035#[inline]
1051#[target_feature(enable = "sse4.1")]
1052#[cfg_attr(test, assert_instr(ptest))]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1054pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
1055 unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) }
1056}
1057
1058#[inline]
1074#[target_feature(enable = "sse4.1")]
1075#[cfg_attr(test, assert_instr(ptest))]
1076#[stable(feature = "simd_x86", since = "1.27.0")]
1077pub fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1078 _mm_testz_si128(a, mask)
1079}
1080
1081#[inline]
1095#[target_feature(enable = "sse4.1")]
1096#[cfg_attr(test, assert_instr(pcmpeqd))]
1097#[cfg_attr(test, assert_instr(ptest))]
1098#[stable(feature = "simd_x86", since = "1.27.0")]
1099pub fn _mm_test_all_ones(a: __m128i) -> i32 {
1100 _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
1101}
1102
1103#[inline]
1119#[target_feature(enable = "sse4.1")]
1120#[cfg_attr(test, assert_instr(ptest))]
1121#[stable(feature = "simd_x86", since = "1.27.0")]
1122pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1123 _mm_testnzc_si128(a, mask)
1124}
1125
1126#[inline]
1132#[target_feature(enable = "sse4.1")]
1133#[cfg_attr(test, assert_instr(movntdqa))]
1134#[stable(feature = "simd_x86_updates", since = "1.82.0")]
1135pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
1136 let dst: __m128i;
1137 crate::arch::asm!(
1138 vpl!("movntdqa {a}"),
1139 a = out(xmm_reg) dst,
1140 p = in(reg) mem_addr,
1141 options(pure, readonly, nostack, preserves_flags),
1142 );
1143 dst
1144}
1145
1146#[allow(improper_ctypes)]
1147unsafe extern "C" {
1148 #[link_name = "llvm.x86.sse41.insertps"]
1149 fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1150 #[link_name = "llvm.x86.sse41.packusdw"]
1151 fn packusdw(a: i32x4, b: i32x4) -> u16x8;
1152 #[link_name = "llvm.x86.sse41.dppd"]
1153 fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1154 #[link_name = "llvm.x86.sse41.dpps"]
1155 fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1156 #[link_name = "llvm.x86.sse41.round.pd"]
1157 fn roundpd(a: __m128d, rounding: i32) -> __m128d;
1158 #[link_name = "llvm.x86.sse41.round.ps"]
1159 fn roundps(a: __m128, rounding: i32) -> __m128;
1160 #[link_name = "llvm.x86.sse41.round.sd"]
1161 fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1162 #[link_name = "llvm.x86.sse41.round.ss"]
1163 fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1164 #[link_name = "llvm.x86.sse41.phminposuw"]
1165 fn phminposuw(a: u16x8) -> u16x8;
1166 #[link_name = "llvm.x86.sse41.mpsadbw"]
1167 fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1168 #[link_name = "llvm.x86.sse41.ptestz"]
1169 fn ptestz(a: i64x2, mask: i64x2) -> i32;
1170 #[link_name = "llvm.x86.sse41.ptestc"]
1171 fn ptestc(a: i64x2, mask: i64x2) -> i32;
1172 #[link_name = "llvm.x86.sse41.ptestnzc"]
1173 fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178 use crate::core_arch::x86::*;
1179 use std::mem;
1180 use stdarch_test::simd_test;
1181
1182 #[simd_test(enable = "sse4.1")]
1183 unsafe fn test_mm_blendv_epi8() {
1184 #[rustfmt::skip]
1185 let a = _mm_setr_epi8(
1186 0, 1, 2, 3, 4, 5, 6, 7,
1187 8, 9, 10, 11, 12, 13, 14, 15,
1188 );
1189 #[rustfmt::skip]
1190 let b = _mm_setr_epi8(
1191 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1192 );
1193 #[rustfmt::skip]
1194 let mask = _mm_setr_epi8(
1195 0, -1, 0, -1, 0, -1, 0, -1,
1196 0, -1, 0, -1, 0, -1, 0, -1,
1197 );
1198 #[rustfmt::skip]
1199 let e = _mm_setr_epi8(
1200 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1201 );
1202 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1203 }
1204
1205 #[simd_test(enable = "sse4.1")]
1206 unsafe fn test_mm_blendv_pd() {
1207 let a = _mm_set1_pd(0.0);
1208 let b = _mm_set1_pd(1.0);
1209 let mask = transmute(_mm_setr_epi64x(0, -1));
1210 let r = _mm_blendv_pd(a, b, mask);
1211 let e = _mm_setr_pd(0.0, 1.0);
1212 assert_eq_m128d(r, e);
1213 }
1214
1215 #[simd_test(enable = "sse4.1")]
1216 unsafe fn test_mm_blendv_ps() {
1217 let a = _mm_set1_ps(0.0);
1218 let b = _mm_set1_ps(1.0);
1219 let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
1220 let r = _mm_blendv_ps(a, b, mask);
1221 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1222 assert_eq_m128(r, e);
1223 }
1224
1225 #[simd_test(enable = "sse4.1")]
1226 unsafe fn test_mm_blend_pd() {
1227 let a = _mm_set1_pd(0.0);
1228 let b = _mm_set1_pd(1.0);
1229 let r = _mm_blend_pd::<0b10>(a, b);
1230 let e = _mm_setr_pd(0.0, 1.0);
1231 assert_eq_m128d(r, e);
1232 }
1233
1234 #[simd_test(enable = "sse4.1")]
1235 unsafe fn test_mm_blend_ps() {
1236 let a = _mm_set1_ps(0.0);
1237 let b = _mm_set1_ps(1.0);
1238 let r = _mm_blend_ps::<0b1010>(a, b);
1239 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1240 assert_eq_m128(r, e);
1241 }
1242
1243 #[simd_test(enable = "sse4.1")]
1244 unsafe fn test_mm_blend_epi16() {
1245 let a = _mm_set1_epi16(0);
1246 let b = _mm_set1_epi16(1);
1247 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
1248 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1249 assert_eq_m128i(r, e);
1250 }
1251
1252 #[simd_test(enable = "sse4.1")]
1253 unsafe fn test_mm_extract_ps() {
1254 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1255 let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
1256 assert_eq!(r, 1.0);
1257 let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
1258 assert_eq!(r, 3.0);
1259 }
1260
1261 #[simd_test(enable = "sse4.1")]
1262 unsafe fn test_mm_extract_epi8() {
1263 #[rustfmt::skip]
1264 let a = _mm_setr_epi8(
1265 -1, 1, 2, 3, 4, 5, 6, 7,
1266 8, 9, 10, 11, 12, 13, 14, 15
1267 );
1268 let r1 = _mm_extract_epi8::<0>(a);
1269 let r2 = _mm_extract_epi8::<3>(a);
1270 assert_eq!(r1, 0xFF);
1271 assert_eq!(r2, 3);
1272 }
1273
1274 #[simd_test(enable = "sse4.1")]
1275 unsafe fn test_mm_extract_epi32() {
1276 let a = _mm_setr_epi32(0, 1, 2, 3);
1277 let r = _mm_extract_epi32::<1>(a);
1278 assert_eq!(r, 1);
1279 let r = _mm_extract_epi32::<3>(a);
1280 assert_eq!(r, 3);
1281 }
1282
1283 #[simd_test(enable = "sse4.1")]
1284 unsafe fn test_mm_insert_ps() {
1285 let a = _mm_set1_ps(1.0);
1286 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1287 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
1288 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1289 assert_eq_m128(r, e);
1290
1291 let a = _mm_set1_ps(1.0);
1293 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1294 let r = _mm_insert_ps::<0b11_00_0001>(a, b);
1295 let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
1296 assert_eq_m128(r, e);
1297 }
1298
1299 #[simd_test(enable = "sse4.1")]
1300 unsafe fn test_mm_insert_epi8() {
1301 let a = _mm_set1_epi8(0);
1302 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1303 let r = _mm_insert_epi8::<1>(a, 32);
1304 assert_eq_m128i(r, e);
1305 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1306 let r = _mm_insert_epi8::<14>(a, 32);
1307 assert_eq_m128i(r, e);
1308 }
1309
1310 #[simd_test(enable = "sse4.1")]
1311 unsafe fn test_mm_insert_epi32() {
1312 let a = _mm_set1_epi32(0);
1313 let e = _mm_setr_epi32(0, 32, 0, 0);
1314 let r = _mm_insert_epi32::<1>(a, 32);
1315 assert_eq_m128i(r, e);
1316 let e = _mm_setr_epi32(0, 0, 0, 32);
1317 let r = _mm_insert_epi32::<3>(a, 32);
1318 assert_eq_m128i(r, e);
1319 }
1320
1321 #[simd_test(enable = "sse4.1")]
1322 unsafe fn test_mm_max_epi8() {
1323 #[rustfmt::skip]
1324 let a = _mm_setr_epi8(
1325 1, 4, 5, 8, 9, 12, 13, 16,
1326 17, 20, 21, 24, 25, 28, 29, 32,
1327 );
1328 #[rustfmt::skip]
1329 let b = _mm_setr_epi8(
1330 2, 3, 6, 7, 10, 11, 14, 15,
1331 18, 19, 22, 23, 26, 27, 30, 31,
1332 );
1333 let r = _mm_max_epi8(a, b);
1334 #[rustfmt::skip]
1335 let e = _mm_setr_epi8(
1336 2, 4, 6, 8, 10, 12, 14, 16,
1337 18, 20, 22, 24, 26, 28, 30, 32,
1338 );
1339 assert_eq_m128i(r, e);
1340 }
1341
1342 #[simd_test(enable = "sse4.1")]
1343 unsafe fn test_mm_max_epu16() {
1344 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1345 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1346 let r = _mm_max_epu16(a, b);
1347 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1348 assert_eq_m128i(r, e);
1349 }
1350
1351 #[simd_test(enable = "sse4.1")]
1352 unsafe fn test_mm_max_epi32() {
1353 let a = _mm_setr_epi32(1, 4, 5, 8);
1354 let b = _mm_setr_epi32(2, 3, 6, 7);
1355 let r = _mm_max_epi32(a, b);
1356 let e = _mm_setr_epi32(2, 4, 6, 8);
1357 assert_eq_m128i(r, e);
1358 }
1359
1360 #[simd_test(enable = "sse4.1")]
1361 unsafe fn test_mm_max_epu32() {
1362 let a = _mm_setr_epi32(1, 4, 5, 8);
1363 let b = _mm_setr_epi32(2, 3, 6, 7);
1364 let r = _mm_max_epu32(a, b);
1365 let e = _mm_setr_epi32(2, 4, 6, 8);
1366 assert_eq_m128i(r, e);
1367 }
1368
1369 #[simd_test(enable = "sse4.1")]
1370 unsafe fn test_mm_min_epi8() {
1371 #[rustfmt::skip]
1372 let a = _mm_setr_epi8(
1373 1, 4, 5, 8, 9, 12, 13, 16,
1374 17, 20, 21, 24, 25, 28, 29, 32,
1375 );
1376 #[rustfmt::skip]
1377 let b = _mm_setr_epi8(
1378 2, 3, 6, 7, 10, 11, 14, 15,
1379 18, 19, 22, 23, 26, 27, 30, 31,
1380 );
1381 let r = _mm_min_epi8(a, b);
1382 #[rustfmt::skip]
1383 let e = _mm_setr_epi8(
1384 1, 3, 5, 7, 9, 11, 13, 15,
1385 17, 19, 21, 23, 25, 27, 29, 31,
1386 );
1387 assert_eq_m128i(r, e);
1388
1389 #[rustfmt::skip]
1390 let a = _mm_setr_epi8(
1391 1, -4, -5, 8, -9, -12, 13, -16,
1392 17, 20, 21, 24, 25, 28, 29, 32,
1393 );
1394 #[rustfmt::skip]
1395 let b = _mm_setr_epi8(
1396 2, -3, -6, 7, -10, -11, 14, -15,
1397 18, 19, 22, 23, 26, 27, 30, 31,
1398 );
1399 let r = _mm_min_epi8(a, b);
1400 #[rustfmt::skip]
1401 let e = _mm_setr_epi8(
1402 1, -4, -6, 7, -10, -12, 13, -16,
1403 17, 19, 21, 23, 25, 27, 29, 31,
1404 );
1405 assert_eq_m128i(r, e);
1406 }
1407
1408 #[simd_test(enable = "sse4.1")]
1409 unsafe fn test_mm_min_epu16() {
1410 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1411 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1412 let r = _mm_min_epu16(a, b);
1413 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1414 assert_eq_m128i(r, e);
1415 }
1416
1417 #[simd_test(enable = "sse4.1")]
1418 unsafe fn test_mm_min_epi32() {
1419 let a = _mm_setr_epi32(1, 4, 5, 8);
1420 let b = _mm_setr_epi32(2, 3, 6, 7);
1421 let r = _mm_min_epi32(a, b);
1422 let e = _mm_setr_epi32(1, 3, 5, 7);
1423 assert_eq_m128i(r, e);
1424
1425 let a = _mm_setr_epi32(-1, 4, 5, -7);
1426 let b = _mm_setr_epi32(-2, 3, -6, 8);
1427 let r = _mm_min_epi32(a, b);
1428 let e = _mm_setr_epi32(-2, 3, -6, -7);
1429 assert_eq_m128i(r, e);
1430 }
1431
1432 #[simd_test(enable = "sse4.1")]
1433 unsafe fn test_mm_min_epu32() {
1434 let a = _mm_setr_epi32(1, 4, 5, 8);
1435 let b = _mm_setr_epi32(2, 3, 6, 7);
1436 let r = _mm_min_epu32(a, b);
1437 let e = _mm_setr_epi32(1, 3, 5, 7);
1438 assert_eq_m128i(r, e);
1439 }
1440
1441 #[simd_test(enable = "sse4.1")]
1442 unsafe fn test_mm_packus_epi32() {
1443 let a = _mm_setr_epi32(1, 2, 3, 4);
1444 let b = _mm_setr_epi32(-1, -2, -3, -4);
1445 let r = _mm_packus_epi32(a, b);
1446 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1447 assert_eq_m128i(r, e);
1448 }
1449
1450 #[simd_test(enable = "sse4.1")]
1451 unsafe fn test_mm_cmpeq_epi64() {
1452 let a = _mm_setr_epi64x(0, 1);
1453 let b = _mm_setr_epi64x(0, 0);
1454 let r = _mm_cmpeq_epi64(a, b);
1455 let e = _mm_setr_epi64x(-1, 0);
1456 assert_eq_m128i(r, e);
1457 }
1458
1459 #[simd_test(enable = "sse4.1")]
1460 unsafe fn test_mm_cvtepi8_epi16() {
1461 let a = _mm_set1_epi8(10);
1462 let r = _mm_cvtepi8_epi16(a);
1463 let e = _mm_set1_epi16(10);
1464 assert_eq_m128i(r, e);
1465 let a = _mm_set1_epi8(-10);
1466 let r = _mm_cvtepi8_epi16(a);
1467 let e = _mm_set1_epi16(-10);
1468 assert_eq_m128i(r, e);
1469 }
1470
1471 #[simd_test(enable = "sse4.1")]
1472 unsafe fn test_mm_cvtepi8_epi32() {
1473 let a = _mm_set1_epi8(10);
1474 let r = _mm_cvtepi8_epi32(a);
1475 let e = _mm_set1_epi32(10);
1476 assert_eq_m128i(r, e);
1477 let a = _mm_set1_epi8(-10);
1478 let r = _mm_cvtepi8_epi32(a);
1479 let e = _mm_set1_epi32(-10);
1480 assert_eq_m128i(r, e);
1481 }
1482
1483 #[simd_test(enable = "sse4.1")]
1484 unsafe fn test_mm_cvtepi8_epi64() {
1485 let a = _mm_set1_epi8(10);
1486 let r = _mm_cvtepi8_epi64(a);
1487 let e = _mm_set1_epi64x(10);
1488 assert_eq_m128i(r, e);
1489 let a = _mm_set1_epi8(-10);
1490 let r = _mm_cvtepi8_epi64(a);
1491 let e = _mm_set1_epi64x(-10);
1492 assert_eq_m128i(r, e);
1493 }
1494
1495 #[simd_test(enable = "sse4.1")]
1496 unsafe fn test_mm_cvtepi16_epi32() {
1497 let a = _mm_set1_epi16(10);
1498 let r = _mm_cvtepi16_epi32(a);
1499 let e = _mm_set1_epi32(10);
1500 assert_eq_m128i(r, e);
1501 let a = _mm_set1_epi16(-10);
1502 let r = _mm_cvtepi16_epi32(a);
1503 let e = _mm_set1_epi32(-10);
1504 assert_eq_m128i(r, e);
1505 }
1506
1507 #[simd_test(enable = "sse4.1")]
1508 unsafe fn test_mm_cvtepi16_epi64() {
1509 let a = _mm_set1_epi16(10);
1510 let r = _mm_cvtepi16_epi64(a);
1511 let e = _mm_set1_epi64x(10);
1512 assert_eq_m128i(r, e);
1513 let a = _mm_set1_epi16(-10);
1514 let r = _mm_cvtepi16_epi64(a);
1515 let e = _mm_set1_epi64x(-10);
1516 assert_eq_m128i(r, e);
1517 }
1518
1519 #[simd_test(enable = "sse4.1")]
1520 unsafe fn test_mm_cvtepi32_epi64() {
1521 let a = _mm_set1_epi32(10);
1522 let r = _mm_cvtepi32_epi64(a);
1523 let e = _mm_set1_epi64x(10);
1524 assert_eq_m128i(r, e);
1525 let a = _mm_set1_epi32(-10);
1526 let r = _mm_cvtepi32_epi64(a);
1527 let e = _mm_set1_epi64x(-10);
1528 assert_eq_m128i(r, e);
1529 }
1530
1531 #[simd_test(enable = "sse4.1")]
1532 unsafe fn test_mm_cvtepu8_epi16() {
1533 let a = _mm_set1_epi8(10);
1534 let r = _mm_cvtepu8_epi16(a);
1535 let e = _mm_set1_epi16(10);
1536 assert_eq_m128i(r, e);
1537 }
1538
1539 #[simd_test(enable = "sse4.1")]
1540 unsafe fn test_mm_cvtepu8_epi32() {
1541 let a = _mm_set1_epi8(10);
1542 let r = _mm_cvtepu8_epi32(a);
1543 let e = _mm_set1_epi32(10);
1544 assert_eq_m128i(r, e);
1545 }
1546
1547 #[simd_test(enable = "sse4.1")]
1548 unsafe fn test_mm_cvtepu8_epi64() {
1549 let a = _mm_set1_epi8(10);
1550 let r = _mm_cvtepu8_epi64(a);
1551 let e = _mm_set1_epi64x(10);
1552 assert_eq_m128i(r, e);
1553 }
1554
1555 #[simd_test(enable = "sse4.1")]
1556 unsafe fn test_mm_cvtepu16_epi32() {
1557 let a = _mm_set1_epi16(10);
1558 let r = _mm_cvtepu16_epi32(a);
1559 let e = _mm_set1_epi32(10);
1560 assert_eq_m128i(r, e);
1561 }
1562
1563 #[simd_test(enable = "sse4.1")]
1564 unsafe fn test_mm_cvtepu16_epi64() {
1565 let a = _mm_set1_epi16(10);
1566 let r = _mm_cvtepu16_epi64(a);
1567 let e = _mm_set1_epi64x(10);
1568 assert_eq_m128i(r, e);
1569 }
1570
1571 #[simd_test(enable = "sse4.1")]
1572 unsafe fn test_mm_cvtepu32_epi64() {
1573 let a = _mm_set1_epi32(10);
1574 let r = _mm_cvtepu32_epi64(a);
1575 let e = _mm_set1_epi64x(10);
1576 assert_eq_m128i(r, e);
1577 }
1578
1579 #[simd_test(enable = "sse4.1")]
1580 unsafe fn test_mm_dp_pd() {
1581 let a = _mm_setr_pd(2.0, 3.0);
1582 let b = _mm_setr_pd(1.0, 4.0);
1583 let e = _mm_setr_pd(14.0, 0.0);
1584 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
1585 }
1586
1587 #[simd_test(enable = "sse4.1")]
1588 unsafe fn test_mm_dp_ps() {
1589 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1590 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1591 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1592 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
1593 }
1594
1595 #[simd_test(enable = "sse4.1")]
1596 unsafe fn test_mm_floor_pd() {
1597 let a = _mm_setr_pd(2.5, 4.5);
1598 let r = _mm_floor_pd(a);
1599 let e = _mm_setr_pd(2.0, 4.0);
1600 assert_eq_m128d(r, e);
1601 }
1602
1603 #[simd_test(enable = "sse4.1")]
1604 unsafe fn test_mm_floor_ps() {
1605 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1606 let r = _mm_floor_ps(a);
1607 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1608 assert_eq_m128(r, e);
1609 }
1610
1611 #[simd_test(enable = "sse4.1")]
1612 unsafe fn test_mm_floor_sd() {
1613 let a = _mm_setr_pd(2.5, 4.5);
1614 let b = _mm_setr_pd(-1.5, -3.5);
1615 let r = _mm_floor_sd(a, b);
1616 let e = _mm_setr_pd(-2.0, 4.5);
1617 assert_eq_m128d(r, e);
1618 }
1619
1620 #[simd_test(enable = "sse4.1")]
1621 unsafe fn test_mm_floor_ss() {
1622 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1623 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1624 let r = _mm_floor_ss(a, b);
1625 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1626 assert_eq_m128(r, e);
1627 }
1628
1629 #[simd_test(enable = "sse4.1")]
1630 unsafe fn test_mm_ceil_pd() {
1631 let a = _mm_setr_pd(1.5, 3.5);
1632 let r = _mm_ceil_pd(a);
1633 let e = _mm_setr_pd(2.0, 4.0);
1634 assert_eq_m128d(r, e);
1635 }
1636
1637 #[simd_test(enable = "sse4.1")]
1638 unsafe fn test_mm_ceil_ps() {
1639 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1640 let r = _mm_ceil_ps(a);
1641 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1642 assert_eq_m128(r, e);
1643 }
1644
1645 #[simd_test(enable = "sse4.1")]
1646 unsafe fn test_mm_ceil_sd() {
1647 let a = _mm_setr_pd(1.5, 3.5);
1648 let b = _mm_setr_pd(-2.5, -4.5);
1649 let r = _mm_ceil_sd(a, b);
1650 let e = _mm_setr_pd(-2.0, 3.5);
1651 assert_eq_m128d(r, e);
1652 }
1653
1654 #[simd_test(enable = "sse4.1")]
1655 unsafe fn test_mm_ceil_ss() {
1656 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1657 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1658 let r = _mm_ceil_ss(a, b);
1659 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1660 assert_eq_m128(r, e);
1661 }
1662
1663 #[simd_test(enable = "sse4.1")]
1664 unsafe fn test_mm_round_pd() {
1665 let a = _mm_setr_pd(1.25, 3.75);
1666 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
1667 let e = _mm_setr_pd(1.0, 4.0);
1668 assert_eq_m128d(r, e);
1669 }
1670
1671 #[simd_test(enable = "sse4.1")]
1672 unsafe fn test_mm_round_ps() {
1673 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1674 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
1675 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1676 assert_eq_m128(r, e);
1677 }
1678
1679 #[simd_test(enable = "sse4.1")]
1680 unsafe fn test_mm_round_sd() {
1681 let a = _mm_setr_pd(1.5, 3.5);
1682 let b = _mm_setr_pd(-2.5, -4.5);
1683 let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1684 let e = _mm_setr_pd(-2.0, 3.5);
1685 assert_eq_m128d(r, e);
1686
1687 let a = _mm_setr_pd(1.5, 3.5);
1688 let b = _mm_setr_pd(-2.5, -4.5);
1689 let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
1690 let e = _mm_setr_pd(-3.0, 3.5);
1691 assert_eq_m128d(r, e);
1692
1693 let a = _mm_setr_pd(1.5, 3.5);
1694 let b = _mm_setr_pd(-2.5, -4.5);
1695 let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
1696 let e = _mm_setr_pd(-2.0, 3.5);
1697 assert_eq_m128d(r, e);
1698
1699 let a = _mm_setr_pd(1.5, 3.5);
1700 let b = _mm_setr_pd(-2.5, -4.5);
1701 let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
1702 let e = _mm_setr_pd(-2.0, 3.5);
1703 assert_eq_m128d(r, e);
1704 }
1705
1706 #[simd_test(enable = "sse4.1")]
1707 unsafe fn test_mm_round_ss() {
1708 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1709 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1710 let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1711 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1712 assert_eq_m128(r, e);
1713
1714 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1715 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1716 let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
1717 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1718 assert_eq_m128(r, e);
1719
1720 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1721 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1722 let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
1723 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1724 assert_eq_m128(r, e);
1725
1726 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1727 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1728 let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
1729 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1730 assert_eq_m128(r, e);
1731 }
1732
1733 #[simd_test(enable = "sse4.1")]
1734 unsafe fn test_mm_minpos_epu16_1() {
1735 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1736 let r = _mm_minpos_epu16(a);
1737 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1738 assert_eq_m128i(r, e);
1739 }
1740
1741 #[simd_test(enable = "sse4.1")]
1742 unsafe fn test_mm_minpos_epu16_2() {
1743 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1744 let r = _mm_minpos_epu16(a);
1745 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1746 assert_eq_m128i(r, e);
1747 }
1748
1749 #[simd_test(enable = "sse4.1")]
1750 unsafe fn test_mm_minpos_epu16_3() {
1751 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
1753 let r = _mm_minpos_epu16(a);
1754 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1755 assert_eq_m128i(r, e);
1756 }
1757
1758 #[simd_test(enable = "sse4.1")]
1759 unsafe fn test_mm_mul_epi32() {
1760 {
1761 let a = _mm_setr_epi32(1, 1, 1, 1);
1762 let b = _mm_setr_epi32(1, 2, 3, 4);
1763 let r = _mm_mul_epi32(a, b);
1764 let e = _mm_setr_epi64x(1, 3);
1765 assert_eq_m128i(r, e);
1766 }
1767 {
1768 let a = _mm_setr_epi32(15, 2 , 1234567, 4 );
1769 let b = _mm_setr_epi32(
1770 -20, -256, 666666, 666666, );
1773 let r = _mm_mul_epi32(a, b);
1774 let e = _mm_setr_epi64x(-300, 823043843622);
1775 assert_eq_m128i(r, e);
1776 }
1777 }
1778
1779 #[simd_test(enable = "sse4.1")]
1780 unsafe fn test_mm_mullo_epi32() {
1781 {
1782 let a = _mm_setr_epi32(1, 1, 1, 1);
1783 let b = _mm_setr_epi32(1, 2, 3, 4);
1784 let r = _mm_mullo_epi32(a, b);
1785 let e = _mm_setr_epi32(1, 2, 3, 4);
1786 assert_eq_m128i(r, e);
1787 }
1788 {
1789 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1790 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1791 let r = _mm_mullo_epi32(a, b);
1792 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1796 assert_eq_m128i(r, e);
1797 }
1798 }
1799
1800 #[simd_test(enable = "sse4.1")]
1801 unsafe fn test_mm_minpos_epu16() {
1802 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1803 let r = _mm_minpos_epu16(a);
1804 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1805 assert_eq_m128i(r, e);
1806 }
1807
1808 #[simd_test(enable = "sse4.1")]
1809 unsafe fn test_mm_mpsadbw_epu8() {
1810 #[rustfmt::skip]
1811 let a = _mm_setr_epi8(
1812 0, 1, 2, 3, 4, 5, 6, 7,
1813 8, 9, 10, 11, 12, 13, 14, 15,
1814 );
1815
1816 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
1817 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1818 assert_eq_m128i(r, e);
1819
1820 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
1821 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1822 assert_eq_m128i(r, e);
1823
1824 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
1825 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1826 assert_eq_m128i(r, e);
1827
1828 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
1829 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1830 assert_eq_m128i(r, e);
1831
1832 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
1833 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1834 assert_eq_m128i(r, e);
1835 }
1836
1837 #[simd_test(enable = "sse4.1")]
1838 unsafe fn test_mm_testz_si128() {
1839 let a = _mm_set1_epi8(1);
1840 let mask = _mm_set1_epi8(0);
1841 let r = _mm_testz_si128(a, mask);
1842 assert_eq!(r, 1);
1843 let a = _mm_set1_epi8(0b101);
1844 let mask = _mm_set1_epi8(0b110);
1845 let r = _mm_testz_si128(a, mask);
1846 assert_eq!(r, 0);
1847 let a = _mm_set1_epi8(0b011);
1848 let mask = _mm_set1_epi8(0b100);
1849 let r = _mm_testz_si128(a, mask);
1850 assert_eq!(r, 1);
1851 }
1852
1853 #[simd_test(enable = "sse4.1")]
1854 unsafe fn test_mm_testc_si128() {
1855 let a = _mm_set1_epi8(-1);
1856 let mask = _mm_set1_epi8(0);
1857 let r = _mm_testc_si128(a, mask);
1858 assert_eq!(r, 1);
1859 let a = _mm_set1_epi8(0b101);
1860 let mask = _mm_set1_epi8(0b110);
1861 let r = _mm_testc_si128(a, mask);
1862 assert_eq!(r, 0);
1863 let a = _mm_set1_epi8(0b101);
1864 let mask = _mm_set1_epi8(0b100);
1865 let r = _mm_testc_si128(a, mask);
1866 assert_eq!(r, 1);
1867 }
1868
1869 #[simd_test(enable = "sse4.1")]
1870 unsafe fn test_mm_testnzc_si128() {
1871 let a = _mm_set1_epi8(0);
1872 let mask = _mm_set1_epi8(1);
1873 let r = _mm_testnzc_si128(a, mask);
1874 assert_eq!(r, 0);
1875 let a = _mm_set1_epi8(-1);
1876 let mask = _mm_set1_epi8(0);
1877 let r = _mm_testnzc_si128(a, mask);
1878 assert_eq!(r, 0);
1879 let a = _mm_set1_epi8(0b101);
1880 let mask = _mm_set1_epi8(0b110);
1881 let r = _mm_testnzc_si128(a, mask);
1882 assert_eq!(r, 1);
1883 let a = _mm_set1_epi8(0b101);
1884 let mask = _mm_set1_epi8(0b101);
1885 let r = _mm_testnzc_si128(a, mask);
1886 assert_eq!(r, 0);
1887 }
1888
1889 #[simd_test(enable = "sse4.1")]
1890 unsafe fn test_mm_test_all_zeros() {
1891 let a = _mm_set1_epi8(1);
1892 let mask = _mm_set1_epi8(0);
1893 let r = _mm_test_all_zeros(a, mask);
1894 assert_eq!(r, 1);
1895 let a = _mm_set1_epi8(0b101);
1896 let mask = _mm_set1_epi8(0b110);
1897 let r = _mm_test_all_zeros(a, mask);
1898 assert_eq!(r, 0);
1899 let a = _mm_set1_epi8(0b011);
1900 let mask = _mm_set1_epi8(0b100);
1901 let r = _mm_test_all_zeros(a, mask);
1902 assert_eq!(r, 1);
1903 }
1904
1905 #[simd_test(enable = "sse4.1")]
1906 unsafe fn test_mm_test_all_ones() {
1907 let a = _mm_set1_epi8(-1);
1908 let r = _mm_test_all_ones(a);
1909 assert_eq!(r, 1);
1910 let a = _mm_set1_epi8(0b101);
1911 let r = _mm_test_all_ones(a);
1912 assert_eq!(r, 0);
1913 }
1914
1915 #[simd_test(enable = "sse4.1")]
1916 unsafe fn test_mm_test_mix_ones_zeros() {
1917 let a = _mm_set1_epi8(0);
1918 let mask = _mm_set1_epi8(1);
1919 let r = _mm_test_mix_ones_zeros(a, mask);
1920 assert_eq!(r, 0);
1921 let a = _mm_set1_epi8(-1);
1922 let mask = _mm_set1_epi8(0);
1923 let r = _mm_test_mix_ones_zeros(a, mask);
1924 assert_eq!(r, 0);
1925 let a = _mm_set1_epi8(0b101);
1926 let mask = _mm_set1_epi8(0b110);
1927 let r = _mm_test_mix_ones_zeros(a, mask);
1928 assert_eq!(r, 1);
1929 let a = _mm_set1_epi8(0b101);
1930 let mask = _mm_set1_epi8(0b101);
1931 let r = _mm_test_mix_ones_zeros(a, mask);
1932 assert_eq!(r, 0);
1933 }
1934
1935 #[simd_test(enable = "sse4.1")]
1936 unsafe fn test_mm_stream_load_si128() {
1937 let a = _mm_set_epi64x(5, 6);
1938 let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
1939 assert_eq_m128i(a, r);
1940 }
1941}