1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 unsafe { pause() }
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53 unsafe { lfence() }
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69 unsafe { mfence() }
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172 transmute(simd_cast::<_, u8x16>(r))
173 }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188 transmute(simd_cast::<_, u16x8>(r))
189 }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
217}
218
219#[inline]
224#[target_feature(enable = "sse2")]
225#[cfg_attr(test, assert_instr(pmaxsw))]
226#[stable(feature = "simd_x86", since = "1.27.0")]
227pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
228 unsafe {
229 let a = a.as_i16x8();
230 let b = b.as_i16x8();
231 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
232 }
233}
234
235#[inline]
240#[target_feature(enable = "sse2")]
241#[cfg_attr(test, assert_instr(pmaxub))]
242#[stable(feature = "simd_x86", since = "1.27.0")]
243pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
244 unsafe {
245 let a = a.as_u8x16();
246 let b = b.as_u8x16();
247 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
248 }
249}
250
251#[inline]
256#[target_feature(enable = "sse2")]
257#[cfg_attr(test, assert_instr(pminsw))]
258#[stable(feature = "simd_x86", since = "1.27.0")]
259pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
260 unsafe {
261 let a = a.as_i16x8();
262 let b = b.as_i16x8();
263 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
264 }
265}
266
267#[inline]
272#[target_feature(enable = "sse2")]
273#[cfg_attr(test, assert_instr(pminub))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
276 unsafe {
277 let a = a.as_u8x16();
278 let b = b.as_u8x16();
279 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
280 }
281}
282
283#[inline]
290#[target_feature(enable = "sse2")]
291#[cfg_attr(test, assert_instr(pmulhw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
294 unsafe {
295 let a = simd_cast::<_, i32x8>(a.as_i16x8());
296 let b = simd_cast::<_, i32x8>(b.as_i16x8());
297 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
298 transmute(simd_cast::<i32x8, i16x8>(r))
299 }
300}
301
302#[inline]
309#[target_feature(enable = "sse2")]
310#[cfg_attr(test, assert_instr(pmulhuw))]
311#[stable(feature = "simd_x86", since = "1.27.0")]
312pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
313 unsafe {
314 let a = simd_cast::<_, u32x8>(a.as_u16x8());
315 let b = simd_cast::<_, u32x8>(b.as_u16x8());
316 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
317 transmute(simd_cast::<u32x8, u16x8>(r))
318 }
319}
320
321#[inline]
328#[target_feature(enable = "sse2")]
329#[cfg_attr(test, assert_instr(pmullw))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
332 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
333}
334
335#[inline]
342#[target_feature(enable = "sse2")]
343#[cfg_attr(test, assert_instr(pmuludq))]
344#[stable(feature = "simd_x86", since = "1.27.0")]
345pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
346 unsafe {
347 let a = a.as_u64x2();
348 let b = b.as_u64x2();
349 let mask = u64x2::splat(u32::MAX.into());
350 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
351 }
352}
353
354#[inline]
363#[target_feature(enable = "sse2")]
364#[cfg_attr(test, assert_instr(psadbw))]
365#[stable(feature = "simd_x86", since = "1.27.0")]
366pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
367 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
368}
369
370#[inline]
374#[target_feature(enable = "sse2")]
375#[cfg_attr(test, assert_instr(psubb))]
376#[stable(feature = "simd_x86", since = "1.27.0")]
377pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
378 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
379}
380
381#[inline]
385#[target_feature(enable = "sse2")]
386#[cfg_attr(test, assert_instr(psubw))]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
389 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
390}
391
392#[inline]
396#[target_feature(enable = "sse2")]
397#[cfg_attr(test, assert_instr(psubd))]
398#[stable(feature = "simd_x86", since = "1.27.0")]
399pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
400 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
401}
402
403#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubq))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsb))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubsw))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusb))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
448}
449
450#[inline]
455#[target_feature(enable = "sse2")]
456#[cfg_attr(test, assert_instr(psubusw))]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
459 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
460}
461
462#[inline]
466#[target_feature(enable = "sse2")]
467#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
468#[rustc_legacy_const_generics(1)]
469#[stable(feature = "simd_x86", since = "1.27.0")]
470pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
471 static_assert_uimm_bits!(IMM8, 8);
472 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
473}
474
475#[inline]
478#[target_feature(enable = "sse2")]
479unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
480 const fn mask(shift: i32, i: u32) -> u32 {
481 let shift = shift as u32 & 0xff;
482 if shift > 15 { i } else { 16 - shift + i }
483 }
484 transmute::<i8x16, _>(simd_shuffle!(
485 i8x16::ZERO,
486 a.as_i8x16(),
487 [
488 mask(IMM8, 0),
489 mask(IMM8, 1),
490 mask(IMM8, 2),
491 mask(IMM8, 3),
492 mask(IMM8, 4),
493 mask(IMM8, 5),
494 mask(IMM8, 6),
495 mask(IMM8, 7),
496 mask(IMM8, 8),
497 mask(IMM8, 9),
498 mask(IMM8, 10),
499 mask(IMM8, 11),
500 mask(IMM8, 12),
501 mask(IMM8, 13),
502 mask(IMM8, 14),
503 mask(IMM8, 15),
504 ],
505 ))
506}
507
508#[inline]
512#[target_feature(enable = "sse2")]
513#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
514#[rustc_legacy_const_generics(1)]
515#[stable(feature = "simd_x86", since = "1.27.0")]
516pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
517 unsafe {
518 static_assert_uimm_bits!(IMM8, 8);
519 _mm_slli_si128_impl::<IMM8>(a)
520 }
521}
522
523#[inline]
527#[target_feature(enable = "sse2")]
528#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
529#[rustc_legacy_const_generics(1)]
530#[stable(feature = "simd_x86", since = "1.27.0")]
531pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
532 unsafe {
533 static_assert_uimm_bits!(IMM8, 8);
534 _mm_srli_si128_impl::<IMM8>(a)
535 }
536}
537
538#[inline]
542#[target_feature(enable = "sse2")]
543#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
544#[rustc_legacy_const_generics(1)]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
547 static_assert_uimm_bits!(IMM8, 8);
548 unsafe {
549 if IMM8 >= 16 {
550 _mm_setzero_si128()
551 } else {
552 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
553 }
554 }
555}
556
557#[inline]
562#[target_feature(enable = "sse2")]
563#[cfg_attr(test, assert_instr(psllw))]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
566 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
567}
568
569#[inline]
573#[target_feature(enable = "sse2")]
574#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
575#[rustc_legacy_const_generics(1)]
576#[stable(feature = "simd_x86", since = "1.27.0")]
577pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
578 static_assert_uimm_bits!(IMM8, 8);
579 unsafe {
580 if IMM8 >= 32 {
581 _mm_setzero_si128()
582 } else {
583 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
584 }
585 }
586}
587
588#[inline]
593#[target_feature(enable = "sse2")]
594#[cfg_attr(test, assert_instr(pslld))]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
597 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
598}
599
600#[inline]
604#[target_feature(enable = "sse2")]
605#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
606#[rustc_legacy_const_generics(1)]
607#[stable(feature = "simd_x86", since = "1.27.0")]
608pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
609 static_assert_uimm_bits!(IMM8, 8);
610 unsafe {
611 if IMM8 >= 64 {
612 _mm_setzero_si128()
613 } else {
614 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
615 }
616 }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psllq))]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
628 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
629}
630
631#[inline]
636#[target_feature(enable = "sse2")]
637#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
638#[rustc_legacy_const_generics(1)]
639#[stable(feature = "simd_x86", since = "1.27.0")]
640pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
641 static_assert_uimm_bits!(IMM8, 8);
642 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psraw))]
652#[stable(feature = "simd_x86", since = "1.27.0")]
653pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
654 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
655}
656
657#[inline]
662#[target_feature(enable = "sse2")]
663#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
664#[rustc_legacy_const_generics(1)]
665#[stable(feature = "simd_x86", since = "1.27.0")]
666pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
667 static_assert_uimm_bits!(IMM8, 8);
668 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
669}
670
671#[inline]
676#[target_feature(enable = "sse2")]
677#[cfg_attr(test, assert_instr(psrad))]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
680 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
681}
682
683#[inline]
687#[target_feature(enable = "sse2")]
688#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
689#[rustc_legacy_const_generics(1)]
690#[stable(feature = "simd_x86", since = "1.27.0")]
691pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
692 static_assert_uimm_bits!(IMM8, 8);
693 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
694}
695
696#[inline]
699#[target_feature(enable = "sse2")]
700unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
701 const fn mask(shift: i32, i: u32) -> u32 {
702 if (shift as u32) > 15 {
703 i + 16
704 } else {
705 i + (shift as u32)
706 }
707 }
708 let x: i8x16 = simd_shuffle!(
709 a.as_i8x16(),
710 i8x16::ZERO,
711 [
712 mask(IMM8, 0),
713 mask(IMM8, 1),
714 mask(IMM8, 2),
715 mask(IMM8, 3),
716 mask(IMM8, 4),
717 mask(IMM8, 5),
718 mask(IMM8, 6),
719 mask(IMM8, 7),
720 mask(IMM8, 8),
721 mask(IMM8, 9),
722 mask(IMM8, 10),
723 mask(IMM8, 11),
724 mask(IMM8, 12),
725 mask(IMM8, 13),
726 mask(IMM8, 14),
727 mask(IMM8, 15),
728 ],
729 );
730 transmute(x)
731}
732
733#[inline]
738#[target_feature(enable = "sse2")]
739#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
740#[rustc_legacy_const_generics(1)]
741#[stable(feature = "simd_x86", since = "1.27.0")]
742pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
743 static_assert_uimm_bits!(IMM8, 8);
744 unsafe {
745 if IMM8 >= 16 {
746 _mm_setzero_si128()
747 } else {
748 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
749 }
750 }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrlw))]
760#[stable(feature = "simd_x86", since = "1.27.0")]
761pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
762 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
763}
764
765#[inline]
770#[target_feature(enable = "sse2")]
771#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
772#[rustc_legacy_const_generics(1)]
773#[stable(feature = "simd_x86", since = "1.27.0")]
774pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
775 static_assert_uimm_bits!(IMM8, 8);
776 unsafe {
777 if IMM8 >= 32 {
778 _mm_setzero_si128()
779 } else {
780 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
781 }
782 }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrld))]
792#[stable(feature = "simd_x86", since = "1.27.0")]
793pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
794 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
795}
796
797#[inline]
802#[target_feature(enable = "sse2")]
803#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
804#[rustc_legacy_const_generics(1)]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
807 static_assert_uimm_bits!(IMM8, 8);
808 unsafe {
809 if IMM8 >= 64 {
810 _mm_setzero_si128()
811 } else {
812 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
813 }
814 }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(psrlq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
826 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(a, b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(andnps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(orps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_or(a, b) }
863}
864
865#[inline]
870#[target_feature(enable = "sse2")]
871#[cfg_attr(test, assert_instr(xorps))]
872#[stable(feature = "simd_x86", since = "1.27.0")]
873pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
874 unsafe { simd_xor(a, b) }
875}
876
877#[inline]
881#[target_feature(enable = "sse2")]
882#[cfg_attr(test, assert_instr(pcmpeqb))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
885 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
886}
887
888#[inline]
892#[target_feature(enable = "sse2")]
893#[cfg_attr(test, assert_instr(pcmpeqw))]
894#[stable(feature = "simd_x86", since = "1.27.0")]
895pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
896 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
897}
898
899#[inline]
903#[target_feature(enable = "sse2")]
904#[cfg_attr(test, assert_instr(pcmpeqd))]
905#[stable(feature = "simd_x86", since = "1.27.0")]
906pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
907 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
908}
909
910#[inline]
914#[target_feature(enable = "sse2")]
915#[cfg_attr(test, assert_instr(pcmpgtb))]
916#[stable(feature = "simd_x86", since = "1.27.0")]
917pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
918 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
919}
920
921#[inline]
925#[target_feature(enable = "sse2")]
926#[cfg_attr(test, assert_instr(pcmpgtw))]
927#[stable(feature = "simd_x86", since = "1.27.0")]
928pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
929 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
930}
931
932#[inline]
936#[target_feature(enable = "sse2")]
937#[cfg_attr(test, assert_instr(pcmpgtd))]
938#[stable(feature = "simd_x86", since = "1.27.0")]
939pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
940 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
941}
942
943#[inline]
947#[target_feature(enable = "sse2")]
948#[cfg_attr(test, assert_instr(pcmpgtb))]
949#[stable(feature = "simd_x86", since = "1.27.0")]
950pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
951 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
952}
953
954#[inline]
958#[target_feature(enable = "sse2")]
959#[cfg_attr(test, assert_instr(pcmpgtw))]
960#[stable(feature = "simd_x86", since = "1.27.0")]
961pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
962 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
963}
964
965#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(pcmpgtd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
973 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
974}
975
976#[inline]
981#[target_feature(enable = "sse2")]
982#[cfg_attr(test, assert_instr(cvtdq2pd))]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
985 unsafe {
986 let a = a.as_i32x4();
987 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
988 }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtsi2sd))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1000 unsafe { simd_insert!(a, 0, b as f64) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtdq2ps))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1012 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[cfg_attr(test, assert_instr(cvtps2dq))]
1022#[stable(feature = "simd_x86", since = "1.27.0")]
1023pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1024 unsafe { transmute(cvtps2dq(a)) }
1025}
1026
1027#[inline]
1032#[target_feature(enable = "sse2")]
1033#[stable(feature = "simd_x86", since = "1.27.0")]
1034pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1035 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1036}
1037
1038#[inline]
1042#[target_feature(enable = "sse2")]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1045 unsafe { simd_extract!(a.as_i32x4(), 0) }
1046}
1047
1048#[inline]
1053#[target_feature(enable = "sse2")]
1054#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1057 unsafe { transmute(i64x2::new(e0, e1)) }
1058}
1059
1060#[inline]
1064#[target_feature(enable = "sse2")]
1065#[stable(feature = "simd_x86", since = "1.27.0")]
1067pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1068 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1069}
1070
1071#[inline]
1075#[target_feature(enable = "sse2")]
1076#[stable(feature = "simd_x86", since = "1.27.0")]
1078pub fn _mm_set_epi16(
1079 e7: i16,
1080 e6: i16,
1081 e5: i16,
1082 e4: i16,
1083 e3: i16,
1084 e2: i16,
1085 e1: i16,
1086 e0: i16,
1087) -> __m128i {
1088 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1089}
1090
1091#[inline]
1095#[target_feature(enable = "sse2")]
1096#[stable(feature = "simd_x86", since = "1.27.0")]
1098pub fn _mm_set_epi8(
1099 e15: i8,
1100 e14: i8,
1101 e13: i8,
1102 e12: i8,
1103 e11: i8,
1104 e10: i8,
1105 e9: i8,
1106 e8: i8,
1107 e7: i8,
1108 e6: i8,
1109 e5: i8,
1110 e4: i8,
1111 e3: i8,
1112 e2: i8,
1113 e1: i8,
1114 e0: i8,
1115) -> __m128i {
1116 unsafe {
1117 #[rustfmt::skip]
1118 transmute(i8x16::new(
1119 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1120 ))
1121 }
1122}
1123
1124#[inline]
1128#[target_feature(enable = "sse2")]
1129#[stable(feature = "simd_x86", since = "1.27.0")]
1131pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1132 _mm_set_epi64x(a, a)
1133}
1134
1135#[inline]
1139#[target_feature(enable = "sse2")]
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1142pub fn _mm_set1_epi32(a: i32) -> __m128i {
1143 _mm_set_epi32(a, a, a, a)
1144}
1145
1146#[inline]
1150#[target_feature(enable = "sse2")]
1151#[stable(feature = "simd_x86", since = "1.27.0")]
1153pub fn _mm_set1_epi16(a: i16) -> __m128i {
1154 _mm_set_epi16(a, a, a, a, a, a, a, a)
1155}
1156
1157#[inline]
1161#[target_feature(enable = "sse2")]
1162#[stable(feature = "simd_x86", since = "1.27.0")]
1164pub fn _mm_set1_epi8(a: i8) -> __m128i {
1165 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1166}
1167
1168#[inline]
1172#[target_feature(enable = "sse2")]
1173#[stable(feature = "simd_x86", since = "1.27.0")]
1175pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1176 _mm_set_epi32(e0, e1, e2, e3)
1177}
1178
1179#[inline]
1183#[target_feature(enable = "sse2")]
1184#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub fn _mm_setr_epi16(
1187 e7: i16,
1188 e6: i16,
1189 e5: i16,
1190 e4: i16,
1191 e3: i16,
1192 e2: i16,
1193 e1: i16,
1194 e0: i16,
1195) -> __m128i {
1196 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1197}
1198
1199#[inline]
1203#[target_feature(enable = "sse2")]
1204#[stable(feature = "simd_x86", since = "1.27.0")]
1206pub fn _mm_setr_epi8(
1207 e15: i8,
1208 e14: i8,
1209 e13: i8,
1210 e12: i8,
1211 e11: i8,
1212 e10: i8,
1213 e9: i8,
1214 e8: i8,
1215 e7: i8,
1216 e6: i8,
1217 e5: i8,
1218 e4: i8,
1219 e3: i8,
1220 e2: i8,
1221 e1: i8,
1222 e0: i8,
1223) -> __m128i {
1224 #[rustfmt::skip]
1225 _mm_set_epi8(
1226 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1227 )
1228}
1229
1230#[inline]
1234#[target_feature(enable = "sse2")]
1235#[cfg_attr(test, assert_instr(xorps))]
1236#[stable(feature = "simd_x86", since = "1.27.0")]
1237pub fn _mm_setzero_si128() -> __m128i {
1238 const { unsafe { mem::zeroed() } }
1239}
1240
1241#[inline]
1245#[target_feature(enable = "sse2")]
1246#[stable(feature = "simd_x86", since = "1.27.0")]
1247pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1248 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1249}
1250
1251#[inline]
1257#[target_feature(enable = "sse2")]
1258#[cfg_attr(
1259 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1260 assert_instr(movaps)
1261)]
1262#[stable(feature = "simd_x86", since = "1.27.0")]
1263pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1264 *mem_addr
1265}
1266
1267#[inline]
1273#[target_feature(enable = "sse2")]
1274#[cfg_attr(test, assert_instr(movups))]
1275#[stable(feature = "simd_x86", since = "1.27.0")]
1276pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1277 let mut dst: __m128i = _mm_undefined_si128();
1278 ptr::copy_nonoverlapping(
1279 mem_addr as *const u8,
1280 ptr::addr_of_mut!(dst) as *mut u8,
1281 mem::size_of::<__m128i>(),
1282 );
1283 dst
1284}
1285
1286#[inline]
1306#[target_feature(enable = "sse2")]
1307#[cfg_attr(test, assert_instr(maskmovdqu))]
1308#[stable(feature = "simd_x86", since = "1.27.0")]
1309pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1310 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1311}
1312
1313#[inline]
1319#[target_feature(enable = "sse2")]
1320#[cfg_attr(
1321 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1322 assert_instr(movaps)
1323)]
1324#[stable(feature = "simd_x86", since = "1.27.0")]
1325pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1326 *mem_addr = a;
1327}
1328
1329#[inline]
1335#[target_feature(enable = "sse2")]
1336#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1338pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1339 mem_addr.write_unaligned(a);
1340}
1341
1342#[inline]
1348#[target_feature(enable = "sse2")]
1349#[stable(feature = "simd_x86", since = "1.27.0")]
1350pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1351 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1352}
1353
1354#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(movntdq))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1373 crate::arch::asm!(
1375 vps!("movntdq", ",{a}"),
1376 p = in(reg) mem_addr,
1377 a = in(xmm_reg) a,
1378 options(nostack, preserves_flags),
1379 );
1380}
1381
1382#[inline]
1397#[target_feature(enable = "sse2")]
1398#[cfg_attr(test, assert_instr(movnti))]
1399#[stable(feature = "simd_x86", since = "1.27.0")]
1400pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1401 crate::arch::asm!(
1403 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1405 a = in(reg) a,
1406 options(nostack, preserves_flags),
1407 );
1408}
1409
1410#[inline]
1415#[target_feature(enable = "sse2")]
1416#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1418#[stable(feature = "simd_x86", since = "1.27.0")]
1419pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1420 unsafe {
1421 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1422 transmute(r)
1423 }
1424}
1425
1426#[inline]
1431#[target_feature(enable = "sse2")]
1432#[cfg_attr(test, assert_instr(packsswb))]
1433#[stable(feature = "simd_x86", since = "1.27.0")]
1434pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1435 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1436}
1437
1438#[inline]
1443#[target_feature(enable = "sse2")]
1444#[cfg_attr(test, assert_instr(packssdw))]
1445#[stable(feature = "simd_x86", since = "1.27.0")]
1446pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1447 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1448}
1449
1450#[inline]
1455#[target_feature(enable = "sse2")]
1456#[cfg_attr(test, assert_instr(packuswb))]
1457#[stable(feature = "simd_x86", since = "1.27.0")]
1458pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1459 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1460}
1461
1462#[inline]
1466#[target_feature(enable = "sse2")]
1467#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1468#[rustc_legacy_const_generics(1)]
1469#[stable(feature = "simd_x86", since = "1.27.0")]
1470pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1471 static_assert_uimm_bits!(IMM8, 3);
1472 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1473}
1474
1475#[inline]
1479#[target_feature(enable = "sse2")]
1480#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1481#[rustc_legacy_const_generics(2)]
1482#[stable(feature = "simd_x86", since = "1.27.0")]
1483pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1484 static_assert_uimm_bits!(IMM8, 3);
1485 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1486}
1487
1488#[inline]
1492#[target_feature(enable = "sse2")]
1493#[cfg_attr(test, assert_instr(pmovmskb))]
1494#[stable(feature = "simd_x86", since = "1.27.0")]
1495pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1496 unsafe {
1497 let z = i8x16::ZERO;
1498 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1499 simd_bitmask::<_, u16>(m) as u32 as i32
1500 }
1501}
1502
1503#[inline]
1507#[target_feature(enable = "sse2")]
1508#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1509#[rustc_legacy_const_generics(1)]
1510#[stable(feature = "simd_x86", since = "1.27.0")]
1511pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1512 static_assert_uimm_bits!(IMM8, 8);
1513 unsafe {
1514 let a = a.as_i32x4();
1515 let x: i32x4 = simd_shuffle!(
1516 a,
1517 a,
1518 [
1519 IMM8 as u32 & 0b11,
1520 (IMM8 as u32 >> 2) & 0b11,
1521 (IMM8 as u32 >> 4) & 0b11,
1522 (IMM8 as u32 >> 6) & 0b11,
1523 ],
1524 );
1525 transmute(x)
1526 }
1527}
1528
1529#[inline]
1537#[target_feature(enable = "sse2")]
1538#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1539#[rustc_legacy_const_generics(1)]
1540#[stable(feature = "simd_x86", since = "1.27.0")]
1541pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1542 static_assert_uimm_bits!(IMM8, 8);
1543 unsafe {
1544 let a = a.as_i16x8();
1545 let x: i16x8 = simd_shuffle!(
1546 a,
1547 a,
1548 [
1549 0,
1550 1,
1551 2,
1552 3,
1553 (IMM8 as u32 & 0b11) + 4,
1554 ((IMM8 as u32 >> 2) & 0b11) + 4,
1555 ((IMM8 as u32 >> 4) & 0b11) + 4,
1556 ((IMM8 as u32 >> 6) & 0b11) + 4,
1557 ],
1558 );
1559 transmute(x)
1560 }
1561}
1562
1563#[inline]
1571#[target_feature(enable = "sse2")]
1572#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1573#[rustc_legacy_const_generics(1)]
1574#[stable(feature = "simd_x86", since = "1.27.0")]
1575pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1576 static_assert_uimm_bits!(IMM8, 8);
1577 unsafe {
1578 let a = a.as_i16x8();
1579 let x: i16x8 = simd_shuffle!(
1580 a,
1581 a,
1582 [
1583 IMM8 as u32 & 0b11,
1584 (IMM8 as u32 >> 2) & 0b11,
1585 (IMM8 as u32 >> 4) & 0b11,
1586 (IMM8 as u32 >> 6) & 0b11,
1587 4,
1588 5,
1589 6,
1590 7,
1591 ],
1592 );
1593 transmute(x)
1594 }
1595}
1596
1597#[inline]
1601#[target_feature(enable = "sse2")]
1602#[cfg_attr(test, assert_instr(punpckhbw))]
1603#[stable(feature = "simd_x86", since = "1.27.0")]
1604pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1605 unsafe {
1606 transmute::<i8x16, _>(simd_shuffle!(
1607 a.as_i8x16(),
1608 b.as_i8x16(),
1609 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1610 ))
1611 }
1612}
1613
1614#[inline]
1618#[target_feature(enable = "sse2")]
1619#[cfg_attr(test, assert_instr(punpckhwd))]
1620#[stable(feature = "simd_x86", since = "1.27.0")]
1621pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1622 unsafe {
1623 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1624 transmute::<i16x8, _>(x)
1625 }
1626}
1627
1628#[inline]
1632#[target_feature(enable = "sse2")]
1633#[cfg_attr(test, assert_instr(unpckhps))]
1634#[stable(feature = "simd_x86", since = "1.27.0")]
1635pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1636 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1637}
1638
1639#[inline]
1643#[target_feature(enable = "sse2")]
1644#[cfg_attr(test, assert_instr(unpckhpd))]
1645#[stable(feature = "simd_x86", since = "1.27.0")]
1646pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1647 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1648}
1649
1650#[inline]
1654#[target_feature(enable = "sse2")]
1655#[cfg_attr(test, assert_instr(punpcklbw))]
1656#[stable(feature = "simd_x86", since = "1.27.0")]
1657pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1658 unsafe {
1659 transmute::<i8x16, _>(simd_shuffle!(
1660 a.as_i8x16(),
1661 b.as_i8x16(),
1662 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1663 ))
1664 }
1665}
1666
1667#[inline]
1671#[target_feature(enable = "sse2")]
1672#[cfg_attr(test, assert_instr(punpcklwd))]
1673#[stable(feature = "simd_x86", since = "1.27.0")]
1674pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1675 unsafe {
1676 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1677 transmute::<i16x8, _>(x)
1678 }
1679}
1680
1681#[inline]
1685#[target_feature(enable = "sse2")]
1686#[cfg_attr(test, assert_instr(unpcklps))]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1689 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1690}
1691
1692#[inline]
1696#[target_feature(enable = "sse2")]
1697#[cfg_attr(test, assert_instr(movlhps))]
1698#[stable(feature = "simd_x86", since = "1.27.0")]
1699pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1700 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1701}
1702
1703#[inline]
1708#[target_feature(enable = "sse2")]
1709#[cfg_attr(test, assert_instr(addsd))]
1710#[stable(feature = "simd_x86", since = "1.27.0")]
1711pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1712 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1713}
1714
1715#[inline]
1720#[target_feature(enable = "sse2")]
1721#[cfg_attr(test, assert_instr(addpd))]
1722#[stable(feature = "simd_x86", since = "1.27.0")]
1723pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1724 unsafe { simd_add(a, b) }
1725}
1726
1727#[inline]
1732#[target_feature(enable = "sse2")]
1733#[cfg_attr(test, assert_instr(divsd))]
1734#[stable(feature = "simd_x86", since = "1.27.0")]
1735pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1736 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1737}
1738
1739#[inline]
1744#[target_feature(enable = "sse2")]
1745#[cfg_attr(test, assert_instr(divpd))]
1746#[stable(feature = "simd_x86", since = "1.27.0")]
1747pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1748 unsafe { simd_div(a, b) }
1749}
1750
1751#[inline]
1756#[target_feature(enable = "sse2")]
1757#[cfg_attr(test, assert_instr(maxsd))]
1758#[stable(feature = "simd_x86", since = "1.27.0")]
1759pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1760 unsafe { maxsd(a, b) }
1761}
1762
1763#[inline]
1768#[target_feature(enable = "sse2")]
1769#[cfg_attr(test, assert_instr(maxpd))]
1770#[stable(feature = "simd_x86", since = "1.27.0")]
1771pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1772 unsafe { maxpd(a, b) }
1773}
1774
1775#[inline]
1780#[target_feature(enable = "sse2")]
1781#[cfg_attr(test, assert_instr(minsd))]
1782#[stable(feature = "simd_x86", since = "1.27.0")]
1783pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1784 unsafe { minsd(a, b) }
1785}
1786
1787#[inline]
1792#[target_feature(enable = "sse2")]
1793#[cfg_attr(test, assert_instr(minpd))]
1794#[stable(feature = "simd_x86", since = "1.27.0")]
1795pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1796 unsafe { minpd(a, b) }
1797}
1798
1799#[inline]
1804#[target_feature(enable = "sse2")]
1805#[cfg_attr(test, assert_instr(mulsd))]
1806#[stable(feature = "simd_x86", since = "1.27.0")]
1807pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1808 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1809}
1810
1811#[inline]
1816#[target_feature(enable = "sse2")]
1817#[cfg_attr(test, assert_instr(mulpd))]
1818#[stable(feature = "simd_x86", since = "1.27.0")]
1819pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1820 unsafe { simd_mul(a, b) }
1821}
1822
1823#[inline]
1828#[target_feature(enable = "sse2")]
1829#[cfg_attr(test, assert_instr(sqrtsd))]
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1831pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1832 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1833}
1834
1835#[inline]
1839#[target_feature(enable = "sse2")]
1840#[cfg_attr(test, assert_instr(sqrtpd))]
1841#[stable(feature = "simd_x86", since = "1.27.0")]
1842pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1843 unsafe { simd_fsqrt(a) }
1844}
1845
1846#[inline]
1851#[target_feature(enable = "sse2")]
1852#[cfg_attr(test, assert_instr(subsd))]
1853#[stable(feature = "simd_x86", since = "1.27.0")]
1854pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1855 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1856}
1857
1858#[inline]
1863#[target_feature(enable = "sse2")]
1864#[cfg_attr(test, assert_instr(subpd))]
1865#[stable(feature = "simd_x86", since = "1.27.0")]
1866pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1867 unsafe { simd_sub(a, b) }
1868}
1869
1870#[inline]
1875#[target_feature(enable = "sse2")]
1876#[cfg_attr(test, assert_instr(andps))]
1877#[stable(feature = "simd_x86", since = "1.27.0")]
1878pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1879 unsafe {
1880 let a: __m128i = transmute(a);
1881 let b: __m128i = transmute(b);
1882 transmute(_mm_and_si128(a, b))
1883 }
1884}
1885
1886#[inline]
1890#[target_feature(enable = "sse2")]
1891#[cfg_attr(test, assert_instr(andnps))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1894 unsafe {
1895 let a: __m128i = transmute(a);
1896 let b: __m128i = transmute(b);
1897 transmute(_mm_andnot_si128(a, b))
1898 }
1899}
1900
1901#[inline]
1905#[target_feature(enable = "sse2")]
1906#[cfg_attr(test, assert_instr(orps))]
1907#[stable(feature = "simd_x86", since = "1.27.0")]
1908pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1909 unsafe {
1910 let a: __m128i = transmute(a);
1911 let b: __m128i = transmute(b);
1912 transmute(_mm_or_si128(a, b))
1913 }
1914}
1915
1916#[inline]
1920#[target_feature(enable = "sse2")]
1921#[cfg_attr(test, assert_instr(xorps))]
1922#[stable(feature = "simd_x86", since = "1.27.0")]
1923pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1924 unsafe {
1925 let a: __m128i = transmute(a);
1926 let b: __m128i = transmute(b);
1927 transmute(_mm_xor_si128(a, b))
1928 }
1929}
1930
1931#[inline]
1936#[target_feature(enable = "sse2")]
1937#[cfg_attr(test, assert_instr(cmpeqsd))]
1938#[stable(feature = "simd_x86", since = "1.27.0")]
1939pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1940 unsafe { cmpsd(a, b, 0) }
1941}
1942
1943#[inline]
1948#[target_feature(enable = "sse2")]
1949#[cfg_attr(test, assert_instr(cmpltsd))]
1950#[stable(feature = "simd_x86", since = "1.27.0")]
1951pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1952 unsafe { cmpsd(a, b, 1) }
1953}
1954
1955#[inline]
1960#[target_feature(enable = "sse2")]
1961#[cfg_attr(test, assert_instr(cmplesd))]
1962#[stable(feature = "simd_x86", since = "1.27.0")]
1963pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1964 unsafe { cmpsd(a, b, 2) }
1965}
1966
1967#[inline]
1972#[target_feature(enable = "sse2")]
1973#[cfg_attr(test, assert_instr(cmpltsd))]
1974#[stable(feature = "simd_x86", since = "1.27.0")]
1975pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1976 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1977}
1978
1979#[inline]
1984#[target_feature(enable = "sse2")]
1985#[cfg_attr(test, assert_instr(cmplesd))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1988 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1989}
1990
1991#[inline]
1998#[target_feature(enable = "sse2")]
1999#[cfg_attr(test, assert_instr(cmpordsd))]
2000#[stable(feature = "simd_x86", since = "1.27.0")]
2001pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2002 unsafe { cmpsd(a, b, 7) }
2003}
2004
2005#[inline]
2011#[target_feature(enable = "sse2")]
2012#[cfg_attr(test, assert_instr(cmpunordsd))]
2013#[stable(feature = "simd_x86", since = "1.27.0")]
2014pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2015 unsafe { cmpsd(a, b, 3) }
2016}
2017
2018#[inline]
2023#[target_feature(enable = "sse2")]
2024#[cfg_attr(test, assert_instr(cmpneqsd))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2027 unsafe { cmpsd(a, b, 4) }
2028}
2029
2030#[inline]
2035#[target_feature(enable = "sse2")]
2036#[cfg_attr(test, assert_instr(cmpnltsd))]
2037#[stable(feature = "simd_x86", since = "1.27.0")]
2038pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2039 unsafe { cmpsd(a, b, 5) }
2040}
2041
2042#[inline]
2047#[target_feature(enable = "sse2")]
2048#[cfg_attr(test, assert_instr(cmpnlesd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2051 unsafe { cmpsd(a, b, 6) }
2052}
2053
2054#[inline]
2059#[target_feature(enable = "sse2")]
2060#[cfg_attr(test, assert_instr(cmpnltsd))]
2061#[stable(feature = "simd_x86", since = "1.27.0")]
2062pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2063 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2064}
2065
2066#[inline]
2071#[target_feature(enable = "sse2")]
2072#[cfg_attr(test, assert_instr(cmpnlesd))]
2073#[stable(feature = "simd_x86", since = "1.27.0")]
2074pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2075 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2076}
2077
2078#[inline]
2082#[target_feature(enable = "sse2")]
2083#[cfg_attr(test, assert_instr(cmpeqpd))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2086 unsafe { cmppd(a, b, 0) }
2087}
2088
2089#[inline]
2093#[target_feature(enable = "sse2")]
2094#[cfg_attr(test, assert_instr(cmpltpd))]
2095#[stable(feature = "simd_x86", since = "1.27.0")]
2096pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2097 unsafe { cmppd(a, b, 1) }
2098}
2099
2100#[inline]
2104#[target_feature(enable = "sse2")]
2105#[cfg_attr(test, assert_instr(cmplepd))]
2106#[stable(feature = "simd_x86", since = "1.27.0")]
2107pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2108 unsafe { cmppd(a, b, 2) }
2109}
2110
2111#[inline]
2115#[target_feature(enable = "sse2")]
2116#[cfg_attr(test, assert_instr(cmpltpd))]
2117#[stable(feature = "simd_x86", since = "1.27.0")]
2118pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2119 _mm_cmplt_pd(b, a)
2120}
2121
2122#[inline]
2126#[target_feature(enable = "sse2")]
2127#[cfg_attr(test, assert_instr(cmplepd))]
2128#[stable(feature = "simd_x86", since = "1.27.0")]
2129pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2130 _mm_cmple_pd(b, a)
2131}
2132
2133#[inline]
2137#[target_feature(enable = "sse2")]
2138#[cfg_attr(test, assert_instr(cmpordpd))]
2139#[stable(feature = "simd_x86", since = "1.27.0")]
2140pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2141 unsafe { cmppd(a, b, 7) }
2142}
2143
2144#[inline]
2148#[target_feature(enable = "sse2")]
2149#[cfg_attr(test, assert_instr(cmpunordpd))]
2150#[stable(feature = "simd_x86", since = "1.27.0")]
2151pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2152 unsafe { cmppd(a, b, 3) }
2153}
2154
2155#[inline]
2159#[target_feature(enable = "sse2")]
2160#[cfg_attr(test, assert_instr(cmpneqpd))]
2161#[stable(feature = "simd_x86", since = "1.27.0")]
2162pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2163 unsafe { cmppd(a, b, 4) }
2164}
2165
2166#[inline]
2170#[target_feature(enable = "sse2")]
2171#[cfg_attr(test, assert_instr(cmpnltpd))]
2172#[stable(feature = "simd_x86", since = "1.27.0")]
2173pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2174 unsafe { cmppd(a, b, 5) }
2175}
2176
2177#[inline]
2181#[target_feature(enable = "sse2")]
2182#[cfg_attr(test, assert_instr(cmpnlepd))]
2183#[stable(feature = "simd_x86", since = "1.27.0")]
2184pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2185 unsafe { cmppd(a, b, 6) }
2186}
2187
2188#[inline]
2192#[target_feature(enable = "sse2")]
2193#[cfg_attr(test, assert_instr(cmpnltpd))]
2194#[stable(feature = "simd_x86", since = "1.27.0")]
2195pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2196 _mm_cmpnlt_pd(b, a)
2197}
2198
2199#[inline]
2204#[target_feature(enable = "sse2")]
2205#[cfg_attr(test, assert_instr(cmpnlepd))]
2206#[stable(feature = "simd_x86", since = "1.27.0")]
2207pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2208 _mm_cmpnle_pd(b, a)
2209}
2210
2211#[inline]
2215#[target_feature(enable = "sse2")]
2216#[cfg_attr(test, assert_instr(comisd))]
2217#[stable(feature = "simd_x86", since = "1.27.0")]
2218pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2219 unsafe { comieqsd(a, b) }
2220}
2221
2222#[inline]
2226#[target_feature(enable = "sse2")]
2227#[cfg_attr(test, assert_instr(comisd))]
2228#[stable(feature = "simd_x86", since = "1.27.0")]
2229pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2230 unsafe { comiltsd(a, b) }
2231}
2232
2233#[inline]
2237#[target_feature(enable = "sse2")]
2238#[cfg_attr(test, assert_instr(comisd))]
2239#[stable(feature = "simd_x86", since = "1.27.0")]
2240pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2241 unsafe { comilesd(a, b) }
2242}
2243
2244#[inline]
2248#[target_feature(enable = "sse2")]
2249#[cfg_attr(test, assert_instr(comisd))]
2250#[stable(feature = "simd_x86", since = "1.27.0")]
2251pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2252 unsafe { comigtsd(a, b) }
2253}
2254
2255#[inline]
2259#[target_feature(enable = "sse2")]
2260#[cfg_attr(test, assert_instr(comisd))]
2261#[stable(feature = "simd_x86", since = "1.27.0")]
2262pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2263 unsafe { comigesd(a, b) }
2264}
2265
2266#[inline]
2270#[target_feature(enable = "sse2")]
2271#[cfg_attr(test, assert_instr(comisd))]
2272#[stable(feature = "simd_x86", since = "1.27.0")]
2273pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2274 unsafe { comineqsd(a, b) }
2275}
2276
2277#[inline]
2281#[target_feature(enable = "sse2")]
2282#[cfg_attr(test, assert_instr(ucomisd))]
2283#[stable(feature = "simd_x86", since = "1.27.0")]
2284pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2285 unsafe { ucomieqsd(a, b) }
2286}
2287
2288#[inline]
2292#[target_feature(enable = "sse2")]
2293#[cfg_attr(test, assert_instr(ucomisd))]
2294#[stable(feature = "simd_x86", since = "1.27.0")]
2295pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2296 unsafe { ucomiltsd(a, b) }
2297}
2298
2299#[inline]
2303#[target_feature(enable = "sse2")]
2304#[cfg_attr(test, assert_instr(ucomisd))]
2305#[stable(feature = "simd_x86", since = "1.27.0")]
2306pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2307 unsafe { ucomilesd(a, b) }
2308}
2309
2310#[inline]
2314#[target_feature(enable = "sse2")]
2315#[cfg_attr(test, assert_instr(ucomisd))]
2316#[stable(feature = "simd_x86", since = "1.27.0")]
2317pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2318 unsafe { ucomigtsd(a, b) }
2319}
2320
2321#[inline]
2325#[target_feature(enable = "sse2")]
2326#[cfg_attr(test, assert_instr(ucomisd))]
2327#[stable(feature = "simd_x86", since = "1.27.0")]
2328pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2329 unsafe { ucomigesd(a, b) }
2330}
2331
2332#[inline]
2336#[target_feature(enable = "sse2")]
2337#[cfg_attr(test, assert_instr(ucomisd))]
2338#[stable(feature = "simd_x86", since = "1.27.0")]
2339pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2340 unsafe { ucomineqsd(a, b) }
2341}
2342
2343#[inline]
2348#[target_feature(enable = "sse2")]
2349#[cfg_attr(test, assert_instr(cvtpd2ps))]
2350#[stable(feature = "simd_x86", since = "1.27.0")]
2351pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2352 unsafe {
2353 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2354 let zero = f32x2::ZERO;
2355 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2356 }
2357}
2358
2359#[inline]
2365#[target_feature(enable = "sse2")]
2366#[cfg_attr(test, assert_instr(cvtps2pd))]
2367#[stable(feature = "simd_x86", since = "1.27.0")]
2368pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2369 unsafe {
2370 let a = a.as_f32x4();
2371 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2372 }
2373}
2374
2375#[inline]
2380#[target_feature(enable = "sse2")]
2381#[cfg_attr(test, assert_instr(cvtpd2dq))]
2382#[stable(feature = "simd_x86", since = "1.27.0")]
2383pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2384 unsafe { transmute(cvtpd2dq(a)) }
2385}
2386
2387#[inline]
2392#[target_feature(enable = "sse2")]
2393#[cfg_attr(test, assert_instr(cvtsd2si))]
2394#[stable(feature = "simd_x86", since = "1.27.0")]
2395pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2396 unsafe { cvtsd2si(a) }
2397}
2398
2399#[inline]
2406#[target_feature(enable = "sse2")]
2407#[cfg_attr(test, assert_instr(cvtsd2ss))]
2408#[stable(feature = "simd_x86", since = "1.27.0")]
2409pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2410 unsafe { cvtsd2ss(a, b) }
2411}
2412
2413#[inline]
2417#[target_feature(enable = "sse2")]
2418#[stable(feature = "simd_x86", since = "1.27.0")]
2419pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2420 unsafe { simd_extract!(a, 0) }
2421}
2422
2423#[inline]
2430#[target_feature(enable = "sse2")]
2431#[cfg_attr(test, assert_instr(cvtss2sd))]
2432#[stable(feature = "simd_x86", since = "1.27.0")]
2433pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2434 unsafe {
2435 let elt: f32 = simd_extract!(b, 0);
2436 simd_insert!(a, 0, elt as f64)
2437 }
2438}
2439
2440#[inline]
2445#[target_feature(enable = "sse2")]
2446#[cfg_attr(test, assert_instr(cvttpd2dq))]
2447#[stable(feature = "simd_x86", since = "1.27.0")]
2448pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2449 unsafe { transmute(cvttpd2dq(a)) }
2450}
2451
2452#[inline]
2457#[target_feature(enable = "sse2")]
2458#[cfg_attr(test, assert_instr(cvttsd2si))]
2459#[stable(feature = "simd_x86", since = "1.27.0")]
2460pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2461 unsafe { cvttsd2si(a) }
2462}
2463
2464#[inline]
2469#[target_feature(enable = "sse2")]
2470#[cfg_attr(test, assert_instr(cvttps2dq))]
2471#[stable(feature = "simd_x86", since = "1.27.0")]
2472pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2473 unsafe { transmute(cvttps2dq(a)) }
2474}
2475
2476#[inline]
2481#[target_feature(enable = "sse2")]
2482#[stable(feature = "simd_x86", since = "1.27.0")]
2483pub fn _mm_set_sd(a: f64) -> __m128d {
2484 _mm_set_pd(0.0, a)
2485}
2486
2487#[inline]
2492#[target_feature(enable = "sse2")]
2493#[stable(feature = "simd_x86", since = "1.27.0")]
2494pub fn _mm_set1_pd(a: f64) -> __m128d {
2495 _mm_set_pd(a, a)
2496}
2497
2498#[inline]
2503#[target_feature(enable = "sse2")]
2504#[stable(feature = "simd_x86", since = "1.27.0")]
2505pub fn _mm_set_pd1(a: f64) -> __m128d {
2506 _mm_set_pd(a, a)
2507}
2508
2509#[inline]
2514#[target_feature(enable = "sse2")]
2515#[stable(feature = "simd_x86", since = "1.27.0")]
2516pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2517 __m128d([b, a])
2518}
2519
2520#[inline]
2525#[target_feature(enable = "sse2")]
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2527pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2528 _mm_set_pd(b, a)
2529}
2530
2531#[inline]
2536#[target_feature(enable = "sse2")]
2537#[cfg_attr(test, assert_instr(xorp))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm_setzero_pd() -> __m128d {
2540 const { unsafe { mem::zeroed() } }
2541}
2542
2543#[inline]
2550#[target_feature(enable = "sse2")]
2551#[cfg_attr(test, assert_instr(movmskpd))]
2552#[stable(feature = "simd_x86", since = "1.27.0")]
2553pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2554 unsafe {
2557 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2558 simd_bitmask::<i64x2, u8>(mask).into()
2559 }
2560}
2561
2562#[inline]
2569#[target_feature(enable = "sse2")]
2570#[cfg_attr(
2571 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2572 assert_instr(movaps)
2573)]
2574#[stable(feature = "simd_x86", since = "1.27.0")]
2575#[allow(clippy::cast_ptr_alignment)]
2576pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2577 *(mem_addr as *const __m128d)
2578}
2579
2580#[inline]
2585#[target_feature(enable = "sse2")]
2586#[cfg_attr(test, assert_instr(movsd))]
2587#[stable(feature = "simd_x86", since = "1.27.0")]
2588pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2589 _mm_setr_pd(*mem_addr, 0.)
2590}
2591
2592#[inline]
2598#[target_feature(enable = "sse2")]
2599#[cfg_attr(test, assert_instr(movhps))]
2600#[stable(feature = "simd_x86", since = "1.27.0")]
2601pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2602 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2603}
2604
2605#[inline]
2611#[target_feature(enable = "sse2")]
2612#[cfg_attr(test, assert_instr(movlps))]
2613#[stable(feature = "simd_x86", since = "1.27.0")]
2614pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2615 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2616}
2617
2618#[inline]
2634#[target_feature(enable = "sse2")]
2635#[cfg_attr(test, assert_instr(movntpd))]
2636#[stable(feature = "simd_x86", since = "1.27.0")]
2637#[allow(clippy::cast_ptr_alignment)]
2638pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2639 crate::arch::asm!(
2641 vps!("movntpd", ",{a}"),
2642 p = in(reg) mem_addr,
2643 a = in(xmm_reg) a,
2644 options(nostack, preserves_flags),
2645 );
2646}
2647
2648#[inline]
2653#[target_feature(enable = "sse2")]
2654#[cfg_attr(test, assert_instr(movlps))]
2655#[stable(feature = "simd_x86", since = "1.27.0")]
2656pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2657 *mem_addr = simd_extract!(a, 0)
2658}
2659
2660#[inline]
2666#[target_feature(enable = "sse2")]
2667#[cfg_attr(
2668 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2669 assert_instr(movaps)
2670)]
2671#[stable(feature = "simd_x86", since = "1.27.0")]
2672#[allow(clippy::cast_ptr_alignment)]
2673pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2674 *(mem_addr as *mut __m128d) = a;
2675}
2676
2677#[inline]
2683#[target_feature(enable = "sse2")]
2684#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2686pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2687 mem_addr.cast::<__m128d>().write_unaligned(a);
2688}
2689
2690#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2698pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2699 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2700}
2701
2702#[inline]
2708#[target_feature(enable = "sse2")]
2709#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2710pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2711 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2712}
2713
2714#[inline]
2720#[target_feature(enable = "sse2")]
2721#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2722pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2723 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2724}
2725
2726#[inline]
2732#[target_feature(enable = "sse2")]
2733#[stable(feature = "simd_x86", since = "1.27.0")]
2734#[allow(clippy::cast_ptr_alignment)]
2735pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2736 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2737 *(mem_addr as *mut __m128d) = b;
2738}
2739
2740#[inline]
2746#[target_feature(enable = "sse2")]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2748#[allow(clippy::cast_ptr_alignment)]
2749pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2750 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2751 *(mem_addr as *mut __m128d) = b;
2752}
2753
2754#[inline]
2761#[target_feature(enable = "sse2")]
2762#[stable(feature = "simd_x86", since = "1.27.0")]
2763#[allow(clippy::cast_ptr_alignment)]
2764pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2765 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2766 *(mem_addr as *mut __m128d) = b;
2767}
2768
2769#[inline]
2774#[target_feature(enable = "sse2")]
2775#[cfg_attr(test, assert_instr(movhps))]
2776#[stable(feature = "simd_x86", since = "1.27.0")]
2777pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2778 *mem_addr = simd_extract!(a, 1);
2779}
2780
2781#[inline]
2786#[target_feature(enable = "sse2")]
2787#[cfg_attr(test, assert_instr(movlps))]
2788#[stable(feature = "simd_x86", since = "1.27.0")]
2789pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2790 *mem_addr = simd_extract!(a, 0);
2791}
2792
2793#[inline]
2798#[target_feature(enable = "sse2")]
2799#[stable(feature = "simd_x86", since = "1.27.0")]
2801pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2802 let d = *mem_addr;
2803 _mm_setr_pd(d, d)
2804}
2805
2806#[inline]
2811#[target_feature(enable = "sse2")]
2812#[stable(feature = "simd_x86", since = "1.27.0")]
2814pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2815 _mm_load1_pd(mem_addr)
2816}
2817
2818#[inline]
2824#[target_feature(enable = "sse2")]
2825#[cfg_attr(
2826 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2827 assert_instr(movaps)
2828)]
2829#[stable(feature = "simd_x86", since = "1.27.0")]
2830pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2831 let a = _mm_load_pd(mem_addr);
2832 simd_shuffle!(a, a, [1, 0])
2833}
2834
2835#[inline]
2841#[target_feature(enable = "sse2")]
2842#[cfg_attr(test, assert_instr(movups))]
2843#[stable(feature = "simd_x86", since = "1.27.0")]
2844pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2845 let mut dst = _mm_undefined_pd();
2846 ptr::copy_nonoverlapping(
2847 mem_addr as *const u8,
2848 ptr::addr_of_mut!(dst) as *mut u8,
2849 mem::size_of::<__m128d>(),
2850 );
2851 dst
2852}
2853
2854#[inline]
2860#[target_feature(enable = "sse2")]
2861#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2862pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2863 transmute(i16x8::new(
2864 ptr::read_unaligned(mem_addr as *const i16),
2865 0,
2866 0,
2867 0,
2868 0,
2869 0,
2870 0,
2871 0,
2872 ))
2873}
2874
2875#[inline]
2881#[target_feature(enable = "sse2")]
2882#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2883pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2884 transmute(i32x4::new(
2885 ptr::read_unaligned(mem_addr as *const i32),
2886 0,
2887 0,
2888 0,
2889 ))
2890}
2891
2892#[inline]
2898#[target_feature(enable = "sse2")]
2899#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2900pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2901 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2902}
2903
2904#[inline]
2910#[target_feature(enable = "sse2")]
2911#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2912#[rustc_legacy_const_generics(2)]
2913#[stable(feature = "simd_x86", since = "1.27.0")]
2914pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2915 static_assert_uimm_bits!(MASK, 8);
2916 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2917}
2918
2919#[inline]
2925#[target_feature(enable = "sse2")]
2926#[cfg_attr(test, assert_instr(movsd))]
2927#[stable(feature = "simd_x86", since = "1.27.0")]
2928pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2929 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2930}
2931
2932#[inline]
2937#[target_feature(enable = "sse2")]
2938#[stable(feature = "simd_x86", since = "1.27.0")]
2939pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2940 unsafe { transmute(a) }
2941}
2942
2943#[inline]
2948#[target_feature(enable = "sse2")]
2949#[stable(feature = "simd_x86", since = "1.27.0")]
2950pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2951 unsafe { transmute(a) }
2952}
2953
2954#[inline]
2959#[target_feature(enable = "sse2")]
2960#[stable(feature = "simd_x86", since = "1.27.0")]
2961pub fn _mm_castps_pd(a: __m128) -> __m128d {
2962 unsafe { transmute(a) }
2963}
2964
2965#[inline]
2970#[target_feature(enable = "sse2")]
2971#[stable(feature = "simd_x86", since = "1.27.0")]
2972pub fn _mm_castps_si128(a: __m128) -> __m128i {
2973 unsafe { transmute(a) }
2974}
2975
2976#[inline]
2981#[target_feature(enable = "sse2")]
2982#[stable(feature = "simd_x86", since = "1.27.0")]
2983pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2984 unsafe { transmute(a) }
2985}
2986
2987#[inline]
2992#[target_feature(enable = "sse2")]
2993#[stable(feature = "simd_x86", since = "1.27.0")]
2994pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2995 unsafe { transmute(a) }
2996}
2997
2998#[inline]
3005#[target_feature(enable = "sse2")]
3006#[stable(feature = "simd_x86", since = "1.27.0")]
3007pub fn _mm_undefined_pd() -> __m128d {
3008 const { unsafe { mem::zeroed() } }
3009}
3010
3011#[inline]
3018#[target_feature(enable = "sse2")]
3019#[stable(feature = "simd_x86", since = "1.27.0")]
3020pub fn _mm_undefined_si128() -> __m128i {
3021 const { unsafe { mem::zeroed() } }
3022}
3023
3024#[inline]
3032#[target_feature(enable = "sse2")]
3033#[cfg_attr(test, assert_instr(unpckhpd))]
3034#[stable(feature = "simd_x86", since = "1.27.0")]
3035pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3036 unsafe { simd_shuffle!(a, b, [1, 3]) }
3037}
3038
3039#[inline]
3047#[target_feature(enable = "sse2")]
3048#[cfg_attr(test, assert_instr(movlhps))]
3049#[stable(feature = "simd_x86", since = "1.27.0")]
3050pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3051 unsafe { simd_shuffle!(a, b, [0, 2]) }
3052}
3053
3054#[allow(improper_ctypes)]
3055unsafe extern "C" {
3056 #[link_name = "llvm.x86.sse2.pause"]
3057 fn pause();
3058 #[link_name = "llvm.x86.sse2.clflush"]
3059 fn clflush(p: *const u8);
3060 #[link_name = "llvm.x86.sse2.lfence"]
3061 fn lfence();
3062 #[link_name = "llvm.x86.sse2.mfence"]
3063 fn mfence();
3064 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3065 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3066 #[link_name = "llvm.x86.sse2.psad.bw"]
3067 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3068 #[link_name = "llvm.x86.sse2.psll.w"]
3069 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3070 #[link_name = "llvm.x86.sse2.psll.d"]
3071 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3072 #[link_name = "llvm.x86.sse2.psll.q"]
3073 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3074 #[link_name = "llvm.x86.sse2.psra.w"]
3075 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3076 #[link_name = "llvm.x86.sse2.psra.d"]
3077 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3078 #[link_name = "llvm.x86.sse2.psrl.w"]
3079 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3080 #[link_name = "llvm.x86.sse2.psrl.d"]
3081 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3082 #[link_name = "llvm.x86.sse2.psrl.q"]
3083 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3084 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3085 fn cvtps2dq(a: __m128) -> i32x4;
3086 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3087 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3088 #[link_name = "llvm.x86.sse2.packsswb.128"]
3089 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3090 #[link_name = "llvm.x86.sse2.packssdw.128"]
3091 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3092 #[link_name = "llvm.x86.sse2.packuswb.128"]
3093 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3094 #[link_name = "llvm.x86.sse2.max.sd"]
3095 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3096 #[link_name = "llvm.x86.sse2.max.pd"]
3097 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3098 #[link_name = "llvm.x86.sse2.min.sd"]
3099 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3100 #[link_name = "llvm.x86.sse2.min.pd"]
3101 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3102 #[link_name = "llvm.x86.sse2.cmp.sd"]
3103 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3104 #[link_name = "llvm.x86.sse2.cmp.pd"]
3105 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3106 #[link_name = "llvm.x86.sse2.comieq.sd"]
3107 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3108 #[link_name = "llvm.x86.sse2.comilt.sd"]
3109 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3110 #[link_name = "llvm.x86.sse2.comile.sd"]
3111 fn comilesd(a: __m128d, b: __m128d) -> i32;
3112 #[link_name = "llvm.x86.sse2.comigt.sd"]
3113 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3114 #[link_name = "llvm.x86.sse2.comige.sd"]
3115 fn comigesd(a: __m128d, b: __m128d) -> i32;
3116 #[link_name = "llvm.x86.sse2.comineq.sd"]
3117 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3118 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3119 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3120 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3121 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3122 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3123 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3124 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3125 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3126 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3127 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3128 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3129 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3130 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3131 fn cvtpd2dq(a: __m128d) -> i32x4;
3132 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3133 fn cvtsd2si(a: __m128d) -> i32;
3134 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3135 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3136 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3137 fn cvttpd2dq(a: __m128d) -> i32x4;
3138 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3139 fn cvttsd2si(a: __m128d) -> i32;
3140 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3141 fn cvttps2dq(a: __m128) -> i32x4;
3142}
3143
3144#[cfg(test)]
3145mod tests {
3146 use crate::{
3147 core_arch::{simd::*, x86::*},
3148 hint::black_box,
3149 };
3150 use std::{
3151 boxed, f32, f64,
3152 mem::{self, transmute},
3153 ptr,
3154 };
3155 use stdarch_test::simd_test;
3156
3157 const NAN: f64 = f64::NAN;
3158
3159 #[test]
3160 fn test_mm_pause() {
3161 _mm_pause()
3162 }
3163
3164 #[simd_test(enable = "sse2")]
3165 unsafe fn test_mm_clflush() {
3166 let x = 0_u8;
3167 _mm_clflush(ptr::addr_of!(x));
3168 }
3169
3170 #[simd_test(enable = "sse2")]
3171 #[cfg_attr(miri, ignore)]
3173 unsafe fn test_mm_lfence() {
3174 _mm_lfence();
3175 }
3176
3177 #[simd_test(enable = "sse2")]
3178 #[cfg_attr(miri, ignore)]
3180 unsafe fn test_mm_mfence() {
3181 _mm_mfence();
3182 }
3183
3184 #[simd_test(enable = "sse2")]
3185 unsafe fn test_mm_add_epi8() {
3186 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3187 #[rustfmt::skip]
3188 let b = _mm_setr_epi8(
3189 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3190 );
3191 let r = _mm_add_epi8(a, b);
3192 #[rustfmt::skip]
3193 let e = _mm_setr_epi8(
3194 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3195 );
3196 assert_eq_m128i(r, e);
3197 }
3198
3199 #[simd_test(enable = "sse2")]
3200 unsafe fn test_mm_add_epi8_overflow() {
3201 let a = _mm_set1_epi8(0x7F);
3202 let b = _mm_set1_epi8(1);
3203 let r = _mm_add_epi8(a, b);
3204 assert_eq_m128i(r, _mm_set1_epi8(-128));
3205 }
3206
3207 #[simd_test(enable = "sse2")]
3208 unsafe fn test_mm_add_epi16() {
3209 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3210 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3211 let r = _mm_add_epi16(a, b);
3212 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3213 assert_eq_m128i(r, e);
3214 }
3215
3216 #[simd_test(enable = "sse2")]
3217 unsafe fn test_mm_add_epi32() {
3218 let a = _mm_setr_epi32(0, 1, 2, 3);
3219 let b = _mm_setr_epi32(4, 5, 6, 7);
3220 let r = _mm_add_epi32(a, b);
3221 let e = _mm_setr_epi32(4, 6, 8, 10);
3222 assert_eq_m128i(r, e);
3223 }
3224
3225 #[simd_test(enable = "sse2")]
3226 unsafe fn test_mm_add_epi64() {
3227 let a = _mm_setr_epi64x(0, 1);
3228 let b = _mm_setr_epi64x(2, 3);
3229 let r = _mm_add_epi64(a, b);
3230 let e = _mm_setr_epi64x(2, 4);
3231 assert_eq_m128i(r, e);
3232 }
3233
3234 #[simd_test(enable = "sse2")]
3235 unsafe fn test_mm_adds_epi8() {
3236 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3237 #[rustfmt::skip]
3238 let b = _mm_setr_epi8(
3239 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3240 );
3241 let r = _mm_adds_epi8(a, b);
3242 #[rustfmt::skip]
3243 let e = _mm_setr_epi8(
3244 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3245 );
3246 assert_eq_m128i(r, e);
3247 }
3248
3249 #[simd_test(enable = "sse2")]
3250 unsafe fn test_mm_adds_epi8_saturate_positive() {
3251 let a = _mm_set1_epi8(0x7F);
3252 let b = _mm_set1_epi8(1);
3253 let r = _mm_adds_epi8(a, b);
3254 assert_eq_m128i(r, a);
3255 }
3256
3257 #[simd_test(enable = "sse2")]
3258 unsafe fn test_mm_adds_epi8_saturate_negative() {
3259 let a = _mm_set1_epi8(-0x80);
3260 let b = _mm_set1_epi8(-1);
3261 let r = _mm_adds_epi8(a, b);
3262 assert_eq_m128i(r, a);
3263 }
3264
3265 #[simd_test(enable = "sse2")]
3266 unsafe fn test_mm_adds_epi16() {
3267 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3268 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3269 let r = _mm_adds_epi16(a, b);
3270 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3271 assert_eq_m128i(r, e);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_adds_epi16_saturate_positive() {
3276 let a = _mm_set1_epi16(0x7FFF);
3277 let b = _mm_set1_epi16(1);
3278 let r = _mm_adds_epi16(a, b);
3279 assert_eq_m128i(r, a);
3280 }
3281
3282 #[simd_test(enable = "sse2")]
3283 unsafe fn test_mm_adds_epi16_saturate_negative() {
3284 let a = _mm_set1_epi16(-0x8000);
3285 let b = _mm_set1_epi16(-1);
3286 let r = _mm_adds_epi16(a, b);
3287 assert_eq_m128i(r, a);
3288 }
3289
3290 #[simd_test(enable = "sse2")]
3291 unsafe fn test_mm_adds_epu8() {
3292 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3293 #[rustfmt::skip]
3294 let b = _mm_setr_epi8(
3295 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3296 );
3297 let r = _mm_adds_epu8(a, b);
3298 #[rustfmt::skip]
3299 let e = _mm_setr_epi8(
3300 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3301 );
3302 assert_eq_m128i(r, e);
3303 }
3304
3305 #[simd_test(enable = "sse2")]
3306 unsafe fn test_mm_adds_epu8_saturate() {
3307 let a = _mm_set1_epi8(!0);
3308 let b = _mm_set1_epi8(1);
3309 let r = _mm_adds_epu8(a, b);
3310 assert_eq_m128i(r, a);
3311 }
3312
3313 #[simd_test(enable = "sse2")]
3314 unsafe fn test_mm_adds_epu16() {
3315 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3316 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3317 let r = _mm_adds_epu16(a, b);
3318 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3319 assert_eq_m128i(r, e);
3320 }
3321
3322 #[simd_test(enable = "sse2")]
3323 unsafe fn test_mm_adds_epu16_saturate() {
3324 let a = _mm_set1_epi16(!0);
3325 let b = _mm_set1_epi16(1);
3326 let r = _mm_adds_epu16(a, b);
3327 assert_eq_m128i(r, a);
3328 }
3329
3330 #[simd_test(enable = "sse2")]
3331 unsafe fn test_mm_avg_epu8() {
3332 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3333 let r = _mm_avg_epu8(a, b);
3334 assert_eq_m128i(r, _mm_set1_epi8(6));
3335 }
3336
3337 #[simd_test(enable = "sse2")]
3338 unsafe fn test_mm_avg_epu16() {
3339 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3340 let r = _mm_avg_epu16(a, b);
3341 assert_eq_m128i(r, _mm_set1_epi16(6));
3342 }
3343
3344 #[simd_test(enable = "sse2")]
3345 unsafe fn test_mm_madd_epi16() {
3346 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3347 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3348 let r = _mm_madd_epi16(a, b);
3349 let e = _mm_setr_epi32(29, 81, 149, 233);
3350 assert_eq_m128i(r, e);
3351
3352 let a = _mm_setr_epi16(
3355 i16::MAX,
3356 i16::MAX,
3357 i16::MIN,
3358 i16::MIN,
3359 i16::MIN,
3360 i16::MAX,
3361 0,
3362 0,
3363 );
3364 let b = _mm_setr_epi16(
3365 i16::MAX,
3366 i16::MAX,
3367 i16::MIN,
3368 i16::MIN,
3369 i16::MAX,
3370 i16::MIN,
3371 0,
3372 0,
3373 );
3374 let r = _mm_madd_epi16(a, b);
3375 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3376 assert_eq_m128i(r, e);
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_max_epi16() {
3381 let a = _mm_set1_epi16(1);
3382 let b = _mm_set1_epi16(-1);
3383 let r = _mm_max_epi16(a, b);
3384 assert_eq_m128i(r, a);
3385 }
3386
3387 #[simd_test(enable = "sse2")]
3388 unsafe fn test_mm_max_epu8() {
3389 let a = _mm_set1_epi8(1);
3390 let b = _mm_set1_epi8(!0);
3391 let r = _mm_max_epu8(a, b);
3392 assert_eq_m128i(r, b);
3393 }
3394
3395 #[simd_test(enable = "sse2")]
3396 unsafe fn test_mm_min_epi16() {
3397 let a = _mm_set1_epi16(1);
3398 let b = _mm_set1_epi16(-1);
3399 let r = _mm_min_epi16(a, b);
3400 assert_eq_m128i(r, b);
3401 }
3402
3403 #[simd_test(enable = "sse2")]
3404 unsafe fn test_mm_min_epu8() {
3405 let a = _mm_set1_epi8(1);
3406 let b = _mm_set1_epi8(!0);
3407 let r = _mm_min_epu8(a, b);
3408 assert_eq_m128i(r, a);
3409 }
3410
3411 #[simd_test(enable = "sse2")]
3412 unsafe fn test_mm_mulhi_epi16() {
3413 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3414 let r = _mm_mulhi_epi16(a, b);
3415 assert_eq_m128i(r, _mm_set1_epi16(-16));
3416 }
3417
3418 #[simd_test(enable = "sse2")]
3419 unsafe fn test_mm_mulhi_epu16() {
3420 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3421 let r = _mm_mulhi_epu16(a, b);
3422 assert_eq_m128i(r, _mm_set1_epi16(15));
3423 }
3424
3425 #[simd_test(enable = "sse2")]
3426 unsafe fn test_mm_mullo_epi16() {
3427 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3428 let r = _mm_mullo_epi16(a, b);
3429 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3430 }
3431
3432 #[simd_test(enable = "sse2")]
3433 unsafe fn test_mm_mul_epu32() {
3434 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3435 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3436 let r = _mm_mul_epu32(a, b);
3437 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3438 assert_eq_m128i(r, e);
3439 }
3440
3441 #[simd_test(enable = "sse2")]
3442 unsafe fn test_mm_sad_epu8() {
3443 #[rustfmt::skip]
3444 let a = _mm_setr_epi8(
3445 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3446 1, 2, 3, 4,
3447 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3448 1, 2, 3, 4,
3449 );
3450 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3451 let r = _mm_sad_epu8(a, b);
3452 let e = _mm_setr_epi64x(1020, 614);
3453 assert_eq_m128i(r, e);
3454 }
3455
3456 #[simd_test(enable = "sse2")]
3457 unsafe fn test_mm_sub_epi8() {
3458 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3459 let r = _mm_sub_epi8(a, b);
3460 assert_eq_m128i(r, _mm_set1_epi8(-1));
3461 }
3462
3463 #[simd_test(enable = "sse2")]
3464 unsafe fn test_mm_sub_epi16() {
3465 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3466 let r = _mm_sub_epi16(a, b);
3467 assert_eq_m128i(r, _mm_set1_epi16(-1));
3468 }
3469
3470 #[simd_test(enable = "sse2")]
3471 unsafe fn test_mm_sub_epi32() {
3472 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3473 let r = _mm_sub_epi32(a, b);
3474 assert_eq_m128i(r, _mm_set1_epi32(-1));
3475 }
3476
3477 #[simd_test(enable = "sse2")]
3478 unsafe fn test_mm_sub_epi64() {
3479 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3480 let r = _mm_sub_epi64(a, b);
3481 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3482 }
3483
3484 #[simd_test(enable = "sse2")]
3485 unsafe fn test_mm_subs_epi8() {
3486 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3487 let r = _mm_subs_epi8(a, b);
3488 assert_eq_m128i(r, _mm_set1_epi8(3));
3489 }
3490
3491 #[simd_test(enable = "sse2")]
3492 unsafe fn test_mm_subs_epi8_saturate_positive() {
3493 let a = _mm_set1_epi8(0x7F);
3494 let b = _mm_set1_epi8(-1);
3495 let r = _mm_subs_epi8(a, b);
3496 assert_eq_m128i(r, a);
3497 }
3498
3499 #[simd_test(enable = "sse2")]
3500 unsafe fn test_mm_subs_epi8_saturate_negative() {
3501 let a = _mm_set1_epi8(-0x80);
3502 let b = _mm_set1_epi8(1);
3503 let r = _mm_subs_epi8(a, b);
3504 assert_eq_m128i(r, a);
3505 }
3506
3507 #[simd_test(enable = "sse2")]
3508 unsafe fn test_mm_subs_epi16() {
3509 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3510 let r = _mm_subs_epi16(a, b);
3511 assert_eq_m128i(r, _mm_set1_epi16(3));
3512 }
3513
3514 #[simd_test(enable = "sse2")]
3515 unsafe fn test_mm_subs_epi16_saturate_positive() {
3516 let a = _mm_set1_epi16(0x7FFF);
3517 let b = _mm_set1_epi16(-1);
3518 let r = _mm_subs_epi16(a, b);
3519 assert_eq_m128i(r, a);
3520 }
3521
3522 #[simd_test(enable = "sse2")]
3523 unsafe fn test_mm_subs_epi16_saturate_negative() {
3524 let a = _mm_set1_epi16(-0x8000);
3525 let b = _mm_set1_epi16(1);
3526 let r = _mm_subs_epi16(a, b);
3527 assert_eq_m128i(r, a);
3528 }
3529
3530 #[simd_test(enable = "sse2")]
3531 unsafe fn test_mm_subs_epu8() {
3532 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3533 let r = _mm_subs_epu8(a, b);
3534 assert_eq_m128i(r, _mm_set1_epi8(3));
3535 }
3536
3537 #[simd_test(enable = "sse2")]
3538 unsafe fn test_mm_subs_epu8_saturate() {
3539 let a = _mm_set1_epi8(0);
3540 let b = _mm_set1_epi8(1);
3541 let r = _mm_subs_epu8(a, b);
3542 assert_eq_m128i(r, a);
3543 }
3544
3545 #[simd_test(enable = "sse2")]
3546 unsafe fn test_mm_subs_epu16() {
3547 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3548 let r = _mm_subs_epu16(a, b);
3549 assert_eq_m128i(r, _mm_set1_epi16(3));
3550 }
3551
3552 #[simd_test(enable = "sse2")]
3553 unsafe fn test_mm_subs_epu16_saturate() {
3554 let a = _mm_set1_epi16(0);
3555 let b = _mm_set1_epi16(1);
3556 let r = _mm_subs_epu16(a, b);
3557 assert_eq_m128i(r, a);
3558 }
3559
3560 #[simd_test(enable = "sse2")]
3561 unsafe fn test_mm_slli_si128() {
3562 #[rustfmt::skip]
3563 let a = _mm_setr_epi8(
3564 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3565 );
3566 let r = _mm_slli_si128::<1>(a);
3567 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3568 assert_eq_m128i(r, e);
3569
3570 #[rustfmt::skip]
3571 let a = _mm_setr_epi8(
3572 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3573 );
3574 let r = _mm_slli_si128::<15>(a);
3575 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3576 assert_eq_m128i(r, e);
3577
3578 #[rustfmt::skip]
3579 let a = _mm_setr_epi8(
3580 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3581 );
3582 let r = _mm_slli_si128::<16>(a);
3583 assert_eq_m128i(r, _mm_set1_epi8(0));
3584 }
3585
3586 #[simd_test(enable = "sse2")]
3587 unsafe fn test_mm_slli_epi16() {
3588 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3589 let r = _mm_slli_epi16::<4>(a);
3590 assert_eq_m128i(
3591 r,
3592 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3593 );
3594 let r = _mm_slli_epi16::<16>(a);
3595 assert_eq_m128i(r, _mm_set1_epi16(0));
3596 }
3597
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_sll_epi16() {
3600 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3601 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3602 assert_eq_m128i(
3603 r,
3604 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3605 );
3606 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3607 assert_eq_m128i(r, a);
3608 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3609 assert_eq_m128i(r, _mm_set1_epi16(0));
3610 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3611 assert_eq_m128i(r, _mm_set1_epi16(0));
3612 }
3613
3614 #[simd_test(enable = "sse2")]
3615 unsafe fn test_mm_slli_epi32() {
3616 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3617 let r = _mm_slli_epi32::<4>(a);
3618 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3619 let r = _mm_slli_epi32::<32>(a);
3620 assert_eq_m128i(r, _mm_set1_epi32(0));
3621 }
3622
3623 #[simd_test(enable = "sse2")]
3624 unsafe fn test_mm_sll_epi32() {
3625 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3626 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3627 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3628 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3629 assert_eq_m128i(r, a);
3630 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3631 assert_eq_m128i(r, _mm_set1_epi32(0));
3632 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3633 assert_eq_m128i(r, _mm_set1_epi32(0));
3634 }
3635
3636 #[simd_test(enable = "sse2")]
3637 unsafe fn test_mm_slli_epi64() {
3638 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3639 let r = _mm_slli_epi64::<4>(a);
3640 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3641 let r = _mm_slli_epi64::<64>(a);
3642 assert_eq_m128i(r, _mm_set1_epi64x(0));
3643 }
3644
3645 #[simd_test(enable = "sse2")]
3646 unsafe fn test_mm_sll_epi64() {
3647 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3648 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3649 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3650 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3651 assert_eq_m128i(r, a);
3652 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3653 assert_eq_m128i(r, _mm_set1_epi64x(0));
3654 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3655 assert_eq_m128i(r, _mm_set1_epi64x(0));
3656 }
3657
3658 #[simd_test(enable = "sse2")]
3659 unsafe fn test_mm_srai_epi16() {
3660 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3661 let r = _mm_srai_epi16::<4>(a);
3662 assert_eq_m128i(
3663 r,
3664 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3665 );
3666 let r = _mm_srai_epi16::<16>(a);
3667 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3668 }
3669
3670 #[simd_test(enable = "sse2")]
3671 unsafe fn test_mm_sra_epi16() {
3672 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3673 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3674 assert_eq_m128i(
3675 r,
3676 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3677 );
3678 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3679 assert_eq_m128i(r, a);
3680 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3681 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3682 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3683 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3684 }
3685
3686 #[simd_test(enable = "sse2")]
3687 unsafe fn test_mm_srai_epi32() {
3688 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3689 let r = _mm_srai_epi32::<4>(a);
3690 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3691 let r = _mm_srai_epi32::<32>(a);
3692 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3693 }
3694
3695 #[simd_test(enable = "sse2")]
3696 unsafe fn test_mm_sra_epi32() {
3697 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3698 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3699 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3700 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3701 assert_eq_m128i(r, a);
3702 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3703 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3704 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3705 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3706 }
3707
3708 #[simd_test(enable = "sse2")]
3709 unsafe fn test_mm_srli_si128() {
3710 #[rustfmt::skip]
3711 let a = _mm_setr_epi8(
3712 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3713 );
3714 let r = _mm_srli_si128::<1>(a);
3715 #[rustfmt::skip]
3716 let e = _mm_setr_epi8(
3717 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3718 );
3719 assert_eq_m128i(r, e);
3720
3721 #[rustfmt::skip]
3722 let a = _mm_setr_epi8(
3723 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3724 );
3725 let r = _mm_srli_si128::<15>(a);
3726 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3727 assert_eq_m128i(r, e);
3728
3729 #[rustfmt::skip]
3730 let a = _mm_setr_epi8(
3731 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3732 );
3733 let r = _mm_srli_si128::<16>(a);
3734 assert_eq_m128i(r, _mm_set1_epi8(0));
3735 }
3736
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_srli_epi16() {
3739 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3740 let r = _mm_srli_epi16::<4>(a);
3741 assert_eq_m128i(
3742 r,
3743 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3744 );
3745 let r = _mm_srli_epi16::<16>(a);
3746 assert_eq_m128i(r, _mm_set1_epi16(0));
3747 }
3748
3749 #[simd_test(enable = "sse2")]
3750 unsafe fn test_mm_srl_epi16() {
3751 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3752 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3753 assert_eq_m128i(
3754 r,
3755 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3756 );
3757 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3758 assert_eq_m128i(r, a);
3759 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3760 assert_eq_m128i(r, _mm_set1_epi16(0));
3761 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3762 assert_eq_m128i(r, _mm_set1_epi16(0));
3763 }
3764
3765 #[simd_test(enable = "sse2")]
3766 unsafe fn test_mm_srli_epi32() {
3767 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3768 let r = _mm_srli_epi32::<4>(a);
3769 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3770 let r = _mm_srli_epi32::<32>(a);
3771 assert_eq_m128i(r, _mm_set1_epi32(0));
3772 }
3773
3774 #[simd_test(enable = "sse2")]
3775 unsafe fn test_mm_srl_epi32() {
3776 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3777 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3778 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3779 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3780 assert_eq_m128i(r, a);
3781 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3782 assert_eq_m128i(r, _mm_set1_epi32(0));
3783 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3784 assert_eq_m128i(r, _mm_set1_epi32(0));
3785 }
3786
3787 #[simd_test(enable = "sse2")]
3788 unsafe fn test_mm_srli_epi64() {
3789 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3790 let r = _mm_srli_epi64::<4>(a);
3791 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3792 let r = _mm_srli_epi64::<64>(a);
3793 assert_eq_m128i(r, _mm_set1_epi64x(0));
3794 }
3795
3796 #[simd_test(enable = "sse2")]
3797 unsafe fn test_mm_srl_epi64() {
3798 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3799 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3800 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3801 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3802 assert_eq_m128i(r, a);
3803 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3804 assert_eq_m128i(r, _mm_set1_epi64x(0));
3805 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3806 assert_eq_m128i(r, _mm_set1_epi64x(0));
3807 }
3808
3809 #[simd_test(enable = "sse2")]
3810 unsafe fn test_mm_and_si128() {
3811 let a = _mm_set1_epi8(5);
3812 let b = _mm_set1_epi8(3);
3813 let r = _mm_and_si128(a, b);
3814 assert_eq_m128i(r, _mm_set1_epi8(1));
3815 }
3816
3817 #[simd_test(enable = "sse2")]
3818 unsafe fn test_mm_andnot_si128() {
3819 let a = _mm_set1_epi8(5);
3820 let b = _mm_set1_epi8(3);
3821 let r = _mm_andnot_si128(a, b);
3822 assert_eq_m128i(r, _mm_set1_epi8(2));
3823 }
3824
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_or_si128() {
3827 let a = _mm_set1_epi8(5);
3828 let b = _mm_set1_epi8(3);
3829 let r = _mm_or_si128(a, b);
3830 assert_eq_m128i(r, _mm_set1_epi8(7));
3831 }
3832
3833 #[simd_test(enable = "sse2")]
3834 unsafe fn test_mm_xor_si128() {
3835 let a = _mm_set1_epi8(5);
3836 let b = _mm_set1_epi8(3);
3837 let r = _mm_xor_si128(a, b);
3838 assert_eq_m128i(r, _mm_set1_epi8(6));
3839 }
3840
3841 #[simd_test(enable = "sse2")]
3842 unsafe fn test_mm_cmpeq_epi8() {
3843 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3844 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3845 let r = _mm_cmpeq_epi8(a, b);
3846 #[rustfmt::skip]
3847 assert_eq_m128i(
3848 r,
3849 _mm_setr_epi8(
3850 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3851 )
3852 );
3853 }
3854
3855 #[simd_test(enable = "sse2")]
3856 unsafe fn test_mm_cmpeq_epi16() {
3857 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3858 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3859 let r = _mm_cmpeq_epi16(a, b);
3860 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3861 }
3862
3863 #[simd_test(enable = "sse2")]
3864 unsafe fn test_mm_cmpeq_epi32() {
3865 let a = _mm_setr_epi32(0, 1, 2, 3);
3866 let b = _mm_setr_epi32(3, 2, 2, 0);
3867 let r = _mm_cmpeq_epi32(a, b);
3868 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3869 }
3870
3871 #[simd_test(enable = "sse2")]
3872 unsafe fn test_mm_cmpgt_epi8() {
3873 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3874 let b = _mm_set1_epi8(0);
3875 let r = _mm_cmpgt_epi8(a, b);
3876 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3877 assert_eq_m128i(r, e);
3878 }
3879
3880 #[simd_test(enable = "sse2")]
3881 unsafe fn test_mm_cmpgt_epi16() {
3882 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3883 let b = _mm_set1_epi16(0);
3884 let r = _mm_cmpgt_epi16(a, b);
3885 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3886 assert_eq_m128i(r, e);
3887 }
3888
3889 #[simd_test(enable = "sse2")]
3890 unsafe fn test_mm_cmpgt_epi32() {
3891 let a = _mm_set_epi32(5, 0, 0, 0);
3892 let b = _mm_set1_epi32(0);
3893 let r = _mm_cmpgt_epi32(a, b);
3894 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3895 }
3896
3897 #[simd_test(enable = "sse2")]
3898 unsafe fn test_mm_cmplt_epi8() {
3899 let a = _mm_set1_epi8(0);
3900 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3901 let r = _mm_cmplt_epi8(a, b);
3902 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3903 assert_eq_m128i(r, e);
3904 }
3905
3906 #[simd_test(enable = "sse2")]
3907 unsafe fn test_mm_cmplt_epi16() {
3908 let a = _mm_set1_epi16(0);
3909 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3910 let r = _mm_cmplt_epi16(a, b);
3911 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3912 assert_eq_m128i(r, e);
3913 }
3914
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_cmplt_epi32() {
3917 let a = _mm_set1_epi32(0);
3918 let b = _mm_set_epi32(5, 0, 0, 0);
3919 let r = _mm_cmplt_epi32(a, b);
3920 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3921 }
3922
3923 #[simd_test(enable = "sse2")]
3924 unsafe fn test_mm_cvtepi32_pd() {
3925 let a = _mm_set_epi32(35, 25, 15, 5);
3926 let r = _mm_cvtepi32_pd(a);
3927 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3928 }
3929
3930 #[simd_test(enable = "sse2")]
3931 unsafe fn test_mm_cvtsi32_sd() {
3932 let a = _mm_set1_pd(3.5);
3933 let r = _mm_cvtsi32_sd(a, 5);
3934 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3935 }
3936
3937 #[simd_test(enable = "sse2")]
3938 unsafe fn test_mm_cvtepi32_ps() {
3939 let a = _mm_setr_epi32(1, 2, 3, 4);
3940 let r = _mm_cvtepi32_ps(a);
3941 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3942 }
3943
3944 #[simd_test(enable = "sse2")]
3945 unsafe fn test_mm_cvtps_epi32() {
3946 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3947 let r = _mm_cvtps_epi32(a);
3948 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3949 }
3950
3951 #[simd_test(enable = "sse2")]
3952 unsafe fn test_mm_cvtsi32_si128() {
3953 let r = _mm_cvtsi32_si128(5);
3954 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3955 }
3956
3957 #[simd_test(enable = "sse2")]
3958 unsafe fn test_mm_cvtsi128_si32() {
3959 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3960 assert_eq!(r, 5);
3961 }
3962
3963 #[simd_test(enable = "sse2")]
3964 unsafe fn test_mm_set_epi64x() {
3965 let r = _mm_set_epi64x(0, 1);
3966 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3967 }
3968
3969 #[simd_test(enable = "sse2")]
3970 unsafe fn test_mm_set_epi32() {
3971 let r = _mm_set_epi32(0, 1, 2, 3);
3972 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3973 }
3974
3975 #[simd_test(enable = "sse2")]
3976 unsafe fn test_mm_set_epi16() {
3977 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3978 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3979 }
3980
3981 #[simd_test(enable = "sse2")]
3982 unsafe fn test_mm_set_epi8() {
3983 #[rustfmt::skip]
3984 let r = _mm_set_epi8(
3985 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3986 );
3987 #[rustfmt::skip]
3988 let e = _mm_setr_epi8(
3989 15, 14, 13, 12, 11, 10, 9, 8,
3990 7, 6, 5, 4, 3, 2, 1, 0,
3991 );
3992 assert_eq_m128i(r, e);
3993 }
3994
3995 #[simd_test(enable = "sse2")]
3996 unsafe fn test_mm_set1_epi64x() {
3997 let r = _mm_set1_epi64x(1);
3998 assert_eq_m128i(r, _mm_set1_epi64x(1));
3999 }
4000
4001 #[simd_test(enable = "sse2")]
4002 unsafe fn test_mm_set1_epi32() {
4003 let r = _mm_set1_epi32(1);
4004 assert_eq_m128i(r, _mm_set1_epi32(1));
4005 }
4006
4007 #[simd_test(enable = "sse2")]
4008 unsafe fn test_mm_set1_epi16() {
4009 let r = _mm_set1_epi16(1);
4010 assert_eq_m128i(r, _mm_set1_epi16(1));
4011 }
4012
4013 #[simd_test(enable = "sse2")]
4014 unsafe fn test_mm_set1_epi8() {
4015 let r = _mm_set1_epi8(1);
4016 assert_eq_m128i(r, _mm_set1_epi8(1));
4017 }
4018
4019 #[simd_test(enable = "sse2")]
4020 unsafe fn test_mm_setr_epi32() {
4021 let r = _mm_setr_epi32(0, 1, 2, 3);
4022 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4023 }
4024
4025 #[simd_test(enable = "sse2")]
4026 unsafe fn test_mm_setr_epi16() {
4027 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4028 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4029 }
4030
4031 #[simd_test(enable = "sse2")]
4032 unsafe fn test_mm_setr_epi8() {
4033 #[rustfmt::skip]
4034 let r = _mm_setr_epi8(
4035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4036 );
4037 #[rustfmt::skip]
4038 let e = _mm_setr_epi8(
4039 0, 1, 2, 3, 4, 5, 6, 7,
4040 8, 9, 10, 11, 12, 13, 14, 15,
4041 );
4042 assert_eq_m128i(r, e);
4043 }
4044
4045 #[simd_test(enable = "sse2")]
4046 unsafe fn test_mm_setzero_si128() {
4047 let r = _mm_setzero_si128();
4048 assert_eq_m128i(r, _mm_set1_epi64x(0));
4049 }
4050
4051 #[simd_test(enable = "sse2")]
4052 unsafe fn test_mm_loadl_epi64() {
4053 let a = _mm_setr_epi64x(6, 5);
4054 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4055 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4056 }
4057
4058 #[simd_test(enable = "sse2")]
4059 unsafe fn test_mm_load_si128() {
4060 let a = _mm_set_epi64x(5, 6);
4061 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4062 assert_eq_m128i(a, r);
4063 }
4064
4065 #[simd_test(enable = "sse2")]
4066 unsafe fn test_mm_loadu_si128() {
4067 let a = _mm_set_epi64x(5, 6);
4068 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4069 assert_eq_m128i(a, r);
4070 }
4071
4072 #[simd_test(enable = "sse2")]
4073 #[cfg_attr(miri, ignore)]
4076 unsafe fn test_mm_maskmoveu_si128() {
4077 let a = _mm_set1_epi8(9);
4078 #[rustfmt::skip]
4079 let mask = _mm_set_epi8(
4080 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4081 0, 0, 0, 0, 0, 0, 0, 0,
4082 );
4083 let mut r = _mm_set1_epi8(0);
4084 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4085 _mm_sfence();
4086 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4087 assert_eq_m128i(r, e);
4088 }
4089
4090 #[simd_test(enable = "sse2")]
4091 unsafe fn test_mm_store_si128() {
4092 let a = _mm_set1_epi8(9);
4093 let mut r = _mm_set1_epi8(0);
4094 _mm_store_si128(&mut r, a);
4095 assert_eq_m128i(r, a);
4096 }
4097
4098 #[simd_test(enable = "sse2")]
4099 unsafe fn test_mm_storeu_si128() {
4100 let a = _mm_set1_epi8(9);
4101 let mut r = _mm_set1_epi8(0);
4102 _mm_storeu_si128(&mut r, a);
4103 assert_eq_m128i(r, a);
4104 }
4105
4106 #[simd_test(enable = "sse2")]
4107 unsafe fn test_mm_storel_epi64() {
4108 let a = _mm_setr_epi64x(2, 9);
4109 let mut r = _mm_set1_epi8(0);
4110 _mm_storel_epi64(&mut r, a);
4111 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4112 }
4113
4114 #[simd_test(enable = "sse2")]
4115 #[cfg_attr(miri, ignore)]
4118 unsafe fn test_mm_stream_si128() {
4119 let a = _mm_setr_epi32(1, 2, 3, 4);
4120 let mut r = _mm_undefined_si128();
4121 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4122 _mm_sfence();
4123 assert_eq_m128i(r, a);
4124 }
4125
4126 #[simd_test(enable = "sse2")]
4127 #[cfg_attr(miri, ignore)]
4130 unsafe fn test_mm_stream_si32() {
4131 let a: i32 = 7;
4132 let mut mem = boxed::Box::<i32>::new(-1);
4133 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4134 _mm_sfence();
4135 assert_eq!(a, *mem);
4136 }
4137
4138 #[simd_test(enable = "sse2")]
4139 unsafe fn test_mm_move_epi64() {
4140 let a = _mm_setr_epi64x(5, 6);
4141 let r = _mm_move_epi64(a);
4142 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4143 }
4144
4145 #[simd_test(enable = "sse2")]
4146 unsafe fn test_mm_packs_epi16() {
4147 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4148 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4149 let r = _mm_packs_epi16(a, b);
4150 #[rustfmt::skip]
4151 assert_eq_m128i(
4152 r,
4153 _mm_setr_epi8(
4154 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4155 )
4156 );
4157 }
4158
4159 #[simd_test(enable = "sse2")]
4160 unsafe fn test_mm_packs_epi32() {
4161 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4162 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4163 let r = _mm_packs_epi32(a, b);
4164 assert_eq_m128i(
4165 r,
4166 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4167 );
4168 }
4169
4170 #[simd_test(enable = "sse2")]
4171 unsafe fn test_mm_packus_epi16() {
4172 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4173 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4174 let r = _mm_packus_epi16(a, b);
4175 assert_eq_m128i(
4176 r,
4177 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4178 );
4179 }
4180
4181 #[simd_test(enable = "sse2")]
4182 unsafe fn test_mm_extract_epi16() {
4183 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4184 let r1 = _mm_extract_epi16::<0>(a);
4185 let r2 = _mm_extract_epi16::<3>(a);
4186 assert_eq!(r1, 0xFFFF);
4187 assert_eq!(r2, 3);
4188 }
4189
4190 #[simd_test(enable = "sse2")]
4191 unsafe fn test_mm_insert_epi16() {
4192 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4193 let r = _mm_insert_epi16::<0>(a, 9);
4194 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4195 assert_eq_m128i(r, e);
4196 }
4197
4198 #[simd_test(enable = "sse2")]
4199 unsafe fn test_mm_movemask_epi8() {
4200 #[rustfmt::skip]
4201 let a = _mm_setr_epi8(
4202 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4203 0b0101, 0b1111_0000u8 as i8, 0, 0,
4204 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4205 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4206 );
4207 let r = _mm_movemask_epi8(a);
4208 assert_eq!(r, 0b10100110_00100101);
4209 }
4210
4211 #[simd_test(enable = "sse2")]
4212 unsafe fn test_mm_shuffle_epi32() {
4213 let a = _mm_setr_epi32(5, 10, 15, 20);
4214 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4215 let e = _mm_setr_epi32(20, 10, 10, 5);
4216 assert_eq_m128i(r, e);
4217 }
4218
4219 #[simd_test(enable = "sse2")]
4220 unsafe fn test_mm_shufflehi_epi16() {
4221 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4222 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4223 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4224 assert_eq_m128i(r, e);
4225 }
4226
4227 #[simd_test(enable = "sse2")]
4228 unsafe fn test_mm_shufflelo_epi16() {
4229 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4230 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4231 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4232 assert_eq_m128i(r, e);
4233 }
4234
4235 #[simd_test(enable = "sse2")]
4236 unsafe fn test_mm_unpackhi_epi8() {
4237 #[rustfmt::skip]
4238 let a = _mm_setr_epi8(
4239 0, 1, 2, 3, 4, 5, 6, 7,
4240 8, 9, 10, 11, 12, 13, 14, 15,
4241 );
4242 #[rustfmt::skip]
4243 let b = _mm_setr_epi8(
4244 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4245 );
4246 let r = _mm_unpackhi_epi8(a, b);
4247 #[rustfmt::skip]
4248 let e = _mm_setr_epi8(
4249 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4250 );
4251 assert_eq_m128i(r, e);
4252 }
4253
4254 #[simd_test(enable = "sse2")]
4255 unsafe fn test_mm_unpackhi_epi16() {
4256 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4257 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4258 let r = _mm_unpackhi_epi16(a, b);
4259 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4260 assert_eq_m128i(r, e);
4261 }
4262
4263 #[simd_test(enable = "sse2")]
4264 unsafe fn test_mm_unpackhi_epi32() {
4265 let a = _mm_setr_epi32(0, 1, 2, 3);
4266 let b = _mm_setr_epi32(4, 5, 6, 7);
4267 let r = _mm_unpackhi_epi32(a, b);
4268 let e = _mm_setr_epi32(2, 6, 3, 7);
4269 assert_eq_m128i(r, e);
4270 }
4271
4272 #[simd_test(enable = "sse2")]
4273 unsafe fn test_mm_unpackhi_epi64() {
4274 let a = _mm_setr_epi64x(0, 1);
4275 let b = _mm_setr_epi64x(2, 3);
4276 let r = _mm_unpackhi_epi64(a, b);
4277 let e = _mm_setr_epi64x(1, 3);
4278 assert_eq_m128i(r, e);
4279 }
4280
4281 #[simd_test(enable = "sse2")]
4282 unsafe fn test_mm_unpacklo_epi8() {
4283 #[rustfmt::skip]
4284 let a = _mm_setr_epi8(
4285 0, 1, 2, 3, 4, 5, 6, 7,
4286 8, 9, 10, 11, 12, 13, 14, 15,
4287 );
4288 #[rustfmt::skip]
4289 let b = _mm_setr_epi8(
4290 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4291 );
4292 let r = _mm_unpacklo_epi8(a, b);
4293 #[rustfmt::skip]
4294 let e = _mm_setr_epi8(
4295 0, 16, 1, 17, 2, 18, 3, 19,
4296 4, 20, 5, 21, 6, 22, 7, 23,
4297 );
4298 assert_eq_m128i(r, e);
4299 }
4300
4301 #[simd_test(enable = "sse2")]
4302 unsafe fn test_mm_unpacklo_epi16() {
4303 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4304 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4305 let r = _mm_unpacklo_epi16(a, b);
4306 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4307 assert_eq_m128i(r, e);
4308 }
4309
4310 #[simd_test(enable = "sse2")]
4311 unsafe fn test_mm_unpacklo_epi32() {
4312 let a = _mm_setr_epi32(0, 1, 2, 3);
4313 let b = _mm_setr_epi32(4, 5, 6, 7);
4314 let r = _mm_unpacklo_epi32(a, b);
4315 let e = _mm_setr_epi32(0, 4, 1, 5);
4316 assert_eq_m128i(r, e);
4317 }
4318
4319 #[simd_test(enable = "sse2")]
4320 unsafe fn test_mm_unpacklo_epi64() {
4321 let a = _mm_setr_epi64x(0, 1);
4322 let b = _mm_setr_epi64x(2, 3);
4323 let r = _mm_unpacklo_epi64(a, b);
4324 let e = _mm_setr_epi64x(0, 2);
4325 assert_eq_m128i(r, e);
4326 }
4327
4328 #[simd_test(enable = "sse2")]
4329 unsafe fn test_mm_add_sd() {
4330 let a = _mm_setr_pd(1.0, 2.0);
4331 let b = _mm_setr_pd(5.0, 10.0);
4332 let r = _mm_add_sd(a, b);
4333 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4334 }
4335
4336 #[simd_test(enable = "sse2")]
4337 unsafe fn test_mm_add_pd() {
4338 let a = _mm_setr_pd(1.0, 2.0);
4339 let b = _mm_setr_pd(5.0, 10.0);
4340 let r = _mm_add_pd(a, b);
4341 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4342 }
4343
4344 #[simd_test(enable = "sse2")]
4345 unsafe fn test_mm_div_sd() {
4346 let a = _mm_setr_pd(1.0, 2.0);
4347 let b = _mm_setr_pd(5.0, 10.0);
4348 let r = _mm_div_sd(a, b);
4349 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4350 }
4351
4352 #[simd_test(enable = "sse2")]
4353 unsafe fn test_mm_div_pd() {
4354 let a = _mm_setr_pd(1.0, 2.0);
4355 let b = _mm_setr_pd(5.0, 10.0);
4356 let r = _mm_div_pd(a, b);
4357 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4358 }
4359
4360 #[simd_test(enable = "sse2")]
4361 unsafe fn test_mm_max_sd() {
4362 let a = _mm_setr_pd(1.0, 2.0);
4363 let b = _mm_setr_pd(5.0, 10.0);
4364 let r = _mm_max_sd(a, b);
4365 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4366 }
4367
4368 #[simd_test(enable = "sse2")]
4369 unsafe fn test_mm_max_pd() {
4370 let a = _mm_setr_pd(1.0, 2.0);
4371 let b = _mm_setr_pd(5.0, 10.0);
4372 let r = _mm_max_pd(a, b);
4373 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4374
4375 let a = _mm_setr_pd(-0.0, 0.0);
4377 let b = _mm_setr_pd(0.0, 0.0);
4378 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4379 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4380 let a: [u8; 16] = transmute(a);
4381 let b: [u8; 16] = transmute(b);
4382 assert_eq!(r1, b);
4383 assert_eq!(r2, a);
4384 assert_ne!(a, b); }
4386
4387 #[simd_test(enable = "sse2")]
4388 unsafe fn test_mm_min_sd() {
4389 let a = _mm_setr_pd(1.0, 2.0);
4390 let b = _mm_setr_pd(5.0, 10.0);
4391 let r = _mm_min_sd(a, b);
4392 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4393 }
4394
4395 #[simd_test(enable = "sse2")]
4396 unsafe fn test_mm_min_pd() {
4397 let a = _mm_setr_pd(1.0, 2.0);
4398 let b = _mm_setr_pd(5.0, 10.0);
4399 let r = _mm_min_pd(a, b);
4400 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4401
4402 let a = _mm_setr_pd(-0.0, 0.0);
4404 let b = _mm_setr_pd(0.0, 0.0);
4405 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4406 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4407 let a: [u8; 16] = transmute(a);
4408 let b: [u8; 16] = transmute(b);
4409 assert_eq!(r1, b);
4410 assert_eq!(r2, a);
4411 assert_ne!(a, b); }
4413
4414 #[simd_test(enable = "sse2")]
4415 unsafe fn test_mm_mul_sd() {
4416 let a = _mm_setr_pd(1.0, 2.0);
4417 let b = _mm_setr_pd(5.0, 10.0);
4418 let r = _mm_mul_sd(a, b);
4419 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4420 }
4421
4422 #[simd_test(enable = "sse2")]
4423 unsafe fn test_mm_mul_pd() {
4424 let a = _mm_setr_pd(1.0, 2.0);
4425 let b = _mm_setr_pd(5.0, 10.0);
4426 let r = _mm_mul_pd(a, b);
4427 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4428 }
4429
4430 #[simd_test(enable = "sse2")]
4431 unsafe fn test_mm_sqrt_sd() {
4432 let a = _mm_setr_pd(1.0, 2.0);
4433 let b = _mm_setr_pd(5.0, 10.0);
4434 let r = _mm_sqrt_sd(a, b);
4435 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4436 }
4437
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_sqrt_pd() {
4440 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4441 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4442 }
4443
4444 #[simd_test(enable = "sse2")]
4445 unsafe fn test_mm_sub_sd() {
4446 let a = _mm_setr_pd(1.0, 2.0);
4447 let b = _mm_setr_pd(5.0, 10.0);
4448 let r = _mm_sub_sd(a, b);
4449 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4450 }
4451
4452 #[simd_test(enable = "sse2")]
4453 unsafe fn test_mm_sub_pd() {
4454 let a = _mm_setr_pd(1.0, 2.0);
4455 let b = _mm_setr_pd(5.0, 10.0);
4456 let r = _mm_sub_pd(a, b);
4457 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4458 }
4459
4460 #[simd_test(enable = "sse2")]
4461 unsafe fn test_mm_and_pd() {
4462 let a = transmute(u64x2::splat(5));
4463 let b = transmute(u64x2::splat(3));
4464 let r = _mm_and_pd(a, b);
4465 let e = transmute(u64x2::splat(1));
4466 assert_eq_m128d(r, e);
4467 }
4468
4469 #[simd_test(enable = "sse2")]
4470 unsafe fn test_mm_andnot_pd() {
4471 let a = transmute(u64x2::splat(5));
4472 let b = transmute(u64x2::splat(3));
4473 let r = _mm_andnot_pd(a, b);
4474 let e = transmute(u64x2::splat(2));
4475 assert_eq_m128d(r, e);
4476 }
4477
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_or_pd() {
4480 let a = transmute(u64x2::splat(5));
4481 let b = transmute(u64x2::splat(3));
4482 let r = _mm_or_pd(a, b);
4483 let e = transmute(u64x2::splat(7));
4484 assert_eq_m128d(r, e);
4485 }
4486
4487 #[simd_test(enable = "sse2")]
4488 unsafe fn test_mm_xor_pd() {
4489 let a = transmute(u64x2::splat(5));
4490 let b = transmute(u64x2::splat(3));
4491 let r = _mm_xor_pd(a, b);
4492 let e = transmute(u64x2::splat(6));
4493 assert_eq_m128d(r, e);
4494 }
4495
4496 #[simd_test(enable = "sse2")]
4497 unsafe fn test_mm_cmpeq_sd() {
4498 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4499 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4500 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4501 assert_eq_m128i(r, e);
4502 }
4503
4504 #[simd_test(enable = "sse2")]
4505 unsafe fn test_mm_cmplt_sd() {
4506 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4507 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4508 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4509 assert_eq_m128i(r, e);
4510 }
4511
4512 #[simd_test(enable = "sse2")]
4513 unsafe fn test_mm_cmple_sd() {
4514 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4515 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4516 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4517 assert_eq_m128i(r, e);
4518 }
4519
4520 #[simd_test(enable = "sse2")]
4521 unsafe fn test_mm_cmpgt_sd() {
4522 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4523 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4524 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4525 assert_eq_m128i(r, e);
4526 }
4527
4528 #[simd_test(enable = "sse2")]
4529 unsafe fn test_mm_cmpge_sd() {
4530 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4531 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4532 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4533 assert_eq_m128i(r, e);
4534 }
4535
4536 #[simd_test(enable = "sse2")]
4537 unsafe fn test_mm_cmpord_sd() {
4538 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4539 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4540 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4541 assert_eq_m128i(r, e);
4542 }
4543
4544 #[simd_test(enable = "sse2")]
4545 unsafe fn test_mm_cmpunord_sd() {
4546 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4547 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4548 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4549 assert_eq_m128i(r, e);
4550 }
4551
4552 #[simd_test(enable = "sse2")]
4553 unsafe fn test_mm_cmpneq_sd() {
4554 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4555 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4556 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4557 assert_eq_m128i(r, e);
4558 }
4559
4560 #[simd_test(enable = "sse2")]
4561 unsafe fn test_mm_cmpnlt_sd() {
4562 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4563 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4564 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4565 assert_eq_m128i(r, e);
4566 }
4567
4568 #[simd_test(enable = "sse2")]
4569 unsafe fn test_mm_cmpnle_sd() {
4570 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4571 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4572 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4573 assert_eq_m128i(r, e);
4574 }
4575
4576 #[simd_test(enable = "sse2")]
4577 unsafe fn test_mm_cmpngt_sd() {
4578 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4579 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4580 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4581 assert_eq_m128i(r, e);
4582 }
4583
4584 #[simd_test(enable = "sse2")]
4585 unsafe fn test_mm_cmpnge_sd() {
4586 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4587 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4588 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4589 assert_eq_m128i(r, e);
4590 }
4591
4592 #[simd_test(enable = "sse2")]
4593 unsafe fn test_mm_cmpeq_pd() {
4594 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4595 let e = _mm_setr_epi64x(!0, 0);
4596 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4597 assert_eq_m128i(r, e);
4598 }
4599
4600 #[simd_test(enable = "sse2")]
4601 unsafe fn test_mm_cmplt_pd() {
4602 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4603 let e = _mm_setr_epi64x(0, !0);
4604 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4605 assert_eq_m128i(r, e);
4606 }
4607
4608 #[simd_test(enable = "sse2")]
4609 unsafe fn test_mm_cmple_pd() {
4610 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4611 let e = _mm_setr_epi64x(!0, !0);
4612 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4613 assert_eq_m128i(r, e);
4614 }
4615
4616 #[simd_test(enable = "sse2")]
4617 unsafe fn test_mm_cmpgt_pd() {
4618 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4619 let e = _mm_setr_epi64x(0, 0);
4620 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4621 assert_eq_m128i(r, e);
4622 }
4623
4624 #[simd_test(enable = "sse2")]
4625 unsafe fn test_mm_cmpge_pd() {
4626 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4627 let e = _mm_setr_epi64x(!0, 0);
4628 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4629 assert_eq_m128i(r, e);
4630 }
4631
4632 #[simd_test(enable = "sse2")]
4633 unsafe fn test_mm_cmpord_pd() {
4634 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4635 let e = _mm_setr_epi64x(0, !0);
4636 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4637 assert_eq_m128i(r, e);
4638 }
4639
4640 #[simd_test(enable = "sse2")]
4641 unsafe fn test_mm_cmpunord_pd() {
4642 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4643 let e = _mm_setr_epi64x(!0, 0);
4644 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4645 assert_eq_m128i(r, e);
4646 }
4647
4648 #[simd_test(enable = "sse2")]
4649 unsafe fn test_mm_cmpneq_pd() {
4650 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4651 let e = _mm_setr_epi64x(!0, !0);
4652 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4653 assert_eq_m128i(r, e);
4654 }
4655
4656 #[simd_test(enable = "sse2")]
4657 unsafe fn test_mm_cmpnlt_pd() {
4658 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4659 let e = _mm_setr_epi64x(0, 0);
4660 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4661 assert_eq_m128i(r, e);
4662 }
4663
4664 #[simd_test(enable = "sse2")]
4665 unsafe fn test_mm_cmpnle_pd() {
4666 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4667 let e = _mm_setr_epi64x(0, 0);
4668 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4669 assert_eq_m128i(r, e);
4670 }
4671
4672 #[simd_test(enable = "sse2")]
4673 unsafe fn test_mm_cmpngt_pd() {
4674 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4675 let e = _mm_setr_epi64x(0, !0);
4676 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4677 assert_eq_m128i(r, e);
4678 }
4679
4680 #[simd_test(enable = "sse2")]
4681 unsafe fn test_mm_cmpnge_pd() {
4682 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4683 let e = _mm_setr_epi64x(0, !0);
4684 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4685 assert_eq_m128i(r, e);
4686 }
4687
4688 #[simd_test(enable = "sse2")]
4689 unsafe fn test_mm_comieq_sd() {
4690 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4691 assert!(_mm_comieq_sd(a, b) != 0);
4692
4693 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4694 assert!(_mm_comieq_sd(a, b) == 0);
4695 }
4696
4697 #[simd_test(enable = "sse2")]
4698 unsafe fn test_mm_comilt_sd() {
4699 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4700 assert!(_mm_comilt_sd(a, b) == 0);
4701 }
4702
4703 #[simd_test(enable = "sse2")]
4704 unsafe fn test_mm_comile_sd() {
4705 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4706 assert!(_mm_comile_sd(a, b) != 0);
4707 }
4708
4709 #[simd_test(enable = "sse2")]
4710 unsafe fn test_mm_comigt_sd() {
4711 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4712 assert!(_mm_comigt_sd(a, b) == 0);
4713 }
4714
4715 #[simd_test(enable = "sse2")]
4716 unsafe fn test_mm_comige_sd() {
4717 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4718 assert!(_mm_comige_sd(a, b) != 0);
4719 }
4720
4721 #[simd_test(enable = "sse2")]
4722 unsafe fn test_mm_comineq_sd() {
4723 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4724 assert!(_mm_comineq_sd(a, b) == 0);
4725 }
4726
4727 #[simd_test(enable = "sse2")]
4728 unsafe fn test_mm_ucomieq_sd() {
4729 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4730 assert!(_mm_ucomieq_sd(a, b) != 0);
4731
4732 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4733 assert!(_mm_ucomieq_sd(a, b) == 0);
4734 }
4735
4736 #[simd_test(enable = "sse2")]
4737 unsafe fn test_mm_ucomilt_sd() {
4738 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4739 assert!(_mm_ucomilt_sd(a, b) == 0);
4740 }
4741
4742 #[simd_test(enable = "sse2")]
4743 unsafe fn test_mm_ucomile_sd() {
4744 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4745 assert!(_mm_ucomile_sd(a, b) != 0);
4746 }
4747
4748 #[simd_test(enable = "sse2")]
4749 unsafe fn test_mm_ucomigt_sd() {
4750 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4751 assert!(_mm_ucomigt_sd(a, b) == 0);
4752 }
4753
4754 #[simd_test(enable = "sse2")]
4755 unsafe fn test_mm_ucomige_sd() {
4756 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4757 assert!(_mm_ucomige_sd(a, b) != 0);
4758 }
4759
4760 #[simd_test(enable = "sse2")]
4761 unsafe fn test_mm_ucomineq_sd() {
4762 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4763 assert!(_mm_ucomineq_sd(a, b) == 0);
4764 }
4765
4766 #[simd_test(enable = "sse2")]
4767 unsafe fn test_mm_movemask_pd() {
4768 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4769 assert_eq!(r, 0b01);
4770
4771 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4772 assert_eq!(r, 0b11);
4773 }
4774
4775 #[repr(align(16))]
4776 struct Memory {
4777 data: [f64; 4],
4778 }
4779
4780 #[simd_test(enable = "sse2")]
4781 unsafe fn test_mm_load_pd() {
4782 let mem = Memory {
4783 data: [1.0f64, 2.0, 3.0, 4.0],
4784 };
4785 let vals = &mem.data;
4786 let d = vals.as_ptr();
4787
4788 let r = _mm_load_pd(d);
4789 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4790 }
4791
4792 #[simd_test(enable = "sse2")]
4793 unsafe fn test_mm_load_sd() {
4794 let a = 1.;
4795 let expected = _mm_setr_pd(a, 0.);
4796 let r = _mm_load_sd(&a);
4797 assert_eq_m128d(r, expected);
4798 }
4799
4800 #[simd_test(enable = "sse2")]
4801 unsafe fn test_mm_loadh_pd() {
4802 let a = _mm_setr_pd(1., 2.);
4803 let b = 3.;
4804 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4805 let r = _mm_loadh_pd(a, &b);
4806 assert_eq_m128d(r, expected);
4807 }
4808
4809 #[simd_test(enable = "sse2")]
4810 unsafe fn test_mm_loadl_pd() {
4811 let a = _mm_setr_pd(1., 2.);
4812 let b = 3.;
4813 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4814 let r = _mm_loadl_pd(a, &b);
4815 assert_eq_m128d(r, expected);
4816 }
4817
4818 #[simd_test(enable = "sse2")]
4819 #[cfg_attr(miri, ignore)]
4822 unsafe fn test_mm_stream_pd() {
4823 #[repr(align(128))]
4824 struct Memory {
4825 pub data: [f64; 2],
4826 }
4827 let a = _mm_set1_pd(7.0);
4828 let mut mem = Memory { data: [-1.0; 2] };
4829
4830 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4831 _mm_sfence();
4832 for i in 0..2 {
4833 assert_eq!(mem.data[i], get_m128d(a, i));
4834 }
4835 }
4836
4837 #[simd_test(enable = "sse2")]
4838 unsafe fn test_mm_store_sd() {
4839 let mut dest = 0.;
4840 let a = _mm_setr_pd(1., 2.);
4841 _mm_store_sd(&mut dest, a);
4842 assert_eq!(dest, _mm_cvtsd_f64(a));
4843 }
4844
4845 #[simd_test(enable = "sse2")]
4846 unsafe fn test_mm_store_pd() {
4847 let mut mem = Memory { data: [0.0f64; 4] };
4848 let vals = &mut mem.data;
4849 let a = _mm_setr_pd(1.0, 2.0);
4850 let d = vals.as_mut_ptr();
4851
4852 _mm_store_pd(d, *black_box(&a));
4853 assert_eq!(vals[0], 1.0);
4854 assert_eq!(vals[1], 2.0);
4855 }
4856
4857 #[simd_test(enable = "sse2")]
4858 unsafe fn test_mm_storeu_pd() {
4859 let mut mem = Memory { data: [0.0f64; 4] };
4860 let vals = &mut mem.data;
4861 let a = _mm_setr_pd(1.0, 2.0);
4862
4863 let mut ofs = 0;
4864 let mut p = vals.as_mut_ptr();
4865
4866 if (p as usize) & 0xf == 0 {
4868 ofs = 1;
4869 p = p.add(1);
4870 }
4871
4872 _mm_storeu_pd(p, *black_box(&a));
4873
4874 if ofs > 0 {
4875 assert_eq!(vals[ofs - 1], 0.0);
4876 }
4877 assert_eq!(vals[ofs + 0], 1.0);
4878 assert_eq!(vals[ofs + 1], 2.0);
4879 }
4880
4881 #[simd_test(enable = "sse2")]
4882 unsafe fn test_mm_storeu_si16() {
4883 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4884 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4885 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4886 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4887 assert_eq_m128i(r, e);
4888 }
4889
4890 #[simd_test(enable = "sse2")]
4891 unsafe fn test_mm_storeu_si32() {
4892 let a = _mm_setr_epi32(1, 2, 3, 4);
4893 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4894 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4895 let e = _mm_setr_epi32(1, 6, 7, 8);
4896 assert_eq_m128i(r, e);
4897 }
4898
4899 #[simd_test(enable = "sse2")]
4900 unsafe fn test_mm_storeu_si64() {
4901 let a = _mm_setr_epi64x(1, 2);
4902 let mut r = _mm_setr_epi64x(3, 4);
4903 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4904 let e = _mm_setr_epi64x(1, 4);
4905 assert_eq_m128i(r, e);
4906 }
4907
4908 #[simd_test(enable = "sse2")]
4909 unsafe fn test_mm_store1_pd() {
4910 let mut mem = Memory { data: [0.0f64; 4] };
4911 let vals = &mut mem.data;
4912 let a = _mm_setr_pd(1.0, 2.0);
4913 let d = vals.as_mut_ptr();
4914
4915 _mm_store1_pd(d, *black_box(&a));
4916 assert_eq!(vals[0], 1.0);
4917 assert_eq!(vals[1], 1.0);
4918 }
4919
4920 #[simd_test(enable = "sse2")]
4921 unsafe fn test_mm_store_pd1() {
4922 let mut mem = Memory { data: [0.0f64; 4] };
4923 let vals = &mut mem.data;
4924 let a = _mm_setr_pd(1.0, 2.0);
4925 let d = vals.as_mut_ptr();
4926
4927 _mm_store_pd1(d, *black_box(&a));
4928 assert_eq!(vals[0], 1.0);
4929 assert_eq!(vals[1], 1.0);
4930 }
4931
4932 #[simd_test(enable = "sse2")]
4933 unsafe fn test_mm_storer_pd() {
4934 let mut mem = Memory { data: [0.0f64; 4] };
4935 let vals = &mut mem.data;
4936 let a = _mm_setr_pd(1.0, 2.0);
4937 let d = vals.as_mut_ptr();
4938
4939 _mm_storer_pd(d, *black_box(&a));
4940 assert_eq!(vals[0], 2.0);
4941 assert_eq!(vals[1], 1.0);
4942 }
4943
4944 #[simd_test(enable = "sse2")]
4945 unsafe fn test_mm_storeh_pd() {
4946 let mut dest = 0.;
4947 let a = _mm_setr_pd(1., 2.);
4948 _mm_storeh_pd(&mut dest, a);
4949 assert_eq!(dest, get_m128d(a, 1));
4950 }
4951
4952 #[simd_test(enable = "sse2")]
4953 unsafe fn test_mm_storel_pd() {
4954 let mut dest = 0.;
4955 let a = _mm_setr_pd(1., 2.);
4956 _mm_storel_pd(&mut dest, a);
4957 assert_eq!(dest, _mm_cvtsd_f64(a));
4958 }
4959
4960 #[simd_test(enable = "sse2")]
4961 unsafe fn test_mm_loadr_pd() {
4962 let mut mem = Memory {
4963 data: [1.0f64, 2.0, 3.0, 4.0],
4964 };
4965 let vals = &mut mem.data;
4966 let d = vals.as_ptr();
4967
4968 let r = _mm_loadr_pd(d);
4969 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4970 }
4971
4972 #[simd_test(enable = "sse2")]
4973 unsafe fn test_mm_loadu_pd() {
4974 let mut mem = Memory {
4975 data: [1.0f64, 2.0, 3.0, 4.0],
4976 };
4977 let vals = &mut mem.data;
4978 let mut d = vals.as_ptr();
4979
4980 let mut offset = 0;
4982 if (d as usize) & 0xf == 0 {
4983 offset = 1;
4984 d = d.add(offset);
4985 }
4986
4987 let r = _mm_loadu_pd(d);
4988 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4989 assert_eq_m128d(r, e);
4990 }
4991
4992 #[simd_test(enable = "sse2")]
4993 unsafe fn test_mm_loadu_si16() {
4994 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4995 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4996 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4997 }
4998
4999 #[simd_test(enable = "sse2")]
5000 unsafe fn test_mm_loadu_si32() {
5001 let a = _mm_setr_epi32(1, 2, 3, 4);
5002 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
5003 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5004 }
5005
5006 #[simd_test(enable = "sse2")]
5007 unsafe fn test_mm_loadu_si64() {
5008 let a = _mm_setr_epi64x(5, 6);
5009 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
5010 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5011 }
5012
5013 #[simd_test(enable = "sse2")]
5014 unsafe fn test_mm_cvtpd_ps() {
5015 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5016 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5017
5018 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5019 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5020
5021 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5022 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5023
5024 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5025 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5026 }
5027
5028 #[simd_test(enable = "sse2")]
5029 unsafe fn test_mm_cvtps_pd() {
5030 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5031 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5032
5033 let r = _mm_cvtps_pd(_mm_setr_ps(
5034 f32::MAX,
5035 f32::INFINITY,
5036 f32::NEG_INFINITY,
5037 f32::MIN,
5038 ));
5039 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5040 }
5041
5042 #[simd_test(enable = "sse2")]
5043 unsafe fn test_mm_cvtpd_epi32() {
5044 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5045 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5046
5047 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5048 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5049
5050 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5051 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5052
5053 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5054 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5055
5056 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5057 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5058 }
5059
5060 #[simd_test(enable = "sse2")]
5061 unsafe fn test_mm_cvtsd_si32() {
5062 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5063 assert_eq!(r, -2);
5064
5065 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5066 assert_eq!(r, i32::MIN);
5067
5068 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5069 assert_eq!(r, i32::MIN);
5070 }
5071
5072 #[simd_test(enable = "sse2")]
5073 unsafe fn test_mm_cvtsd_ss() {
5074 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5075 let b = _mm_setr_pd(2.0, -5.0);
5076
5077 let r = _mm_cvtsd_ss(a, b);
5078
5079 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5080
5081 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5082 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5083
5084 let r = _mm_cvtsd_ss(a, b);
5085
5086 assert_eq_m128(
5087 r,
5088 _mm_setr_ps(
5089 f32::INFINITY,
5090 f32::NEG_INFINITY,
5091 f32::MAX,
5092 f32::NEG_INFINITY,
5093 ),
5094 );
5095 }
5096
5097 #[simd_test(enable = "sse2")]
5098 unsafe fn test_mm_cvtsd_f64() {
5099 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5100 assert_eq!(r, -1.1);
5101 }
5102
5103 #[simd_test(enable = "sse2")]
5104 unsafe fn test_mm_cvtss_sd() {
5105 let a = _mm_setr_pd(-1.1, 2.2);
5106 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5107
5108 let r = _mm_cvtss_sd(a, b);
5109 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5110
5111 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5112 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5113
5114 let r = _mm_cvtss_sd(a, b);
5115 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5116 }
5117
5118 #[simd_test(enable = "sse2")]
5119 unsafe fn test_mm_cvttpd_epi32() {
5120 let a = _mm_setr_pd(-1.1, 2.2);
5121 let r = _mm_cvttpd_epi32(a);
5122 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5123
5124 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5125 let r = _mm_cvttpd_epi32(a);
5126 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5127 }
5128
5129 #[simd_test(enable = "sse2")]
5130 unsafe fn test_mm_cvttsd_si32() {
5131 let a = _mm_setr_pd(-1.1, 2.2);
5132 let r = _mm_cvttsd_si32(a);
5133 assert_eq!(r, -1);
5134
5135 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5136 let r = _mm_cvttsd_si32(a);
5137 assert_eq!(r, i32::MIN);
5138 }
5139
5140 #[simd_test(enable = "sse2")]
5141 unsafe fn test_mm_cvttps_epi32() {
5142 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5143 let r = _mm_cvttps_epi32(a);
5144 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5145
5146 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5147 let r = _mm_cvttps_epi32(a);
5148 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5149 }
5150
5151 #[simd_test(enable = "sse2")]
5152 unsafe fn test_mm_set_sd() {
5153 let r = _mm_set_sd(-1.0_f64);
5154 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5155 }
5156
5157 #[simd_test(enable = "sse2")]
5158 unsafe fn test_mm_set1_pd() {
5159 let r = _mm_set1_pd(-1.0_f64);
5160 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5161 }
5162
5163 #[simd_test(enable = "sse2")]
5164 unsafe fn test_mm_set_pd1() {
5165 let r = _mm_set_pd1(-2.0_f64);
5166 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5167 }
5168
5169 #[simd_test(enable = "sse2")]
5170 unsafe fn test_mm_set_pd() {
5171 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5172 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5173 }
5174
5175 #[simd_test(enable = "sse2")]
5176 unsafe fn test_mm_setr_pd() {
5177 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5178 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5179 }
5180
5181 #[simd_test(enable = "sse2")]
5182 unsafe fn test_mm_setzero_pd() {
5183 let r = _mm_setzero_pd();
5184 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5185 }
5186
5187 #[simd_test(enable = "sse2")]
5188 unsafe fn test_mm_load1_pd() {
5189 let d = -5.0;
5190 let r = _mm_load1_pd(&d);
5191 assert_eq_m128d(r, _mm_setr_pd(d, d));
5192 }
5193
5194 #[simd_test(enable = "sse2")]
5195 unsafe fn test_mm_load_pd1() {
5196 let d = -5.0;
5197 let r = _mm_load_pd1(&d);
5198 assert_eq_m128d(r, _mm_setr_pd(d, d));
5199 }
5200
5201 #[simd_test(enable = "sse2")]
5202 unsafe fn test_mm_unpackhi_pd() {
5203 let a = _mm_setr_pd(1.0, 2.0);
5204 let b = _mm_setr_pd(3.0, 4.0);
5205 let r = _mm_unpackhi_pd(a, b);
5206 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5207 }
5208
5209 #[simd_test(enable = "sse2")]
5210 unsafe fn test_mm_unpacklo_pd() {
5211 let a = _mm_setr_pd(1.0, 2.0);
5212 let b = _mm_setr_pd(3.0, 4.0);
5213 let r = _mm_unpacklo_pd(a, b);
5214 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5215 }
5216
5217 #[simd_test(enable = "sse2")]
5218 unsafe fn test_mm_shuffle_pd() {
5219 let a = _mm_setr_pd(1., 2.);
5220 let b = _mm_setr_pd(3., 4.);
5221 let expected = _mm_setr_pd(1., 3.);
5222 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5223 assert_eq_m128d(r, expected);
5224 }
5225
5226 #[simd_test(enable = "sse2")]
5227 unsafe fn test_mm_move_sd() {
5228 let a = _mm_setr_pd(1., 2.);
5229 let b = _mm_setr_pd(3., 4.);
5230 let expected = _mm_setr_pd(3., 2.);
5231 let r = _mm_move_sd(a, b);
5232 assert_eq_m128d(r, expected);
5233 }
5234
5235 #[simd_test(enable = "sse2")]
5236 unsafe fn test_mm_castpd_ps() {
5237 let a = _mm_set1_pd(0.);
5238 let expected = _mm_set1_ps(0.);
5239 let r = _mm_castpd_ps(a);
5240 assert_eq_m128(r, expected);
5241 }
5242
5243 #[simd_test(enable = "sse2")]
5244 unsafe fn test_mm_castpd_si128() {
5245 let a = _mm_set1_pd(0.);
5246 let expected = _mm_set1_epi64x(0);
5247 let r = _mm_castpd_si128(a);
5248 assert_eq_m128i(r, expected);
5249 }
5250
5251 #[simd_test(enable = "sse2")]
5252 unsafe fn test_mm_castps_pd() {
5253 let a = _mm_set1_ps(0.);
5254 let expected = _mm_set1_pd(0.);
5255 let r = _mm_castps_pd(a);
5256 assert_eq_m128d(r, expected);
5257 }
5258
5259 #[simd_test(enable = "sse2")]
5260 unsafe fn test_mm_castps_si128() {
5261 let a = _mm_set1_ps(0.);
5262 let expected = _mm_set1_epi32(0);
5263 let r = _mm_castps_si128(a);
5264 assert_eq_m128i(r, expected);
5265 }
5266
5267 #[simd_test(enable = "sse2")]
5268 unsafe fn test_mm_castsi128_pd() {
5269 let a = _mm_set1_epi64x(0);
5270 let expected = _mm_set1_pd(0.);
5271 let r = _mm_castsi128_pd(a);
5272 assert_eq_m128d(r, expected);
5273 }
5274
5275 #[simd_test(enable = "sse2")]
5276 unsafe fn test_mm_castsi128_ps() {
5277 let a = _mm_set1_epi32(0);
5278 let expected = _mm_set1_ps(0.);
5279 let r = _mm_castsi128_ps(a);
5280 assert_eq_m128(r, expected);
5281 }
5282}