1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16()))
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8()))
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16()))
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8()))
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 let a = simd_cast::<_, u16x16>(a.as_u8x16());
169 let b = simd_cast::<_, u16x16>(b.as_u8x16());
170 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
171 transmute(simd_cast::<_, u8x16>(r))
172}
173
174#[inline]
178#[target_feature(enable = "sse2")]
179#[cfg_attr(test, assert_instr(pavgw))]
180#[stable(feature = "simd_x86", since = "1.27.0")]
181pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
182 let a = simd_cast::<_, u32x8>(a.as_u16x8());
183 let b = simd_cast::<_, u32x8>(b.as_u16x8());
184 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
185 transmute(simd_cast::<_, u16x8>(r))
186}
187
188#[inline]
196#[target_feature(enable = "sse2")]
197#[cfg_attr(test, assert_instr(pmaddwd))]
198#[stable(feature = "simd_x86", since = "1.27.0")]
199pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
200 transmute(pmaddwd(a.as_i16x8(), b.as_i16x8()))
201}
202
203#[inline]
208#[target_feature(enable = "sse2")]
209#[cfg_attr(test, assert_instr(pmaxsw))]
210#[stable(feature = "simd_x86", since = "1.27.0")]
211pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
212 let a = a.as_i16x8();
213 let b = b.as_i16x8();
214 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
215}
216
217#[inline]
222#[target_feature(enable = "sse2")]
223#[cfg_attr(test, assert_instr(pmaxub))]
224#[stable(feature = "simd_x86", since = "1.27.0")]
225pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
226 let a = a.as_u8x16();
227 let b = b.as_u8x16();
228 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
229}
230
231#[inline]
236#[target_feature(enable = "sse2")]
237#[cfg_attr(test, assert_instr(pminsw))]
238#[stable(feature = "simd_x86", since = "1.27.0")]
239pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
240 let a = a.as_i16x8();
241 let b = b.as_i16x8();
242 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
243}
244
245#[inline]
250#[target_feature(enable = "sse2")]
251#[cfg_attr(test, assert_instr(pminub))]
252#[stable(feature = "simd_x86", since = "1.27.0")]
253pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
254 let a = a.as_u8x16();
255 let b = b.as_u8x16();
256 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
257}
258
259#[inline]
266#[target_feature(enable = "sse2")]
267#[cfg_attr(test, assert_instr(pmulhw))]
268#[stable(feature = "simd_x86", since = "1.27.0")]
269pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
270 let a = simd_cast::<_, i32x8>(a.as_i16x8());
271 let b = simd_cast::<_, i32x8>(b.as_i16x8());
272 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
273 transmute(simd_cast::<i32x8, i16x8>(r))
274}
275
276#[inline]
283#[target_feature(enable = "sse2")]
284#[cfg_attr(test, assert_instr(pmulhuw))]
285#[stable(feature = "simd_x86", since = "1.27.0")]
286pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
287 let a = simd_cast::<_, u32x8>(a.as_u16x8());
288 let b = simd_cast::<_, u32x8>(b.as_u16x8());
289 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
290 transmute(simd_cast::<u32x8, u16x8>(r))
291}
292
293#[inline]
300#[target_feature(enable = "sse2")]
301#[cfg_attr(test, assert_instr(pmullw))]
302#[stable(feature = "simd_x86", since = "1.27.0")]
303pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
304 transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
305}
306
307#[inline]
314#[target_feature(enable = "sse2")]
315#[cfg_attr(test, assert_instr(pmuludq))]
316#[stable(feature = "simd_x86", since = "1.27.0")]
317pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
318 let a = a.as_u64x2();
319 let b = b.as_u64x2();
320 let mask = u64x2::splat(u32::MAX.into());
321 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
322}
323
324#[inline]
333#[target_feature(enable = "sse2")]
334#[cfg_attr(test, assert_instr(psadbw))]
335#[stable(feature = "simd_x86", since = "1.27.0")]
336pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
337 transmute(psadbw(a.as_u8x16(), b.as_u8x16()))
338}
339
340#[inline]
344#[target_feature(enable = "sse2")]
345#[cfg_attr(test, assert_instr(psubb))]
346#[stable(feature = "simd_x86", since = "1.27.0")]
347pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
348 transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
349}
350
351#[inline]
355#[target_feature(enable = "sse2")]
356#[cfg_attr(test, assert_instr(psubw))]
357#[stable(feature = "simd_x86", since = "1.27.0")]
358pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
359 transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
360}
361
362#[inline]
366#[target_feature(enable = "sse2")]
367#[cfg_attr(test, assert_instr(psubd))]
368#[stable(feature = "simd_x86", since = "1.27.0")]
369pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
370 transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
371}
372
373#[inline]
377#[target_feature(enable = "sse2")]
378#[cfg_attr(test, assert_instr(psubq))]
379#[stable(feature = "simd_x86", since = "1.27.0")]
380pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
381 transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
382}
383
384#[inline]
389#[target_feature(enable = "sse2")]
390#[cfg_attr(test, assert_instr(psubsb))]
391#[stable(feature = "simd_x86", since = "1.27.0")]
392pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
393 transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16()))
394}
395
396#[inline]
401#[target_feature(enable = "sse2")]
402#[cfg_attr(test, assert_instr(psubsw))]
403#[stable(feature = "simd_x86", since = "1.27.0")]
404pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
405 transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8()))
406}
407
408#[inline]
413#[target_feature(enable = "sse2")]
414#[cfg_attr(test, assert_instr(psubusb))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
417 transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16()))
418}
419
420#[inline]
425#[target_feature(enable = "sse2")]
426#[cfg_attr(test, assert_instr(psubusw))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
429 transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8()))
430}
431
432#[inline]
436#[target_feature(enable = "sse2")]
437#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
438#[rustc_legacy_const_generics(1)]
439#[stable(feature = "simd_x86", since = "1.27.0")]
440pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
441 static_assert_uimm_bits!(IMM8, 8);
442 _mm_slli_si128_impl::<IMM8>(a)
443}
444
445#[inline]
448#[target_feature(enable = "sse2")]
449unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
450 const fn mask(shift: i32, i: u32) -> u32 {
451 let shift = shift as u32 & 0xff;
452 if shift > 15 {
453 i
454 } else {
455 16 - shift + i
456 }
457 }
458 transmute::<i8x16, _>(simd_shuffle!(
459 i8x16::ZERO,
460 a.as_i8x16(),
461 [
462 mask(IMM8, 0),
463 mask(IMM8, 1),
464 mask(IMM8, 2),
465 mask(IMM8, 3),
466 mask(IMM8, 4),
467 mask(IMM8, 5),
468 mask(IMM8, 6),
469 mask(IMM8, 7),
470 mask(IMM8, 8),
471 mask(IMM8, 9),
472 mask(IMM8, 10),
473 mask(IMM8, 11),
474 mask(IMM8, 12),
475 mask(IMM8, 13),
476 mask(IMM8, 14),
477 mask(IMM8, 15),
478 ],
479 ))
480}
481
482#[inline]
486#[target_feature(enable = "sse2")]
487#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
488#[rustc_legacy_const_generics(1)]
489#[stable(feature = "simd_x86", since = "1.27.0")]
490pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
491 static_assert_uimm_bits!(IMM8, 8);
492 _mm_slli_si128_impl::<IMM8>(a)
493}
494
495#[inline]
499#[target_feature(enable = "sse2")]
500#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
501#[rustc_legacy_const_generics(1)]
502#[stable(feature = "simd_x86", since = "1.27.0")]
503pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
504 static_assert_uimm_bits!(IMM8, 8);
505 _mm_srli_si128_impl::<IMM8>(a)
506}
507
508#[inline]
512#[target_feature(enable = "sse2")]
513#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
514#[rustc_legacy_const_generics(1)]
515#[stable(feature = "simd_x86", since = "1.27.0")]
516pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
517 static_assert_uimm_bits!(IMM8, 8);
518 if IMM8 >= 16 {
519 _mm_setzero_si128()
520 } else {
521 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
522 }
523}
524
525#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw))]
532#[stable(feature = "simd_x86", since = "1.27.0")]
533pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
534 transmute(psllw(a.as_i16x8(), count.as_i16x8()))
535}
536
537#[inline]
541#[target_feature(enable = "sse2")]
542#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
543#[rustc_legacy_const_generics(1)]
544#[stable(feature = "simd_x86", since = "1.27.0")]
545pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
546 static_assert_uimm_bits!(IMM8, 8);
547 if IMM8 >= 32 {
548 _mm_setzero_si128()
549 } else {
550 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
551 }
552}
553
554#[inline]
559#[target_feature(enable = "sse2")]
560#[cfg_attr(test, assert_instr(pslld))]
561#[stable(feature = "simd_x86", since = "1.27.0")]
562pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
563 transmute(pslld(a.as_i32x4(), count.as_i32x4()))
564}
565
566#[inline]
570#[target_feature(enable = "sse2")]
571#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
572#[rustc_legacy_const_generics(1)]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
575 static_assert_uimm_bits!(IMM8, 8);
576 if IMM8 >= 64 {
577 _mm_setzero_si128()
578 } else {
579 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
580 }
581}
582
583#[inline]
588#[target_feature(enable = "sse2")]
589#[cfg_attr(test, assert_instr(psllq))]
590#[stable(feature = "simd_x86", since = "1.27.0")]
591pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
592 transmute(psllq(a.as_i64x2(), count.as_i64x2()))
593}
594
595#[inline]
600#[target_feature(enable = "sse2")]
601#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
602#[rustc_legacy_const_generics(1)]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
605 static_assert_uimm_bits!(IMM8, 8);
606 transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)))
607}
608
609#[inline]
614#[target_feature(enable = "sse2")]
615#[cfg_attr(test, assert_instr(psraw))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
618 transmute(psraw(a.as_i16x8(), count.as_i16x8()))
619}
620
621#[inline]
626#[target_feature(enable = "sse2")]
627#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
628#[rustc_legacy_const_generics(1)]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
631 static_assert_uimm_bits!(IMM8, 8);
632 transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31))))
633}
634
635#[inline]
640#[target_feature(enable = "sse2")]
641#[cfg_attr(test, assert_instr(psrad))]
642#[stable(feature = "simd_x86", since = "1.27.0")]
643pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
644 transmute(psrad(a.as_i32x4(), count.as_i32x4()))
645}
646
647#[inline]
651#[target_feature(enable = "sse2")]
652#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
653#[rustc_legacy_const_generics(1)]
654#[stable(feature = "simd_x86", since = "1.27.0")]
655pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
656 static_assert_uimm_bits!(IMM8, 8);
657 _mm_srli_si128_impl::<IMM8>(a)
658}
659
660#[inline]
663#[target_feature(enable = "sse2")]
664unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
665 const fn mask(shift: i32, i: u32) -> u32 {
666 if (shift as u32) > 15 {
667 i + 16
668 } else {
669 i + (shift as u32)
670 }
671 }
672 let x: i8x16 = simd_shuffle!(
673 a.as_i8x16(),
674 i8x16::ZERO,
675 [
676 mask(IMM8, 0),
677 mask(IMM8, 1),
678 mask(IMM8, 2),
679 mask(IMM8, 3),
680 mask(IMM8, 4),
681 mask(IMM8, 5),
682 mask(IMM8, 6),
683 mask(IMM8, 7),
684 mask(IMM8, 8),
685 mask(IMM8, 9),
686 mask(IMM8, 10),
687 mask(IMM8, 11),
688 mask(IMM8, 12),
689 mask(IMM8, 13),
690 mask(IMM8, 14),
691 mask(IMM8, 15),
692 ],
693 );
694 transmute(x)
695}
696
697#[inline]
702#[target_feature(enable = "sse2")]
703#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
704#[rustc_legacy_const_generics(1)]
705#[stable(feature = "simd_x86", since = "1.27.0")]
706pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
707 static_assert_uimm_bits!(IMM8, 8);
708 if IMM8 >= 16 {
709 _mm_setzero_si128()
710 } else {
711 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
712 }
713}
714
715#[inline]
720#[target_feature(enable = "sse2")]
721#[cfg_attr(test, assert_instr(psrlw))]
722#[stable(feature = "simd_x86", since = "1.27.0")]
723pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
724 transmute(psrlw(a.as_i16x8(), count.as_i16x8()))
725}
726
727#[inline]
732#[target_feature(enable = "sse2")]
733#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
734#[rustc_legacy_const_generics(1)]
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
737 static_assert_uimm_bits!(IMM8, 8);
738 if IMM8 >= 32 {
739 _mm_setzero_si128()
740 } else {
741 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
742 }
743}
744
745#[inline]
750#[target_feature(enable = "sse2")]
751#[cfg_attr(test, assert_instr(psrld))]
752#[stable(feature = "simd_x86", since = "1.27.0")]
753pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
754 transmute(psrld(a.as_i32x4(), count.as_i32x4()))
755}
756
757#[inline]
762#[target_feature(enable = "sse2")]
763#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
764#[rustc_legacy_const_generics(1)]
765#[stable(feature = "simd_x86", since = "1.27.0")]
766pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
767 static_assert_uimm_bits!(IMM8, 8);
768 if IMM8 >= 64 {
769 _mm_setzero_si128()
770 } else {
771 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
772 }
773}
774
775#[inline]
780#[target_feature(enable = "sse2")]
781#[cfg_attr(test, assert_instr(psrlq))]
782#[stable(feature = "simd_x86", since = "1.27.0")]
783pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
784 transmute(psrlq(a.as_i64x2(), count.as_i64x2()))
785}
786
787#[inline]
792#[target_feature(enable = "sse2")]
793#[cfg_attr(test, assert_instr(andps))]
794#[stable(feature = "simd_x86", since = "1.27.0")]
795pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
796 simd_and(a, b)
797}
798
799#[inline]
804#[target_feature(enable = "sse2")]
805#[cfg_attr(test, assert_instr(andnps))]
806#[stable(feature = "simd_x86", since = "1.27.0")]
807pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
808 simd_and(simd_xor(_mm_set1_epi8(-1), a), b)
809}
810
811#[inline]
816#[target_feature(enable = "sse2")]
817#[cfg_attr(test, assert_instr(orps))]
818#[stable(feature = "simd_x86", since = "1.27.0")]
819pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
820 simd_or(a, b)
821}
822
823#[inline]
828#[target_feature(enable = "sse2")]
829#[cfg_attr(test, assert_instr(xorps))]
830#[stable(feature = "simd_x86", since = "1.27.0")]
831pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
832 simd_xor(a, b)
833}
834
835#[inline]
839#[target_feature(enable = "sse2")]
840#[cfg_attr(test, assert_instr(pcmpeqb))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
843 transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
844}
845
846#[inline]
850#[target_feature(enable = "sse2")]
851#[cfg_attr(test, assert_instr(pcmpeqw))]
852#[stable(feature = "simd_x86", since = "1.27.0")]
853pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
854 transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
855}
856
857#[inline]
861#[target_feature(enable = "sse2")]
862#[cfg_attr(test, assert_instr(pcmpeqd))]
863#[stable(feature = "simd_x86", since = "1.27.0")]
864pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
865 transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
866}
867
868#[inline]
872#[target_feature(enable = "sse2")]
873#[cfg_attr(test, assert_instr(pcmpgtb))]
874#[stable(feature = "simd_x86", since = "1.27.0")]
875pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
876 transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
877}
878
879#[inline]
883#[target_feature(enable = "sse2")]
884#[cfg_attr(test, assert_instr(pcmpgtw))]
885#[stable(feature = "simd_x86", since = "1.27.0")]
886pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
887 transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
888}
889
890#[inline]
894#[target_feature(enable = "sse2")]
895#[cfg_attr(test, assert_instr(pcmpgtd))]
896#[stable(feature = "simd_x86", since = "1.27.0")]
897pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
898 transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
899}
900
901#[inline]
905#[target_feature(enable = "sse2")]
906#[cfg_attr(test, assert_instr(pcmpgtb))]
907#[stable(feature = "simd_x86", since = "1.27.0")]
908pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
909 transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
910}
911
912#[inline]
916#[target_feature(enable = "sse2")]
917#[cfg_attr(test, assert_instr(pcmpgtw))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
920 transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
921}
922
923#[inline]
927#[target_feature(enable = "sse2")]
928#[cfg_attr(test, assert_instr(pcmpgtd))]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
931 transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
932}
933
934#[inline]
939#[target_feature(enable = "sse2")]
940#[cfg_attr(test, assert_instr(cvtdq2pd))]
941#[stable(feature = "simd_x86", since = "1.27.0")]
942pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
943 let a = a.as_i32x4();
944 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
945}
946
947#[inline]
952#[target_feature(enable = "sse2")]
953#[cfg_attr(test, assert_instr(cvtsi2sd))]
954#[stable(feature = "simd_x86", since = "1.27.0")]
955pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
956 simd_insert!(a, 0, b as f64)
957}
958
959#[inline]
964#[target_feature(enable = "sse2")]
965#[cfg_attr(test, assert_instr(cvtdq2ps))]
966#[stable(feature = "simd_x86", since = "1.27.0")]
967pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
968 transmute(simd_cast::<_, f32x4>(a.as_i32x4()))
969}
970
971#[inline]
976#[target_feature(enable = "sse2")]
977#[cfg_attr(test, assert_instr(cvtps2dq))]
978#[stable(feature = "simd_x86", since = "1.27.0")]
979pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
980 transmute(cvtps2dq(a))
981}
982
983#[inline]
988#[target_feature(enable = "sse2")]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
991 transmute(i32x4::new(a, 0, 0, 0))
992}
993
994#[inline]
998#[target_feature(enable = "sse2")]
999#[stable(feature = "simd_x86", since = "1.27.0")]
1000pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1001 simd_extract!(a.as_i32x4(), 0)
1002}
1003
1004#[inline]
1009#[target_feature(enable = "sse2")]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1012pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1013 transmute(i64x2::new(e0, e1))
1014}
1015
1016#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1023pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1024 transmute(i32x4::new(e0, e1, e2, e3))
1025}
1026
1027#[inline]
1031#[target_feature(enable = "sse2")]
1032#[stable(feature = "simd_x86", since = "1.27.0")]
1034pub unsafe fn _mm_set_epi16(
1035 e7: i16,
1036 e6: i16,
1037 e5: i16,
1038 e4: i16,
1039 e3: i16,
1040 e2: i16,
1041 e1: i16,
1042 e0: i16,
1043) -> __m128i {
1044 transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
1045}
1046
1047#[inline]
1051#[target_feature(enable = "sse2")]
1052#[stable(feature = "simd_x86", since = "1.27.0")]
1054pub unsafe fn _mm_set_epi8(
1055 e15: i8,
1056 e14: i8,
1057 e13: i8,
1058 e12: i8,
1059 e11: i8,
1060 e10: i8,
1061 e9: i8,
1062 e8: i8,
1063 e7: i8,
1064 e6: i8,
1065 e5: i8,
1066 e4: i8,
1067 e3: i8,
1068 e2: i8,
1069 e1: i8,
1070 e0: i8,
1071) -> __m128i {
1072 #[rustfmt::skip]
1073 transmute(i8x16::new(
1074 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1075 ))
1076}
1077
1078#[inline]
1082#[target_feature(enable = "sse2")]
1083#[stable(feature = "simd_x86", since = "1.27.0")]
1085pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1086 _mm_set_epi64x(a, a)
1087}
1088
1089#[inline]
1093#[target_feature(enable = "sse2")]
1094#[stable(feature = "simd_x86", since = "1.27.0")]
1096pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1097 _mm_set_epi32(a, a, a, a)
1098}
1099
1100#[inline]
1104#[target_feature(enable = "sse2")]
1105#[stable(feature = "simd_x86", since = "1.27.0")]
1107pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1108 _mm_set_epi16(a, a, a, a, a, a, a, a)
1109}
1110
1111#[inline]
1115#[target_feature(enable = "sse2")]
1116#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1119 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1120}
1121
1122#[inline]
1126#[target_feature(enable = "sse2")]
1127#[stable(feature = "simd_x86", since = "1.27.0")]
1129pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1130 _mm_set_epi32(e0, e1, e2, e3)
1131}
1132
1133#[inline]
1137#[target_feature(enable = "sse2")]
1138#[stable(feature = "simd_x86", since = "1.27.0")]
1140pub unsafe fn _mm_setr_epi16(
1141 e7: i16,
1142 e6: i16,
1143 e5: i16,
1144 e4: i16,
1145 e3: i16,
1146 e2: i16,
1147 e1: i16,
1148 e0: i16,
1149) -> __m128i {
1150 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1151}
1152
1153#[inline]
1157#[target_feature(enable = "sse2")]
1158#[stable(feature = "simd_x86", since = "1.27.0")]
1160pub unsafe fn _mm_setr_epi8(
1161 e15: i8,
1162 e14: i8,
1163 e13: i8,
1164 e12: i8,
1165 e11: i8,
1166 e10: i8,
1167 e9: i8,
1168 e8: i8,
1169 e7: i8,
1170 e6: i8,
1171 e5: i8,
1172 e4: i8,
1173 e3: i8,
1174 e2: i8,
1175 e1: i8,
1176 e0: i8,
1177) -> __m128i {
1178 #[rustfmt::skip]
1179 _mm_set_epi8(
1180 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1181 )
1182}
1183
1184#[inline]
1188#[target_feature(enable = "sse2")]
1189#[cfg_attr(test, assert_instr(xorps))]
1190#[stable(feature = "simd_x86", since = "1.27.0")]
1191pub unsafe fn _mm_setzero_si128() -> __m128i {
1192 const { mem::zeroed() }
1193}
1194
1195#[inline]
1199#[target_feature(enable = "sse2")]
1200#[stable(feature = "simd_x86", since = "1.27.0")]
1201pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1202 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1203}
1204
1205#[inline]
1211#[target_feature(enable = "sse2")]
1212#[cfg_attr(test, assert_instr(movaps))]
1213#[stable(feature = "simd_x86", since = "1.27.0")]
1214pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1215 *mem_addr
1216}
1217
1218#[inline]
1224#[target_feature(enable = "sse2")]
1225#[cfg_attr(test, assert_instr(movups))]
1226#[stable(feature = "simd_x86", since = "1.27.0")]
1227pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1228 let mut dst: __m128i = _mm_undefined_si128();
1229 ptr::copy_nonoverlapping(
1230 mem_addr as *const u8,
1231 ptr::addr_of_mut!(dst) as *mut u8,
1232 mem::size_of::<__m128i>(),
1233 );
1234 dst
1235}
1236
1237#[inline]
1248#[target_feature(enable = "sse2")]
1249#[cfg_attr(test, assert_instr(maskmovdqu))]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1252 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1253}
1254
1255#[inline]
1261#[target_feature(enable = "sse2")]
1262#[cfg_attr(test, assert_instr(movaps))]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1265 *mem_addr = a;
1266}
1267
1268#[inline]
1274#[target_feature(enable = "sse2")]
1275#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1277pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1278 mem_addr.write_unaligned(a);
1279}
1280
1281#[inline]
1287#[target_feature(enable = "sse2")]
1288#[stable(feature = "simd_x86", since = "1.27.0")]
1289pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1290 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1291}
1292
1293#[inline]
1308#[target_feature(enable = "sse2")]
1309#[cfg_attr(test, assert_instr(movntdq))]
1310#[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1312 crate::arch::asm!(
1313 vps!("movntdq", ",{a}"),
1314 p = in(reg) mem_addr,
1315 a = in(xmm_reg) a,
1316 options(nostack, preserves_flags),
1317 );
1318}
1319
1320#[inline]
1335#[target_feature(enable = "sse2")]
1336#[cfg_attr(test, assert_instr(movnti))]
1337#[stable(feature = "simd_x86", since = "1.27.0")]
1338pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1339 crate::arch::asm!(
1340 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1342 a = in(reg) a,
1343 options(nostack, preserves_flags),
1344 );
1345}
1346
1347#[inline]
1352#[target_feature(enable = "sse2")]
1353#[cfg_attr(
1355 all(test, not(target_env = "msvc"), target_arch = "x86_64"),
1356 assert_instr(movq)
1357)]
1358#[stable(feature = "simd_x86", since = "1.27.0")]
1359pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1360 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1361 transmute(r)
1362}
1363
1364#[inline]
1369#[target_feature(enable = "sse2")]
1370#[cfg_attr(test, assert_instr(packsswb))]
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1373 transmute(packsswb(a.as_i16x8(), b.as_i16x8()))
1374}
1375
1376#[inline]
1381#[target_feature(enable = "sse2")]
1382#[cfg_attr(test, assert_instr(packssdw))]
1383#[stable(feature = "simd_x86", since = "1.27.0")]
1384pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1385 transmute(packssdw(a.as_i32x4(), b.as_i32x4()))
1386}
1387
1388#[inline]
1393#[target_feature(enable = "sse2")]
1394#[cfg_attr(test, assert_instr(packuswb))]
1395#[stable(feature = "simd_x86", since = "1.27.0")]
1396pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1397 transmute(packuswb(a.as_i16x8(), b.as_i16x8()))
1398}
1399
1400#[inline]
1404#[target_feature(enable = "sse2")]
1405#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1406#[rustc_legacy_const_generics(1)]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1409 static_assert_uimm_bits!(IMM8, 3);
1410 simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32
1411}
1412
1413#[inline]
1417#[target_feature(enable = "sse2")]
1418#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1419#[rustc_legacy_const_generics(2)]
1420#[stable(feature = "simd_x86", since = "1.27.0")]
1421pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1422 static_assert_uimm_bits!(IMM8, 3);
1423 transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16))
1424}
1425
1426#[inline]
1430#[target_feature(enable = "sse2")]
1431#[cfg_attr(test, assert_instr(pmovmskb))]
1432#[stable(feature = "simd_x86", since = "1.27.0")]
1433pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1434 let z = i8x16::ZERO;
1435 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1436 simd_bitmask::<_, u16>(m) as u32 as i32
1437}
1438
1439#[inline]
1443#[target_feature(enable = "sse2")]
1444#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1445#[rustc_legacy_const_generics(1)]
1446#[stable(feature = "simd_x86", since = "1.27.0")]
1447pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1448 static_assert_uimm_bits!(IMM8, 8);
1449 let a = a.as_i32x4();
1450 let x: i32x4 = simd_shuffle!(
1451 a,
1452 a,
1453 [
1454 IMM8 as u32 & 0b11,
1455 (IMM8 as u32 >> 2) & 0b11,
1456 (IMM8 as u32 >> 4) & 0b11,
1457 (IMM8 as u32 >> 6) & 0b11,
1458 ],
1459 );
1460 transmute(x)
1461}
1462
1463#[inline]
1471#[target_feature(enable = "sse2")]
1472#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1473#[rustc_legacy_const_generics(1)]
1474#[stable(feature = "simd_x86", since = "1.27.0")]
1475pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1476 static_assert_uimm_bits!(IMM8, 8);
1477 let a = a.as_i16x8();
1478 let x: i16x8 = simd_shuffle!(
1479 a,
1480 a,
1481 [
1482 0,
1483 1,
1484 2,
1485 3,
1486 (IMM8 as u32 & 0b11) + 4,
1487 ((IMM8 as u32 >> 2) & 0b11) + 4,
1488 ((IMM8 as u32 >> 4) & 0b11) + 4,
1489 ((IMM8 as u32 >> 6) & 0b11) + 4,
1490 ],
1491 );
1492 transmute(x)
1493}
1494
1495#[inline]
1503#[target_feature(enable = "sse2")]
1504#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1505#[rustc_legacy_const_generics(1)]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1508 static_assert_uimm_bits!(IMM8, 8);
1509 let a = a.as_i16x8();
1510 let x: i16x8 = simd_shuffle!(
1511 a,
1512 a,
1513 [
1514 IMM8 as u32 & 0b11,
1515 (IMM8 as u32 >> 2) & 0b11,
1516 (IMM8 as u32 >> 4) & 0b11,
1517 (IMM8 as u32 >> 6) & 0b11,
1518 4,
1519 5,
1520 6,
1521 7,
1522 ],
1523 );
1524 transmute(x)
1525}
1526
1527#[inline]
1531#[target_feature(enable = "sse2")]
1532#[cfg_attr(test, assert_instr(punpckhbw))]
1533#[stable(feature = "simd_x86", since = "1.27.0")]
1534pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1535 transmute::<i8x16, _>(simd_shuffle!(
1536 a.as_i8x16(),
1537 b.as_i8x16(),
1538 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1539 ))
1540}
1541
1542#[inline]
1546#[target_feature(enable = "sse2")]
1547#[cfg_attr(test, assert_instr(punpckhwd))]
1548#[stable(feature = "simd_x86", since = "1.27.0")]
1549pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1550 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1551 transmute::<i16x8, _>(x)
1552}
1553
1554#[inline]
1558#[target_feature(enable = "sse2")]
1559#[cfg_attr(test, assert_instr(unpckhps))]
1560#[stable(feature = "simd_x86", since = "1.27.0")]
1561pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1562 transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
1563}
1564
1565#[inline]
1569#[target_feature(enable = "sse2")]
1570#[cfg_attr(test, assert_instr(unpckhpd))]
1571#[stable(feature = "simd_x86", since = "1.27.0")]
1572pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1573 transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
1574}
1575
1576#[inline]
1580#[target_feature(enable = "sse2")]
1581#[cfg_attr(test, assert_instr(punpcklbw))]
1582#[stable(feature = "simd_x86", since = "1.27.0")]
1583pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1584 transmute::<i8x16, _>(simd_shuffle!(
1585 a.as_i8x16(),
1586 b.as_i8x16(),
1587 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1588 ))
1589}
1590
1591#[inline]
1595#[target_feature(enable = "sse2")]
1596#[cfg_attr(test, assert_instr(punpcklwd))]
1597#[stable(feature = "simd_x86", since = "1.27.0")]
1598pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1599 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1600 transmute::<i16x8, _>(x)
1601}
1602
1603#[inline]
1607#[target_feature(enable = "sse2")]
1608#[cfg_attr(test, assert_instr(unpcklps))]
1609#[stable(feature = "simd_x86", since = "1.27.0")]
1610pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1611 transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
1612}
1613
1614#[inline]
1618#[target_feature(enable = "sse2")]
1619#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
1620#[stable(feature = "simd_x86", since = "1.27.0")]
1621pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1622 transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
1623}
1624
1625#[inline]
1630#[target_feature(enable = "sse2")]
1631#[cfg_attr(test, assert_instr(addsd))]
1632#[stable(feature = "simd_x86", since = "1.27.0")]
1633pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1634 simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1635}
1636
1637#[inline]
1642#[target_feature(enable = "sse2")]
1643#[cfg_attr(test, assert_instr(addpd))]
1644#[stable(feature = "simd_x86", since = "1.27.0")]
1645pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1646 simd_add(a, b)
1647}
1648
1649#[inline]
1654#[target_feature(enable = "sse2")]
1655#[cfg_attr(test, assert_instr(divsd))]
1656#[stable(feature = "simd_x86", since = "1.27.0")]
1657pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1658 simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1659}
1660
1661#[inline]
1666#[target_feature(enable = "sse2")]
1667#[cfg_attr(test, assert_instr(divpd))]
1668#[stable(feature = "simd_x86", since = "1.27.0")]
1669pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1670 simd_div(a, b)
1671}
1672
1673#[inline]
1678#[target_feature(enable = "sse2")]
1679#[cfg_attr(test, assert_instr(maxsd))]
1680#[stable(feature = "simd_x86", since = "1.27.0")]
1681pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1682 maxsd(a, b)
1683}
1684
1685#[inline]
1690#[target_feature(enable = "sse2")]
1691#[cfg_attr(test, assert_instr(maxpd))]
1692#[stable(feature = "simd_x86", since = "1.27.0")]
1693pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1694 maxpd(a, b)
1695}
1696
1697#[inline]
1702#[target_feature(enable = "sse2")]
1703#[cfg_attr(test, assert_instr(minsd))]
1704#[stable(feature = "simd_x86", since = "1.27.0")]
1705pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1706 minsd(a, b)
1707}
1708
1709#[inline]
1714#[target_feature(enable = "sse2")]
1715#[cfg_attr(test, assert_instr(minpd))]
1716#[stable(feature = "simd_x86", since = "1.27.0")]
1717pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1718 minpd(a, b)
1719}
1720
1721#[inline]
1726#[target_feature(enable = "sse2")]
1727#[cfg_attr(test, assert_instr(mulsd))]
1728#[stable(feature = "simd_x86", since = "1.27.0")]
1729pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1730 simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1731}
1732
1733#[inline]
1738#[target_feature(enable = "sse2")]
1739#[cfg_attr(test, assert_instr(mulpd))]
1740#[stable(feature = "simd_x86", since = "1.27.0")]
1741pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1742 simd_mul(a, b)
1743}
1744
1745#[inline]
1750#[target_feature(enable = "sse2")]
1751#[cfg_attr(test, assert_instr(sqrtsd))]
1752#[stable(feature = "simd_x86", since = "1.27.0")]
1753pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1754 simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b)))
1755}
1756
1757#[inline]
1761#[target_feature(enable = "sse2")]
1762#[cfg_attr(test, assert_instr(sqrtpd))]
1763#[stable(feature = "simd_x86", since = "1.27.0")]
1764pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1765 simd_fsqrt(a)
1766}
1767
1768#[inline]
1773#[target_feature(enable = "sse2")]
1774#[cfg_attr(test, assert_instr(subsd))]
1775#[stable(feature = "simd_x86", since = "1.27.0")]
1776pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1777 simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1778}
1779
1780#[inline]
1785#[target_feature(enable = "sse2")]
1786#[cfg_attr(test, assert_instr(subpd))]
1787#[stable(feature = "simd_x86", since = "1.27.0")]
1788pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1789 simd_sub(a, b)
1790}
1791
1792#[inline]
1797#[target_feature(enable = "sse2")]
1798#[cfg_attr(test, assert_instr(andps))]
1799#[stable(feature = "simd_x86", since = "1.27.0")]
1800pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1801 let a: __m128i = transmute(a);
1802 let b: __m128i = transmute(b);
1803 transmute(_mm_and_si128(a, b))
1804}
1805
1806#[inline]
1810#[target_feature(enable = "sse2")]
1811#[cfg_attr(test, assert_instr(andnps))]
1812#[stable(feature = "simd_x86", since = "1.27.0")]
1813pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1814 let a: __m128i = transmute(a);
1815 let b: __m128i = transmute(b);
1816 transmute(_mm_andnot_si128(a, b))
1817}
1818
1819#[inline]
1823#[target_feature(enable = "sse2")]
1824#[cfg_attr(test, assert_instr(orps))]
1825#[stable(feature = "simd_x86", since = "1.27.0")]
1826pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1827 let a: __m128i = transmute(a);
1828 let b: __m128i = transmute(b);
1829 transmute(_mm_or_si128(a, b))
1830}
1831
1832#[inline]
1836#[target_feature(enable = "sse2")]
1837#[cfg_attr(test, assert_instr(xorps))]
1838#[stable(feature = "simd_x86", since = "1.27.0")]
1839pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1840 let a: __m128i = transmute(a);
1841 let b: __m128i = transmute(b);
1842 transmute(_mm_xor_si128(a, b))
1843}
1844
1845#[inline]
1850#[target_feature(enable = "sse2")]
1851#[cfg_attr(test, assert_instr(cmpeqsd))]
1852#[stable(feature = "simd_x86", since = "1.27.0")]
1853pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1854 cmpsd(a, b, 0)
1855}
1856
1857#[inline]
1862#[target_feature(enable = "sse2")]
1863#[cfg_attr(test, assert_instr(cmpltsd))]
1864#[stable(feature = "simd_x86", since = "1.27.0")]
1865pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1866 cmpsd(a, b, 1)
1867}
1868
1869#[inline]
1874#[target_feature(enable = "sse2")]
1875#[cfg_attr(test, assert_instr(cmplesd))]
1876#[stable(feature = "simd_x86", since = "1.27.0")]
1877pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1878 cmpsd(a, b, 2)
1879}
1880
1881#[inline]
1886#[target_feature(enable = "sse2")]
1887#[cfg_attr(test, assert_instr(cmpltsd))]
1888#[stable(feature = "simd_x86", since = "1.27.0")]
1889pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1890 simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64))
1891}
1892
1893#[inline]
1898#[target_feature(enable = "sse2")]
1899#[cfg_attr(test, assert_instr(cmplesd))]
1900#[stable(feature = "simd_x86", since = "1.27.0")]
1901pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1902 simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64))
1903}
1904
1905#[inline]
1912#[target_feature(enable = "sse2")]
1913#[cfg_attr(test, assert_instr(cmpordsd))]
1914#[stable(feature = "simd_x86", since = "1.27.0")]
1915pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1916 cmpsd(a, b, 7)
1917}
1918
1919#[inline]
1925#[target_feature(enable = "sse2")]
1926#[cfg_attr(test, assert_instr(cmpunordsd))]
1927#[stable(feature = "simd_x86", since = "1.27.0")]
1928pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1929 cmpsd(a, b, 3)
1930}
1931
1932#[inline]
1937#[target_feature(enable = "sse2")]
1938#[cfg_attr(test, assert_instr(cmpneqsd))]
1939#[stable(feature = "simd_x86", since = "1.27.0")]
1940pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1941 cmpsd(a, b, 4)
1942}
1943
1944#[inline]
1949#[target_feature(enable = "sse2")]
1950#[cfg_attr(test, assert_instr(cmpnltsd))]
1951#[stable(feature = "simd_x86", since = "1.27.0")]
1952pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1953 cmpsd(a, b, 5)
1954}
1955
1956#[inline]
1961#[target_feature(enable = "sse2")]
1962#[cfg_attr(test, assert_instr(cmpnlesd))]
1963#[stable(feature = "simd_x86", since = "1.27.0")]
1964pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1965 cmpsd(a, b, 6)
1966}
1967
1968#[inline]
1973#[target_feature(enable = "sse2")]
1974#[cfg_attr(test, assert_instr(cmpnltsd))]
1975#[stable(feature = "simd_x86", since = "1.27.0")]
1976pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
1977 simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64))
1978}
1979
1980#[inline]
1985#[target_feature(enable = "sse2")]
1986#[cfg_attr(test, assert_instr(cmpnlesd))]
1987#[stable(feature = "simd_x86", since = "1.27.0")]
1988pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
1989 simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64))
1990}
1991
1992#[inline]
1996#[target_feature(enable = "sse2")]
1997#[cfg_attr(test, assert_instr(cmpeqpd))]
1998#[stable(feature = "simd_x86", since = "1.27.0")]
1999pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2000 cmppd(a, b, 0)
2001}
2002
2003#[inline]
2007#[target_feature(enable = "sse2")]
2008#[cfg_attr(test, assert_instr(cmpltpd))]
2009#[stable(feature = "simd_x86", since = "1.27.0")]
2010pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2011 cmppd(a, b, 1)
2012}
2013
2014#[inline]
2018#[target_feature(enable = "sse2")]
2019#[cfg_attr(test, assert_instr(cmplepd))]
2020#[stable(feature = "simd_x86", since = "1.27.0")]
2021pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2022 cmppd(a, b, 2)
2023}
2024
2025#[inline]
2029#[target_feature(enable = "sse2")]
2030#[cfg_attr(test, assert_instr(cmpltpd))]
2031#[stable(feature = "simd_x86", since = "1.27.0")]
2032pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2033 _mm_cmplt_pd(b, a)
2034}
2035
2036#[inline]
2040#[target_feature(enable = "sse2")]
2041#[cfg_attr(test, assert_instr(cmplepd))]
2042#[stable(feature = "simd_x86", since = "1.27.0")]
2043pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2044 _mm_cmple_pd(b, a)
2045}
2046
2047#[inline]
2051#[target_feature(enable = "sse2")]
2052#[cfg_attr(test, assert_instr(cmpordpd))]
2053#[stable(feature = "simd_x86", since = "1.27.0")]
2054pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2055 cmppd(a, b, 7)
2056}
2057
2058#[inline]
2062#[target_feature(enable = "sse2")]
2063#[cfg_attr(test, assert_instr(cmpunordpd))]
2064#[stable(feature = "simd_x86", since = "1.27.0")]
2065pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2066 cmppd(a, b, 3)
2067}
2068
2069#[inline]
2073#[target_feature(enable = "sse2")]
2074#[cfg_attr(test, assert_instr(cmpneqpd))]
2075#[stable(feature = "simd_x86", since = "1.27.0")]
2076pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2077 cmppd(a, b, 4)
2078}
2079
2080#[inline]
2084#[target_feature(enable = "sse2")]
2085#[cfg_attr(test, assert_instr(cmpnltpd))]
2086#[stable(feature = "simd_x86", since = "1.27.0")]
2087pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2088 cmppd(a, b, 5)
2089}
2090
2091#[inline]
2095#[target_feature(enable = "sse2")]
2096#[cfg_attr(test, assert_instr(cmpnlepd))]
2097#[stable(feature = "simd_x86", since = "1.27.0")]
2098pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2099 cmppd(a, b, 6)
2100}
2101
2102#[inline]
2106#[target_feature(enable = "sse2")]
2107#[cfg_attr(test, assert_instr(cmpnltpd))]
2108#[stable(feature = "simd_x86", since = "1.27.0")]
2109pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2110 _mm_cmpnlt_pd(b, a)
2111}
2112
2113#[inline]
2118#[target_feature(enable = "sse2")]
2119#[cfg_attr(test, assert_instr(cmpnlepd))]
2120#[stable(feature = "simd_x86", since = "1.27.0")]
2121pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2122 _mm_cmpnle_pd(b, a)
2123}
2124
2125#[inline]
2129#[target_feature(enable = "sse2")]
2130#[cfg_attr(test, assert_instr(comisd))]
2131#[stable(feature = "simd_x86", since = "1.27.0")]
2132pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2133 comieqsd(a, b)
2134}
2135
2136#[inline]
2140#[target_feature(enable = "sse2")]
2141#[cfg_attr(test, assert_instr(comisd))]
2142#[stable(feature = "simd_x86", since = "1.27.0")]
2143pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2144 comiltsd(a, b)
2145}
2146
2147#[inline]
2151#[target_feature(enable = "sse2")]
2152#[cfg_attr(test, assert_instr(comisd))]
2153#[stable(feature = "simd_x86", since = "1.27.0")]
2154pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2155 comilesd(a, b)
2156}
2157
2158#[inline]
2162#[target_feature(enable = "sse2")]
2163#[cfg_attr(test, assert_instr(comisd))]
2164#[stable(feature = "simd_x86", since = "1.27.0")]
2165pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2166 comigtsd(a, b)
2167}
2168
2169#[inline]
2173#[target_feature(enable = "sse2")]
2174#[cfg_attr(test, assert_instr(comisd))]
2175#[stable(feature = "simd_x86", since = "1.27.0")]
2176pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2177 comigesd(a, b)
2178}
2179
2180#[inline]
2184#[target_feature(enable = "sse2")]
2185#[cfg_attr(test, assert_instr(comisd))]
2186#[stable(feature = "simd_x86", since = "1.27.0")]
2187pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2188 comineqsd(a, b)
2189}
2190
2191#[inline]
2195#[target_feature(enable = "sse2")]
2196#[cfg_attr(test, assert_instr(ucomisd))]
2197#[stable(feature = "simd_x86", since = "1.27.0")]
2198pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2199 ucomieqsd(a, b)
2200}
2201
2202#[inline]
2206#[target_feature(enable = "sse2")]
2207#[cfg_attr(test, assert_instr(ucomisd))]
2208#[stable(feature = "simd_x86", since = "1.27.0")]
2209pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2210 ucomiltsd(a, b)
2211}
2212
2213#[inline]
2217#[target_feature(enable = "sse2")]
2218#[cfg_attr(test, assert_instr(ucomisd))]
2219#[stable(feature = "simd_x86", since = "1.27.0")]
2220pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2221 ucomilesd(a, b)
2222}
2223
2224#[inline]
2228#[target_feature(enable = "sse2")]
2229#[cfg_attr(test, assert_instr(ucomisd))]
2230#[stable(feature = "simd_x86", since = "1.27.0")]
2231pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2232 ucomigtsd(a, b)
2233}
2234
2235#[inline]
2239#[target_feature(enable = "sse2")]
2240#[cfg_attr(test, assert_instr(ucomisd))]
2241#[stable(feature = "simd_x86", since = "1.27.0")]
2242pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2243 ucomigesd(a, b)
2244}
2245
2246#[inline]
2250#[target_feature(enable = "sse2")]
2251#[cfg_attr(test, assert_instr(ucomisd))]
2252#[stable(feature = "simd_x86", since = "1.27.0")]
2253pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2254 ucomineqsd(a, b)
2255}
2256
2257#[inline]
2262#[target_feature(enable = "sse2")]
2263#[cfg_attr(test, assert_instr(cvtpd2ps))]
2264#[stable(feature = "simd_x86", since = "1.27.0")]
2265pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2266 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2267 let zero = f32x2::ZERO;
2268 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2269}
2270
2271#[inline]
2277#[target_feature(enable = "sse2")]
2278#[cfg_attr(test, assert_instr(cvtps2pd))]
2279#[stable(feature = "simd_x86", since = "1.27.0")]
2280pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2281 let a = a.as_f32x4();
2282 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2283}
2284
2285#[inline]
2290#[target_feature(enable = "sse2")]
2291#[cfg_attr(test, assert_instr(cvtpd2dq))]
2292#[stable(feature = "simd_x86", since = "1.27.0")]
2293pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2294 transmute(cvtpd2dq(a))
2295}
2296
2297#[inline]
2302#[target_feature(enable = "sse2")]
2303#[cfg_attr(test, assert_instr(cvtsd2si))]
2304#[stable(feature = "simd_x86", since = "1.27.0")]
2305pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2306 cvtsd2si(a)
2307}
2308
2309#[inline]
2316#[target_feature(enable = "sse2")]
2317#[cfg_attr(test, assert_instr(cvtsd2ss))]
2318#[stable(feature = "simd_x86", since = "1.27.0")]
2319pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2320 cvtsd2ss(a, b)
2321}
2322
2323#[inline]
2327#[target_feature(enable = "sse2")]
2328#[stable(feature = "simd_x86", since = "1.27.0")]
2329pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2330 simd_extract!(a, 0)
2331}
2332
2333#[inline]
2340#[target_feature(enable = "sse2")]
2341#[cfg_attr(test, assert_instr(cvtss2sd))]
2342#[stable(feature = "simd_x86", since = "1.27.0")]
2343pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2344 cvtss2sd(a, b)
2345}
2346
2347#[inline]
2352#[target_feature(enable = "sse2")]
2353#[cfg_attr(test, assert_instr(cvttpd2dq))]
2354#[stable(feature = "simd_x86", since = "1.27.0")]
2355pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2356 transmute(cvttpd2dq(a))
2357}
2358
2359#[inline]
2364#[target_feature(enable = "sse2")]
2365#[cfg_attr(test, assert_instr(cvttsd2si))]
2366#[stable(feature = "simd_x86", since = "1.27.0")]
2367pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2368 cvttsd2si(a)
2369}
2370
2371#[inline]
2376#[target_feature(enable = "sse2")]
2377#[cfg_attr(test, assert_instr(cvttps2dq))]
2378#[stable(feature = "simd_x86", since = "1.27.0")]
2379pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2380 transmute(cvttps2dq(a))
2381}
2382
2383#[inline]
2388#[target_feature(enable = "sse2")]
2389#[stable(feature = "simd_x86", since = "1.27.0")]
2390pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2391 _mm_set_pd(0.0, a)
2392}
2393
2394#[inline]
2399#[target_feature(enable = "sse2")]
2400#[stable(feature = "simd_x86", since = "1.27.0")]
2401pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2402 _mm_set_pd(a, a)
2403}
2404
2405#[inline]
2410#[target_feature(enable = "sse2")]
2411#[stable(feature = "simd_x86", since = "1.27.0")]
2412pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2413 _mm_set_pd(a, a)
2414}
2415
2416#[inline]
2421#[target_feature(enable = "sse2")]
2422#[stable(feature = "simd_x86", since = "1.27.0")]
2423pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2424 __m128d([b, a])
2425}
2426
2427#[inline]
2432#[target_feature(enable = "sse2")]
2433#[stable(feature = "simd_x86", since = "1.27.0")]
2434pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2435 _mm_set_pd(b, a)
2436}
2437
2438#[inline]
2443#[target_feature(enable = "sse2")]
2444#[cfg_attr(test, assert_instr(xorp))]
2445#[stable(feature = "simd_x86", since = "1.27.0")]
2446pub unsafe fn _mm_setzero_pd() -> __m128d {
2447 const { mem::zeroed() }
2448}
2449
2450#[inline]
2457#[target_feature(enable = "sse2")]
2458#[cfg_attr(test, assert_instr(movmskpd))]
2459#[stable(feature = "simd_x86", since = "1.27.0")]
2460pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2461 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2464 simd_bitmask::<i64x2, u8>(mask).into()
2465}
2466
2467#[inline]
2474#[target_feature(enable = "sse2")]
2475#[cfg_attr(test, assert_instr(movaps))]
2476#[stable(feature = "simd_x86", since = "1.27.0")]
2477#[allow(clippy::cast_ptr_alignment)]
2478pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2479 *(mem_addr as *const __m128d)
2480}
2481
2482#[inline]
2487#[target_feature(enable = "sse2")]
2488#[cfg_attr(test, assert_instr(movsd))]
2489#[stable(feature = "simd_x86", since = "1.27.0")]
2490pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2491 _mm_setr_pd(*mem_addr, 0.)
2492}
2493
2494#[inline]
2500#[target_feature(enable = "sse2")]
2501#[cfg_attr(test, assert_instr(movhps))]
2502#[stable(feature = "simd_x86", since = "1.27.0")]
2503pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2504 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2505}
2506
2507#[inline]
2513#[target_feature(enable = "sse2")]
2514#[cfg_attr(test, assert_instr(movlps))]
2515#[stable(feature = "simd_x86", since = "1.27.0")]
2516pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2517 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2518}
2519
2520#[inline]
2536#[target_feature(enable = "sse2")]
2537#[cfg_attr(test, assert_instr(movntpd))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539#[allow(clippy::cast_ptr_alignment)]
2540pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2541 crate::arch::asm!(
2542 vps!("movntpd", ",{a}"),
2543 p = in(reg) mem_addr,
2544 a = in(xmm_reg) a,
2545 options(nostack, preserves_flags),
2546 );
2547}
2548
2549#[inline]
2554#[target_feature(enable = "sse2")]
2555#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2556#[stable(feature = "simd_x86", since = "1.27.0")]
2557pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2558 *mem_addr = simd_extract!(a, 0)
2559}
2560
2561#[inline]
2567#[target_feature(enable = "sse2")]
2568#[cfg_attr(test, assert_instr(movaps))]
2569#[stable(feature = "simd_x86", since = "1.27.0")]
2570#[allow(clippy::cast_ptr_alignment)]
2571pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2572 *(mem_addr as *mut __m128d) = a;
2573}
2574
2575#[inline]
2581#[target_feature(enable = "sse2")]
2582#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2584pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2585 mem_addr.cast::<__m128d>().write_unaligned(a);
2586}
2587
2588#[inline]
2594#[target_feature(enable = "sse2")]
2595#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2596pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2597 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2598}
2599
2600#[inline]
2606#[target_feature(enable = "sse2")]
2607#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2608pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2609 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2610}
2611
2612#[inline]
2618#[target_feature(enable = "sse2")]
2619#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2620pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2621 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2622}
2623
2624#[inline]
2630#[target_feature(enable = "sse2")]
2631#[stable(feature = "simd_x86", since = "1.27.0")]
2632#[allow(clippy::cast_ptr_alignment)]
2633pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2634 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2635 *(mem_addr as *mut __m128d) = b;
2636}
2637
2638#[inline]
2644#[target_feature(enable = "sse2")]
2645#[stable(feature = "simd_x86", since = "1.27.0")]
2646#[allow(clippy::cast_ptr_alignment)]
2647pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2648 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2649 *(mem_addr as *mut __m128d) = b;
2650}
2651
2652#[inline]
2659#[target_feature(enable = "sse2")]
2660#[stable(feature = "simd_x86", since = "1.27.0")]
2661#[allow(clippy::cast_ptr_alignment)]
2662pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2663 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2664 *(mem_addr as *mut __m128d) = b;
2665}
2666
2667#[inline]
2672#[target_feature(enable = "sse2")]
2673#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movhps))]
2674#[stable(feature = "simd_x86", since = "1.27.0")]
2675pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2676 *mem_addr = simd_extract!(a, 1);
2677}
2678
2679#[inline]
2684#[target_feature(enable = "sse2")]
2685#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlps))]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2688 *mem_addr = simd_extract!(a, 0);
2689}
2690
2691#[inline]
2696#[target_feature(enable = "sse2")]
2697#[stable(feature = "simd_x86", since = "1.27.0")]
2699pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2700 let d = *mem_addr;
2701 _mm_setr_pd(d, d)
2702}
2703
2704#[inline]
2709#[target_feature(enable = "sse2")]
2710#[stable(feature = "simd_x86", since = "1.27.0")]
2712pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2713 _mm_load1_pd(mem_addr)
2714}
2715
2716#[inline]
2722#[target_feature(enable = "sse2")]
2723#[cfg_attr(test, assert_instr(movaps))]
2724#[stable(feature = "simd_x86", since = "1.27.0")]
2725pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2726 let a = _mm_load_pd(mem_addr);
2727 simd_shuffle!(a, a, [1, 0])
2728}
2729
2730#[inline]
2736#[target_feature(enable = "sse2")]
2737#[cfg_attr(test, assert_instr(movups))]
2738#[stable(feature = "simd_x86", since = "1.27.0")]
2739pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2740 let mut dst = _mm_undefined_pd();
2741 ptr::copy_nonoverlapping(
2742 mem_addr as *const u8,
2743 ptr::addr_of_mut!(dst) as *mut u8,
2744 mem::size_of::<__m128d>(),
2745 );
2746 dst
2747}
2748
2749#[inline]
2755#[target_feature(enable = "sse2")]
2756#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2757pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2758 transmute(i16x8::new(
2759 ptr::read_unaligned(mem_addr as *const i16),
2760 0,
2761 0,
2762 0,
2763 0,
2764 0,
2765 0,
2766 0,
2767 ))
2768}
2769
2770#[inline]
2776#[target_feature(enable = "sse2")]
2777#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2778pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2779 transmute(i32x4::new(
2780 ptr::read_unaligned(mem_addr as *const i32),
2781 0,
2782 0,
2783 0,
2784 ))
2785}
2786
2787#[inline]
2793#[target_feature(enable = "sse2")]
2794#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2795pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2796 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2797}
2798
2799#[inline]
2805#[target_feature(enable = "sse2")]
2806#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2807#[rustc_legacy_const_generics(2)]
2808#[stable(feature = "simd_x86", since = "1.27.0")]
2809pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2810 static_assert_uimm_bits!(MASK, 8);
2811 simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
2812}
2813
2814#[inline]
2820#[target_feature(enable = "sse2")]
2821#[cfg_attr(test, assert_instr(movsd))]
2822#[stable(feature = "simd_x86", since = "1.27.0")]
2823pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2824 _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1))
2825}
2826
2827#[inline]
2832#[target_feature(enable = "sse2")]
2833#[stable(feature = "simd_x86", since = "1.27.0")]
2834pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2835 transmute(a)
2836}
2837
2838#[inline]
2843#[target_feature(enable = "sse2")]
2844#[stable(feature = "simd_x86", since = "1.27.0")]
2845pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2846 transmute(a)
2847}
2848
2849#[inline]
2854#[target_feature(enable = "sse2")]
2855#[stable(feature = "simd_x86", since = "1.27.0")]
2856pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2857 transmute(a)
2858}
2859
2860#[inline]
2865#[target_feature(enable = "sse2")]
2866#[stable(feature = "simd_x86", since = "1.27.0")]
2867pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2868 transmute(a)
2869}
2870
2871#[inline]
2876#[target_feature(enable = "sse2")]
2877#[stable(feature = "simd_x86", since = "1.27.0")]
2878pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2879 transmute(a)
2880}
2881
2882#[inline]
2887#[target_feature(enable = "sse2")]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2889pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2890 transmute(a)
2891}
2892
2893#[inline]
2899#[target_feature(enable = "sse2")]
2900#[stable(feature = "simd_x86", since = "1.27.0")]
2901pub unsafe fn _mm_undefined_pd() -> __m128d {
2902 const { mem::zeroed() }
2903}
2904
2905#[inline]
2911#[target_feature(enable = "sse2")]
2912#[stable(feature = "simd_x86", since = "1.27.0")]
2913pub unsafe fn _mm_undefined_si128() -> __m128i {
2914 const { mem::zeroed() }
2915}
2916
2917#[inline]
2925#[target_feature(enable = "sse2")]
2926#[cfg_attr(test, assert_instr(unpckhpd))]
2927#[stable(feature = "simd_x86", since = "1.27.0")]
2928pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2929 simd_shuffle!(a, b, [1, 3])
2930}
2931
2932#[inline]
2940#[target_feature(enable = "sse2")]
2941#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(movlhps))]
2942#[stable(feature = "simd_x86", since = "1.27.0")]
2943pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2944 simd_shuffle!(a, b, [0, 2])
2945}
2946
2947#[allow(improper_ctypes)]
2948extern "C" {
2949 #[link_name = "llvm.x86.sse2.pause"]
2950 fn pause();
2951 #[link_name = "llvm.x86.sse2.clflush"]
2952 fn clflush(p: *const u8);
2953 #[link_name = "llvm.x86.sse2.lfence"]
2954 fn lfence();
2955 #[link_name = "llvm.x86.sse2.mfence"]
2956 fn mfence();
2957 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2958 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2959 #[link_name = "llvm.x86.sse2.psad.bw"]
2960 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2961 #[link_name = "llvm.x86.sse2.psll.w"]
2962 fn psllw(a: i16x8, count: i16x8) -> i16x8;
2963 #[link_name = "llvm.x86.sse2.psll.d"]
2964 fn pslld(a: i32x4, count: i32x4) -> i32x4;
2965 #[link_name = "llvm.x86.sse2.psll.q"]
2966 fn psllq(a: i64x2, count: i64x2) -> i64x2;
2967 #[link_name = "llvm.x86.sse2.psra.w"]
2968 fn psraw(a: i16x8, count: i16x8) -> i16x8;
2969 #[link_name = "llvm.x86.sse2.psra.d"]
2970 fn psrad(a: i32x4, count: i32x4) -> i32x4;
2971 #[link_name = "llvm.x86.sse2.psrl.w"]
2972 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2973 #[link_name = "llvm.x86.sse2.psrl.d"]
2974 fn psrld(a: i32x4, count: i32x4) -> i32x4;
2975 #[link_name = "llvm.x86.sse2.psrl.q"]
2976 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2977 #[link_name = "llvm.x86.sse2.cvtps2dq"]
2978 fn cvtps2dq(a: __m128) -> i32x4;
2979 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
2980 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2981 #[link_name = "llvm.x86.sse2.packsswb.128"]
2982 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2983 #[link_name = "llvm.x86.sse2.packssdw.128"]
2984 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2985 #[link_name = "llvm.x86.sse2.packuswb.128"]
2986 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2987 #[link_name = "llvm.x86.sse2.max.sd"]
2988 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2989 #[link_name = "llvm.x86.sse2.max.pd"]
2990 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2991 #[link_name = "llvm.x86.sse2.min.sd"]
2992 fn minsd(a: __m128d, b: __m128d) -> __m128d;
2993 #[link_name = "llvm.x86.sse2.min.pd"]
2994 fn minpd(a: __m128d, b: __m128d) -> __m128d;
2995 #[link_name = "llvm.x86.sse2.cmp.sd"]
2996 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2997 #[link_name = "llvm.x86.sse2.cmp.pd"]
2998 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2999 #[link_name = "llvm.x86.sse2.comieq.sd"]
3000 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3001 #[link_name = "llvm.x86.sse2.comilt.sd"]
3002 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3003 #[link_name = "llvm.x86.sse2.comile.sd"]
3004 fn comilesd(a: __m128d, b: __m128d) -> i32;
3005 #[link_name = "llvm.x86.sse2.comigt.sd"]
3006 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3007 #[link_name = "llvm.x86.sse2.comige.sd"]
3008 fn comigesd(a: __m128d, b: __m128d) -> i32;
3009 #[link_name = "llvm.x86.sse2.comineq.sd"]
3010 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3011 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3012 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3013 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3014 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3015 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3016 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3017 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3018 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3019 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3020 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3021 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3022 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3023 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3024 fn cvtpd2dq(a: __m128d) -> i32x4;
3025 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3026 fn cvtsd2si(a: __m128d) -> i32;
3027 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3028 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3029 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3030 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3031 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3032 fn cvttpd2dq(a: __m128d) -> i32x4;
3033 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3034 fn cvttsd2si(a: __m128d) -> i32;
3035 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3036 fn cvttps2dq(a: __m128) -> i32x4;
3037}
3038
3039#[cfg(test)]
3040mod tests {
3041 use crate::{
3042 core_arch::{simd::*, x86::*},
3043 hint::black_box,
3044 };
3045 use std::{
3046 boxed, f32, f64,
3047 mem::{self, transmute},
3048 ptr,
3049 };
3050 use stdarch_test::simd_test;
3051
3052 const NAN: f64 = f64::NAN;
3053
3054 #[test]
3055 fn test_mm_pause() {
3056 unsafe { _mm_pause() }
3057 }
3058
3059 #[simd_test(enable = "sse2")]
3060 unsafe fn test_mm_clflush() {
3061 let x = 0_u8;
3062 _mm_clflush(ptr::addr_of!(x));
3063 }
3064
3065 #[simd_test(enable = "sse2")]
3066 #[cfg_attr(miri, ignore)]
3068 unsafe fn test_mm_lfence() {
3069 _mm_lfence();
3070 }
3071
3072 #[simd_test(enable = "sse2")]
3073 #[cfg_attr(miri, ignore)]
3075 unsafe fn test_mm_mfence() {
3076 _mm_mfence();
3077 }
3078
3079 #[simd_test(enable = "sse2")]
3080 unsafe fn test_mm_add_epi8() {
3081 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3082 #[rustfmt::skip]
3083 let b = _mm_setr_epi8(
3084 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3085 );
3086 let r = _mm_add_epi8(a, b);
3087 #[rustfmt::skip]
3088 let e = _mm_setr_epi8(
3089 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3090 );
3091 assert_eq_m128i(r, e);
3092 }
3093
3094 #[simd_test(enable = "sse2")]
3095 unsafe fn test_mm_add_epi8_overflow() {
3096 let a = _mm_set1_epi8(0x7F);
3097 let b = _mm_set1_epi8(1);
3098 let r = _mm_add_epi8(a, b);
3099 assert_eq_m128i(r, _mm_set1_epi8(-128));
3100 }
3101
3102 #[simd_test(enable = "sse2")]
3103 unsafe fn test_mm_add_epi16() {
3104 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3105 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3106 let r = _mm_add_epi16(a, b);
3107 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3108 assert_eq_m128i(r, e);
3109 }
3110
3111 #[simd_test(enable = "sse2")]
3112 unsafe fn test_mm_add_epi32() {
3113 let a = _mm_setr_epi32(0, 1, 2, 3);
3114 let b = _mm_setr_epi32(4, 5, 6, 7);
3115 let r = _mm_add_epi32(a, b);
3116 let e = _mm_setr_epi32(4, 6, 8, 10);
3117 assert_eq_m128i(r, e);
3118 }
3119
3120 #[simd_test(enable = "sse2")]
3121 unsafe fn test_mm_add_epi64() {
3122 let a = _mm_setr_epi64x(0, 1);
3123 let b = _mm_setr_epi64x(2, 3);
3124 let r = _mm_add_epi64(a, b);
3125 let e = _mm_setr_epi64x(2, 4);
3126 assert_eq_m128i(r, e);
3127 }
3128
3129 #[simd_test(enable = "sse2")]
3130 unsafe fn test_mm_adds_epi8() {
3131 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3132 #[rustfmt::skip]
3133 let b = _mm_setr_epi8(
3134 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3135 );
3136 let r = _mm_adds_epi8(a, b);
3137 #[rustfmt::skip]
3138 let e = _mm_setr_epi8(
3139 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3140 );
3141 assert_eq_m128i(r, e);
3142 }
3143
3144 #[simd_test(enable = "sse2")]
3145 unsafe fn test_mm_adds_epi8_saturate_positive() {
3146 let a = _mm_set1_epi8(0x7F);
3147 let b = _mm_set1_epi8(1);
3148 let r = _mm_adds_epi8(a, b);
3149 assert_eq_m128i(r, a);
3150 }
3151
3152 #[simd_test(enable = "sse2")]
3153 unsafe fn test_mm_adds_epi8_saturate_negative() {
3154 let a = _mm_set1_epi8(-0x80);
3155 let b = _mm_set1_epi8(-1);
3156 let r = _mm_adds_epi8(a, b);
3157 assert_eq_m128i(r, a);
3158 }
3159
3160 #[simd_test(enable = "sse2")]
3161 unsafe fn test_mm_adds_epi16() {
3162 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3163 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3164 let r = _mm_adds_epi16(a, b);
3165 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3166 assert_eq_m128i(r, e);
3167 }
3168
3169 #[simd_test(enable = "sse2")]
3170 unsafe fn test_mm_adds_epi16_saturate_positive() {
3171 let a = _mm_set1_epi16(0x7FFF);
3172 let b = _mm_set1_epi16(1);
3173 let r = _mm_adds_epi16(a, b);
3174 assert_eq_m128i(r, a);
3175 }
3176
3177 #[simd_test(enable = "sse2")]
3178 unsafe fn test_mm_adds_epi16_saturate_negative() {
3179 let a = _mm_set1_epi16(-0x8000);
3180 let b = _mm_set1_epi16(-1);
3181 let r = _mm_adds_epi16(a, b);
3182 assert_eq_m128i(r, a);
3183 }
3184
3185 #[simd_test(enable = "sse2")]
3186 unsafe fn test_mm_adds_epu8() {
3187 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3188 #[rustfmt::skip]
3189 let b = _mm_setr_epi8(
3190 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3191 );
3192 let r = _mm_adds_epu8(a, b);
3193 #[rustfmt::skip]
3194 let e = _mm_setr_epi8(
3195 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3196 );
3197 assert_eq_m128i(r, e);
3198 }
3199
3200 #[simd_test(enable = "sse2")]
3201 unsafe fn test_mm_adds_epu8_saturate() {
3202 let a = _mm_set1_epi8(!0);
3203 let b = _mm_set1_epi8(1);
3204 let r = _mm_adds_epu8(a, b);
3205 assert_eq_m128i(r, a);
3206 }
3207
3208 #[simd_test(enable = "sse2")]
3209 unsafe fn test_mm_adds_epu16() {
3210 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3211 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3212 let r = _mm_adds_epu16(a, b);
3213 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3214 assert_eq_m128i(r, e);
3215 }
3216
3217 #[simd_test(enable = "sse2")]
3218 unsafe fn test_mm_adds_epu16_saturate() {
3219 let a = _mm_set1_epi16(!0);
3220 let b = _mm_set1_epi16(1);
3221 let r = _mm_adds_epu16(a, b);
3222 assert_eq_m128i(r, a);
3223 }
3224
3225 #[simd_test(enable = "sse2")]
3226 unsafe fn test_mm_avg_epu8() {
3227 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3228 let r = _mm_avg_epu8(a, b);
3229 assert_eq_m128i(r, _mm_set1_epi8(6));
3230 }
3231
3232 #[simd_test(enable = "sse2")]
3233 unsafe fn test_mm_avg_epu16() {
3234 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3235 let r = _mm_avg_epu16(a, b);
3236 assert_eq_m128i(r, _mm_set1_epi16(6));
3237 }
3238
3239 #[simd_test(enable = "sse2")]
3240 unsafe fn test_mm_madd_epi16() {
3241 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3242 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3243 let r = _mm_madd_epi16(a, b);
3244 let e = _mm_setr_epi32(29, 81, 149, 233);
3245 assert_eq_m128i(r, e);
3246
3247 let a = _mm_setr_epi16(
3250 i16::MAX,
3251 i16::MAX,
3252 i16::MIN,
3253 i16::MIN,
3254 i16::MIN,
3255 i16::MAX,
3256 0,
3257 0,
3258 );
3259 let b = _mm_setr_epi16(
3260 i16::MAX,
3261 i16::MAX,
3262 i16::MIN,
3263 i16::MIN,
3264 i16::MAX,
3265 i16::MIN,
3266 0,
3267 0,
3268 );
3269 let r = _mm_madd_epi16(a, b);
3270 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3271 assert_eq_m128i(r, e);
3272 }
3273
3274 #[simd_test(enable = "sse2")]
3275 unsafe fn test_mm_max_epi16() {
3276 let a = _mm_set1_epi16(1);
3277 let b = _mm_set1_epi16(-1);
3278 let r = _mm_max_epi16(a, b);
3279 assert_eq_m128i(r, a);
3280 }
3281
3282 #[simd_test(enable = "sse2")]
3283 unsafe fn test_mm_max_epu8() {
3284 let a = _mm_set1_epi8(1);
3285 let b = _mm_set1_epi8(!0);
3286 let r = _mm_max_epu8(a, b);
3287 assert_eq_m128i(r, b);
3288 }
3289
3290 #[simd_test(enable = "sse2")]
3291 unsafe fn test_mm_min_epi16() {
3292 let a = _mm_set1_epi16(1);
3293 let b = _mm_set1_epi16(-1);
3294 let r = _mm_min_epi16(a, b);
3295 assert_eq_m128i(r, b);
3296 }
3297
3298 #[simd_test(enable = "sse2")]
3299 unsafe fn test_mm_min_epu8() {
3300 let a = _mm_set1_epi8(1);
3301 let b = _mm_set1_epi8(!0);
3302 let r = _mm_min_epu8(a, b);
3303 assert_eq_m128i(r, a);
3304 }
3305
3306 #[simd_test(enable = "sse2")]
3307 unsafe fn test_mm_mulhi_epi16() {
3308 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3309 let r = _mm_mulhi_epi16(a, b);
3310 assert_eq_m128i(r, _mm_set1_epi16(-16));
3311 }
3312
3313 #[simd_test(enable = "sse2")]
3314 unsafe fn test_mm_mulhi_epu16() {
3315 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3316 let r = _mm_mulhi_epu16(a, b);
3317 assert_eq_m128i(r, _mm_set1_epi16(15));
3318 }
3319
3320 #[simd_test(enable = "sse2")]
3321 unsafe fn test_mm_mullo_epi16() {
3322 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3323 let r = _mm_mullo_epi16(a, b);
3324 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3325 }
3326
3327 #[simd_test(enable = "sse2")]
3328 unsafe fn test_mm_mul_epu32() {
3329 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3330 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3331 let r = _mm_mul_epu32(a, b);
3332 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3333 assert_eq_m128i(r, e);
3334 }
3335
3336 #[simd_test(enable = "sse2")]
3337 unsafe fn test_mm_sad_epu8() {
3338 #[rustfmt::skip]
3339 let a = _mm_setr_epi8(
3340 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3341 1, 2, 3, 4,
3342 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3343 1, 2, 3, 4,
3344 );
3345 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3346 let r = _mm_sad_epu8(a, b);
3347 let e = _mm_setr_epi64x(1020, 614);
3348 assert_eq_m128i(r, e);
3349 }
3350
3351 #[simd_test(enable = "sse2")]
3352 unsafe fn test_mm_sub_epi8() {
3353 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3354 let r = _mm_sub_epi8(a, b);
3355 assert_eq_m128i(r, _mm_set1_epi8(-1));
3356 }
3357
3358 #[simd_test(enable = "sse2")]
3359 unsafe fn test_mm_sub_epi16() {
3360 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3361 let r = _mm_sub_epi16(a, b);
3362 assert_eq_m128i(r, _mm_set1_epi16(-1));
3363 }
3364
3365 #[simd_test(enable = "sse2")]
3366 unsafe fn test_mm_sub_epi32() {
3367 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3368 let r = _mm_sub_epi32(a, b);
3369 assert_eq_m128i(r, _mm_set1_epi32(-1));
3370 }
3371
3372 #[simd_test(enable = "sse2")]
3373 unsafe fn test_mm_sub_epi64() {
3374 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3375 let r = _mm_sub_epi64(a, b);
3376 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_subs_epi8() {
3381 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3382 let r = _mm_subs_epi8(a, b);
3383 assert_eq_m128i(r, _mm_set1_epi8(3));
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_subs_epi8_saturate_positive() {
3388 let a = _mm_set1_epi8(0x7F);
3389 let b = _mm_set1_epi8(-1);
3390 let r = _mm_subs_epi8(a, b);
3391 assert_eq_m128i(r, a);
3392 }
3393
3394 #[simd_test(enable = "sse2")]
3395 unsafe fn test_mm_subs_epi8_saturate_negative() {
3396 let a = _mm_set1_epi8(-0x80);
3397 let b = _mm_set1_epi8(1);
3398 let r = _mm_subs_epi8(a, b);
3399 assert_eq_m128i(r, a);
3400 }
3401
3402 #[simd_test(enable = "sse2")]
3403 unsafe fn test_mm_subs_epi16() {
3404 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3405 let r = _mm_subs_epi16(a, b);
3406 assert_eq_m128i(r, _mm_set1_epi16(3));
3407 }
3408
3409 #[simd_test(enable = "sse2")]
3410 unsafe fn test_mm_subs_epi16_saturate_positive() {
3411 let a = _mm_set1_epi16(0x7FFF);
3412 let b = _mm_set1_epi16(-1);
3413 let r = _mm_subs_epi16(a, b);
3414 assert_eq_m128i(r, a);
3415 }
3416
3417 #[simd_test(enable = "sse2")]
3418 unsafe fn test_mm_subs_epi16_saturate_negative() {
3419 let a = _mm_set1_epi16(-0x8000);
3420 let b = _mm_set1_epi16(1);
3421 let r = _mm_subs_epi16(a, b);
3422 assert_eq_m128i(r, a);
3423 }
3424
3425 #[simd_test(enable = "sse2")]
3426 unsafe fn test_mm_subs_epu8() {
3427 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3428 let r = _mm_subs_epu8(a, b);
3429 assert_eq_m128i(r, _mm_set1_epi8(3));
3430 }
3431
3432 #[simd_test(enable = "sse2")]
3433 unsafe fn test_mm_subs_epu8_saturate() {
3434 let a = _mm_set1_epi8(0);
3435 let b = _mm_set1_epi8(1);
3436 let r = _mm_subs_epu8(a, b);
3437 assert_eq_m128i(r, a);
3438 }
3439
3440 #[simd_test(enable = "sse2")]
3441 unsafe fn test_mm_subs_epu16() {
3442 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3443 let r = _mm_subs_epu16(a, b);
3444 assert_eq_m128i(r, _mm_set1_epi16(3));
3445 }
3446
3447 #[simd_test(enable = "sse2")]
3448 unsafe fn test_mm_subs_epu16_saturate() {
3449 let a = _mm_set1_epi16(0);
3450 let b = _mm_set1_epi16(1);
3451 let r = _mm_subs_epu16(a, b);
3452 assert_eq_m128i(r, a);
3453 }
3454
3455 #[simd_test(enable = "sse2")]
3456 unsafe fn test_mm_slli_si128() {
3457 #[rustfmt::skip]
3458 let a = _mm_setr_epi8(
3459 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3460 );
3461 let r = _mm_slli_si128::<1>(a);
3462 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3463 assert_eq_m128i(r, e);
3464
3465 #[rustfmt::skip]
3466 let a = _mm_setr_epi8(
3467 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3468 );
3469 let r = _mm_slli_si128::<15>(a);
3470 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3471 assert_eq_m128i(r, e);
3472
3473 #[rustfmt::skip]
3474 let a = _mm_setr_epi8(
3475 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3476 );
3477 let r = _mm_slli_si128::<16>(a);
3478 assert_eq_m128i(r, _mm_set1_epi8(0));
3479 }
3480
3481 #[simd_test(enable = "sse2")]
3482 unsafe fn test_mm_slli_epi16() {
3483 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3484 let r = _mm_slli_epi16::<4>(a);
3485 assert_eq_m128i(
3486 r,
3487 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3488 );
3489 let r = _mm_slli_epi16::<16>(a);
3490 assert_eq_m128i(r, _mm_set1_epi16(0));
3491 }
3492
3493 #[simd_test(enable = "sse2")]
3494 unsafe fn test_mm_sll_epi16() {
3495 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3496 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3497 assert_eq_m128i(
3498 r,
3499 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3500 );
3501 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3502 assert_eq_m128i(r, a);
3503 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3504 assert_eq_m128i(r, _mm_set1_epi16(0));
3505 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3506 assert_eq_m128i(r, _mm_set1_epi16(0));
3507 }
3508
3509 #[simd_test(enable = "sse2")]
3510 unsafe fn test_mm_slli_epi32() {
3511 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3512 let r = _mm_slli_epi32::<4>(a);
3513 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3514 let r = _mm_slli_epi32::<32>(a);
3515 assert_eq_m128i(r, _mm_set1_epi32(0));
3516 }
3517
3518 #[simd_test(enable = "sse2")]
3519 unsafe fn test_mm_sll_epi32() {
3520 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3521 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3522 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3523 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3524 assert_eq_m128i(r, a);
3525 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3526 assert_eq_m128i(r, _mm_set1_epi32(0));
3527 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3528 assert_eq_m128i(r, _mm_set1_epi32(0));
3529 }
3530
3531 #[simd_test(enable = "sse2")]
3532 unsafe fn test_mm_slli_epi64() {
3533 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3534 let r = _mm_slli_epi64::<4>(a);
3535 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3536 let r = _mm_slli_epi64::<64>(a);
3537 assert_eq_m128i(r, _mm_set1_epi64x(0));
3538 }
3539
3540 #[simd_test(enable = "sse2")]
3541 unsafe fn test_mm_sll_epi64() {
3542 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3543 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3544 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3545 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3546 assert_eq_m128i(r, a);
3547 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3548 assert_eq_m128i(r, _mm_set1_epi64x(0));
3549 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3550 assert_eq_m128i(r, _mm_set1_epi64x(0));
3551 }
3552
3553 #[simd_test(enable = "sse2")]
3554 unsafe fn test_mm_srai_epi16() {
3555 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3556 let r = _mm_srai_epi16::<4>(a);
3557 assert_eq_m128i(
3558 r,
3559 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3560 );
3561 let r = _mm_srai_epi16::<16>(a);
3562 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3563 }
3564
3565 #[simd_test(enable = "sse2")]
3566 unsafe fn test_mm_sra_epi16() {
3567 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3568 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3569 assert_eq_m128i(
3570 r,
3571 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3572 );
3573 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3574 assert_eq_m128i(r, a);
3575 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3576 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3577 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3578 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3579 }
3580
3581 #[simd_test(enable = "sse2")]
3582 unsafe fn test_mm_srai_epi32() {
3583 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3584 let r = _mm_srai_epi32::<4>(a);
3585 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3586 let r = _mm_srai_epi32::<32>(a);
3587 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3588 }
3589
3590 #[simd_test(enable = "sse2")]
3591 unsafe fn test_mm_sra_epi32() {
3592 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3593 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3594 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3595 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3596 assert_eq_m128i(r, a);
3597 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3598 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3599 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3600 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3601 }
3602
3603 #[simd_test(enable = "sse2")]
3604 unsafe fn test_mm_srli_si128() {
3605 #[rustfmt::skip]
3606 let a = _mm_setr_epi8(
3607 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3608 );
3609 let r = _mm_srli_si128::<1>(a);
3610 #[rustfmt::skip]
3611 let e = _mm_setr_epi8(
3612 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3613 );
3614 assert_eq_m128i(r, e);
3615
3616 #[rustfmt::skip]
3617 let a = _mm_setr_epi8(
3618 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3619 );
3620 let r = _mm_srli_si128::<15>(a);
3621 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3622 assert_eq_m128i(r, e);
3623
3624 #[rustfmt::skip]
3625 let a = _mm_setr_epi8(
3626 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3627 );
3628 let r = _mm_srli_si128::<16>(a);
3629 assert_eq_m128i(r, _mm_set1_epi8(0));
3630 }
3631
3632 #[simd_test(enable = "sse2")]
3633 unsafe fn test_mm_srli_epi16() {
3634 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3635 let r = _mm_srli_epi16::<4>(a);
3636 assert_eq_m128i(
3637 r,
3638 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3639 );
3640 let r = _mm_srli_epi16::<16>(a);
3641 assert_eq_m128i(r, _mm_set1_epi16(0));
3642 }
3643
3644 #[simd_test(enable = "sse2")]
3645 unsafe fn test_mm_srl_epi16() {
3646 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3647 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3648 assert_eq_m128i(
3649 r,
3650 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3651 );
3652 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3653 assert_eq_m128i(r, a);
3654 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3655 assert_eq_m128i(r, _mm_set1_epi16(0));
3656 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3657 assert_eq_m128i(r, _mm_set1_epi16(0));
3658 }
3659
3660 #[simd_test(enable = "sse2")]
3661 unsafe fn test_mm_srli_epi32() {
3662 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3663 let r = _mm_srli_epi32::<4>(a);
3664 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3665 let r = _mm_srli_epi32::<32>(a);
3666 assert_eq_m128i(r, _mm_set1_epi32(0));
3667 }
3668
3669 #[simd_test(enable = "sse2")]
3670 unsafe fn test_mm_srl_epi32() {
3671 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3672 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3673 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3674 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3675 assert_eq_m128i(r, a);
3676 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3677 assert_eq_m128i(r, _mm_set1_epi32(0));
3678 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3679 assert_eq_m128i(r, _mm_set1_epi32(0));
3680 }
3681
3682 #[simd_test(enable = "sse2")]
3683 unsafe fn test_mm_srli_epi64() {
3684 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3685 let r = _mm_srli_epi64::<4>(a);
3686 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3687 let r = _mm_srli_epi64::<64>(a);
3688 assert_eq_m128i(r, _mm_set1_epi64x(0));
3689 }
3690
3691 #[simd_test(enable = "sse2")]
3692 unsafe fn test_mm_srl_epi64() {
3693 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3694 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3695 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3696 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3697 assert_eq_m128i(r, a);
3698 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3699 assert_eq_m128i(r, _mm_set1_epi64x(0));
3700 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3701 assert_eq_m128i(r, _mm_set1_epi64x(0));
3702 }
3703
3704 #[simd_test(enable = "sse2")]
3705 unsafe fn test_mm_and_si128() {
3706 let a = _mm_set1_epi8(5);
3707 let b = _mm_set1_epi8(3);
3708 let r = _mm_and_si128(a, b);
3709 assert_eq_m128i(r, _mm_set1_epi8(1));
3710 }
3711
3712 #[simd_test(enable = "sse2")]
3713 unsafe fn test_mm_andnot_si128() {
3714 let a = _mm_set1_epi8(5);
3715 let b = _mm_set1_epi8(3);
3716 let r = _mm_andnot_si128(a, b);
3717 assert_eq_m128i(r, _mm_set1_epi8(2));
3718 }
3719
3720 #[simd_test(enable = "sse2")]
3721 unsafe fn test_mm_or_si128() {
3722 let a = _mm_set1_epi8(5);
3723 let b = _mm_set1_epi8(3);
3724 let r = _mm_or_si128(a, b);
3725 assert_eq_m128i(r, _mm_set1_epi8(7));
3726 }
3727
3728 #[simd_test(enable = "sse2")]
3729 unsafe fn test_mm_xor_si128() {
3730 let a = _mm_set1_epi8(5);
3731 let b = _mm_set1_epi8(3);
3732 let r = _mm_xor_si128(a, b);
3733 assert_eq_m128i(r, _mm_set1_epi8(6));
3734 }
3735
3736 #[simd_test(enable = "sse2")]
3737 unsafe fn test_mm_cmpeq_epi8() {
3738 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3739 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3740 let r = _mm_cmpeq_epi8(a, b);
3741 #[rustfmt::skip]
3742 assert_eq_m128i(
3743 r,
3744 _mm_setr_epi8(
3745 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3746 )
3747 );
3748 }
3749
3750 #[simd_test(enable = "sse2")]
3751 unsafe fn test_mm_cmpeq_epi16() {
3752 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3753 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3754 let r = _mm_cmpeq_epi16(a, b);
3755 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3756 }
3757
3758 #[simd_test(enable = "sse2")]
3759 unsafe fn test_mm_cmpeq_epi32() {
3760 let a = _mm_setr_epi32(0, 1, 2, 3);
3761 let b = _mm_setr_epi32(3, 2, 2, 0);
3762 let r = _mm_cmpeq_epi32(a, b);
3763 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3764 }
3765
3766 #[simd_test(enable = "sse2")]
3767 unsafe fn test_mm_cmpgt_epi8() {
3768 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3769 let b = _mm_set1_epi8(0);
3770 let r = _mm_cmpgt_epi8(a, b);
3771 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3772 assert_eq_m128i(r, e);
3773 }
3774
3775 #[simd_test(enable = "sse2")]
3776 unsafe fn test_mm_cmpgt_epi16() {
3777 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3778 let b = _mm_set1_epi16(0);
3779 let r = _mm_cmpgt_epi16(a, b);
3780 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3781 assert_eq_m128i(r, e);
3782 }
3783
3784 #[simd_test(enable = "sse2")]
3785 unsafe fn test_mm_cmpgt_epi32() {
3786 let a = _mm_set_epi32(5, 0, 0, 0);
3787 let b = _mm_set1_epi32(0);
3788 let r = _mm_cmpgt_epi32(a, b);
3789 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3790 }
3791
3792 #[simd_test(enable = "sse2")]
3793 unsafe fn test_mm_cmplt_epi8() {
3794 let a = _mm_set1_epi8(0);
3795 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3796 let r = _mm_cmplt_epi8(a, b);
3797 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3798 assert_eq_m128i(r, e);
3799 }
3800
3801 #[simd_test(enable = "sse2")]
3802 unsafe fn test_mm_cmplt_epi16() {
3803 let a = _mm_set1_epi16(0);
3804 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3805 let r = _mm_cmplt_epi16(a, b);
3806 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3807 assert_eq_m128i(r, e);
3808 }
3809
3810 #[simd_test(enable = "sse2")]
3811 unsafe fn test_mm_cmplt_epi32() {
3812 let a = _mm_set1_epi32(0);
3813 let b = _mm_set_epi32(5, 0, 0, 0);
3814 let r = _mm_cmplt_epi32(a, b);
3815 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3816 }
3817
3818 #[simd_test(enable = "sse2")]
3819 unsafe fn test_mm_cvtepi32_pd() {
3820 let a = _mm_set_epi32(35, 25, 15, 5);
3821 let r = _mm_cvtepi32_pd(a);
3822 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3823 }
3824
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_cvtsi32_sd() {
3827 let a = _mm_set1_pd(3.5);
3828 let r = _mm_cvtsi32_sd(a, 5);
3829 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3830 }
3831
3832 #[simd_test(enable = "sse2")]
3833 unsafe fn test_mm_cvtepi32_ps() {
3834 let a = _mm_setr_epi32(1, 2, 3, 4);
3835 let r = _mm_cvtepi32_ps(a);
3836 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3837 }
3838
3839 #[simd_test(enable = "sse2")]
3840 unsafe fn test_mm_cvtps_epi32() {
3841 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3842 let r = _mm_cvtps_epi32(a);
3843 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3844 }
3845
3846 #[simd_test(enable = "sse2")]
3847 unsafe fn test_mm_cvtsi32_si128() {
3848 let r = _mm_cvtsi32_si128(5);
3849 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3850 }
3851
3852 #[simd_test(enable = "sse2")]
3853 unsafe fn test_mm_cvtsi128_si32() {
3854 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3855 assert_eq!(r, 5);
3856 }
3857
3858 #[simd_test(enable = "sse2")]
3859 unsafe fn test_mm_set_epi64x() {
3860 let r = _mm_set_epi64x(0, 1);
3861 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3862 }
3863
3864 #[simd_test(enable = "sse2")]
3865 unsafe fn test_mm_set_epi32() {
3866 let r = _mm_set_epi32(0, 1, 2, 3);
3867 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3868 }
3869
3870 #[simd_test(enable = "sse2")]
3871 unsafe fn test_mm_set_epi16() {
3872 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3873 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3874 }
3875
3876 #[simd_test(enable = "sse2")]
3877 unsafe fn test_mm_set_epi8() {
3878 #[rustfmt::skip]
3879 let r = _mm_set_epi8(
3880 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3881 );
3882 #[rustfmt::skip]
3883 let e = _mm_setr_epi8(
3884 15, 14, 13, 12, 11, 10, 9, 8,
3885 7, 6, 5, 4, 3, 2, 1, 0,
3886 );
3887 assert_eq_m128i(r, e);
3888 }
3889
3890 #[simd_test(enable = "sse2")]
3891 unsafe fn test_mm_set1_epi64x() {
3892 let r = _mm_set1_epi64x(1);
3893 assert_eq_m128i(r, _mm_set1_epi64x(1));
3894 }
3895
3896 #[simd_test(enable = "sse2")]
3897 unsafe fn test_mm_set1_epi32() {
3898 let r = _mm_set1_epi32(1);
3899 assert_eq_m128i(r, _mm_set1_epi32(1));
3900 }
3901
3902 #[simd_test(enable = "sse2")]
3903 unsafe fn test_mm_set1_epi16() {
3904 let r = _mm_set1_epi16(1);
3905 assert_eq_m128i(r, _mm_set1_epi16(1));
3906 }
3907
3908 #[simd_test(enable = "sse2")]
3909 unsafe fn test_mm_set1_epi8() {
3910 let r = _mm_set1_epi8(1);
3911 assert_eq_m128i(r, _mm_set1_epi8(1));
3912 }
3913
3914 #[simd_test(enable = "sse2")]
3915 unsafe fn test_mm_setr_epi32() {
3916 let r = _mm_setr_epi32(0, 1, 2, 3);
3917 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3918 }
3919
3920 #[simd_test(enable = "sse2")]
3921 unsafe fn test_mm_setr_epi16() {
3922 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3923 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3924 }
3925
3926 #[simd_test(enable = "sse2")]
3927 unsafe fn test_mm_setr_epi8() {
3928 #[rustfmt::skip]
3929 let r = _mm_setr_epi8(
3930 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3931 );
3932 #[rustfmt::skip]
3933 let e = _mm_setr_epi8(
3934 0, 1, 2, 3, 4, 5, 6, 7,
3935 8, 9, 10, 11, 12, 13, 14, 15,
3936 );
3937 assert_eq_m128i(r, e);
3938 }
3939
3940 #[simd_test(enable = "sse2")]
3941 unsafe fn test_mm_setzero_si128() {
3942 let r = _mm_setzero_si128();
3943 assert_eq_m128i(r, _mm_set1_epi64x(0));
3944 }
3945
3946 #[simd_test(enable = "sse2")]
3947 unsafe fn test_mm_loadl_epi64() {
3948 let a = _mm_setr_epi64x(6, 5);
3949 let r = _mm_loadl_epi64(ptr::addr_of!(a));
3950 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3951 }
3952
3953 #[simd_test(enable = "sse2")]
3954 unsafe fn test_mm_load_si128() {
3955 let a = _mm_set_epi64x(5, 6);
3956 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
3957 assert_eq_m128i(a, r);
3958 }
3959
3960 #[simd_test(enable = "sse2")]
3961 unsafe fn test_mm_loadu_si128() {
3962 let a = _mm_set_epi64x(5, 6);
3963 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
3964 assert_eq_m128i(a, r);
3965 }
3966
3967 #[simd_test(enable = "sse2")]
3968 #[cfg_attr(miri, ignore)]
3971 unsafe fn test_mm_maskmoveu_si128() {
3972 let a = _mm_set1_epi8(9);
3973 #[rustfmt::skip]
3974 let mask = _mm_set_epi8(
3975 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3976 0, 0, 0, 0, 0, 0, 0, 0,
3977 );
3978 let mut r = _mm_set1_epi8(0);
3979 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
3980 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3981 assert_eq_m128i(r, e);
3982 }
3983
3984 #[simd_test(enable = "sse2")]
3985 unsafe fn test_mm_store_si128() {
3986 let a = _mm_set1_epi8(9);
3987 let mut r = _mm_set1_epi8(0);
3988 _mm_store_si128(&mut r, a);
3989 assert_eq_m128i(r, a);
3990 }
3991
3992 #[simd_test(enable = "sse2")]
3993 unsafe fn test_mm_storeu_si128() {
3994 let a = _mm_set1_epi8(9);
3995 let mut r = _mm_set1_epi8(0);
3996 _mm_storeu_si128(&mut r, a);
3997 assert_eq_m128i(r, a);
3998 }
3999
4000 #[simd_test(enable = "sse2")]
4001 unsafe fn test_mm_storel_epi64() {
4002 let a = _mm_setr_epi64x(2, 9);
4003 let mut r = _mm_set1_epi8(0);
4004 _mm_storel_epi64(&mut r, a);
4005 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4006 }
4007
4008 #[simd_test(enable = "sse2")]
4009 #[cfg_attr(miri, ignore)]
4012 unsafe fn test_mm_stream_si128() {
4013 let a = _mm_setr_epi32(1, 2, 3, 4);
4014 let mut r = _mm_undefined_si128();
4015 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4016 assert_eq_m128i(r, a);
4017 }
4018
4019 #[simd_test(enable = "sse2")]
4020 #[cfg_attr(miri, ignore)]
4023 unsafe fn test_mm_stream_si32() {
4024 let a: i32 = 7;
4025 let mut mem = boxed::Box::<i32>::new(-1);
4026 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4027 assert_eq!(a, *mem);
4028 }
4029
4030 #[simd_test(enable = "sse2")]
4031 unsafe fn test_mm_move_epi64() {
4032 let a = _mm_setr_epi64x(5, 6);
4033 let r = _mm_move_epi64(a);
4034 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4035 }
4036
4037 #[simd_test(enable = "sse2")]
4038 unsafe fn test_mm_packs_epi16() {
4039 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4040 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4041 let r = _mm_packs_epi16(a, b);
4042 #[rustfmt::skip]
4043 assert_eq_m128i(
4044 r,
4045 _mm_setr_epi8(
4046 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4047 )
4048 );
4049 }
4050
4051 #[simd_test(enable = "sse2")]
4052 unsafe fn test_mm_packs_epi32() {
4053 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4054 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4055 let r = _mm_packs_epi32(a, b);
4056 assert_eq_m128i(
4057 r,
4058 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4059 );
4060 }
4061
4062 #[simd_test(enable = "sse2")]
4063 unsafe fn test_mm_packus_epi16() {
4064 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4065 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4066 let r = _mm_packus_epi16(a, b);
4067 assert_eq_m128i(
4068 r,
4069 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4070 );
4071 }
4072
4073 #[simd_test(enable = "sse2")]
4074 unsafe fn test_mm_extract_epi16() {
4075 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4076 let r1 = _mm_extract_epi16::<0>(a);
4077 let r2 = _mm_extract_epi16::<3>(a);
4078 assert_eq!(r1, 0xFFFF);
4079 assert_eq!(r2, 3);
4080 }
4081
4082 #[simd_test(enable = "sse2")]
4083 unsafe fn test_mm_insert_epi16() {
4084 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4085 let r = _mm_insert_epi16::<0>(a, 9);
4086 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4087 assert_eq_m128i(r, e);
4088 }
4089
4090 #[simd_test(enable = "sse2")]
4091 unsafe fn test_mm_movemask_epi8() {
4092 #[rustfmt::skip]
4093 let a = _mm_setr_epi8(
4094 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4095 0b0101, 0b1111_0000u8 as i8, 0, 0,
4096 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4097 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4098 );
4099 let r = _mm_movemask_epi8(a);
4100 assert_eq!(r, 0b10100110_00100101);
4101 }
4102
4103 #[simd_test(enable = "sse2")]
4104 unsafe fn test_mm_shuffle_epi32() {
4105 let a = _mm_setr_epi32(5, 10, 15, 20);
4106 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4107 let e = _mm_setr_epi32(20, 10, 10, 5);
4108 assert_eq_m128i(r, e);
4109 }
4110
4111 #[simd_test(enable = "sse2")]
4112 unsafe fn test_mm_shufflehi_epi16() {
4113 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4114 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4115 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4116 assert_eq_m128i(r, e);
4117 }
4118
4119 #[simd_test(enable = "sse2")]
4120 unsafe fn test_mm_shufflelo_epi16() {
4121 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4122 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4123 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4124 assert_eq_m128i(r, e);
4125 }
4126
4127 #[simd_test(enable = "sse2")]
4128 unsafe fn test_mm_unpackhi_epi8() {
4129 #[rustfmt::skip]
4130 let a = _mm_setr_epi8(
4131 0, 1, 2, 3, 4, 5, 6, 7,
4132 8, 9, 10, 11, 12, 13, 14, 15,
4133 );
4134 #[rustfmt::skip]
4135 let b = _mm_setr_epi8(
4136 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4137 );
4138 let r = _mm_unpackhi_epi8(a, b);
4139 #[rustfmt::skip]
4140 let e = _mm_setr_epi8(
4141 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4142 );
4143 assert_eq_m128i(r, e);
4144 }
4145
4146 #[simd_test(enable = "sse2")]
4147 unsafe fn test_mm_unpackhi_epi16() {
4148 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4149 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4150 let r = _mm_unpackhi_epi16(a, b);
4151 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4152 assert_eq_m128i(r, e);
4153 }
4154
4155 #[simd_test(enable = "sse2")]
4156 unsafe fn test_mm_unpackhi_epi32() {
4157 let a = _mm_setr_epi32(0, 1, 2, 3);
4158 let b = _mm_setr_epi32(4, 5, 6, 7);
4159 let r = _mm_unpackhi_epi32(a, b);
4160 let e = _mm_setr_epi32(2, 6, 3, 7);
4161 assert_eq_m128i(r, e);
4162 }
4163
4164 #[simd_test(enable = "sse2")]
4165 unsafe fn test_mm_unpackhi_epi64() {
4166 let a = _mm_setr_epi64x(0, 1);
4167 let b = _mm_setr_epi64x(2, 3);
4168 let r = _mm_unpackhi_epi64(a, b);
4169 let e = _mm_setr_epi64x(1, 3);
4170 assert_eq_m128i(r, e);
4171 }
4172
4173 #[simd_test(enable = "sse2")]
4174 unsafe fn test_mm_unpacklo_epi8() {
4175 #[rustfmt::skip]
4176 let a = _mm_setr_epi8(
4177 0, 1, 2, 3, 4, 5, 6, 7,
4178 8, 9, 10, 11, 12, 13, 14, 15,
4179 );
4180 #[rustfmt::skip]
4181 let b = _mm_setr_epi8(
4182 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4183 );
4184 let r = _mm_unpacklo_epi8(a, b);
4185 #[rustfmt::skip]
4186 let e = _mm_setr_epi8(
4187 0, 16, 1, 17, 2, 18, 3, 19,
4188 4, 20, 5, 21, 6, 22, 7, 23,
4189 );
4190 assert_eq_m128i(r, e);
4191 }
4192
4193 #[simd_test(enable = "sse2")]
4194 unsafe fn test_mm_unpacklo_epi16() {
4195 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4196 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4197 let r = _mm_unpacklo_epi16(a, b);
4198 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4199 assert_eq_m128i(r, e);
4200 }
4201
4202 #[simd_test(enable = "sse2")]
4203 unsafe fn test_mm_unpacklo_epi32() {
4204 let a = _mm_setr_epi32(0, 1, 2, 3);
4205 let b = _mm_setr_epi32(4, 5, 6, 7);
4206 let r = _mm_unpacklo_epi32(a, b);
4207 let e = _mm_setr_epi32(0, 4, 1, 5);
4208 assert_eq_m128i(r, e);
4209 }
4210
4211 #[simd_test(enable = "sse2")]
4212 unsafe fn test_mm_unpacklo_epi64() {
4213 let a = _mm_setr_epi64x(0, 1);
4214 let b = _mm_setr_epi64x(2, 3);
4215 let r = _mm_unpacklo_epi64(a, b);
4216 let e = _mm_setr_epi64x(0, 2);
4217 assert_eq_m128i(r, e);
4218 }
4219
4220 #[simd_test(enable = "sse2")]
4221 unsafe fn test_mm_add_sd() {
4222 let a = _mm_setr_pd(1.0, 2.0);
4223 let b = _mm_setr_pd(5.0, 10.0);
4224 let r = _mm_add_sd(a, b);
4225 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4226 }
4227
4228 #[simd_test(enable = "sse2")]
4229 unsafe fn test_mm_add_pd() {
4230 let a = _mm_setr_pd(1.0, 2.0);
4231 let b = _mm_setr_pd(5.0, 10.0);
4232 let r = _mm_add_pd(a, b);
4233 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4234 }
4235
4236 #[simd_test(enable = "sse2")]
4237 unsafe fn test_mm_div_sd() {
4238 let a = _mm_setr_pd(1.0, 2.0);
4239 let b = _mm_setr_pd(5.0, 10.0);
4240 let r = _mm_div_sd(a, b);
4241 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4242 }
4243
4244 #[simd_test(enable = "sse2")]
4245 unsafe fn test_mm_div_pd() {
4246 let a = _mm_setr_pd(1.0, 2.0);
4247 let b = _mm_setr_pd(5.0, 10.0);
4248 let r = _mm_div_pd(a, b);
4249 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4250 }
4251
4252 #[simd_test(enable = "sse2")]
4253 unsafe fn test_mm_max_sd() {
4254 let a = _mm_setr_pd(1.0, 2.0);
4255 let b = _mm_setr_pd(5.0, 10.0);
4256 let r = _mm_max_sd(a, b);
4257 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4258 }
4259
4260 #[simd_test(enable = "sse2")]
4261 unsafe fn test_mm_max_pd() {
4262 let a = _mm_setr_pd(1.0, 2.0);
4263 let b = _mm_setr_pd(5.0, 10.0);
4264 let r = _mm_max_pd(a, b);
4265 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4266
4267 let a = _mm_setr_pd(-0.0, 0.0);
4269 let b = _mm_setr_pd(0.0, 0.0);
4270 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4271 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4272 let a: [u8; 16] = transmute(a);
4273 let b: [u8; 16] = transmute(b);
4274 assert_eq!(r1, b);
4275 assert_eq!(r2, a);
4276 assert_ne!(a, b); }
4278
4279 #[simd_test(enable = "sse2")]
4280 unsafe fn test_mm_min_sd() {
4281 let a = _mm_setr_pd(1.0, 2.0);
4282 let b = _mm_setr_pd(5.0, 10.0);
4283 let r = _mm_min_sd(a, b);
4284 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4285 }
4286
4287 #[simd_test(enable = "sse2")]
4288 unsafe fn test_mm_min_pd() {
4289 let a = _mm_setr_pd(1.0, 2.0);
4290 let b = _mm_setr_pd(5.0, 10.0);
4291 let r = _mm_min_pd(a, b);
4292 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4293
4294 let a = _mm_setr_pd(-0.0, 0.0);
4296 let b = _mm_setr_pd(0.0, 0.0);
4297 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4298 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4299 let a: [u8; 16] = transmute(a);
4300 let b: [u8; 16] = transmute(b);
4301 assert_eq!(r1, b);
4302 assert_eq!(r2, a);
4303 assert_ne!(a, b); }
4305
4306 #[simd_test(enable = "sse2")]
4307 unsafe fn test_mm_mul_sd() {
4308 let a = _mm_setr_pd(1.0, 2.0);
4309 let b = _mm_setr_pd(5.0, 10.0);
4310 let r = _mm_mul_sd(a, b);
4311 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4312 }
4313
4314 #[simd_test(enable = "sse2")]
4315 unsafe fn test_mm_mul_pd() {
4316 let a = _mm_setr_pd(1.0, 2.0);
4317 let b = _mm_setr_pd(5.0, 10.0);
4318 let r = _mm_mul_pd(a, b);
4319 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4320 }
4321
4322 #[simd_test(enable = "sse2")]
4323 unsafe fn test_mm_sqrt_sd() {
4324 let a = _mm_setr_pd(1.0, 2.0);
4325 let b = _mm_setr_pd(5.0, 10.0);
4326 let r = _mm_sqrt_sd(a, b);
4327 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4328 }
4329
4330 #[simd_test(enable = "sse2")]
4331 unsafe fn test_mm_sqrt_pd() {
4332 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4333 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4334 }
4335
4336 #[simd_test(enable = "sse2")]
4337 unsafe fn test_mm_sub_sd() {
4338 let a = _mm_setr_pd(1.0, 2.0);
4339 let b = _mm_setr_pd(5.0, 10.0);
4340 let r = _mm_sub_sd(a, b);
4341 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4342 }
4343
4344 #[simd_test(enable = "sse2")]
4345 unsafe fn test_mm_sub_pd() {
4346 let a = _mm_setr_pd(1.0, 2.0);
4347 let b = _mm_setr_pd(5.0, 10.0);
4348 let r = _mm_sub_pd(a, b);
4349 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4350 }
4351
4352 #[simd_test(enable = "sse2")]
4353 unsafe fn test_mm_and_pd() {
4354 let a = transmute(u64x2::splat(5));
4355 let b = transmute(u64x2::splat(3));
4356 let r = _mm_and_pd(a, b);
4357 let e = transmute(u64x2::splat(1));
4358 assert_eq_m128d(r, e);
4359 }
4360
4361 #[simd_test(enable = "sse2")]
4362 unsafe fn test_mm_andnot_pd() {
4363 let a = transmute(u64x2::splat(5));
4364 let b = transmute(u64x2::splat(3));
4365 let r = _mm_andnot_pd(a, b);
4366 let e = transmute(u64x2::splat(2));
4367 assert_eq_m128d(r, e);
4368 }
4369
4370 #[simd_test(enable = "sse2")]
4371 unsafe fn test_mm_or_pd() {
4372 let a = transmute(u64x2::splat(5));
4373 let b = transmute(u64x2::splat(3));
4374 let r = _mm_or_pd(a, b);
4375 let e = transmute(u64x2::splat(7));
4376 assert_eq_m128d(r, e);
4377 }
4378
4379 #[simd_test(enable = "sse2")]
4380 unsafe fn test_mm_xor_pd() {
4381 let a = transmute(u64x2::splat(5));
4382 let b = transmute(u64x2::splat(3));
4383 let r = _mm_xor_pd(a, b);
4384 let e = transmute(u64x2::splat(6));
4385 assert_eq_m128d(r, e);
4386 }
4387
4388 #[simd_test(enable = "sse2")]
4389 unsafe fn test_mm_cmpeq_sd() {
4390 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4391 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4392 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4393 assert_eq_m128i(r, e);
4394 }
4395
4396 #[simd_test(enable = "sse2")]
4397 unsafe fn test_mm_cmplt_sd() {
4398 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4399 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4400 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4401 assert_eq_m128i(r, e);
4402 }
4403
4404 #[simd_test(enable = "sse2")]
4405 unsafe fn test_mm_cmple_sd() {
4406 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4407 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4408 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4409 assert_eq_m128i(r, e);
4410 }
4411
4412 #[simd_test(enable = "sse2")]
4413 unsafe fn test_mm_cmpgt_sd() {
4414 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4415 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4416 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4417 assert_eq_m128i(r, e);
4418 }
4419
4420 #[simd_test(enable = "sse2")]
4421 unsafe fn test_mm_cmpge_sd() {
4422 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4423 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4424 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4425 assert_eq_m128i(r, e);
4426 }
4427
4428 #[simd_test(enable = "sse2")]
4429 unsafe fn test_mm_cmpord_sd() {
4430 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4431 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4432 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4433 assert_eq_m128i(r, e);
4434 }
4435
4436 #[simd_test(enable = "sse2")]
4437 unsafe fn test_mm_cmpunord_sd() {
4438 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4439 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4440 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4441 assert_eq_m128i(r, e);
4442 }
4443
4444 #[simd_test(enable = "sse2")]
4445 unsafe fn test_mm_cmpneq_sd() {
4446 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4447 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4448 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4449 assert_eq_m128i(r, e);
4450 }
4451
4452 #[simd_test(enable = "sse2")]
4453 unsafe fn test_mm_cmpnlt_sd() {
4454 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4455 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4456 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4457 assert_eq_m128i(r, e);
4458 }
4459
4460 #[simd_test(enable = "sse2")]
4461 unsafe fn test_mm_cmpnle_sd() {
4462 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4463 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4464 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4465 assert_eq_m128i(r, e);
4466 }
4467
4468 #[simd_test(enable = "sse2")]
4469 unsafe fn test_mm_cmpngt_sd() {
4470 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4471 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4472 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4473 assert_eq_m128i(r, e);
4474 }
4475
4476 #[simd_test(enable = "sse2")]
4477 unsafe fn test_mm_cmpnge_sd() {
4478 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4479 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4480 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4481 assert_eq_m128i(r, e);
4482 }
4483
4484 #[simd_test(enable = "sse2")]
4485 unsafe fn test_mm_cmpeq_pd() {
4486 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4487 let e = _mm_setr_epi64x(!0, 0);
4488 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4489 assert_eq_m128i(r, e);
4490 }
4491
4492 #[simd_test(enable = "sse2")]
4493 unsafe fn test_mm_cmplt_pd() {
4494 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4495 let e = _mm_setr_epi64x(0, !0);
4496 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4497 assert_eq_m128i(r, e);
4498 }
4499
4500 #[simd_test(enable = "sse2")]
4501 unsafe fn test_mm_cmple_pd() {
4502 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4503 let e = _mm_setr_epi64x(!0, !0);
4504 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4505 assert_eq_m128i(r, e);
4506 }
4507
4508 #[simd_test(enable = "sse2")]
4509 unsafe fn test_mm_cmpgt_pd() {
4510 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4511 let e = _mm_setr_epi64x(0, 0);
4512 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4513 assert_eq_m128i(r, e);
4514 }
4515
4516 #[simd_test(enable = "sse2")]
4517 unsafe fn test_mm_cmpge_pd() {
4518 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4519 let e = _mm_setr_epi64x(!0, 0);
4520 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4521 assert_eq_m128i(r, e);
4522 }
4523
4524 #[simd_test(enable = "sse2")]
4525 unsafe fn test_mm_cmpord_pd() {
4526 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4527 let e = _mm_setr_epi64x(0, !0);
4528 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4529 assert_eq_m128i(r, e);
4530 }
4531
4532 #[simd_test(enable = "sse2")]
4533 unsafe fn test_mm_cmpunord_pd() {
4534 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4535 let e = _mm_setr_epi64x(!0, 0);
4536 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4537 assert_eq_m128i(r, e);
4538 }
4539
4540 #[simd_test(enable = "sse2")]
4541 unsafe fn test_mm_cmpneq_pd() {
4542 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4543 let e = _mm_setr_epi64x(!0, !0);
4544 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4545 assert_eq_m128i(r, e);
4546 }
4547
4548 #[simd_test(enable = "sse2")]
4549 unsafe fn test_mm_cmpnlt_pd() {
4550 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4551 let e = _mm_setr_epi64x(0, 0);
4552 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4553 assert_eq_m128i(r, e);
4554 }
4555
4556 #[simd_test(enable = "sse2")]
4557 unsafe fn test_mm_cmpnle_pd() {
4558 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4559 let e = _mm_setr_epi64x(0, 0);
4560 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4561 assert_eq_m128i(r, e);
4562 }
4563
4564 #[simd_test(enable = "sse2")]
4565 unsafe fn test_mm_cmpngt_pd() {
4566 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4567 let e = _mm_setr_epi64x(0, !0);
4568 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4569 assert_eq_m128i(r, e);
4570 }
4571
4572 #[simd_test(enable = "sse2")]
4573 unsafe fn test_mm_cmpnge_pd() {
4574 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4575 let e = _mm_setr_epi64x(0, !0);
4576 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4577 assert_eq_m128i(r, e);
4578 }
4579
4580 #[simd_test(enable = "sse2")]
4581 unsafe fn test_mm_comieq_sd() {
4582 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4583 assert!(_mm_comieq_sd(a, b) != 0);
4584
4585 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4586 assert!(_mm_comieq_sd(a, b) == 0);
4587 }
4588
4589 #[simd_test(enable = "sse2")]
4590 unsafe fn test_mm_comilt_sd() {
4591 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4592 assert!(_mm_comilt_sd(a, b) == 0);
4593 }
4594
4595 #[simd_test(enable = "sse2")]
4596 unsafe fn test_mm_comile_sd() {
4597 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4598 assert!(_mm_comile_sd(a, b) != 0);
4599 }
4600
4601 #[simd_test(enable = "sse2")]
4602 unsafe fn test_mm_comigt_sd() {
4603 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4604 assert!(_mm_comigt_sd(a, b) == 0);
4605 }
4606
4607 #[simd_test(enable = "sse2")]
4608 unsafe fn test_mm_comige_sd() {
4609 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4610 assert!(_mm_comige_sd(a, b) != 0);
4611 }
4612
4613 #[simd_test(enable = "sse2")]
4614 unsafe fn test_mm_comineq_sd() {
4615 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4616 assert!(_mm_comineq_sd(a, b) == 0);
4617 }
4618
4619 #[simd_test(enable = "sse2")]
4620 unsafe fn test_mm_ucomieq_sd() {
4621 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4622 assert!(_mm_ucomieq_sd(a, b) != 0);
4623
4624 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4625 assert!(_mm_ucomieq_sd(a, b) == 0);
4626 }
4627
4628 #[simd_test(enable = "sse2")]
4629 unsafe fn test_mm_ucomilt_sd() {
4630 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4631 assert!(_mm_ucomilt_sd(a, b) == 0);
4632 }
4633
4634 #[simd_test(enable = "sse2")]
4635 unsafe fn test_mm_ucomile_sd() {
4636 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4637 assert!(_mm_ucomile_sd(a, b) != 0);
4638 }
4639
4640 #[simd_test(enable = "sse2")]
4641 unsafe fn test_mm_ucomigt_sd() {
4642 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4643 assert!(_mm_ucomigt_sd(a, b) == 0);
4644 }
4645
4646 #[simd_test(enable = "sse2")]
4647 unsafe fn test_mm_ucomige_sd() {
4648 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4649 assert!(_mm_ucomige_sd(a, b) != 0);
4650 }
4651
4652 #[simd_test(enable = "sse2")]
4653 unsafe fn test_mm_ucomineq_sd() {
4654 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4655 assert!(_mm_ucomineq_sd(a, b) == 0);
4656 }
4657
4658 #[simd_test(enable = "sse2")]
4659 unsafe fn test_mm_movemask_pd() {
4660 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4661 assert_eq!(r, 0b01);
4662
4663 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4664 assert_eq!(r, 0b11);
4665 }
4666
4667 #[repr(align(16))]
4668 struct Memory {
4669 data: [f64; 4],
4670 }
4671
4672 #[simd_test(enable = "sse2")]
4673 unsafe fn test_mm_load_pd() {
4674 let mem = Memory {
4675 data: [1.0f64, 2.0, 3.0, 4.0],
4676 };
4677 let vals = &mem.data;
4678 let d = vals.as_ptr();
4679
4680 let r = _mm_load_pd(d);
4681 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4682 }
4683
4684 #[simd_test(enable = "sse2")]
4685 unsafe fn test_mm_load_sd() {
4686 let a = 1.;
4687 let expected = _mm_setr_pd(a, 0.);
4688 let r = _mm_load_sd(&a);
4689 assert_eq_m128d(r, expected);
4690 }
4691
4692 #[simd_test(enable = "sse2")]
4693 unsafe fn test_mm_loadh_pd() {
4694 let a = _mm_setr_pd(1., 2.);
4695 let b = 3.;
4696 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4697 let r = _mm_loadh_pd(a, &b);
4698 assert_eq_m128d(r, expected);
4699 }
4700
4701 #[simd_test(enable = "sse2")]
4702 unsafe fn test_mm_loadl_pd() {
4703 let a = _mm_setr_pd(1., 2.);
4704 let b = 3.;
4705 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4706 let r = _mm_loadl_pd(a, &b);
4707 assert_eq_m128d(r, expected);
4708 }
4709
4710 #[simd_test(enable = "sse2")]
4711 #[cfg_attr(miri, ignore)]
4714 unsafe fn test_mm_stream_pd() {
4715 #[repr(align(128))]
4716 struct Memory {
4717 pub data: [f64; 2],
4718 }
4719 let a = _mm_set1_pd(7.0);
4720 let mut mem = Memory { data: [-1.0; 2] };
4721
4722 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4723 for i in 0..2 {
4724 assert_eq!(mem.data[i], get_m128d(a, i));
4725 }
4726 }
4727
4728 #[simd_test(enable = "sse2")]
4729 unsafe fn test_mm_store_sd() {
4730 let mut dest = 0.;
4731 let a = _mm_setr_pd(1., 2.);
4732 _mm_store_sd(&mut dest, a);
4733 assert_eq!(dest, _mm_cvtsd_f64(a));
4734 }
4735
4736 #[simd_test(enable = "sse2")]
4737 unsafe fn test_mm_store_pd() {
4738 let mut mem = Memory { data: [0.0f64; 4] };
4739 let vals = &mut mem.data;
4740 let a = _mm_setr_pd(1.0, 2.0);
4741 let d = vals.as_mut_ptr();
4742
4743 _mm_store_pd(d, *black_box(&a));
4744 assert_eq!(vals[0], 1.0);
4745 assert_eq!(vals[1], 2.0);
4746 }
4747
4748 #[simd_test(enable = "sse2")]
4749 unsafe fn test_mm_storeu_pd() {
4750 let mut mem = Memory { data: [0.0f64; 4] };
4751 let vals = &mut mem.data;
4752 let a = _mm_setr_pd(1.0, 2.0);
4753
4754 let mut ofs = 0;
4755 let mut p = vals.as_mut_ptr();
4756
4757 if (p as usize) & 0xf == 0 {
4759 ofs = 1;
4760 p = p.add(1);
4761 }
4762
4763 _mm_storeu_pd(p, *black_box(&a));
4764
4765 if ofs > 0 {
4766 assert_eq!(vals[ofs - 1], 0.0);
4767 }
4768 assert_eq!(vals[ofs + 0], 1.0);
4769 assert_eq!(vals[ofs + 1], 2.0);
4770 }
4771
4772 #[simd_test(enable = "sse2")]
4773 unsafe fn test_mm_storeu_si16() {
4774 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4775 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4776 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4777 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4778 assert_eq_m128i(r, e);
4779 }
4780
4781 #[simd_test(enable = "sse2")]
4782 unsafe fn test_mm_storeu_si32() {
4783 let a = _mm_setr_epi32(1, 2, 3, 4);
4784 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4785 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4786 let e = _mm_setr_epi32(1, 6, 7, 8);
4787 assert_eq_m128i(r, e);
4788 }
4789
4790 #[simd_test(enable = "sse2")]
4791 unsafe fn test_mm_storeu_si64() {
4792 let a = _mm_setr_epi64x(1, 2);
4793 let mut r = _mm_setr_epi64x(3, 4);
4794 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4795 let e = _mm_setr_epi64x(1, 4);
4796 assert_eq_m128i(r, e);
4797 }
4798
4799 #[simd_test(enable = "sse2")]
4800 unsafe fn test_mm_store1_pd() {
4801 let mut mem = Memory { data: [0.0f64; 4] };
4802 let vals = &mut mem.data;
4803 let a = _mm_setr_pd(1.0, 2.0);
4804 let d = vals.as_mut_ptr();
4805
4806 _mm_store1_pd(d, *black_box(&a));
4807 assert_eq!(vals[0], 1.0);
4808 assert_eq!(vals[1], 1.0);
4809 }
4810
4811 #[simd_test(enable = "sse2")]
4812 unsafe fn test_mm_store_pd1() {
4813 let mut mem = Memory { data: [0.0f64; 4] };
4814 let vals = &mut mem.data;
4815 let a = _mm_setr_pd(1.0, 2.0);
4816 let d = vals.as_mut_ptr();
4817
4818 _mm_store_pd1(d, *black_box(&a));
4819 assert_eq!(vals[0], 1.0);
4820 assert_eq!(vals[1], 1.0);
4821 }
4822
4823 #[simd_test(enable = "sse2")]
4824 unsafe fn test_mm_storer_pd() {
4825 let mut mem = Memory { data: [0.0f64; 4] };
4826 let vals = &mut mem.data;
4827 let a = _mm_setr_pd(1.0, 2.0);
4828 let d = vals.as_mut_ptr();
4829
4830 _mm_storer_pd(d, *black_box(&a));
4831 assert_eq!(vals[0], 2.0);
4832 assert_eq!(vals[1], 1.0);
4833 }
4834
4835 #[simd_test(enable = "sse2")]
4836 unsafe fn test_mm_storeh_pd() {
4837 let mut dest = 0.;
4838 let a = _mm_setr_pd(1., 2.);
4839 _mm_storeh_pd(&mut dest, a);
4840 assert_eq!(dest, get_m128d(a, 1));
4841 }
4842
4843 #[simd_test(enable = "sse2")]
4844 unsafe fn test_mm_storel_pd() {
4845 let mut dest = 0.;
4846 let a = _mm_setr_pd(1., 2.);
4847 _mm_storel_pd(&mut dest, a);
4848 assert_eq!(dest, _mm_cvtsd_f64(a));
4849 }
4850
4851 #[simd_test(enable = "sse2")]
4852 unsafe fn test_mm_loadr_pd() {
4853 let mut mem = Memory {
4854 data: [1.0f64, 2.0, 3.0, 4.0],
4855 };
4856 let vals = &mut mem.data;
4857 let d = vals.as_ptr();
4858
4859 let r = _mm_loadr_pd(d);
4860 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4861 }
4862
4863 #[simd_test(enable = "sse2")]
4864 unsafe fn test_mm_loadu_pd() {
4865 let mut mem = Memory {
4866 data: [1.0f64, 2.0, 3.0, 4.0],
4867 };
4868 let vals = &mut mem.data;
4869 let mut d = vals.as_ptr();
4870
4871 let mut offset = 0;
4873 if (d as usize) & 0xf == 0 {
4874 offset = 1;
4875 d = d.add(offset);
4876 }
4877
4878 let r = _mm_loadu_pd(d);
4879 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4880 assert_eq_m128d(r, e);
4881 }
4882
4883 #[simd_test(enable = "sse2")]
4884 unsafe fn test_mm_loadu_si16() {
4885 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4886 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4887 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4888 }
4889
4890 #[simd_test(enable = "sse2")]
4891 unsafe fn test_mm_loadu_si32() {
4892 let a = _mm_setr_epi32(1, 2, 3, 4);
4893 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4894 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4895 }
4896
4897 #[simd_test(enable = "sse2")]
4898 unsafe fn test_mm_loadu_si64() {
4899 let a = _mm_setr_epi64x(5, 6);
4900 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4901 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4902 }
4903
4904 #[simd_test(enable = "sse2")]
4905 unsafe fn test_mm_cvtpd_ps() {
4906 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4907 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4908
4909 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4910 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4911
4912 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4913 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4914
4915 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4916 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4917 }
4918
4919 #[simd_test(enable = "sse2")]
4920 unsafe fn test_mm_cvtps_pd() {
4921 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4922 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4923
4924 let r = _mm_cvtps_pd(_mm_setr_ps(
4925 f32::MAX,
4926 f32::INFINITY,
4927 f32::NEG_INFINITY,
4928 f32::MIN,
4929 ));
4930 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4931 }
4932
4933 #[simd_test(enable = "sse2")]
4934 unsafe fn test_mm_cvtpd_epi32() {
4935 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4936 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4937
4938 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4939 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4940
4941 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4942 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4943
4944 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4945 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4946
4947 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4948 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4949 }
4950
4951 #[simd_test(enable = "sse2")]
4952 unsafe fn test_mm_cvtsd_si32() {
4953 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4954 assert_eq!(r, -2);
4955
4956 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4957 assert_eq!(r, i32::MIN);
4958
4959 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4960 assert_eq!(r, i32::MIN);
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 unsafe fn test_mm_cvtsd_ss() {
4965 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4966 let b = _mm_setr_pd(2.0, -5.0);
4967
4968 let r = _mm_cvtsd_ss(a, b);
4969
4970 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4971
4972 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4973 let b = _mm_setr_pd(f64::INFINITY, -5.0);
4974
4975 let r = _mm_cvtsd_ss(a, b);
4976
4977 assert_eq_m128(
4978 r,
4979 _mm_setr_ps(
4980 f32::INFINITY,
4981 f32::NEG_INFINITY,
4982 f32::MAX,
4983 f32::NEG_INFINITY,
4984 ),
4985 );
4986 }
4987
4988 #[simd_test(enable = "sse2")]
4989 unsafe fn test_mm_cvtsd_f64() {
4990 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4991 assert_eq!(r, -1.1);
4992 }
4993
4994 #[simd_test(enable = "sse2")]
4995 unsafe fn test_mm_cvtss_sd() {
4996 let a = _mm_setr_pd(-1.1, 2.2);
4997 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4998
4999 let r = _mm_cvtss_sd(a, b);
5000 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5001
5002 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5003 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5004
5005 let r = _mm_cvtss_sd(a, b);
5006 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5007 }
5008
5009 #[simd_test(enable = "sse2")]
5010 unsafe fn test_mm_cvttpd_epi32() {
5011 let a = _mm_setr_pd(-1.1, 2.2);
5012 let r = _mm_cvttpd_epi32(a);
5013 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5014
5015 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5016 let r = _mm_cvttpd_epi32(a);
5017 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5018 }
5019
5020 #[simd_test(enable = "sse2")]
5021 unsafe fn test_mm_cvttsd_si32() {
5022 let a = _mm_setr_pd(-1.1, 2.2);
5023 let r = _mm_cvttsd_si32(a);
5024 assert_eq!(r, -1);
5025
5026 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5027 let r = _mm_cvttsd_si32(a);
5028 assert_eq!(r, i32::MIN);
5029 }
5030
5031 #[simd_test(enable = "sse2")]
5032 unsafe fn test_mm_cvttps_epi32() {
5033 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5034 let r = _mm_cvttps_epi32(a);
5035 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5036
5037 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5038 let r = _mm_cvttps_epi32(a);
5039 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5040 }
5041
5042 #[simd_test(enable = "sse2")]
5043 unsafe fn test_mm_set_sd() {
5044 let r = _mm_set_sd(-1.0_f64);
5045 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5046 }
5047
5048 #[simd_test(enable = "sse2")]
5049 unsafe fn test_mm_set1_pd() {
5050 let r = _mm_set1_pd(-1.0_f64);
5051 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5052 }
5053
5054 #[simd_test(enable = "sse2")]
5055 unsafe fn test_mm_set_pd1() {
5056 let r = _mm_set_pd1(-2.0_f64);
5057 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5058 }
5059
5060 #[simd_test(enable = "sse2")]
5061 unsafe fn test_mm_set_pd() {
5062 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5063 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5064 }
5065
5066 #[simd_test(enable = "sse2")]
5067 unsafe fn test_mm_setr_pd() {
5068 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5069 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5070 }
5071
5072 #[simd_test(enable = "sse2")]
5073 unsafe fn test_mm_setzero_pd() {
5074 let r = _mm_setzero_pd();
5075 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5076 }
5077
5078 #[simd_test(enable = "sse2")]
5079 unsafe fn test_mm_load1_pd() {
5080 let d = -5.0;
5081 let r = _mm_load1_pd(&d);
5082 assert_eq_m128d(r, _mm_setr_pd(d, d));
5083 }
5084
5085 #[simd_test(enable = "sse2")]
5086 unsafe fn test_mm_load_pd1() {
5087 let d = -5.0;
5088 let r = _mm_load_pd1(&d);
5089 assert_eq_m128d(r, _mm_setr_pd(d, d));
5090 }
5091
5092 #[simd_test(enable = "sse2")]
5093 unsafe fn test_mm_unpackhi_pd() {
5094 let a = _mm_setr_pd(1.0, 2.0);
5095 let b = _mm_setr_pd(3.0, 4.0);
5096 let r = _mm_unpackhi_pd(a, b);
5097 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5098 }
5099
5100 #[simd_test(enable = "sse2")]
5101 unsafe fn test_mm_unpacklo_pd() {
5102 let a = _mm_setr_pd(1.0, 2.0);
5103 let b = _mm_setr_pd(3.0, 4.0);
5104 let r = _mm_unpacklo_pd(a, b);
5105 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5106 }
5107
5108 #[simd_test(enable = "sse2")]
5109 unsafe fn test_mm_shuffle_pd() {
5110 let a = _mm_setr_pd(1., 2.);
5111 let b = _mm_setr_pd(3., 4.);
5112 let expected = _mm_setr_pd(1., 3.);
5113 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5114 assert_eq_m128d(r, expected);
5115 }
5116
5117 #[simd_test(enable = "sse2")]
5118 unsafe fn test_mm_move_sd() {
5119 let a = _mm_setr_pd(1., 2.);
5120 let b = _mm_setr_pd(3., 4.);
5121 let expected = _mm_setr_pd(3., 2.);
5122 let r = _mm_move_sd(a, b);
5123 assert_eq_m128d(r, expected);
5124 }
5125
5126 #[simd_test(enable = "sse2")]
5127 unsafe fn test_mm_castpd_ps() {
5128 let a = _mm_set1_pd(0.);
5129 let expected = _mm_set1_ps(0.);
5130 let r = _mm_castpd_ps(a);
5131 assert_eq_m128(r, expected);
5132 }
5133
5134 #[simd_test(enable = "sse2")]
5135 unsafe fn test_mm_castpd_si128() {
5136 let a = _mm_set1_pd(0.);
5137 let expected = _mm_set1_epi64x(0);
5138 let r = _mm_castpd_si128(a);
5139 assert_eq_m128i(r, expected);
5140 }
5141
5142 #[simd_test(enable = "sse2")]
5143 unsafe fn test_mm_castps_pd() {
5144 let a = _mm_set1_ps(0.);
5145 let expected = _mm_set1_pd(0.);
5146 let r = _mm_castps_pd(a);
5147 assert_eq_m128d(r, expected);
5148 }
5149
5150 #[simd_test(enable = "sse2")]
5151 unsafe fn test_mm_castps_si128() {
5152 let a = _mm_set1_ps(0.);
5153 let expected = _mm_set1_epi32(0);
5154 let r = _mm_castps_si128(a);
5155 assert_eq_m128i(r, expected);
5156 }
5157
5158 #[simd_test(enable = "sse2")]
5159 unsafe fn test_mm_castsi128_pd() {
5160 let a = _mm_set1_epi64x(0);
5161 let expected = _mm_set1_pd(0.);
5162 let r = _mm_castsi128_pd(a);
5163 assert_eq_m128d(r, expected);
5164 }
5165
5166 #[simd_test(enable = "sse2")]
5167 unsafe fn test_mm_castsi128_ps() {
5168 let a = _mm_set1_epi32(0);
5169 let expected = _mm_set1_ps(0.);
5170 let r = _mm_castsi128_ps(a);
5171 assert_eq_m128(r, expected);
5172 }
5173}