1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub unsafe fn _mm_pause() {
23 pause()
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
81}
82
83#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
92}
93
94#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
103}
104
105#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
114}
115
116#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
122#[stable(feature = "simd_x86", since = "1.27.0")]
123pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
125}
126
127#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
133#[stable(feature = "simd_x86", since = "1.27.0")]
134pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
136}
137
138#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
144#[stable(feature = "simd_x86", since = "1.27.0")]
145pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
147}
148
149#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
158}
159
160#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
166#[stable(feature = "simd_x86", since = "1.27.0")]
167pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 unsafe {
169 let a = simd_cast::<_, u16x16>(a.as_u8x16());
170 let b = simd_cast::<_, u16x16>(b.as_u8x16());
171 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
172 transmute(simd_cast::<_, u8x16>(r))
173 }
174}
175
176#[inline]
180#[target_feature(enable = "sse2")]
181#[cfg_attr(test, assert_instr(pavgw))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
184 unsafe {
185 let a = simd_cast::<_, u32x8>(a.as_u16x8());
186 let b = simd_cast::<_, u32x8>(b.as_u16x8());
187 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
188 transmute(simd_cast::<_, u16x8>(r))
189 }
190}
191
192#[inline]
200#[target_feature(enable = "sse2")]
201#[cfg_attr(test, assert_instr(pmaddwd))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
205}
206
207#[inline]
212#[target_feature(enable = "sse2")]
213#[cfg_attr(test, assert_instr(pmaxsw))]
214#[stable(feature = "simd_x86", since = "1.27.0")]
215pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
216 unsafe {
217 let a = a.as_i16x8();
218 let b = b.as_i16x8();
219 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
220 }
221}
222
223#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxub))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
232 unsafe {
233 let a = a.as_u8x16();
234 let b = b.as_u8x16();
235 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
236 }
237}
238
239#[inline]
244#[target_feature(enable = "sse2")]
245#[cfg_attr(test, assert_instr(pminsw))]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
248 unsafe {
249 let a = a.as_i16x8();
250 let b = b.as_i16x8();
251 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
252 }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminub))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
264 unsafe {
265 let a = a.as_u8x16();
266 let b = b.as_u8x16();
267 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
268 }
269}
270
271#[inline]
278#[target_feature(enable = "sse2")]
279#[cfg_attr(test, assert_instr(pmulhw))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
282 unsafe {
283 let a = simd_cast::<_, i32x8>(a.as_i16x8());
284 let b = simd_cast::<_, i32x8>(b.as_i16x8());
285 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
286 transmute(simd_cast::<i32x8, i16x8>(r))
287 }
288}
289
290#[inline]
297#[target_feature(enable = "sse2")]
298#[cfg_attr(test, assert_instr(pmulhuw))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
301 unsafe {
302 let a = simd_cast::<_, u32x8>(a.as_u16x8());
303 let b = simd_cast::<_, u32x8>(b.as_u16x8());
304 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
305 transmute(simd_cast::<u32x8, u16x8>(r))
306 }
307}
308
309#[inline]
316#[target_feature(enable = "sse2")]
317#[cfg_attr(test, assert_instr(pmullw))]
318#[stable(feature = "simd_x86", since = "1.27.0")]
319pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
320 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
321}
322
323#[inline]
330#[target_feature(enable = "sse2")]
331#[cfg_attr(test, assert_instr(pmuludq))]
332#[stable(feature = "simd_x86", since = "1.27.0")]
333pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
334 unsafe {
335 let a = a.as_u64x2();
336 let b = b.as_u64x2();
337 let mask = u64x2::splat(u32::MAX.into());
338 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
339 }
340}
341
342#[inline]
351#[target_feature(enable = "sse2")]
352#[cfg_attr(test, assert_instr(psadbw))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
355 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
356}
357
358#[inline]
362#[target_feature(enable = "sse2")]
363#[cfg_attr(test, assert_instr(psubb))]
364#[stable(feature = "simd_x86", since = "1.27.0")]
365pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
367}
368
369#[inline]
373#[target_feature(enable = "sse2")]
374#[cfg_attr(test, assert_instr(psubw))]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
377 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
378}
379
380#[inline]
384#[target_feature(enable = "sse2")]
385#[cfg_attr(test, assert_instr(psubd))]
386#[stable(feature = "simd_x86", since = "1.27.0")]
387pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
388 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
389}
390
391#[inline]
395#[target_feature(enable = "sse2")]
396#[cfg_attr(test, assert_instr(psubq))]
397#[stable(feature = "simd_x86", since = "1.27.0")]
398pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
399 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
400}
401
402#[inline]
407#[target_feature(enable = "sse2")]
408#[cfg_attr(test, assert_instr(psubsb))]
409#[stable(feature = "simd_x86", since = "1.27.0")]
410pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
411 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
412}
413
414#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsw))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
423 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
424}
425
426#[inline]
431#[target_feature(enable = "sse2")]
432#[cfg_attr(test, assert_instr(psubusb))]
433#[stable(feature = "simd_x86", since = "1.27.0")]
434pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
435 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
436}
437
438#[inline]
443#[target_feature(enable = "sse2")]
444#[cfg_attr(test, assert_instr(psubusw))]
445#[stable(feature = "simd_x86", since = "1.27.0")]
446pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
447 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
448}
449
450#[inline]
454#[target_feature(enable = "sse2")]
455#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
456#[rustc_legacy_const_generics(1)]
457#[stable(feature = "simd_x86", since = "1.27.0")]
458pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
459 static_assert_uimm_bits!(IMM8, 8);
460 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
461}
462
463#[inline]
466#[target_feature(enable = "sse2")]
467unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
468 const fn mask(shift: i32, i: u32) -> u32 {
469 let shift = shift as u32 & 0xff;
470 if shift > 15 { i } else { 16 - shift + i }
471 }
472 transmute::<i8x16, _>(simd_shuffle!(
473 i8x16::ZERO,
474 a.as_i8x16(),
475 [
476 mask(IMM8, 0),
477 mask(IMM8, 1),
478 mask(IMM8, 2),
479 mask(IMM8, 3),
480 mask(IMM8, 4),
481 mask(IMM8, 5),
482 mask(IMM8, 6),
483 mask(IMM8, 7),
484 mask(IMM8, 8),
485 mask(IMM8, 9),
486 mask(IMM8, 10),
487 mask(IMM8, 11),
488 mask(IMM8, 12),
489 mask(IMM8, 13),
490 mask(IMM8, 14),
491 mask(IMM8, 15),
492 ],
493 ))
494}
495
496#[inline]
500#[target_feature(enable = "sse2")]
501#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
502#[rustc_legacy_const_generics(1)]
503#[stable(feature = "simd_x86", since = "1.27.0")]
504pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
505 unsafe {
506 static_assert_uimm_bits!(IMM8, 8);
507 _mm_slli_si128_impl::<IMM8>(a)
508 }
509}
510
511#[inline]
515#[target_feature(enable = "sse2")]
516#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
517#[rustc_legacy_const_generics(1)]
518#[stable(feature = "simd_x86", since = "1.27.0")]
519pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
520 unsafe {
521 static_assert_uimm_bits!(IMM8, 8);
522 _mm_srli_si128_impl::<IMM8>(a)
523 }
524}
525
526#[inline]
530#[target_feature(enable = "sse2")]
531#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
532#[rustc_legacy_const_generics(1)]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
535 static_assert_uimm_bits!(IMM8, 8);
536 unsafe {
537 if IMM8 >= 16 {
538 _mm_setzero_si128()
539 } else {
540 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
541 }
542 }
543}
544
545#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw))]
552#[stable(feature = "simd_x86", since = "1.27.0")]
553pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
554 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
555}
556
557#[inline]
561#[target_feature(enable = "sse2")]
562#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
563#[rustc_legacy_const_generics(1)]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
566 static_assert_uimm_bits!(IMM8, 8);
567 unsafe {
568 if IMM8 >= 32 {
569 _mm_setzero_si128()
570 } else {
571 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
572 }
573 }
574}
575
576#[inline]
581#[target_feature(enable = "sse2")]
582#[cfg_attr(test, assert_instr(pslld))]
583#[stable(feature = "simd_x86", since = "1.27.0")]
584pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
585 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
586}
587
588#[inline]
592#[target_feature(enable = "sse2")]
593#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
594#[rustc_legacy_const_generics(1)]
595#[stable(feature = "simd_x86", since = "1.27.0")]
596pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_uimm_bits!(IMM8, 8);
598 unsafe {
599 if IMM8 >= 64 {
600 _mm_setzero_si128()
601 } else {
602 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
603 }
604 }
605}
606
607#[inline]
612#[target_feature(enable = "sse2")]
613#[cfg_attr(test, assert_instr(psllq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
616 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
617}
618
619#[inline]
624#[target_feature(enable = "sse2")]
625#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
626#[rustc_legacy_const_generics(1)]
627#[stable(feature = "simd_x86", since = "1.27.0")]
628pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
629 static_assert_uimm_bits!(IMM8, 8);
630 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
631}
632
633#[inline]
638#[target_feature(enable = "sse2")]
639#[cfg_attr(test, assert_instr(psraw))]
640#[stable(feature = "simd_x86", since = "1.27.0")]
641pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
642 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
643}
644
645#[inline]
650#[target_feature(enable = "sse2")]
651#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
652#[rustc_legacy_const_generics(1)]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
655 static_assert_uimm_bits!(IMM8, 8);
656 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
657}
658
659#[inline]
664#[target_feature(enable = "sse2")]
665#[cfg_attr(test, assert_instr(psrad))]
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
668 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
669}
670
671#[inline]
675#[target_feature(enable = "sse2")]
676#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
677#[rustc_legacy_const_generics(1)]
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
680 static_assert_uimm_bits!(IMM8, 8);
681 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
682}
683
684#[inline]
687#[target_feature(enable = "sse2")]
688unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
689 const fn mask(shift: i32, i: u32) -> u32 {
690 if (shift as u32) > 15 {
691 i + 16
692 } else {
693 i + (shift as u32)
694 }
695 }
696 let x: i8x16 = simd_shuffle!(
697 a.as_i8x16(),
698 i8x16::ZERO,
699 [
700 mask(IMM8, 0),
701 mask(IMM8, 1),
702 mask(IMM8, 2),
703 mask(IMM8, 3),
704 mask(IMM8, 4),
705 mask(IMM8, 5),
706 mask(IMM8, 6),
707 mask(IMM8, 7),
708 mask(IMM8, 8),
709 mask(IMM8, 9),
710 mask(IMM8, 10),
711 mask(IMM8, 11),
712 mask(IMM8, 12),
713 mask(IMM8, 13),
714 mask(IMM8, 14),
715 mask(IMM8, 15),
716 ],
717 );
718 transmute(x)
719}
720
721#[inline]
726#[target_feature(enable = "sse2")]
727#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
728#[rustc_legacy_const_generics(1)]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
731 static_assert_uimm_bits!(IMM8, 8);
732 unsafe {
733 if IMM8 >= 16 {
734 _mm_setzero_si128()
735 } else {
736 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
737 }
738 }
739}
740
741#[inline]
746#[target_feature(enable = "sse2")]
747#[cfg_attr(test, assert_instr(psrlw))]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
750 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
751}
752
753#[inline]
758#[target_feature(enable = "sse2")]
759#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
760#[rustc_legacy_const_generics(1)]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
763 static_assert_uimm_bits!(IMM8, 8);
764 unsafe {
765 if IMM8 >= 32 {
766 _mm_setzero_si128()
767 } else {
768 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
769 }
770 }
771}
772
773#[inline]
778#[target_feature(enable = "sse2")]
779#[cfg_attr(test, assert_instr(psrld))]
780#[stable(feature = "simd_x86", since = "1.27.0")]
781pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
782 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
783}
784
785#[inline]
790#[target_feature(enable = "sse2")]
791#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
792#[rustc_legacy_const_generics(1)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
795 static_assert_uimm_bits!(IMM8, 8);
796 unsafe {
797 if IMM8 >= 64 {
798 _mm_setzero_si128()
799 } else {
800 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
801 }
802 }
803}
804
805#[inline]
810#[target_feature(enable = "sse2")]
811#[cfg_attr(test, assert_instr(psrlq))]
812#[stable(feature = "simd_x86", since = "1.27.0")]
813pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
814 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
815}
816
817#[inline]
822#[target_feature(enable = "sse2")]
823#[cfg_attr(test, assert_instr(andps))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
826 unsafe { simd_and(a, b) }
827}
828
829#[inline]
834#[target_feature(enable = "sse2")]
835#[cfg_attr(test, assert_instr(andnps))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
838 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(orps))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
850 unsafe { simd_or(a, b) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(xorps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
862 unsafe { simd_xor(a, b) }
863}
864
865#[inline]
869#[target_feature(enable = "sse2")]
870#[cfg_attr(test, assert_instr(pcmpeqb))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
873 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
874}
875
876#[inline]
880#[target_feature(enable = "sse2")]
881#[cfg_attr(test, assert_instr(pcmpeqw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
884 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
885}
886
887#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(pcmpeqd))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
895 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
896}
897
898#[inline]
902#[target_feature(enable = "sse2")]
903#[cfg_attr(test, assert_instr(pcmpgtb))]
904#[stable(feature = "simd_x86", since = "1.27.0")]
905pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
906 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
907}
908
909#[inline]
913#[target_feature(enable = "sse2")]
914#[cfg_attr(test, assert_instr(pcmpgtw))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
917 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
918}
919
920#[inline]
924#[target_feature(enable = "sse2")]
925#[cfg_attr(test, assert_instr(pcmpgtd))]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
929}
930
931#[inline]
935#[target_feature(enable = "sse2")]
936#[cfg_attr(test, assert_instr(pcmpgtb))]
937#[stable(feature = "simd_x86", since = "1.27.0")]
938pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
939 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
940}
941
942#[inline]
946#[target_feature(enable = "sse2")]
947#[cfg_attr(test, assert_instr(pcmpgtw))]
948#[stable(feature = "simd_x86", since = "1.27.0")]
949pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtd))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
961 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
962}
963
964#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(cvtdq2pd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
973 unsafe {
974 let a = a.as_i32x4();
975 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
976 }
977}
978
979#[inline]
984#[target_feature(enable = "sse2")]
985#[cfg_attr(test, assert_instr(cvtsi2sd))]
986#[stable(feature = "simd_x86", since = "1.27.0")]
987pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
988 unsafe { simd_insert!(a, 0, b as f64) }
989}
990
991#[inline]
996#[target_feature(enable = "sse2")]
997#[cfg_attr(test, assert_instr(cvtdq2ps))]
998#[stable(feature = "simd_x86", since = "1.27.0")]
999pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1000 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1001}
1002
1003#[inline]
1008#[target_feature(enable = "sse2")]
1009#[cfg_attr(test, assert_instr(cvtps2dq))]
1010#[stable(feature = "simd_x86", since = "1.27.0")]
1011pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1012 unsafe { transmute(cvtps2dq(a)) }
1013}
1014
1015#[inline]
1020#[target_feature(enable = "sse2")]
1021#[stable(feature = "simd_x86", since = "1.27.0")]
1022pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1023 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "sse2")]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1033 unsafe { simd_extract!(a.as_i32x4(), 0) }
1034}
1035
1036#[inline]
1041#[target_feature(enable = "sse2")]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1045 unsafe { transmute(i64x2::new(e0, e1)) }
1046}
1047
1048#[inline]
1052#[target_feature(enable = "sse2")]
1053#[stable(feature = "simd_x86", since = "1.27.0")]
1055pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1056 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1057}
1058
1059#[inline]
1063#[target_feature(enable = "sse2")]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1066pub fn _mm_set_epi16(
1067 e7: i16,
1068 e6: i16,
1069 e5: i16,
1070 e4: i16,
1071 e3: i16,
1072 e2: i16,
1073 e1: i16,
1074 e0: i16,
1075) -> __m128i {
1076 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1086pub fn _mm_set_epi8(
1087 e15: i8,
1088 e14: i8,
1089 e13: i8,
1090 e12: i8,
1091 e11: i8,
1092 e10: i8,
1093 e9: i8,
1094 e8: i8,
1095 e7: i8,
1096 e6: i8,
1097 e5: i8,
1098 e4: i8,
1099 e3: i8,
1100 e2: i8,
1101 e1: i8,
1102 e0: i8,
1103) -> __m128i {
1104 unsafe {
1105 #[rustfmt::skip]
1106 transmute(i8x16::new(
1107 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1108 ))
1109 }
1110}
1111
1112#[inline]
1116#[target_feature(enable = "sse2")]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1119pub fn _mm_set1_epi64x(a: i64) -> __m128i {
1120 _mm_set_epi64x(a, a)
1121}
1122
1123#[inline]
1127#[target_feature(enable = "sse2")]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub fn _mm_set1_epi32(a: i32) -> __m128i {
1131 _mm_set_epi32(a, a, a, a)
1132}
1133
1134#[inline]
1138#[target_feature(enable = "sse2")]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_set1_epi16(a: i16) -> __m128i {
1142 _mm_set_epi16(a, a, a, a, a, a, a, a)
1143}
1144
1145#[inline]
1149#[target_feature(enable = "sse2")]
1150#[stable(feature = "simd_x86", since = "1.27.0")]
1152pub fn _mm_set1_epi8(a: i8) -> __m128i {
1153 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1154}
1155
1156#[inline]
1160#[target_feature(enable = "sse2")]
1161#[stable(feature = "simd_x86", since = "1.27.0")]
1163pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1164 _mm_set_epi32(e0, e1, e2, e3)
1165}
1166
1167#[inline]
1171#[target_feature(enable = "sse2")]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm_setr_epi16(
1175 e7: i16,
1176 e6: i16,
1177 e5: i16,
1178 e4: i16,
1179 e3: i16,
1180 e2: i16,
1181 e1: i16,
1182 e0: i16,
1183) -> __m128i {
1184 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1185}
1186
1187#[inline]
1191#[target_feature(enable = "sse2")]
1192#[stable(feature = "simd_x86", since = "1.27.0")]
1194pub fn _mm_setr_epi8(
1195 e15: i8,
1196 e14: i8,
1197 e13: i8,
1198 e12: i8,
1199 e11: i8,
1200 e10: i8,
1201 e9: i8,
1202 e8: i8,
1203 e7: i8,
1204 e6: i8,
1205 e5: i8,
1206 e4: i8,
1207 e3: i8,
1208 e2: i8,
1209 e1: i8,
1210 e0: i8,
1211) -> __m128i {
1212 #[rustfmt::skip]
1213 _mm_set_epi8(
1214 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1215 )
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[cfg_attr(test, assert_instr(xorps))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225pub fn _mm_setzero_si128() -> __m128i {
1226 const { unsafe { mem::zeroed() } }
1227}
1228
1229#[inline]
1233#[target_feature(enable = "sse2")]
1234#[stable(feature = "simd_x86", since = "1.27.0")]
1235pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1236 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1237}
1238
1239#[inline]
1245#[target_feature(enable = "sse2")]
1246#[cfg_attr(
1247 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1248 assert_instr(movaps)
1249)]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1252 *mem_addr
1253}
1254
1255#[inline]
1261#[target_feature(enable = "sse2")]
1262#[cfg_attr(test, assert_instr(movups))]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1265 let mut dst: __m128i = _mm_undefined_si128();
1266 ptr::copy_nonoverlapping(
1267 mem_addr as *const u8,
1268 ptr::addr_of_mut!(dst) as *mut u8,
1269 mem::size_of::<__m128i>(),
1270 );
1271 dst
1272}
1273
1274#[inline]
1285#[target_feature(enable = "sse2")]
1286#[cfg_attr(test, assert_instr(maskmovdqu))]
1287#[stable(feature = "simd_x86", since = "1.27.0")]
1288pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1289 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1290}
1291
1292#[inline]
1298#[target_feature(enable = "sse2")]
1299#[cfg_attr(
1300 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1301 assert_instr(movaps)
1302)]
1303#[stable(feature = "simd_x86", since = "1.27.0")]
1304pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1305 *mem_addr = a;
1306}
1307
1308#[inline]
1314#[target_feature(enable = "sse2")]
1315#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1317pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1318 mem_addr.write_unaligned(a);
1319}
1320
1321#[inline]
1327#[target_feature(enable = "sse2")]
1328#[stable(feature = "simd_x86", since = "1.27.0")]
1329pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1330 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1331}
1332
1333#[inline]
1348#[target_feature(enable = "sse2")]
1349#[cfg_attr(test, assert_instr(movntdq))]
1350#[stable(feature = "simd_x86", since = "1.27.0")]
1351pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1352 crate::arch::asm!(
1353 vps!("movntdq", ",{a}"),
1354 p = in(reg) mem_addr,
1355 a = in(xmm_reg) a,
1356 options(nostack, preserves_flags),
1357 );
1358}
1359
1360#[inline]
1375#[target_feature(enable = "sse2")]
1376#[cfg_attr(test, assert_instr(movnti))]
1377#[stable(feature = "simd_x86", since = "1.27.0")]
1378pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1379 crate::arch::asm!(
1380 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1382 a = in(reg) a,
1383 options(nostack, preserves_flags),
1384 );
1385}
1386
1387#[inline]
1392#[target_feature(enable = "sse2")]
1393#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1395#[stable(feature = "simd_x86", since = "1.27.0")]
1396pub fn _mm_move_epi64(a: __m128i) -> __m128i {
1397 unsafe {
1398 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1399 transmute(r)
1400 }
1401}
1402
1403#[inline]
1408#[target_feature(enable = "sse2")]
1409#[cfg_attr(test, assert_instr(packsswb))]
1410#[stable(feature = "simd_x86", since = "1.27.0")]
1411pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1412 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1413}
1414
1415#[inline]
1420#[target_feature(enable = "sse2")]
1421#[cfg_attr(test, assert_instr(packssdw))]
1422#[stable(feature = "simd_x86", since = "1.27.0")]
1423pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1424 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1425}
1426
1427#[inline]
1432#[target_feature(enable = "sse2")]
1433#[cfg_attr(test, assert_instr(packuswb))]
1434#[stable(feature = "simd_x86", since = "1.27.0")]
1435pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1436 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1437}
1438
1439#[inline]
1443#[target_feature(enable = "sse2")]
1444#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1445#[rustc_legacy_const_generics(1)]
1446#[stable(feature = "simd_x86", since = "1.27.0")]
1447pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1448 static_assert_uimm_bits!(IMM8, 3);
1449 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1450}
1451
1452#[inline]
1456#[target_feature(enable = "sse2")]
1457#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1458#[rustc_legacy_const_generics(2)]
1459#[stable(feature = "simd_x86", since = "1.27.0")]
1460pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1461 static_assert_uimm_bits!(IMM8, 3);
1462 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1463}
1464
1465#[inline]
1469#[target_feature(enable = "sse2")]
1470#[cfg_attr(test, assert_instr(pmovmskb))]
1471#[stable(feature = "simd_x86", since = "1.27.0")]
1472pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
1473 unsafe {
1474 let z = i8x16::ZERO;
1475 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1476 simd_bitmask::<_, u16>(m) as u32 as i32
1477 }
1478}
1479
1480#[inline]
1484#[target_feature(enable = "sse2")]
1485#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1486#[rustc_legacy_const_generics(1)]
1487#[stable(feature = "simd_x86", since = "1.27.0")]
1488pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1489 static_assert_uimm_bits!(IMM8, 8);
1490 unsafe {
1491 let a = a.as_i32x4();
1492 let x: i32x4 = simd_shuffle!(
1493 a,
1494 a,
1495 [
1496 IMM8 as u32 & 0b11,
1497 (IMM8 as u32 >> 2) & 0b11,
1498 (IMM8 as u32 >> 4) & 0b11,
1499 (IMM8 as u32 >> 6) & 0b11,
1500 ],
1501 );
1502 transmute(x)
1503 }
1504}
1505
1506#[inline]
1514#[target_feature(enable = "sse2")]
1515#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1516#[rustc_legacy_const_generics(1)]
1517#[stable(feature = "simd_x86", since = "1.27.0")]
1518pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1519 static_assert_uimm_bits!(IMM8, 8);
1520 unsafe {
1521 let a = a.as_i16x8();
1522 let x: i16x8 = simd_shuffle!(
1523 a,
1524 a,
1525 [
1526 0,
1527 1,
1528 2,
1529 3,
1530 (IMM8 as u32 & 0b11) + 4,
1531 ((IMM8 as u32 >> 2) & 0b11) + 4,
1532 ((IMM8 as u32 >> 4) & 0b11) + 4,
1533 ((IMM8 as u32 >> 6) & 0b11) + 4,
1534 ],
1535 );
1536 transmute(x)
1537 }
1538}
1539
1540#[inline]
1548#[target_feature(enable = "sse2")]
1549#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1550#[rustc_legacy_const_generics(1)]
1551#[stable(feature = "simd_x86", since = "1.27.0")]
1552pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1553 static_assert_uimm_bits!(IMM8, 8);
1554 unsafe {
1555 let a = a.as_i16x8();
1556 let x: i16x8 = simd_shuffle!(
1557 a,
1558 a,
1559 [
1560 IMM8 as u32 & 0b11,
1561 (IMM8 as u32 >> 2) & 0b11,
1562 (IMM8 as u32 >> 4) & 0b11,
1563 (IMM8 as u32 >> 6) & 0b11,
1564 4,
1565 5,
1566 6,
1567 7,
1568 ],
1569 );
1570 transmute(x)
1571 }
1572}
1573
1574#[inline]
1578#[target_feature(enable = "sse2")]
1579#[cfg_attr(test, assert_instr(punpckhbw))]
1580#[stable(feature = "simd_x86", since = "1.27.0")]
1581pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1582 unsafe {
1583 transmute::<i8x16, _>(simd_shuffle!(
1584 a.as_i8x16(),
1585 b.as_i8x16(),
1586 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1587 ))
1588 }
1589}
1590
1591#[inline]
1595#[target_feature(enable = "sse2")]
1596#[cfg_attr(test, assert_instr(punpckhwd))]
1597#[stable(feature = "simd_x86", since = "1.27.0")]
1598pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1599 unsafe {
1600 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1601 transmute::<i16x8, _>(x)
1602 }
1603}
1604
1605#[inline]
1609#[target_feature(enable = "sse2")]
1610#[cfg_attr(test, assert_instr(unpckhps))]
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1612pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1613 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1614}
1615
1616#[inline]
1620#[target_feature(enable = "sse2")]
1621#[cfg_attr(test, assert_instr(unpckhpd))]
1622#[stable(feature = "simd_x86", since = "1.27.0")]
1623pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1624 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1625}
1626
1627#[inline]
1631#[target_feature(enable = "sse2")]
1632#[cfg_attr(test, assert_instr(punpcklbw))]
1633#[stable(feature = "simd_x86", since = "1.27.0")]
1634pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1635 unsafe {
1636 transmute::<i8x16, _>(simd_shuffle!(
1637 a.as_i8x16(),
1638 b.as_i8x16(),
1639 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1640 ))
1641 }
1642}
1643
1644#[inline]
1648#[target_feature(enable = "sse2")]
1649#[cfg_attr(test, assert_instr(punpcklwd))]
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1652 unsafe {
1653 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1654 transmute::<i16x8, _>(x)
1655 }
1656}
1657
1658#[inline]
1662#[target_feature(enable = "sse2")]
1663#[cfg_attr(test, assert_instr(unpcklps))]
1664#[stable(feature = "simd_x86", since = "1.27.0")]
1665pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1666 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1667}
1668
1669#[inline]
1673#[target_feature(enable = "sse2")]
1674#[cfg_attr(test, assert_instr(movlhps))]
1675#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1677 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1678}
1679
1680#[inline]
1685#[target_feature(enable = "sse2")]
1686#[cfg_attr(test, assert_instr(addsd))]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1689 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1690}
1691
1692#[inline]
1697#[target_feature(enable = "sse2")]
1698#[cfg_attr(test, assert_instr(addpd))]
1699#[stable(feature = "simd_x86", since = "1.27.0")]
1700pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1701 unsafe { simd_add(a, b) }
1702}
1703
1704#[inline]
1709#[target_feature(enable = "sse2")]
1710#[cfg_attr(test, assert_instr(divsd))]
1711#[stable(feature = "simd_x86", since = "1.27.0")]
1712pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1713 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1714}
1715
1716#[inline]
1721#[target_feature(enable = "sse2")]
1722#[cfg_attr(test, assert_instr(divpd))]
1723#[stable(feature = "simd_x86", since = "1.27.0")]
1724pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1725 unsafe { simd_div(a, b) }
1726}
1727
1728#[inline]
1733#[target_feature(enable = "sse2")]
1734#[cfg_attr(test, assert_instr(maxsd))]
1735#[stable(feature = "simd_x86", since = "1.27.0")]
1736pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1737 unsafe { maxsd(a, b) }
1738}
1739
1740#[inline]
1745#[target_feature(enable = "sse2")]
1746#[cfg_attr(test, assert_instr(maxpd))]
1747#[stable(feature = "simd_x86", since = "1.27.0")]
1748pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1749 unsafe { maxpd(a, b) }
1750}
1751
1752#[inline]
1757#[target_feature(enable = "sse2")]
1758#[cfg_attr(test, assert_instr(minsd))]
1759#[stable(feature = "simd_x86", since = "1.27.0")]
1760pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1761 unsafe { minsd(a, b) }
1762}
1763
1764#[inline]
1769#[target_feature(enable = "sse2")]
1770#[cfg_attr(test, assert_instr(minpd))]
1771#[stable(feature = "simd_x86", since = "1.27.0")]
1772pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1773 unsafe { minpd(a, b) }
1774}
1775
1776#[inline]
1781#[target_feature(enable = "sse2")]
1782#[cfg_attr(test, assert_instr(mulsd))]
1783#[stable(feature = "simd_x86", since = "1.27.0")]
1784pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1785 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1786}
1787
1788#[inline]
1793#[target_feature(enable = "sse2")]
1794#[cfg_attr(test, assert_instr(mulpd))]
1795#[stable(feature = "simd_x86", since = "1.27.0")]
1796pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1797 unsafe { simd_mul(a, b) }
1798}
1799
1800#[inline]
1805#[target_feature(enable = "sse2")]
1806#[cfg_attr(test, assert_instr(sqrtsd))]
1807#[stable(feature = "simd_x86", since = "1.27.0")]
1808pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1809 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1810}
1811
1812#[inline]
1816#[target_feature(enable = "sse2")]
1817#[cfg_attr(test, assert_instr(sqrtpd))]
1818#[stable(feature = "simd_x86", since = "1.27.0")]
1819pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1820 unsafe { simd_fsqrt(a) }
1821}
1822
1823#[inline]
1828#[target_feature(enable = "sse2")]
1829#[cfg_attr(test, assert_instr(subsd))]
1830#[stable(feature = "simd_x86", since = "1.27.0")]
1831pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1832 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1833}
1834
1835#[inline]
1840#[target_feature(enable = "sse2")]
1841#[cfg_attr(test, assert_instr(subpd))]
1842#[stable(feature = "simd_x86", since = "1.27.0")]
1843pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1844 unsafe { simd_sub(a, b) }
1845}
1846
1847#[inline]
1852#[target_feature(enable = "sse2")]
1853#[cfg_attr(test, assert_instr(andps))]
1854#[stable(feature = "simd_x86", since = "1.27.0")]
1855pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1856 unsafe {
1857 let a: __m128i = transmute(a);
1858 let b: __m128i = transmute(b);
1859 transmute(_mm_and_si128(a, b))
1860 }
1861}
1862
1863#[inline]
1867#[target_feature(enable = "sse2")]
1868#[cfg_attr(test, assert_instr(andnps))]
1869#[stable(feature = "simd_x86", since = "1.27.0")]
1870pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1871 unsafe {
1872 let a: __m128i = transmute(a);
1873 let b: __m128i = transmute(b);
1874 transmute(_mm_andnot_si128(a, b))
1875 }
1876}
1877
1878#[inline]
1882#[target_feature(enable = "sse2")]
1883#[cfg_attr(test, assert_instr(orps))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1886 unsafe {
1887 let a: __m128i = transmute(a);
1888 let b: __m128i = transmute(b);
1889 transmute(_mm_or_si128(a, b))
1890 }
1891}
1892
1893#[inline]
1897#[target_feature(enable = "sse2")]
1898#[cfg_attr(test, assert_instr(xorps))]
1899#[stable(feature = "simd_x86", since = "1.27.0")]
1900pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1901 unsafe {
1902 let a: __m128i = transmute(a);
1903 let b: __m128i = transmute(b);
1904 transmute(_mm_xor_si128(a, b))
1905 }
1906}
1907
1908#[inline]
1913#[target_feature(enable = "sse2")]
1914#[cfg_attr(test, assert_instr(cmpeqsd))]
1915#[stable(feature = "simd_x86", since = "1.27.0")]
1916pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1917 unsafe { cmpsd(a, b, 0) }
1918}
1919
1920#[inline]
1925#[target_feature(enable = "sse2")]
1926#[cfg_attr(test, assert_instr(cmpltsd))]
1927#[stable(feature = "simd_x86", since = "1.27.0")]
1928pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1929 unsafe { cmpsd(a, b, 1) }
1930}
1931
1932#[inline]
1937#[target_feature(enable = "sse2")]
1938#[cfg_attr(test, assert_instr(cmplesd))]
1939#[stable(feature = "simd_x86", since = "1.27.0")]
1940pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1941 unsafe { cmpsd(a, b, 2) }
1942}
1943
1944#[inline]
1949#[target_feature(enable = "sse2")]
1950#[cfg_attr(test, assert_instr(cmpltsd))]
1951#[stable(feature = "simd_x86", since = "1.27.0")]
1952pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1953 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1954}
1955
1956#[inline]
1961#[target_feature(enable = "sse2")]
1962#[cfg_attr(test, assert_instr(cmplesd))]
1963#[stable(feature = "simd_x86", since = "1.27.0")]
1964pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1965 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
1966}
1967
1968#[inline]
1975#[target_feature(enable = "sse2")]
1976#[cfg_attr(test, assert_instr(cmpordsd))]
1977#[stable(feature = "simd_x86", since = "1.27.0")]
1978pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1979 unsafe { cmpsd(a, b, 7) }
1980}
1981
1982#[inline]
1988#[target_feature(enable = "sse2")]
1989#[cfg_attr(test, assert_instr(cmpunordsd))]
1990#[stable(feature = "simd_x86", since = "1.27.0")]
1991pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1992 unsafe { cmpsd(a, b, 3) }
1993}
1994
1995#[inline]
2000#[target_feature(enable = "sse2")]
2001#[cfg_attr(test, assert_instr(cmpneqsd))]
2002#[stable(feature = "simd_x86", since = "1.27.0")]
2003pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2004 unsafe { cmpsd(a, b, 4) }
2005}
2006
2007#[inline]
2012#[target_feature(enable = "sse2")]
2013#[cfg_attr(test, assert_instr(cmpnltsd))]
2014#[stable(feature = "simd_x86", since = "1.27.0")]
2015pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2016 unsafe { cmpsd(a, b, 5) }
2017}
2018
2019#[inline]
2024#[target_feature(enable = "sse2")]
2025#[cfg_attr(test, assert_instr(cmpnlesd))]
2026#[stable(feature = "simd_x86", since = "1.27.0")]
2027pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2028 unsafe { cmpsd(a, b, 6) }
2029}
2030
2031#[inline]
2036#[target_feature(enable = "sse2")]
2037#[cfg_attr(test, assert_instr(cmpnltsd))]
2038#[stable(feature = "simd_x86", since = "1.27.0")]
2039pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2040 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2041}
2042
2043#[inline]
2048#[target_feature(enable = "sse2")]
2049#[cfg_attr(test, assert_instr(cmpnlesd))]
2050#[stable(feature = "simd_x86", since = "1.27.0")]
2051pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2052 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2053}
2054
2055#[inline]
2059#[target_feature(enable = "sse2")]
2060#[cfg_attr(test, assert_instr(cmpeqpd))]
2061#[stable(feature = "simd_x86", since = "1.27.0")]
2062pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2063 unsafe { cmppd(a, b, 0) }
2064}
2065
2066#[inline]
2070#[target_feature(enable = "sse2")]
2071#[cfg_attr(test, assert_instr(cmpltpd))]
2072#[stable(feature = "simd_x86", since = "1.27.0")]
2073pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2074 unsafe { cmppd(a, b, 1) }
2075}
2076
2077#[inline]
2081#[target_feature(enable = "sse2")]
2082#[cfg_attr(test, assert_instr(cmplepd))]
2083#[stable(feature = "simd_x86", since = "1.27.0")]
2084pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2085 unsafe { cmppd(a, b, 2) }
2086}
2087
2088#[inline]
2092#[target_feature(enable = "sse2")]
2093#[cfg_attr(test, assert_instr(cmpltpd))]
2094#[stable(feature = "simd_x86", since = "1.27.0")]
2095pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2096 _mm_cmplt_pd(b, a)
2097}
2098
2099#[inline]
2103#[target_feature(enable = "sse2")]
2104#[cfg_attr(test, assert_instr(cmplepd))]
2105#[stable(feature = "simd_x86", since = "1.27.0")]
2106pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2107 _mm_cmple_pd(b, a)
2108}
2109
2110#[inline]
2114#[target_feature(enable = "sse2")]
2115#[cfg_attr(test, assert_instr(cmpordpd))]
2116#[stable(feature = "simd_x86", since = "1.27.0")]
2117pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2118 unsafe { cmppd(a, b, 7) }
2119}
2120
2121#[inline]
2125#[target_feature(enable = "sse2")]
2126#[cfg_attr(test, assert_instr(cmpunordpd))]
2127#[stable(feature = "simd_x86", since = "1.27.0")]
2128pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2129 unsafe { cmppd(a, b, 3) }
2130}
2131
2132#[inline]
2136#[target_feature(enable = "sse2")]
2137#[cfg_attr(test, assert_instr(cmpneqpd))]
2138#[stable(feature = "simd_x86", since = "1.27.0")]
2139pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2140 unsafe { cmppd(a, b, 4) }
2141}
2142
2143#[inline]
2147#[target_feature(enable = "sse2")]
2148#[cfg_attr(test, assert_instr(cmpnltpd))]
2149#[stable(feature = "simd_x86", since = "1.27.0")]
2150pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2151 unsafe { cmppd(a, b, 5) }
2152}
2153
2154#[inline]
2158#[target_feature(enable = "sse2")]
2159#[cfg_attr(test, assert_instr(cmpnlepd))]
2160#[stable(feature = "simd_x86", since = "1.27.0")]
2161pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2162 unsafe { cmppd(a, b, 6) }
2163}
2164
2165#[inline]
2169#[target_feature(enable = "sse2")]
2170#[cfg_attr(test, assert_instr(cmpnltpd))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2173 _mm_cmpnlt_pd(b, a)
2174}
2175
2176#[inline]
2181#[target_feature(enable = "sse2")]
2182#[cfg_attr(test, assert_instr(cmpnlepd))]
2183#[stable(feature = "simd_x86", since = "1.27.0")]
2184pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2185 _mm_cmpnle_pd(b, a)
2186}
2187
2188#[inline]
2192#[target_feature(enable = "sse2")]
2193#[cfg_attr(test, assert_instr(comisd))]
2194#[stable(feature = "simd_x86", since = "1.27.0")]
2195pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2196 unsafe { comieqsd(a, b) }
2197}
2198
2199#[inline]
2203#[target_feature(enable = "sse2")]
2204#[cfg_attr(test, assert_instr(comisd))]
2205#[stable(feature = "simd_x86", since = "1.27.0")]
2206pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2207 unsafe { comiltsd(a, b) }
2208}
2209
2210#[inline]
2214#[target_feature(enable = "sse2")]
2215#[cfg_attr(test, assert_instr(comisd))]
2216#[stable(feature = "simd_x86", since = "1.27.0")]
2217pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2218 unsafe { comilesd(a, b) }
2219}
2220
2221#[inline]
2225#[target_feature(enable = "sse2")]
2226#[cfg_attr(test, assert_instr(comisd))]
2227#[stable(feature = "simd_x86", since = "1.27.0")]
2228pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2229 unsafe { comigtsd(a, b) }
2230}
2231
2232#[inline]
2236#[target_feature(enable = "sse2")]
2237#[cfg_attr(test, assert_instr(comisd))]
2238#[stable(feature = "simd_x86", since = "1.27.0")]
2239pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2240 unsafe { comigesd(a, b) }
2241}
2242
2243#[inline]
2247#[target_feature(enable = "sse2")]
2248#[cfg_attr(test, assert_instr(comisd))]
2249#[stable(feature = "simd_x86", since = "1.27.0")]
2250pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2251 unsafe { comineqsd(a, b) }
2252}
2253
2254#[inline]
2258#[target_feature(enable = "sse2")]
2259#[cfg_attr(test, assert_instr(ucomisd))]
2260#[stable(feature = "simd_x86", since = "1.27.0")]
2261pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2262 unsafe { ucomieqsd(a, b) }
2263}
2264
2265#[inline]
2269#[target_feature(enable = "sse2")]
2270#[cfg_attr(test, assert_instr(ucomisd))]
2271#[stable(feature = "simd_x86", since = "1.27.0")]
2272pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2273 unsafe { ucomiltsd(a, b) }
2274}
2275
2276#[inline]
2280#[target_feature(enable = "sse2")]
2281#[cfg_attr(test, assert_instr(ucomisd))]
2282#[stable(feature = "simd_x86", since = "1.27.0")]
2283pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2284 unsafe { ucomilesd(a, b) }
2285}
2286
2287#[inline]
2291#[target_feature(enable = "sse2")]
2292#[cfg_attr(test, assert_instr(ucomisd))]
2293#[stable(feature = "simd_x86", since = "1.27.0")]
2294pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2295 unsafe { ucomigtsd(a, b) }
2296}
2297
2298#[inline]
2302#[target_feature(enable = "sse2")]
2303#[cfg_attr(test, assert_instr(ucomisd))]
2304#[stable(feature = "simd_x86", since = "1.27.0")]
2305pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2306 unsafe { ucomigesd(a, b) }
2307}
2308
2309#[inline]
2313#[target_feature(enable = "sse2")]
2314#[cfg_attr(test, assert_instr(ucomisd))]
2315#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2317 unsafe { ucomineqsd(a, b) }
2318}
2319
2320#[inline]
2325#[target_feature(enable = "sse2")]
2326#[cfg_attr(test, assert_instr(cvtpd2ps))]
2327#[stable(feature = "simd_x86", since = "1.27.0")]
2328pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2329 unsafe {
2330 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2331 let zero = f32x2::ZERO;
2332 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2333 }
2334}
2335
2336#[inline]
2342#[target_feature(enable = "sse2")]
2343#[cfg_attr(test, assert_instr(cvtps2pd))]
2344#[stable(feature = "simd_x86", since = "1.27.0")]
2345pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
2346 unsafe {
2347 let a = a.as_f32x4();
2348 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2349 }
2350}
2351
2352#[inline]
2357#[target_feature(enable = "sse2")]
2358#[cfg_attr(test, assert_instr(cvtpd2dq))]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2361 unsafe { transmute(cvtpd2dq(a)) }
2362}
2363
2364#[inline]
2369#[target_feature(enable = "sse2")]
2370#[cfg_attr(test, assert_instr(cvtsd2si))]
2371#[stable(feature = "simd_x86", since = "1.27.0")]
2372pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2373 unsafe { cvtsd2si(a) }
2374}
2375
2376#[inline]
2383#[target_feature(enable = "sse2")]
2384#[cfg_attr(test, assert_instr(cvtsd2ss))]
2385#[stable(feature = "simd_x86", since = "1.27.0")]
2386pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2387 unsafe { cvtsd2ss(a, b) }
2388}
2389
2390#[inline]
2394#[target_feature(enable = "sse2")]
2395#[stable(feature = "simd_x86", since = "1.27.0")]
2396pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2397 unsafe { simd_extract!(a, 0) }
2398}
2399
2400#[inline]
2407#[target_feature(enable = "sse2")]
2408#[cfg_attr(test, assert_instr(cvtss2sd))]
2409#[stable(feature = "simd_x86", since = "1.27.0")]
2410pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2411 unsafe { cvtss2sd(a, b) }
2412}
2413
2414#[inline]
2419#[target_feature(enable = "sse2")]
2420#[cfg_attr(test, assert_instr(cvttpd2dq))]
2421#[stable(feature = "simd_x86", since = "1.27.0")]
2422pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2423 unsafe { transmute(cvttpd2dq(a)) }
2424}
2425
2426#[inline]
2431#[target_feature(enable = "sse2")]
2432#[cfg_attr(test, assert_instr(cvttsd2si))]
2433#[stable(feature = "simd_x86", since = "1.27.0")]
2434pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2435 unsafe { cvttsd2si(a) }
2436}
2437
2438#[inline]
2443#[target_feature(enable = "sse2")]
2444#[cfg_attr(test, assert_instr(cvttps2dq))]
2445#[stable(feature = "simd_x86", since = "1.27.0")]
2446pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2447 unsafe { transmute(cvttps2dq(a)) }
2448}
2449
2450#[inline]
2455#[target_feature(enable = "sse2")]
2456#[stable(feature = "simd_x86", since = "1.27.0")]
2457pub fn _mm_set_sd(a: f64) -> __m128d {
2458 _mm_set_pd(0.0, a)
2459}
2460
2461#[inline]
2466#[target_feature(enable = "sse2")]
2467#[stable(feature = "simd_x86", since = "1.27.0")]
2468pub fn _mm_set1_pd(a: f64) -> __m128d {
2469 _mm_set_pd(a, a)
2470}
2471
2472#[inline]
2477#[target_feature(enable = "sse2")]
2478#[stable(feature = "simd_x86", since = "1.27.0")]
2479pub fn _mm_set_pd1(a: f64) -> __m128d {
2480 _mm_set_pd(a, a)
2481}
2482
2483#[inline]
2488#[target_feature(enable = "sse2")]
2489#[stable(feature = "simd_x86", since = "1.27.0")]
2490pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2491 __m128d([b, a])
2492}
2493
2494#[inline]
2499#[target_feature(enable = "sse2")]
2500#[stable(feature = "simd_x86", since = "1.27.0")]
2501pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2502 _mm_set_pd(b, a)
2503}
2504
2505#[inline]
2510#[target_feature(enable = "sse2")]
2511#[cfg_attr(test, assert_instr(xorp))]
2512#[stable(feature = "simd_x86", since = "1.27.0")]
2513pub fn _mm_setzero_pd() -> __m128d {
2514 const { unsafe { mem::zeroed() } }
2515}
2516
2517#[inline]
2524#[target_feature(enable = "sse2")]
2525#[cfg_attr(test, assert_instr(movmskpd))]
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2527pub fn _mm_movemask_pd(a: __m128d) -> i32 {
2528 unsafe {
2531 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2532 simd_bitmask::<i64x2, u8>(mask).into()
2533 }
2534}
2535
2536#[inline]
2543#[target_feature(enable = "sse2")]
2544#[cfg_attr(
2545 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2546 assert_instr(movaps)
2547)]
2548#[stable(feature = "simd_x86", since = "1.27.0")]
2549#[allow(clippy::cast_ptr_alignment)]
2550pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2551 *(mem_addr as *const __m128d)
2552}
2553
2554#[inline]
2559#[target_feature(enable = "sse2")]
2560#[cfg_attr(test, assert_instr(movsd))]
2561#[stable(feature = "simd_x86", since = "1.27.0")]
2562pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2563 _mm_setr_pd(*mem_addr, 0.)
2564}
2565
2566#[inline]
2572#[target_feature(enable = "sse2")]
2573#[cfg_attr(test, assert_instr(movhps))]
2574#[stable(feature = "simd_x86", since = "1.27.0")]
2575pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2576 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2577}
2578
2579#[inline]
2585#[target_feature(enable = "sse2")]
2586#[cfg_attr(test, assert_instr(movlps))]
2587#[stable(feature = "simd_x86", since = "1.27.0")]
2588pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2589 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2590}
2591
2592#[inline]
2608#[target_feature(enable = "sse2")]
2609#[cfg_attr(test, assert_instr(movntpd))]
2610#[stable(feature = "simd_x86", since = "1.27.0")]
2611#[allow(clippy::cast_ptr_alignment)]
2612pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2613 crate::arch::asm!(
2614 vps!("movntpd", ",{a}"),
2615 p = in(reg) mem_addr,
2616 a = in(xmm_reg) a,
2617 options(nostack, preserves_flags),
2618 );
2619}
2620
2621#[inline]
2626#[target_feature(enable = "sse2")]
2627#[cfg_attr(test, assert_instr(movlps))]
2628#[stable(feature = "simd_x86", since = "1.27.0")]
2629pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2630 *mem_addr = simd_extract!(a, 0)
2631}
2632
2633#[inline]
2639#[target_feature(enable = "sse2")]
2640#[cfg_attr(
2641 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2642 assert_instr(movaps)
2643)]
2644#[stable(feature = "simd_x86", since = "1.27.0")]
2645#[allow(clippy::cast_ptr_alignment)]
2646pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2647 *(mem_addr as *mut __m128d) = a;
2648}
2649
2650#[inline]
2656#[target_feature(enable = "sse2")]
2657#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2659pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2660 mem_addr.cast::<__m128d>().write_unaligned(a);
2661}
2662
2663#[inline]
2669#[target_feature(enable = "sse2")]
2670#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2671pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2672 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2673}
2674
2675#[inline]
2681#[target_feature(enable = "sse2")]
2682#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2683pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2684 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2685}
2686
2687#[inline]
2693#[target_feature(enable = "sse2")]
2694#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2695pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2696 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2697}
2698
2699#[inline]
2705#[target_feature(enable = "sse2")]
2706#[stable(feature = "simd_x86", since = "1.27.0")]
2707#[allow(clippy::cast_ptr_alignment)]
2708pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2709 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2710 *(mem_addr as *mut __m128d) = b;
2711}
2712
2713#[inline]
2719#[target_feature(enable = "sse2")]
2720#[stable(feature = "simd_x86", since = "1.27.0")]
2721#[allow(clippy::cast_ptr_alignment)]
2722pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2723 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2724 *(mem_addr as *mut __m128d) = b;
2725}
2726
2727#[inline]
2734#[target_feature(enable = "sse2")]
2735#[stable(feature = "simd_x86", since = "1.27.0")]
2736#[allow(clippy::cast_ptr_alignment)]
2737pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2738 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2739 *(mem_addr as *mut __m128d) = b;
2740}
2741
2742#[inline]
2747#[target_feature(enable = "sse2")]
2748#[cfg_attr(test, assert_instr(movhps))]
2749#[stable(feature = "simd_x86", since = "1.27.0")]
2750pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2751 *mem_addr = simd_extract!(a, 1);
2752}
2753
2754#[inline]
2759#[target_feature(enable = "sse2")]
2760#[cfg_attr(test, assert_instr(movlps))]
2761#[stable(feature = "simd_x86", since = "1.27.0")]
2762pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2763 *mem_addr = simd_extract!(a, 0);
2764}
2765
2766#[inline]
2771#[target_feature(enable = "sse2")]
2772#[stable(feature = "simd_x86", since = "1.27.0")]
2774pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2775 let d = *mem_addr;
2776 _mm_setr_pd(d, d)
2777}
2778
2779#[inline]
2784#[target_feature(enable = "sse2")]
2785#[stable(feature = "simd_x86", since = "1.27.0")]
2787pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2788 _mm_load1_pd(mem_addr)
2789}
2790
2791#[inline]
2797#[target_feature(enable = "sse2")]
2798#[cfg_attr(
2799 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2800 assert_instr(movaps)
2801)]
2802#[stable(feature = "simd_x86", since = "1.27.0")]
2803pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2804 let a = _mm_load_pd(mem_addr);
2805 simd_shuffle!(a, a, [1, 0])
2806}
2807
2808#[inline]
2814#[target_feature(enable = "sse2")]
2815#[cfg_attr(test, assert_instr(movups))]
2816#[stable(feature = "simd_x86", since = "1.27.0")]
2817pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2818 let mut dst = _mm_undefined_pd();
2819 ptr::copy_nonoverlapping(
2820 mem_addr as *const u8,
2821 ptr::addr_of_mut!(dst) as *mut u8,
2822 mem::size_of::<__m128d>(),
2823 );
2824 dst
2825}
2826
2827#[inline]
2833#[target_feature(enable = "sse2")]
2834#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2835pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2836 transmute(i16x8::new(
2837 ptr::read_unaligned(mem_addr as *const i16),
2838 0,
2839 0,
2840 0,
2841 0,
2842 0,
2843 0,
2844 0,
2845 ))
2846}
2847
2848#[inline]
2854#[target_feature(enable = "sse2")]
2855#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2856pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2857 transmute(i32x4::new(
2858 ptr::read_unaligned(mem_addr as *const i32),
2859 0,
2860 0,
2861 0,
2862 ))
2863}
2864
2865#[inline]
2871#[target_feature(enable = "sse2")]
2872#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
2873pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
2874 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
2875}
2876
2877#[inline]
2883#[target_feature(enable = "sse2")]
2884#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2885#[rustc_legacy_const_generics(2)]
2886#[stable(feature = "simd_x86", since = "1.27.0")]
2887pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2888 static_assert_uimm_bits!(MASK, 8);
2889 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
2890}
2891
2892#[inline]
2898#[target_feature(enable = "sse2")]
2899#[cfg_attr(test, assert_instr(movsd))]
2900#[stable(feature = "simd_x86", since = "1.27.0")]
2901pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2902 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
2903}
2904
2905#[inline]
2910#[target_feature(enable = "sse2")]
2911#[stable(feature = "simd_x86", since = "1.27.0")]
2912pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
2913 unsafe { transmute(a) }
2914}
2915
2916#[inline]
2921#[target_feature(enable = "sse2")]
2922#[stable(feature = "simd_x86", since = "1.27.0")]
2923pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
2924 unsafe { transmute(a) }
2925}
2926
2927#[inline]
2932#[target_feature(enable = "sse2")]
2933#[stable(feature = "simd_x86", since = "1.27.0")]
2934pub fn _mm_castps_pd(a: __m128) -> __m128d {
2935 unsafe { transmute(a) }
2936}
2937
2938#[inline]
2943#[target_feature(enable = "sse2")]
2944#[stable(feature = "simd_x86", since = "1.27.0")]
2945pub fn _mm_castps_si128(a: __m128) -> __m128i {
2946 unsafe { transmute(a) }
2947}
2948
2949#[inline]
2954#[target_feature(enable = "sse2")]
2955#[stable(feature = "simd_x86", since = "1.27.0")]
2956pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2957 unsafe { transmute(a) }
2958}
2959
2960#[inline]
2965#[target_feature(enable = "sse2")]
2966#[stable(feature = "simd_x86", since = "1.27.0")]
2967pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2968 unsafe { transmute(a) }
2969}
2970
2971#[inline]
2978#[target_feature(enable = "sse2")]
2979#[stable(feature = "simd_x86", since = "1.27.0")]
2980pub fn _mm_undefined_pd() -> __m128d {
2981 const { unsafe { mem::zeroed() } }
2982}
2983
2984#[inline]
2991#[target_feature(enable = "sse2")]
2992#[stable(feature = "simd_x86", since = "1.27.0")]
2993pub fn _mm_undefined_si128() -> __m128i {
2994 const { unsafe { mem::zeroed() } }
2995}
2996
2997#[inline]
3005#[target_feature(enable = "sse2")]
3006#[cfg_attr(test, assert_instr(unpckhpd))]
3007#[stable(feature = "simd_x86", since = "1.27.0")]
3008pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3009 unsafe { simd_shuffle!(a, b, [1, 3]) }
3010}
3011
3012#[inline]
3020#[target_feature(enable = "sse2")]
3021#[cfg_attr(test, assert_instr(movlhps))]
3022#[stable(feature = "simd_x86", since = "1.27.0")]
3023pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3024 unsafe { simd_shuffle!(a, b, [0, 2]) }
3025}
3026
3027#[allow(improper_ctypes)]
3028unsafe extern "C" {
3029 #[link_name = "llvm.x86.sse2.pause"]
3030 fn pause();
3031 #[link_name = "llvm.x86.sse2.clflush"]
3032 fn clflush(p: *const u8);
3033 #[link_name = "llvm.x86.sse2.lfence"]
3034 fn lfence();
3035 #[link_name = "llvm.x86.sse2.mfence"]
3036 fn mfence();
3037 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3038 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3039 #[link_name = "llvm.x86.sse2.psad.bw"]
3040 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3041 #[link_name = "llvm.x86.sse2.psll.w"]
3042 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3043 #[link_name = "llvm.x86.sse2.psll.d"]
3044 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3045 #[link_name = "llvm.x86.sse2.psll.q"]
3046 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3047 #[link_name = "llvm.x86.sse2.psra.w"]
3048 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3049 #[link_name = "llvm.x86.sse2.psra.d"]
3050 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3051 #[link_name = "llvm.x86.sse2.psrl.w"]
3052 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3053 #[link_name = "llvm.x86.sse2.psrl.d"]
3054 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3055 #[link_name = "llvm.x86.sse2.psrl.q"]
3056 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3057 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3058 fn cvtps2dq(a: __m128) -> i32x4;
3059 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3060 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3061 #[link_name = "llvm.x86.sse2.packsswb.128"]
3062 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3063 #[link_name = "llvm.x86.sse2.packssdw.128"]
3064 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3065 #[link_name = "llvm.x86.sse2.packuswb.128"]
3066 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3067 #[link_name = "llvm.x86.sse2.max.sd"]
3068 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3069 #[link_name = "llvm.x86.sse2.max.pd"]
3070 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3071 #[link_name = "llvm.x86.sse2.min.sd"]
3072 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3073 #[link_name = "llvm.x86.sse2.min.pd"]
3074 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3075 #[link_name = "llvm.x86.sse2.cmp.sd"]
3076 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3077 #[link_name = "llvm.x86.sse2.cmp.pd"]
3078 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3079 #[link_name = "llvm.x86.sse2.comieq.sd"]
3080 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3081 #[link_name = "llvm.x86.sse2.comilt.sd"]
3082 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3083 #[link_name = "llvm.x86.sse2.comile.sd"]
3084 fn comilesd(a: __m128d, b: __m128d) -> i32;
3085 #[link_name = "llvm.x86.sse2.comigt.sd"]
3086 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3087 #[link_name = "llvm.x86.sse2.comige.sd"]
3088 fn comigesd(a: __m128d, b: __m128d) -> i32;
3089 #[link_name = "llvm.x86.sse2.comineq.sd"]
3090 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3091 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3092 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3093 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3094 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3095 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3096 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3097 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3098 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3099 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3100 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3101 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3102 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3103 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3104 fn cvtpd2dq(a: __m128d) -> i32x4;
3105 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3106 fn cvtsd2si(a: __m128d) -> i32;
3107 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3108 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3109 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3110 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3111 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3112 fn cvttpd2dq(a: __m128d) -> i32x4;
3113 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3114 fn cvttsd2si(a: __m128d) -> i32;
3115 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3116 fn cvttps2dq(a: __m128) -> i32x4;
3117}
3118
3119#[cfg(test)]
3120mod tests {
3121 use crate::{
3122 core_arch::{simd::*, x86::*},
3123 hint::black_box,
3124 };
3125 use std::{
3126 boxed, f32, f64,
3127 mem::{self, transmute},
3128 ptr,
3129 };
3130 use stdarch_test::simd_test;
3131
3132 const NAN: f64 = f64::NAN;
3133
3134 #[test]
3135 fn test_mm_pause() {
3136 unsafe { _mm_pause() }
3137 }
3138
3139 #[simd_test(enable = "sse2")]
3140 unsafe fn test_mm_clflush() {
3141 let x = 0_u8;
3142 _mm_clflush(ptr::addr_of!(x));
3143 }
3144
3145 #[simd_test(enable = "sse2")]
3146 #[cfg_attr(miri, ignore)]
3148 unsafe fn test_mm_lfence() {
3149 _mm_lfence();
3150 }
3151
3152 #[simd_test(enable = "sse2")]
3153 #[cfg_attr(miri, ignore)]
3155 unsafe fn test_mm_mfence() {
3156 _mm_mfence();
3157 }
3158
3159 #[simd_test(enable = "sse2")]
3160 unsafe fn test_mm_add_epi8() {
3161 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3162 #[rustfmt::skip]
3163 let b = _mm_setr_epi8(
3164 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3165 );
3166 let r = _mm_add_epi8(a, b);
3167 #[rustfmt::skip]
3168 let e = _mm_setr_epi8(
3169 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3170 );
3171 assert_eq_m128i(r, e);
3172 }
3173
3174 #[simd_test(enable = "sse2")]
3175 unsafe fn test_mm_add_epi8_overflow() {
3176 let a = _mm_set1_epi8(0x7F);
3177 let b = _mm_set1_epi8(1);
3178 let r = _mm_add_epi8(a, b);
3179 assert_eq_m128i(r, _mm_set1_epi8(-128));
3180 }
3181
3182 #[simd_test(enable = "sse2")]
3183 unsafe fn test_mm_add_epi16() {
3184 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3185 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3186 let r = _mm_add_epi16(a, b);
3187 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3188 assert_eq_m128i(r, e);
3189 }
3190
3191 #[simd_test(enable = "sse2")]
3192 unsafe fn test_mm_add_epi32() {
3193 let a = _mm_setr_epi32(0, 1, 2, 3);
3194 let b = _mm_setr_epi32(4, 5, 6, 7);
3195 let r = _mm_add_epi32(a, b);
3196 let e = _mm_setr_epi32(4, 6, 8, 10);
3197 assert_eq_m128i(r, e);
3198 }
3199
3200 #[simd_test(enable = "sse2")]
3201 unsafe fn test_mm_add_epi64() {
3202 let a = _mm_setr_epi64x(0, 1);
3203 let b = _mm_setr_epi64x(2, 3);
3204 let r = _mm_add_epi64(a, b);
3205 let e = _mm_setr_epi64x(2, 4);
3206 assert_eq_m128i(r, e);
3207 }
3208
3209 #[simd_test(enable = "sse2")]
3210 unsafe fn test_mm_adds_epi8() {
3211 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3212 #[rustfmt::skip]
3213 let b = _mm_setr_epi8(
3214 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3215 );
3216 let r = _mm_adds_epi8(a, b);
3217 #[rustfmt::skip]
3218 let e = _mm_setr_epi8(
3219 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3220 );
3221 assert_eq_m128i(r, e);
3222 }
3223
3224 #[simd_test(enable = "sse2")]
3225 unsafe fn test_mm_adds_epi8_saturate_positive() {
3226 let a = _mm_set1_epi8(0x7F);
3227 let b = _mm_set1_epi8(1);
3228 let r = _mm_adds_epi8(a, b);
3229 assert_eq_m128i(r, a);
3230 }
3231
3232 #[simd_test(enable = "sse2")]
3233 unsafe fn test_mm_adds_epi8_saturate_negative() {
3234 let a = _mm_set1_epi8(-0x80);
3235 let b = _mm_set1_epi8(-1);
3236 let r = _mm_adds_epi8(a, b);
3237 assert_eq_m128i(r, a);
3238 }
3239
3240 #[simd_test(enable = "sse2")]
3241 unsafe fn test_mm_adds_epi16() {
3242 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3243 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3244 let r = _mm_adds_epi16(a, b);
3245 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3246 assert_eq_m128i(r, e);
3247 }
3248
3249 #[simd_test(enable = "sse2")]
3250 unsafe fn test_mm_adds_epi16_saturate_positive() {
3251 let a = _mm_set1_epi16(0x7FFF);
3252 let b = _mm_set1_epi16(1);
3253 let r = _mm_adds_epi16(a, b);
3254 assert_eq_m128i(r, a);
3255 }
3256
3257 #[simd_test(enable = "sse2")]
3258 unsafe fn test_mm_adds_epi16_saturate_negative() {
3259 let a = _mm_set1_epi16(-0x8000);
3260 let b = _mm_set1_epi16(-1);
3261 let r = _mm_adds_epi16(a, b);
3262 assert_eq_m128i(r, a);
3263 }
3264
3265 #[simd_test(enable = "sse2")]
3266 unsafe fn test_mm_adds_epu8() {
3267 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3268 #[rustfmt::skip]
3269 let b = _mm_setr_epi8(
3270 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3271 );
3272 let r = _mm_adds_epu8(a, b);
3273 #[rustfmt::skip]
3274 let e = _mm_setr_epi8(
3275 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3276 );
3277 assert_eq_m128i(r, e);
3278 }
3279
3280 #[simd_test(enable = "sse2")]
3281 unsafe fn test_mm_adds_epu8_saturate() {
3282 let a = _mm_set1_epi8(!0);
3283 let b = _mm_set1_epi8(1);
3284 let r = _mm_adds_epu8(a, b);
3285 assert_eq_m128i(r, a);
3286 }
3287
3288 #[simd_test(enable = "sse2")]
3289 unsafe fn test_mm_adds_epu16() {
3290 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3291 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3292 let r = _mm_adds_epu16(a, b);
3293 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3294 assert_eq_m128i(r, e);
3295 }
3296
3297 #[simd_test(enable = "sse2")]
3298 unsafe fn test_mm_adds_epu16_saturate() {
3299 let a = _mm_set1_epi16(!0);
3300 let b = _mm_set1_epi16(1);
3301 let r = _mm_adds_epu16(a, b);
3302 assert_eq_m128i(r, a);
3303 }
3304
3305 #[simd_test(enable = "sse2")]
3306 unsafe fn test_mm_avg_epu8() {
3307 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3308 let r = _mm_avg_epu8(a, b);
3309 assert_eq_m128i(r, _mm_set1_epi8(6));
3310 }
3311
3312 #[simd_test(enable = "sse2")]
3313 unsafe fn test_mm_avg_epu16() {
3314 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3315 let r = _mm_avg_epu16(a, b);
3316 assert_eq_m128i(r, _mm_set1_epi16(6));
3317 }
3318
3319 #[simd_test(enable = "sse2")]
3320 unsafe fn test_mm_madd_epi16() {
3321 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3322 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3323 let r = _mm_madd_epi16(a, b);
3324 let e = _mm_setr_epi32(29, 81, 149, 233);
3325 assert_eq_m128i(r, e);
3326
3327 let a = _mm_setr_epi16(
3330 i16::MAX,
3331 i16::MAX,
3332 i16::MIN,
3333 i16::MIN,
3334 i16::MIN,
3335 i16::MAX,
3336 0,
3337 0,
3338 );
3339 let b = _mm_setr_epi16(
3340 i16::MAX,
3341 i16::MAX,
3342 i16::MIN,
3343 i16::MIN,
3344 i16::MAX,
3345 i16::MIN,
3346 0,
3347 0,
3348 );
3349 let r = _mm_madd_epi16(a, b);
3350 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3351 assert_eq_m128i(r, e);
3352 }
3353
3354 #[simd_test(enable = "sse2")]
3355 unsafe fn test_mm_max_epi16() {
3356 let a = _mm_set1_epi16(1);
3357 let b = _mm_set1_epi16(-1);
3358 let r = _mm_max_epi16(a, b);
3359 assert_eq_m128i(r, a);
3360 }
3361
3362 #[simd_test(enable = "sse2")]
3363 unsafe fn test_mm_max_epu8() {
3364 let a = _mm_set1_epi8(1);
3365 let b = _mm_set1_epi8(!0);
3366 let r = _mm_max_epu8(a, b);
3367 assert_eq_m128i(r, b);
3368 }
3369
3370 #[simd_test(enable = "sse2")]
3371 unsafe fn test_mm_min_epi16() {
3372 let a = _mm_set1_epi16(1);
3373 let b = _mm_set1_epi16(-1);
3374 let r = _mm_min_epi16(a, b);
3375 assert_eq_m128i(r, b);
3376 }
3377
3378 #[simd_test(enable = "sse2")]
3379 unsafe fn test_mm_min_epu8() {
3380 let a = _mm_set1_epi8(1);
3381 let b = _mm_set1_epi8(!0);
3382 let r = _mm_min_epu8(a, b);
3383 assert_eq_m128i(r, a);
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_mulhi_epi16() {
3388 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3389 let r = _mm_mulhi_epi16(a, b);
3390 assert_eq_m128i(r, _mm_set1_epi16(-16));
3391 }
3392
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_mulhi_epu16() {
3395 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3396 let r = _mm_mulhi_epu16(a, b);
3397 assert_eq_m128i(r, _mm_set1_epi16(15));
3398 }
3399
3400 #[simd_test(enable = "sse2")]
3401 unsafe fn test_mm_mullo_epi16() {
3402 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3403 let r = _mm_mullo_epi16(a, b);
3404 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3405 }
3406
3407 #[simd_test(enable = "sse2")]
3408 unsafe fn test_mm_mul_epu32() {
3409 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3410 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3411 let r = _mm_mul_epu32(a, b);
3412 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3413 assert_eq_m128i(r, e);
3414 }
3415
3416 #[simd_test(enable = "sse2")]
3417 unsafe fn test_mm_sad_epu8() {
3418 #[rustfmt::skip]
3419 let a = _mm_setr_epi8(
3420 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3421 1, 2, 3, 4,
3422 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3423 1, 2, 3, 4,
3424 );
3425 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3426 let r = _mm_sad_epu8(a, b);
3427 let e = _mm_setr_epi64x(1020, 614);
3428 assert_eq_m128i(r, e);
3429 }
3430
3431 #[simd_test(enable = "sse2")]
3432 unsafe fn test_mm_sub_epi8() {
3433 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3434 let r = _mm_sub_epi8(a, b);
3435 assert_eq_m128i(r, _mm_set1_epi8(-1));
3436 }
3437
3438 #[simd_test(enable = "sse2")]
3439 unsafe fn test_mm_sub_epi16() {
3440 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3441 let r = _mm_sub_epi16(a, b);
3442 assert_eq_m128i(r, _mm_set1_epi16(-1));
3443 }
3444
3445 #[simd_test(enable = "sse2")]
3446 unsafe fn test_mm_sub_epi32() {
3447 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3448 let r = _mm_sub_epi32(a, b);
3449 assert_eq_m128i(r, _mm_set1_epi32(-1));
3450 }
3451
3452 #[simd_test(enable = "sse2")]
3453 unsafe fn test_mm_sub_epi64() {
3454 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3455 let r = _mm_sub_epi64(a, b);
3456 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3457 }
3458
3459 #[simd_test(enable = "sse2")]
3460 unsafe fn test_mm_subs_epi8() {
3461 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3462 let r = _mm_subs_epi8(a, b);
3463 assert_eq_m128i(r, _mm_set1_epi8(3));
3464 }
3465
3466 #[simd_test(enable = "sse2")]
3467 unsafe fn test_mm_subs_epi8_saturate_positive() {
3468 let a = _mm_set1_epi8(0x7F);
3469 let b = _mm_set1_epi8(-1);
3470 let r = _mm_subs_epi8(a, b);
3471 assert_eq_m128i(r, a);
3472 }
3473
3474 #[simd_test(enable = "sse2")]
3475 unsafe fn test_mm_subs_epi8_saturate_negative() {
3476 let a = _mm_set1_epi8(-0x80);
3477 let b = _mm_set1_epi8(1);
3478 let r = _mm_subs_epi8(a, b);
3479 assert_eq_m128i(r, a);
3480 }
3481
3482 #[simd_test(enable = "sse2")]
3483 unsafe fn test_mm_subs_epi16() {
3484 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3485 let r = _mm_subs_epi16(a, b);
3486 assert_eq_m128i(r, _mm_set1_epi16(3));
3487 }
3488
3489 #[simd_test(enable = "sse2")]
3490 unsafe fn test_mm_subs_epi16_saturate_positive() {
3491 let a = _mm_set1_epi16(0x7FFF);
3492 let b = _mm_set1_epi16(-1);
3493 let r = _mm_subs_epi16(a, b);
3494 assert_eq_m128i(r, a);
3495 }
3496
3497 #[simd_test(enable = "sse2")]
3498 unsafe fn test_mm_subs_epi16_saturate_negative() {
3499 let a = _mm_set1_epi16(-0x8000);
3500 let b = _mm_set1_epi16(1);
3501 let r = _mm_subs_epi16(a, b);
3502 assert_eq_m128i(r, a);
3503 }
3504
3505 #[simd_test(enable = "sse2")]
3506 unsafe fn test_mm_subs_epu8() {
3507 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3508 let r = _mm_subs_epu8(a, b);
3509 assert_eq_m128i(r, _mm_set1_epi8(3));
3510 }
3511
3512 #[simd_test(enable = "sse2")]
3513 unsafe fn test_mm_subs_epu8_saturate() {
3514 let a = _mm_set1_epi8(0);
3515 let b = _mm_set1_epi8(1);
3516 let r = _mm_subs_epu8(a, b);
3517 assert_eq_m128i(r, a);
3518 }
3519
3520 #[simd_test(enable = "sse2")]
3521 unsafe fn test_mm_subs_epu16() {
3522 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3523 let r = _mm_subs_epu16(a, b);
3524 assert_eq_m128i(r, _mm_set1_epi16(3));
3525 }
3526
3527 #[simd_test(enable = "sse2")]
3528 unsafe fn test_mm_subs_epu16_saturate() {
3529 let a = _mm_set1_epi16(0);
3530 let b = _mm_set1_epi16(1);
3531 let r = _mm_subs_epu16(a, b);
3532 assert_eq_m128i(r, a);
3533 }
3534
3535 #[simd_test(enable = "sse2")]
3536 unsafe fn test_mm_slli_si128() {
3537 #[rustfmt::skip]
3538 let a = _mm_setr_epi8(
3539 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3540 );
3541 let r = _mm_slli_si128::<1>(a);
3542 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3543 assert_eq_m128i(r, e);
3544
3545 #[rustfmt::skip]
3546 let a = _mm_setr_epi8(
3547 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3548 );
3549 let r = _mm_slli_si128::<15>(a);
3550 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3551 assert_eq_m128i(r, e);
3552
3553 #[rustfmt::skip]
3554 let a = _mm_setr_epi8(
3555 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3556 );
3557 let r = _mm_slli_si128::<16>(a);
3558 assert_eq_m128i(r, _mm_set1_epi8(0));
3559 }
3560
3561 #[simd_test(enable = "sse2")]
3562 unsafe fn test_mm_slli_epi16() {
3563 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3564 let r = _mm_slli_epi16::<4>(a);
3565 assert_eq_m128i(
3566 r,
3567 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3568 );
3569 let r = _mm_slli_epi16::<16>(a);
3570 assert_eq_m128i(r, _mm_set1_epi16(0));
3571 }
3572
3573 #[simd_test(enable = "sse2")]
3574 unsafe fn test_mm_sll_epi16() {
3575 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3576 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3577 assert_eq_m128i(
3578 r,
3579 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3580 );
3581 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3582 assert_eq_m128i(r, a);
3583 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3584 assert_eq_m128i(r, _mm_set1_epi16(0));
3585 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3586 assert_eq_m128i(r, _mm_set1_epi16(0));
3587 }
3588
3589 #[simd_test(enable = "sse2")]
3590 unsafe fn test_mm_slli_epi32() {
3591 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3592 let r = _mm_slli_epi32::<4>(a);
3593 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3594 let r = _mm_slli_epi32::<32>(a);
3595 assert_eq_m128i(r, _mm_set1_epi32(0));
3596 }
3597
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_sll_epi32() {
3600 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3601 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3602 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3603 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3604 assert_eq_m128i(r, a);
3605 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3606 assert_eq_m128i(r, _mm_set1_epi32(0));
3607 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3608 assert_eq_m128i(r, _mm_set1_epi32(0));
3609 }
3610
3611 #[simd_test(enable = "sse2")]
3612 unsafe fn test_mm_slli_epi64() {
3613 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3614 let r = _mm_slli_epi64::<4>(a);
3615 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3616 let r = _mm_slli_epi64::<64>(a);
3617 assert_eq_m128i(r, _mm_set1_epi64x(0));
3618 }
3619
3620 #[simd_test(enable = "sse2")]
3621 unsafe fn test_mm_sll_epi64() {
3622 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3623 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3624 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3625 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3626 assert_eq_m128i(r, a);
3627 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3628 assert_eq_m128i(r, _mm_set1_epi64x(0));
3629 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3630 assert_eq_m128i(r, _mm_set1_epi64x(0));
3631 }
3632
3633 #[simd_test(enable = "sse2")]
3634 unsafe fn test_mm_srai_epi16() {
3635 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3636 let r = _mm_srai_epi16::<4>(a);
3637 assert_eq_m128i(
3638 r,
3639 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3640 );
3641 let r = _mm_srai_epi16::<16>(a);
3642 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3643 }
3644
3645 #[simd_test(enable = "sse2")]
3646 unsafe fn test_mm_sra_epi16() {
3647 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3648 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3649 assert_eq_m128i(
3650 r,
3651 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3652 );
3653 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3654 assert_eq_m128i(r, a);
3655 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3656 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3657 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3658 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3659 }
3660
3661 #[simd_test(enable = "sse2")]
3662 unsafe fn test_mm_srai_epi32() {
3663 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3664 let r = _mm_srai_epi32::<4>(a);
3665 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3666 let r = _mm_srai_epi32::<32>(a);
3667 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3668 }
3669
3670 #[simd_test(enable = "sse2")]
3671 unsafe fn test_mm_sra_epi32() {
3672 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3673 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3674 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3675 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3676 assert_eq_m128i(r, a);
3677 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3678 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3679 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3680 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3681 }
3682
3683 #[simd_test(enable = "sse2")]
3684 unsafe fn test_mm_srli_si128() {
3685 #[rustfmt::skip]
3686 let a = _mm_setr_epi8(
3687 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3688 );
3689 let r = _mm_srli_si128::<1>(a);
3690 #[rustfmt::skip]
3691 let e = _mm_setr_epi8(
3692 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3693 );
3694 assert_eq_m128i(r, e);
3695
3696 #[rustfmt::skip]
3697 let a = _mm_setr_epi8(
3698 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3699 );
3700 let r = _mm_srli_si128::<15>(a);
3701 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3702 assert_eq_m128i(r, e);
3703
3704 #[rustfmt::skip]
3705 let a = _mm_setr_epi8(
3706 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3707 );
3708 let r = _mm_srli_si128::<16>(a);
3709 assert_eq_m128i(r, _mm_set1_epi8(0));
3710 }
3711
3712 #[simd_test(enable = "sse2")]
3713 unsafe fn test_mm_srli_epi16() {
3714 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3715 let r = _mm_srli_epi16::<4>(a);
3716 assert_eq_m128i(
3717 r,
3718 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3719 );
3720 let r = _mm_srli_epi16::<16>(a);
3721 assert_eq_m128i(r, _mm_set1_epi16(0));
3722 }
3723
3724 #[simd_test(enable = "sse2")]
3725 unsafe fn test_mm_srl_epi16() {
3726 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3727 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3728 assert_eq_m128i(
3729 r,
3730 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3731 );
3732 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3733 assert_eq_m128i(r, a);
3734 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3735 assert_eq_m128i(r, _mm_set1_epi16(0));
3736 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3737 assert_eq_m128i(r, _mm_set1_epi16(0));
3738 }
3739
3740 #[simd_test(enable = "sse2")]
3741 unsafe fn test_mm_srli_epi32() {
3742 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3743 let r = _mm_srli_epi32::<4>(a);
3744 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3745 let r = _mm_srli_epi32::<32>(a);
3746 assert_eq_m128i(r, _mm_set1_epi32(0));
3747 }
3748
3749 #[simd_test(enable = "sse2")]
3750 unsafe fn test_mm_srl_epi32() {
3751 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3752 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3753 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3754 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3755 assert_eq_m128i(r, a);
3756 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3757 assert_eq_m128i(r, _mm_set1_epi32(0));
3758 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3759 assert_eq_m128i(r, _mm_set1_epi32(0));
3760 }
3761
3762 #[simd_test(enable = "sse2")]
3763 unsafe fn test_mm_srli_epi64() {
3764 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3765 let r = _mm_srli_epi64::<4>(a);
3766 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3767 let r = _mm_srli_epi64::<64>(a);
3768 assert_eq_m128i(r, _mm_set1_epi64x(0));
3769 }
3770
3771 #[simd_test(enable = "sse2")]
3772 unsafe fn test_mm_srl_epi64() {
3773 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3774 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3775 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3776 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3777 assert_eq_m128i(r, a);
3778 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3779 assert_eq_m128i(r, _mm_set1_epi64x(0));
3780 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3781 assert_eq_m128i(r, _mm_set1_epi64x(0));
3782 }
3783
3784 #[simd_test(enable = "sse2")]
3785 unsafe fn test_mm_and_si128() {
3786 let a = _mm_set1_epi8(5);
3787 let b = _mm_set1_epi8(3);
3788 let r = _mm_and_si128(a, b);
3789 assert_eq_m128i(r, _mm_set1_epi8(1));
3790 }
3791
3792 #[simd_test(enable = "sse2")]
3793 unsafe fn test_mm_andnot_si128() {
3794 let a = _mm_set1_epi8(5);
3795 let b = _mm_set1_epi8(3);
3796 let r = _mm_andnot_si128(a, b);
3797 assert_eq_m128i(r, _mm_set1_epi8(2));
3798 }
3799
3800 #[simd_test(enable = "sse2")]
3801 unsafe fn test_mm_or_si128() {
3802 let a = _mm_set1_epi8(5);
3803 let b = _mm_set1_epi8(3);
3804 let r = _mm_or_si128(a, b);
3805 assert_eq_m128i(r, _mm_set1_epi8(7));
3806 }
3807
3808 #[simd_test(enable = "sse2")]
3809 unsafe fn test_mm_xor_si128() {
3810 let a = _mm_set1_epi8(5);
3811 let b = _mm_set1_epi8(3);
3812 let r = _mm_xor_si128(a, b);
3813 assert_eq_m128i(r, _mm_set1_epi8(6));
3814 }
3815
3816 #[simd_test(enable = "sse2")]
3817 unsafe fn test_mm_cmpeq_epi8() {
3818 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3819 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3820 let r = _mm_cmpeq_epi8(a, b);
3821 #[rustfmt::skip]
3822 assert_eq_m128i(
3823 r,
3824 _mm_setr_epi8(
3825 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3826 )
3827 );
3828 }
3829
3830 #[simd_test(enable = "sse2")]
3831 unsafe fn test_mm_cmpeq_epi16() {
3832 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3833 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3834 let r = _mm_cmpeq_epi16(a, b);
3835 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3836 }
3837
3838 #[simd_test(enable = "sse2")]
3839 unsafe fn test_mm_cmpeq_epi32() {
3840 let a = _mm_setr_epi32(0, 1, 2, 3);
3841 let b = _mm_setr_epi32(3, 2, 2, 0);
3842 let r = _mm_cmpeq_epi32(a, b);
3843 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3844 }
3845
3846 #[simd_test(enable = "sse2")]
3847 unsafe fn test_mm_cmpgt_epi8() {
3848 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3849 let b = _mm_set1_epi8(0);
3850 let r = _mm_cmpgt_epi8(a, b);
3851 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3852 assert_eq_m128i(r, e);
3853 }
3854
3855 #[simd_test(enable = "sse2")]
3856 unsafe fn test_mm_cmpgt_epi16() {
3857 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3858 let b = _mm_set1_epi16(0);
3859 let r = _mm_cmpgt_epi16(a, b);
3860 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3861 assert_eq_m128i(r, e);
3862 }
3863
3864 #[simd_test(enable = "sse2")]
3865 unsafe fn test_mm_cmpgt_epi32() {
3866 let a = _mm_set_epi32(5, 0, 0, 0);
3867 let b = _mm_set1_epi32(0);
3868 let r = _mm_cmpgt_epi32(a, b);
3869 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3870 }
3871
3872 #[simd_test(enable = "sse2")]
3873 unsafe fn test_mm_cmplt_epi8() {
3874 let a = _mm_set1_epi8(0);
3875 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3876 let r = _mm_cmplt_epi8(a, b);
3877 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3878 assert_eq_m128i(r, e);
3879 }
3880
3881 #[simd_test(enable = "sse2")]
3882 unsafe fn test_mm_cmplt_epi16() {
3883 let a = _mm_set1_epi16(0);
3884 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3885 let r = _mm_cmplt_epi16(a, b);
3886 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3887 assert_eq_m128i(r, e);
3888 }
3889
3890 #[simd_test(enable = "sse2")]
3891 unsafe fn test_mm_cmplt_epi32() {
3892 let a = _mm_set1_epi32(0);
3893 let b = _mm_set_epi32(5, 0, 0, 0);
3894 let r = _mm_cmplt_epi32(a, b);
3895 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3896 }
3897
3898 #[simd_test(enable = "sse2")]
3899 unsafe fn test_mm_cvtepi32_pd() {
3900 let a = _mm_set_epi32(35, 25, 15, 5);
3901 let r = _mm_cvtepi32_pd(a);
3902 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3903 }
3904
3905 #[simd_test(enable = "sse2")]
3906 unsafe fn test_mm_cvtsi32_sd() {
3907 let a = _mm_set1_pd(3.5);
3908 let r = _mm_cvtsi32_sd(a, 5);
3909 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3910 }
3911
3912 #[simd_test(enable = "sse2")]
3913 unsafe fn test_mm_cvtepi32_ps() {
3914 let a = _mm_setr_epi32(1, 2, 3, 4);
3915 let r = _mm_cvtepi32_ps(a);
3916 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3917 }
3918
3919 #[simd_test(enable = "sse2")]
3920 unsafe fn test_mm_cvtps_epi32() {
3921 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3922 let r = _mm_cvtps_epi32(a);
3923 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3924 }
3925
3926 #[simd_test(enable = "sse2")]
3927 unsafe fn test_mm_cvtsi32_si128() {
3928 let r = _mm_cvtsi32_si128(5);
3929 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3930 }
3931
3932 #[simd_test(enable = "sse2")]
3933 unsafe fn test_mm_cvtsi128_si32() {
3934 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3935 assert_eq!(r, 5);
3936 }
3937
3938 #[simd_test(enable = "sse2")]
3939 unsafe fn test_mm_set_epi64x() {
3940 let r = _mm_set_epi64x(0, 1);
3941 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3942 }
3943
3944 #[simd_test(enable = "sse2")]
3945 unsafe fn test_mm_set_epi32() {
3946 let r = _mm_set_epi32(0, 1, 2, 3);
3947 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3948 }
3949
3950 #[simd_test(enable = "sse2")]
3951 unsafe fn test_mm_set_epi16() {
3952 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3953 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3954 }
3955
3956 #[simd_test(enable = "sse2")]
3957 unsafe fn test_mm_set_epi8() {
3958 #[rustfmt::skip]
3959 let r = _mm_set_epi8(
3960 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3961 );
3962 #[rustfmt::skip]
3963 let e = _mm_setr_epi8(
3964 15, 14, 13, 12, 11, 10, 9, 8,
3965 7, 6, 5, 4, 3, 2, 1, 0,
3966 );
3967 assert_eq_m128i(r, e);
3968 }
3969
3970 #[simd_test(enable = "sse2")]
3971 unsafe fn test_mm_set1_epi64x() {
3972 let r = _mm_set1_epi64x(1);
3973 assert_eq_m128i(r, _mm_set1_epi64x(1));
3974 }
3975
3976 #[simd_test(enable = "sse2")]
3977 unsafe fn test_mm_set1_epi32() {
3978 let r = _mm_set1_epi32(1);
3979 assert_eq_m128i(r, _mm_set1_epi32(1));
3980 }
3981
3982 #[simd_test(enable = "sse2")]
3983 unsafe fn test_mm_set1_epi16() {
3984 let r = _mm_set1_epi16(1);
3985 assert_eq_m128i(r, _mm_set1_epi16(1));
3986 }
3987
3988 #[simd_test(enable = "sse2")]
3989 unsafe fn test_mm_set1_epi8() {
3990 let r = _mm_set1_epi8(1);
3991 assert_eq_m128i(r, _mm_set1_epi8(1));
3992 }
3993
3994 #[simd_test(enable = "sse2")]
3995 unsafe fn test_mm_setr_epi32() {
3996 let r = _mm_setr_epi32(0, 1, 2, 3);
3997 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3998 }
3999
4000 #[simd_test(enable = "sse2")]
4001 unsafe fn test_mm_setr_epi16() {
4002 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4003 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4004 }
4005
4006 #[simd_test(enable = "sse2")]
4007 unsafe fn test_mm_setr_epi8() {
4008 #[rustfmt::skip]
4009 let r = _mm_setr_epi8(
4010 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4011 );
4012 #[rustfmt::skip]
4013 let e = _mm_setr_epi8(
4014 0, 1, 2, 3, 4, 5, 6, 7,
4015 8, 9, 10, 11, 12, 13, 14, 15,
4016 );
4017 assert_eq_m128i(r, e);
4018 }
4019
4020 #[simd_test(enable = "sse2")]
4021 unsafe fn test_mm_setzero_si128() {
4022 let r = _mm_setzero_si128();
4023 assert_eq_m128i(r, _mm_set1_epi64x(0));
4024 }
4025
4026 #[simd_test(enable = "sse2")]
4027 unsafe fn test_mm_loadl_epi64() {
4028 let a = _mm_setr_epi64x(6, 5);
4029 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4030 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4031 }
4032
4033 #[simd_test(enable = "sse2")]
4034 unsafe fn test_mm_load_si128() {
4035 let a = _mm_set_epi64x(5, 6);
4036 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4037 assert_eq_m128i(a, r);
4038 }
4039
4040 #[simd_test(enable = "sse2")]
4041 unsafe fn test_mm_loadu_si128() {
4042 let a = _mm_set_epi64x(5, 6);
4043 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4044 assert_eq_m128i(a, r);
4045 }
4046
4047 #[simd_test(enable = "sse2")]
4048 #[cfg_attr(miri, ignore)]
4051 unsafe fn test_mm_maskmoveu_si128() {
4052 let a = _mm_set1_epi8(9);
4053 #[rustfmt::skip]
4054 let mask = _mm_set_epi8(
4055 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4056 0, 0, 0, 0, 0, 0, 0, 0,
4057 );
4058 let mut r = _mm_set1_epi8(0);
4059 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4060 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4061 assert_eq_m128i(r, e);
4062 }
4063
4064 #[simd_test(enable = "sse2")]
4065 unsafe fn test_mm_store_si128() {
4066 let a = _mm_set1_epi8(9);
4067 let mut r = _mm_set1_epi8(0);
4068 _mm_store_si128(&mut r, a);
4069 assert_eq_m128i(r, a);
4070 }
4071
4072 #[simd_test(enable = "sse2")]
4073 unsafe fn test_mm_storeu_si128() {
4074 let a = _mm_set1_epi8(9);
4075 let mut r = _mm_set1_epi8(0);
4076 _mm_storeu_si128(&mut r, a);
4077 assert_eq_m128i(r, a);
4078 }
4079
4080 #[simd_test(enable = "sse2")]
4081 unsafe fn test_mm_storel_epi64() {
4082 let a = _mm_setr_epi64x(2, 9);
4083 let mut r = _mm_set1_epi8(0);
4084 _mm_storel_epi64(&mut r, a);
4085 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4086 }
4087
4088 #[simd_test(enable = "sse2")]
4089 #[cfg_attr(miri, ignore)]
4092 unsafe fn test_mm_stream_si128() {
4093 let a = _mm_setr_epi32(1, 2, 3, 4);
4094 let mut r = _mm_undefined_si128();
4095 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4096 assert_eq_m128i(r, a);
4097 }
4098
4099 #[simd_test(enable = "sse2")]
4100 #[cfg_attr(miri, ignore)]
4103 unsafe fn test_mm_stream_si32() {
4104 let a: i32 = 7;
4105 let mut mem = boxed::Box::<i32>::new(-1);
4106 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4107 assert_eq!(a, *mem);
4108 }
4109
4110 #[simd_test(enable = "sse2")]
4111 unsafe fn test_mm_move_epi64() {
4112 let a = _mm_setr_epi64x(5, 6);
4113 let r = _mm_move_epi64(a);
4114 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4115 }
4116
4117 #[simd_test(enable = "sse2")]
4118 unsafe fn test_mm_packs_epi16() {
4119 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4120 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4121 let r = _mm_packs_epi16(a, b);
4122 #[rustfmt::skip]
4123 assert_eq_m128i(
4124 r,
4125 _mm_setr_epi8(
4126 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4127 )
4128 );
4129 }
4130
4131 #[simd_test(enable = "sse2")]
4132 unsafe fn test_mm_packs_epi32() {
4133 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4134 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4135 let r = _mm_packs_epi32(a, b);
4136 assert_eq_m128i(
4137 r,
4138 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4139 );
4140 }
4141
4142 #[simd_test(enable = "sse2")]
4143 unsafe fn test_mm_packus_epi16() {
4144 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4145 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4146 let r = _mm_packus_epi16(a, b);
4147 assert_eq_m128i(
4148 r,
4149 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4150 );
4151 }
4152
4153 #[simd_test(enable = "sse2")]
4154 unsafe fn test_mm_extract_epi16() {
4155 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4156 let r1 = _mm_extract_epi16::<0>(a);
4157 let r2 = _mm_extract_epi16::<3>(a);
4158 assert_eq!(r1, 0xFFFF);
4159 assert_eq!(r2, 3);
4160 }
4161
4162 #[simd_test(enable = "sse2")]
4163 unsafe fn test_mm_insert_epi16() {
4164 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4165 let r = _mm_insert_epi16::<0>(a, 9);
4166 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4167 assert_eq_m128i(r, e);
4168 }
4169
4170 #[simd_test(enable = "sse2")]
4171 unsafe fn test_mm_movemask_epi8() {
4172 #[rustfmt::skip]
4173 let a = _mm_setr_epi8(
4174 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4175 0b0101, 0b1111_0000u8 as i8, 0, 0,
4176 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4177 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4178 );
4179 let r = _mm_movemask_epi8(a);
4180 assert_eq!(r, 0b10100110_00100101);
4181 }
4182
4183 #[simd_test(enable = "sse2")]
4184 unsafe fn test_mm_shuffle_epi32() {
4185 let a = _mm_setr_epi32(5, 10, 15, 20);
4186 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4187 let e = _mm_setr_epi32(20, 10, 10, 5);
4188 assert_eq_m128i(r, e);
4189 }
4190
4191 #[simd_test(enable = "sse2")]
4192 unsafe fn test_mm_shufflehi_epi16() {
4193 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4194 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4195 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4196 assert_eq_m128i(r, e);
4197 }
4198
4199 #[simd_test(enable = "sse2")]
4200 unsafe fn test_mm_shufflelo_epi16() {
4201 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4202 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4203 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4204 assert_eq_m128i(r, e);
4205 }
4206
4207 #[simd_test(enable = "sse2")]
4208 unsafe fn test_mm_unpackhi_epi8() {
4209 #[rustfmt::skip]
4210 let a = _mm_setr_epi8(
4211 0, 1, 2, 3, 4, 5, 6, 7,
4212 8, 9, 10, 11, 12, 13, 14, 15,
4213 );
4214 #[rustfmt::skip]
4215 let b = _mm_setr_epi8(
4216 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4217 );
4218 let r = _mm_unpackhi_epi8(a, b);
4219 #[rustfmt::skip]
4220 let e = _mm_setr_epi8(
4221 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4222 );
4223 assert_eq_m128i(r, e);
4224 }
4225
4226 #[simd_test(enable = "sse2")]
4227 unsafe fn test_mm_unpackhi_epi16() {
4228 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4229 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4230 let r = _mm_unpackhi_epi16(a, b);
4231 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4232 assert_eq_m128i(r, e);
4233 }
4234
4235 #[simd_test(enable = "sse2")]
4236 unsafe fn test_mm_unpackhi_epi32() {
4237 let a = _mm_setr_epi32(0, 1, 2, 3);
4238 let b = _mm_setr_epi32(4, 5, 6, 7);
4239 let r = _mm_unpackhi_epi32(a, b);
4240 let e = _mm_setr_epi32(2, 6, 3, 7);
4241 assert_eq_m128i(r, e);
4242 }
4243
4244 #[simd_test(enable = "sse2")]
4245 unsafe fn test_mm_unpackhi_epi64() {
4246 let a = _mm_setr_epi64x(0, 1);
4247 let b = _mm_setr_epi64x(2, 3);
4248 let r = _mm_unpackhi_epi64(a, b);
4249 let e = _mm_setr_epi64x(1, 3);
4250 assert_eq_m128i(r, e);
4251 }
4252
4253 #[simd_test(enable = "sse2")]
4254 unsafe fn test_mm_unpacklo_epi8() {
4255 #[rustfmt::skip]
4256 let a = _mm_setr_epi8(
4257 0, 1, 2, 3, 4, 5, 6, 7,
4258 8, 9, 10, 11, 12, 13, 14, 15,
4259 );
4260 #[rustfmt::skip]
4261 let b = _mm_setr_epi8(
4262 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4263 );
4264 let r = _mm_unpacklo_epi8(a, b);
4265 #[rustfmt::skip]
4266 let e = _mm_setr_epi8(
4267 0, 16, 1, 17, 2, 18, 3, 19,
4268 4, 20, 5, 21, 6, 22, 7, 23,
4269 );
4270 assert_eq_m128i(r, e);
4271 }
4272
4273 #[simd_test(enable = "sse2")]
4274 unsafe fn test_mm_unpacklo_epi16() {
4275 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4276 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4277 let r = _mm_unpacklo_epi16(a, b);
4278 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4279 assert_eq_m128i(r, e);
4280 }
4281
4282 #[simd_test(enable = "sse2")]
4283 unsafe fn test_mm_unpacklo_epi32() {
4284 let a = _mm_setr_epi32(0, 1, 2, 3);
4285 let b = _mm_setr_epi32(4, 5, 6, 7);
4286 let r = _mm_unpacklo_epi32(a, b);
4287 let e = _mm_setr_epi32(0, 4, 1, 5);
4288 assert_eq_m128i(r, e);
4289 }
4290
4291 #[simd_test(enable = "sse2")]
4292 unsafe fn test_mm_unpacklo_epi64() {
4293 let a = _mm_setr_epi64x(0, 1);
4294 let b = _mm_setr_epi64x(2, 3);
4295 let r = _mm_unpacklo_epi64(a, b);
4296 let e = _mm_setr_epi64x(0, 2);
4297 assert_eq_m128i(r, e);
4298 }
4299
4300 #[simd_test(enable = "sse2")]
4301 unsafe fn test_mm_add_sd() {
4302 let a = _mm_setr_pd(1.0, 2.0);
4303 let b = _mm_setr_pd(5.0, 10.0);
4304 let r = _mm_add_sd(a, b);
4305 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4306 }
4307
4308 #[simd_test(enable = "sse2")]
4309 unsafe fn test_mm_add_pd() {
4310 let a = _mm_setr_pd(1.0, 2.0);
4311 let b = _mm_setr_pd(5.0, 10.0);
4312 let r = _mm_add_pd(a, b);
4313 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4314 }
4315
4316 #[simd_test(enable = "sse2")]
4317 unsafe fn test_mm_div_sd() {
4318 let a = _mm_setr_pd(1.0, 2.0);
4319 let b = _mm_setr_pd(5.0, 10.0);
4320 let r = _mm_div_sd(a, b);
4321 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4322 }
4323
4324 #[simd_test(enable = "sse2")]
4325 unsafe fn test_mm_div_pd() {
4326 let a = _mm_setr_pd(1.0, 2.0);
4327 let b = _mm_setr_pd(5.0, 10.0);
4328 let r = _mm_div_pd(a, b);
4329 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4330 }
4331
4332 #[simd_test(enable = "sse2")]
4333 unsafe fn test_mm_max_sd() {
4334 let a = _mm_setr_pd(1.0, 2.0);
4335 let b = _mm_setr_pd(5.0, 10.0);
4336 let r = _mm_max_sd(a, b);
4337 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4338 }
4339
4340 #[simd_test(enable = "sse2")]
4341 unsafe fn test_mm_max_pd() {
4342 let a = _mm_setr_pd(1.0, 2.0);
4343 let b = _mm_setr_pd(5.0, 10.0);
4344 let r = _mm_max_pd(a, b);
4345 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4346
4347 let a = _mm_setr_pd(-0.0, 0.0);
4349 let b = _mm_setr_pd(0.0, 0.0);
4350 let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4351 let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4352 let a: [u8; 16] = transmute(a);
4353 let b: [u8; 16] = transmute(b);
4354 assert_eq!(r1, b);
4355 assert_eq!(r2, a);
4356 assert_ne!(a, b); }
4358
4359 #[simd_test(enable = "sse2")]
4360 unsafe fn test_mm_min_sd() {
4361 let a = _mm_setr_pd(1.0, 2.0);
4362 let b = _mm_setr_pd(5.0, 10.0);
4363 let r = _mm_min_sd(a, b);
4364 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4365 }
4366
4367 #[simd_test(enable = "sse2")]
4368 unsafe fn test_mm_min_pd() {
4369 let a = _mm_setr_pd(1.0, 2.0);
4370 let b = _mm_setr_pd(5.0, 10.0);
4371 let r = _mm_min_pd(a, b);
4372 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4373
4374 let a = _mm_setr_pd(-0.0, 0.0);
4376 let b = _mm_setr_pd(0.0, 0.0);
4377 let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4378 let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4379 let a: [u8; 16] = transmute(a);
4380 let b: [u8; 16] = transmute(b);
4381 assert_eq!(r1, b);
4382 assert_eq!(r2, a);
4383 assert_ne!(a, b); }
4385
4386 #[simd_test(enable = "sse2")]
4387 unsafe fn test_mm_mul_sd() {
4388 let a = _mm_setr_pd(1.0, 2.0);
4389 let b = _mm_setr_pd(5.0, 10.0);
4390 let r = _mm_mul_sd(a, b);
4391 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4392 }
4393
4394 #[simd_test(enable = "sse2")]
4395 unsafe fn test_mm_mul_pd() {
4396 let a = _mm_setr_pd(1.0, 2.0);
4397 let b = _mm_setr_pd(5.0, 10.0);
4398 let r = _mm_mul_pd(a, b);
4399 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4400 }
4401
4402 #[simd_test(enable = "sse2")]
4403 unsafe fn test_mm_sqrt_sd() {
4404 let a = _mm_setr_pd(1.0, 2.0);
4405 let b = _mm_setr_pd(5.0, 10.0);
4406 let r = _mm_sqrt_sd(a, b);
4407 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4408 }
4409
4410 #[simd_test(enable = "sse2")]
4411 unsafe fn test_mm_sqrt_pd() {
4412 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4413 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4414 }
4415
4416 #[simd_test(enable = "sse2")]
4417 unsafe fn test_mm_sub_sd() {
4418 let a = _mm_setr_pd(1.0, 2.0);
4419 let b = _mm_setr_pd(5.0, 10.0);
4420 let r = _mm_sub_sd(a, b);
4421 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4422 }
4423
4424 #[simd_test(enable = "sse2")]
4425 unsafe fn test_mm_sub_pd() {
4426 let a = _mm_setr_pd(1.0, 2.0);
4427 let b = _mm_setr_pd(5.0, 10.0);
4428 let r = _mm_sub_pd(a, b);
4429 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4430 }
4431
4432 #[simd_test(enable = "sse2")]
4433 unsafe fn test_mm_and_pd() {
4434 let a = transmute(u64x2::splat(5));
4435 let b = transmute(u64x2::splat(3));
4436 let r = _mm_and_pd(a, b);
4437 let e = transmute(u64x2::splat(1));
4438 assert_eq_m128d(r, e);
4439 }
4440
4441 #[simd_test(enable = "sse2")]
4442 unsafe fn test_mm_andnot_pd() {
4443 let a = transmute(u64x2::splat(5));
4444 let b = transmute(u64x2::splat(3));
4445 let r = _mm_andnot_pd(a, b);
4446 let e = transmute(u64x2::splat(2));
4447 assert_eq_m128d(r, e);
4448 }
4449
4450 #[simd_test(enable = "sse2")]
4451 unsafe fn test_mm_or_pd() {
4452 let a = transmute(u64x2::splat(5));
4453 let b = transmute(u64x2::splat(3));
4454 let r = _mm_or_pd(a, b);
4455 let e = transmute(u64x2::splat(7));
4456 assert_eq_m128d(r, e);
4457 }
4458
4459 #[simd_test(enable = "sse2")]
4460 unsafe fn test_mm_xor_pd() {
4461 let a = transmute(u64x2::splat(5));
4462 let b = transmute(u64x2::splat(3));
4463 let r = _mm_xor_pd(a, b);
4464 let e = transmute(u64x2::splat(6));
4465 assert_eq_m128d(r, e);
4466 }
4467
4468 #[simd_test(enable = "sse2")]
4469 unsafe fn test_mm_cmpeq_sd() {
4470 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4471 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4472 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4473 assert_eq_m128i(r, e);
4474 }
4475
4476 #[simd_test(enable = "sse2")]
4477 unsafe fn test_mm_cmplt_sd() {
4478 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4479 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4480 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4481 assert_eq_m128i(r, e);
4482 }
4483
4484 #[simd_test(enable = "sse2")]
4485 unsafe fn test_mm_cmple_sd() {
4486 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4487 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4488 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4489 assert_eq_m128i(r, e);
4490 }
4491
4492 #[simd_test(enable = "sse2")]
4493 unsafe fn test_mm_cmpgt_sd() {
4494 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4495 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4496 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4497 assert_eq_m128i(r, e);
4498 }
4499
4500 #[simd_test(enable = "sse2")]
4501 unsafe fn test_mm_cmpge_sd() {
4502 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4503 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4504 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4505 assert_eq_m128i(r, e);
4506 }
4507
4508 #[simd_test(enable = "sse2")]
4509 unsafe fn test_mm_cmpord_sd() {
4510 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4511 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4512 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4513 assert_eq_m128i(r, e);
4514 }
4515
4516 #[simd_test(enable = "sse2")]
4517 unsafe fn test_mm_cmpunord_sd() {
4518 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4519 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4520 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4521 assert_eq_m128i(r, e);
4522 }
4523
4524 #[simd_test(enable = "sse2")]
4525 unsafe fn test_mm_cmpneq_sd() {
4526 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4527 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4528 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4529 assert_eq_m128i(r, e);
4530 }
4531
4532 #[simd_test(enable = "sse2")]
4533 unsafe fn test_mm_cmpnlt_sd() {
4534 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4535 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4536 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4537 assert_eq_m128i(r, e);
4538 }
4539
4540 #[simd_test(enable = "sse2")]
4541 unsafe fn test_mm_cmpnle_sd() {
4542 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4543 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4544 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4545 assert_eq_m128i(r, e);
4546 }
4547
4548 #[simd_test(enable = "sse2")]
4549 unsafe fn test_mm_cmpngt_sd() {
4550 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4551 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4552 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4553 assert_eq_m128i(r, e);
4554 }
4555
4556 #[simd_test(enable = "sse2")]
4557 unsafe fn test_mm_cmpnge_sd() {
4558 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4559 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4560 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4561 assert_eq_m128i(r, e);
4562 }
4563
4564 #[simd_test(enable = "sse2")]
4565 unsafe fn test_mm_cmpeq_pd() {
4566 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4567 let e = _mm_setr_epi64x(!0, 0);
4568 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4569 assert_eq_m128i(r, e);
4570 }
4571
4572 #[simd_test(enable = "sse2")]
4573 unsafe fn test_mm_cmplt_pd() {
4574 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4575 let e = _mm_setr_epi64x(0, !0);
4576 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4577 assert_eq_m128i(r, e);
4578 }
4579
4580 #[simd_test(enable = "sse2")]
4581 unsafe fn test_mm_cmple_pd() {
4582 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4583 let e = _mm_setr_epi64x(!0, !0);
4584 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4585 assert_eq_m128i(r, e);
4586 }
4587
4588 #[simd_test(enable = "sse2")]
4589 unsafe fn test_mm_cmpgt_pd() {
4590 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4591 let e = _mm_setr_epi64x(0, 0);
4592 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4593 assert_eq_m128i(r, e);
4594 }
4595
4596 #[simd_test(enable = "sse2")]
4597 unsafe fn test_mm_cmpge_pd() {
4598 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4599 let e = _mm_setr_epi64x(!0, 0);
4600 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4601 assert_eq_m128i(r, e);
4602 }
4603
4604 #[simd_test(enable = "sse2")]
4605 unsafe fn test_mm_cmpord_pd() {
4606 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4607 let e = _mm_setr_epi64x(0, !0);
4608 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4609 assert_eq_m128i(r, e);
4610 }
4611
4612 #[simd_test(enable = "sse2")]
4613 unsafe fn test_mm_cmpunord_pd() {
4614 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4615 let e = _mm_setr_epi64x(!0, 0);
4616 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4617 assert_eq_m128i(r, e);
4618 }
4619
4620 #[simd_test(enable = "sse2")]
4621 unsafe fn test_mm_cmpneq_pd() {
4622 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4623 let e = _mm_setr_epi64x(!0, !0);
4624 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4625 assert_eq_m128i(r, e);
4626 }
4627
4628 #[simd_test(enable = "sse2")]
4629 unsafe fn test_mm_cmpnlt_pd() {
4630 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4631 let e = _mm_setr_epi64x(0, 0);
4632 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4633 assert_eq_m128i(r, e);
4634 }
4635
4636 #[simd_test(enable = "sse2")]
4637 unsafe fn test_mm_cmpnle_pd() {
4638 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4639 let e = _mm_setr_epi64x(0, 0);
4640 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4641 assert_eq_m128i(r, e);
4642 }
4643
4644 #[simd_test(enable = "sse2")]
4645 unsafe fn test_mm_cmpngt_pd() {
4646 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4647 let e = _mm_setr_epi64x(0, !0);
4648 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4649 assert_eq_m128i(r, e);
4650 }
4651
4652 #[simd_test(enable = "sse2")]
4653 unsafe fn test_mm_cmpnge_pd() {
4654 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4655 let e = _mm_setr_epi64x(0, !0);
4656 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4657 assert_eq_m128i(r, e);
4658 }
4659
4660 #[simd_test(enable = "sse2")]
4661 unsafe fn test_mm_comieq_sd() {
4662 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4663 assert!(_mm_comieq_sd(a, b) != 0);
4664
4665 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4666 assert!(_mm_comieq_sd(a, b) == 0);
4667 }
4668
4669 #[simd_test(enable = "sse2")]
4670 unsafe fn test_mm_comilt_sd() {
4671 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4672 assert!(_mm_comilt_sd(a, b) == 0);
4673 }
4674
4675 #[simd_test(enable = "sse2")]
4676 unsafe fn test_mm_comile_sd() {
4677 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4678 assert!(_mm_comile_sd(a, b) != 0);
4679 }
4680
4681 #[simd_test(enable = "sse2")]
4682 unsafe fn test_mm_comigt_sd() {
4683 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4684 assert!(_mm_comigt_sd(a, b) == 0);
4685 }
4686
4687 #[simd_test(enable = "sse2")]
4688 unsafe fn test_mm_comige_sd() {
4689 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4690 assert!(_mm_comige_sd(a, b) != 0);
4691 }
4692
4693 #[simd_test(enable = "sse2")]
4694 unsafe fn test_mm_comineq_sd() {
4695 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4696 assert!(_mm_comineq_sd(a, b) == 0);
4697 }
4698
4699 #[simd_test(enable = "sse2")]
4700 unsafe fn test_mm_ucomieq_sd() {
4701 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4702 assert!(_mm_ucomieq_sd(a, b) != 0);
4703
4704 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4705 assert!(_mm_ucomieq_sd(a, b) == 0);
4706 }
4707
4708 #[simd_test(enable = "sse2")]
4709 unsafe fn test_mm_ucomilt_sd() {
4710 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4711 assert!(_mm_ucomilt_sd(a, b) == 0);
4712 }
4713
4714 #[simd_test(enable = "sse2")]
4715 unsafe fn test_mm_ucomile_sd() {
4716 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4717 assert!(_mm_ucomile_sd(a, b) != 0);
4718 }
4719
4720 #[simd_test(enable = "sse2")]
4721 unsafe fn test_mm_ucomigt_sd() {
4722 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4723 assert!(_mm_ucomigt_sd(a, b) == 0);
4724 }
4725
4726 #[simd_test(enable = "sse2")]
4727 unsafe fn test_mm_ucomige_sd() {
4728 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4729 assert!(_mm_ucomige_sd(a, b) != 0);
4730 }
4731
4732 #[simd_test(enable = "sse2")]
4733 unsafe fn test_mm_ucomineq_sd() {
4734 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4735 assert!(_mm_ucomineq_sd(a, b) == 0);
4736 }
4737
4738 #[simd_test(enable = "sse2")]
4739 unsafe fn test_mm_movemask_pd() {
4740 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4741 assert_eq!(r, 0b01);
4742
4743 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4744 assert_eq!(r, 0b11);
4745 }
4746
4747 #[repr(align(16))]
4748 struct Memory {
4749 data: [f64; 4],
4750 }
4751
4752 #[simd_test(enable = "sse2")]
4753 unsafe fn test_mm_load_pd() {
4754 let mem = Memory {
4755 data: [1.0f64, 2.0, 3.0, 4.0],
4756 };
4757 let vals = &mem.data;
4758 let d = vals.as_ptr();
4759
4760 let r = _mm_load_pd(d);
4761 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4762 }
4763
4764 #[simd_test(enable = "sse2")]
4765 unsafe fn test_mm_load_sd() {
4766 let a = 1.;
4767 let expected = _mm_setr_pd(a, 0.);
4768 let r = _mm_load_sd(&a);
4769 assert_eq_m128d(r, expected);
4770 }
4771
4772 #[simd_test(enable = "sse2")]
4773 unsafe fn test_mm_loadh_pd() {
4774 let a = _mm_setr_pd(1., 2.);
4775 let b = 3.;
4776 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4777 let r = _mm_loadh_pd(a, &b);
4778 assert_eq_m128d(r, expected);
4779 }
4780
4781 #[simd_test(enable = "sse2")]
4782 unsafe fn test_mm_loadl_pd() {
4783 let a = _mm_setr_pd(1., 2.);
4784 let b = 3.;
4785 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4786 let r = _mm_loadl_pd(a, &b);
4787 assert_eq_m128d(r, expected);
4788 }
4789
4790 #[simd_test(enable = "sse2")]
4791 #[cfg_attr(miri, ignore)]
4794 unsafe fn test_mm_stream_pd() {
4795 #[repr(align(128))]
4796 struct Memory {
4797 pub data: [f64; 2],
4798 }
4799 let a = _mm_set1_pd(7.0);
4800 let mut mem = Memory { data: [-1.0; 2] };
4801
4802 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4803 for i in 0..2 {
4804 assert_eq!(mem.data[i], get_m128d(a, i));
4805 }
4806 }
4807
4808 #[simd_test(enable = "sse2")]
4809 unsafe fn test_mm_store_sd() {
4810 let mut dest = 0.;
4811 let a = _mm_setr_pd(1., 2.);
4812 _mm_store_sd(&mut dest, a);
4813 assert_eq!(dest, _mm_cvtsd_f64(a));
4814 }
4815
4816 #[simd_test(enable = "sse2")]
4817 unsafe fn test_mm_store_pd() {
4818 let mut mem = Memory { data: [0.0f64; 4] };
4819 let vals = &mut mem.data;
4820 let a = _mm_setr_pd(1.0, 2.0);
4821 let d = vals.as_mut_ptr();
4822
4823 _mm_store_pd(d, *black_box(&a));
4824 assert_eq!(vals[0], 1.0);
4825 assert_eq!(vals[1], 2.0);
4826 }
4827
4828 #[simd_test(enable = "sse2")]
4829 unsafe fn test_mm_storeu_pd() {
4830 let mut mem = Memory { data: [0.0f64; 4] };
4831 let vals = &mut mem.data;
4832 let a = _mm_setr_pd(1.0, 2.0);
4833
4834 let mut ofs = 0;
4835 let mut p = vals.as_mut_ptr();
4836
4837 if (p as usize) & 0xf == 0 {
4839 ofs = 1;
4840 p = p.add(1);
4841 }
4842
4843 _mm_storeu_pd(p, *black_box(&a));
4844
4845 if ofs > 0 {
4846 assert_eq!(vals[ofs - 1], 0.0);
4847 }
4848 assert_eq!(vals[ofs + 0], 1.0);
4849 assert_eq!(vals[ofs + 1], 2.0);
4850 }
4851
4852 #[simd_test(enable = "sse2")]
4853 unsafe fn test_mm_storeu_si16() {
4854 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4855 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
4856 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
4857 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
4858 assert_eq_m128i(r, e);
4859 }
4860
4861 #[simd_test(enable = "sse2")]
4862 unsafe fn test_mm_storeu_si32() {
4863 let a = _mm_setr_epi32(1, 2, 3, 4);
4864 let mut r = _mm_setr_epi32(5, 6, 7, 8);
4865 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
4866 let e = _mm_setr_epi32(1, 6, 7, 8);
4867 assert_eq_m128i(r, e);
4868 }
4869
4870 #[simd_test(enable = "sse2")]
4871 unsafe fn test_mm_storeu_si64() {
4872 let a = _mm_setr_epi64x(1, 2);
4873 let mut r = _mm_setr_epi64x(3, 4);
4874 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
4875 let e = _mm_setr_epi64x(1, 4);
4876 assert_eq_m128i(r, e);
4877 }
4878
4879 #[simd_test(enable = "sse2")]
4880 unsafe fn test_mm_store1_pd() {
4881 let mut mem = Memory { data: [0.0f64; 4] };
4882 let vals = &mut mem.data;
4883 let a = _mm_setr_pd(1.0, 2.0);
4884 let d = vals.as_mut_ptr();
4885
4886 _mm_store1_pd(d, *black_box(&a));
4887 assert_eq!(vals[0], 1.0);
4888 assert_eq!(vals[1], 1.0);
4889 }
4890
4891 #[simd_test(enable = "sse2")]
4892 unsafe fn test_mm_store_pd1() {
4893 let mut mem = Memory { data: [0.0f64; 4] };
4894 let vals = &mut mem.data;
4895 let a = _mm_setr_pd(1.0, 2.0);
4896 let d = vals.as_mut_ptr();
4897
4898 _mm_store_pd1(d, *black_box(&a));
4899 assert_eq!(vals[0], 1.0);
4900 assert_eq!(vals[1], 1.0);
4901 }
4902
4903 #[simd_test(enable = "sse2")]
4904 unsafe fn test_mm_storer_pd() {
4905 let mut mem = Memory { data: [0.0f64; 4] };
4906 let vals = &mut mem.data;
4907 let a = _mm_setr_pd(1.0, 2.0);
4908 let d = vals.as_mut_ptr();
4909
4910 _mm_storer_pd(d, *black_box(&a));
4911 assert_eq!(vals[0], 2.0);
4912 assert_eq!(vals[1], 1.0);
4913 }
4914
4915 #[simd_test(enable = "sse2")]
4916 unsafe fn test_mm_storeh_pd() {
4917 let mut dest = 0.;
4918 let a = _mm_setr_pd(1., 2.);
4919 _mm_storeh_pd(&mut dest, a);
4920 assert_eq!(dest, get_m128d(a, 1));
4921 }
4922
4923 #[simd_test(enable = "sse2")]
4924 unsafe fn test_mm_storel_pd() {
4925 let mut dest = 0.;
4926 let a = _mm_setr_pd(1., 2.);
4927 _mm_storel_pd(&mut dest, a);
4928 assert_eq!(dest, _mm_cvtsd_f64(a));
4929 }
4930
4931 #[simd_test(enable = "sse2")]
4932 unsafe fn test_mm_loadr_pd() {
4933 let mut mem = Memory {
4934 data: [1.0f64, 2.0, 3.0, 4.0],
4935 };
4936 let vals = &mut mem.data;
4937 let d = vals.as_ptr();
4938
4939 let r = _mm_loadr_pd(d);
4940 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4941 }
4942
4943 #[simd_test(enable = "sse2")]
4944 unsafe fn test_mm_loadu_pd() {
4945 let mut mem = Memory {
4946 data: [1.0f64, 2.0, 3.0, 4.0],
4947 };
4948 let vals = &mut mem.data;
4949 let mut d = vals.as_ptr();
4950
4951 let mut offset = 0;
4953 if (d as usize) & 0xf == 0 {
4954 offset = 1;
4955 d = d.add(offset);
4956 }
4957
4958 let r = _mm_loadu_pd(d);
4959 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4960 assert_eq_m128d(r, e);
4961 }
4962
4963 #[simd_test(enable = "sse2")]
4964 unsafe fn test_mm_loadu_si16() {
4965 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4966 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
4967 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
4968 }
4969
4970 #[simd_test(enable = "sse2")]
4971 unsafe fn test_mm_loadu_si32() {
4972 let a = _mm_setr_epi32(1, 2, 3, 4);
4973 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
4974 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
4975 }
4976
4977 #[simd_test(enable = "sse2")]
4978 unsafe fn test_mm_loadu_si64() {
4979 let a = _mm_setr_epi64x(5, 6);
4980 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
4981 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4982 }
4983
4984 #[simd_test(enable = "sse2")]
4985 unsafe fn test_mm_cvtpd_ps() {
4986 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4987 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4988
4989 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4990 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4991
4992 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4993 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4994
4995 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4996 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4997 }
4998
4999 #[simd_test(enable = "sse2")]
5000 unsafe fn test_mm_cvtps_pd() {
5001 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5002 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5003
5004 let r = _mm_cvtps_pd(_mm_setr_ps(
5005 f32::MAX,
5006 f32::INFINITY,
5007 f32::NEG_INFINITY,
5008 f32::MIN,
5009 ));
5010 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5011 }
5012
5013 #[simd_test(enable = "sse2")]
5014 unsafe fn test_mm_cvtpd_epi32() {
5015 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5016 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5017
5018 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5019 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5020
5021 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5022 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5023
5024 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5025 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5026
5027 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5028 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5029 }
5030
5031 #[simd_test(enable = "sse2")]
5032 unsafe fn test_mm_cvtsd_si32() {
5033 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5034 assert_eq!(r, -2);
5035
5036 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5037 assert_eq!(r, i32::MIN);
5038
5039 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5040 assert_eq!(r, i32::MIN);
5041 }
5042
5043 #[simd_test(enable = "sse2")]
5044 unsafe fn test_mm_cvtsd_ss() {
5045 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5046 let b = _mm_setr_pd(2.0, -5.0);
5047
5048 let r = _mm_cvtsd_ss(a, b);
5049
5050 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5051
5052 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5053 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5054
5055 let r = _mm_cvtsd_ss(a, b);
5056
5057 assert_eq_m128(
5058 r,
5059 _mm_setr_ps(
5060 f32::INFINITY,
5061 f32::NEG_INFINITY,
5062 f32::MAX,
5063 f32::NEG_INFINITY,
5064 ),
5065 );
5066 }
5067
5068 #[simd_test(enable = "sse2")]
5069 unsafe fn test_mm_cvtsd_f64() {
5070 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5071 assert_eq!(r, -1.1);
5072 }
5073
5074 #[simd_test(enable = "sse2")]
5075 unsafe fn test_mm_cvtss_sd() {
5076 let a = _mm_setr_pd(-1.1, 2.2);
5077 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5078
5079 let r = _mm_cvtss_sd(a, b);
5080 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5081
5082 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5083 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5084
5085 let r = _mm_cvtss_sd(a, b);
5086 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5087 }
5088
5089 #[simd_test(enable = "sse2")]
5090 unsafe fn test_mm_cvttpd_epi32() {
5091 let a = _mm_setr_pd(-1.1, 2.2);
5092 let r = _mm_cvttpd_epi32(a);
5093 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5094
5095 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5096 let r = _mm_cvttpd_epi32(a);
5097 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5098 }
5099
5100 #[simd_test(enable = "sse2")]
5101 unsafe fn test_mm_cvttsd_si32() {
5102 let a = _mm_setr_pd(-1.1, 2.2);
5103 let r = _mm_cvttsd_si32(a);
5104 assert_eq!(r, -1);
5105
5106 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5107 let r = _mm_cvttsd_si32(a);
5108 assert_eq!(r, i32::MIN);
5109 }
5110
5111 #[simd_test(enable = "sse2")]
5112 unsafe fn test_mm_cvttps_epi32() {
5113 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5114 let r = _mm_cvttps_epi32(a);
5115 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5116
5117 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5118 let r = _mm_cvttps_epi32(a);
5119 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5120 }
5121
5122 #[simd_test(enable = "sse2")]
5123 unsafe fn test_mm_set_sd() {
5124 let r = _mm_set_sd(-1.0_f64);
5125 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5126 }
5127
5128 #[simd_test(enable = "sse2")]
5129 unsafe fn test_mm_set1_pd() {
5130 let r = _mm_set1_pd(-1.0_f64);
5131 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5132 }
5133
5134 #[simd_test(enable = "sse2")]
5135 unsafe fn test_mm_set_pd1() {
5136 let r = _mm_set_pd1(-2.0_f64);
5137 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5138 }
5139
5140 #[simd_test(enable = "sse2")]
5141 unsafe fn test_mm_set_pd() {
5142 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5143 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5144 }
5145
5146 #[simd_test(enable = "sse2")]
5147 unsafe fn test_mm_setr_pd() {
5148 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5149 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5150 }
5151
5152 #[simd_test(enable = "sse2")]
5153 unsafe fn test_mm_setzero_pd() {
5154 let r = _mm_setzero_pd();
5155 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5156 }
5157
5158 #[simd_test(enable = "sse2")]
5159 unsafe fn test_mm_load1_pd() {
5160 let d = -5.0;
5161 let r = _mm_load1_pd(&d);
5162 assert_eq_m128d(r, _mm_setr_pd(d, d));
5163 }
5164
5165 #[simd_test(enable = "sse2")]
5166 unsafe fn test_mm_load_pd1() {
5167 let d = -5.0;
5168 let r = _mm_load_pd1(&d);
5169 assert_eq_m128d(r, _mm_setr_pd(d, d));
5170 }
5171
5172 #[simd_test(enable = "sse2")]
5173 unsafe fn test_mm_unpackhi_pd() {
5174 let a = _mm_setr_pd(1.0, 2.0);
5175 let b = _mm_setr_pd(3.0, 4.0);
5176 let r = _mm_unpackhi_pd(a, b);
5177 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5178 }
5179
5180 #[simd_test(enable = "sse2")]
5181 unsafe fn test_mm_unpacklo_pd() {
5182 let a = _mm_setr_pd(1.0, 2.0);
5183 let b = _mm_setr_pd(3.0, 4.0);
5184 let r = _mm_unpacklo_pd(a, b);
5185 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5186 }
5187
5188 #[simd_test(enable = "sse2")]
5189 unsafe fn test_mm_shuffle_pd() {
5190 let a = _mm_setr_pd(1., 2.);
5191 let b = _mm_setr_pd(3., 4.);
5192 let expected = _mm_setr_pd(1., 3.);
5193 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5194 assert_eq_m128d(r, expected);
5195 }
5196
5197 #[simd_test(enable = "sse2")]
5198 unsafe fn test_mm_move_sd() {
5199 let a = _mm_setr_pd(1., 2.);
5200 let b = _mm_setr_pd(3., 4.);
5201 let expected = _mm_setr_pd(3., 2.);
5202 let r = _mm_move_sd(a, b);
5203 assert_eq_m128d(r, expected);
5204 }
5205
5206 #[simd_test(enable = "sse2")]
5207 unsafe fn test_mm_castpd_ps() {
5208 let a = _mm_set1_pd(0.);
5209 let expected = _mm_set1_ps(0.);
5210 let r = _mm_castpd_ps(a);
5211 assert_eq_m128(r, expected);
5212 }
5213
5214 #[simd_test(enable = "sse2")]
5215 unsafe fn test_mm_castpd_si128() {
5216 let a = _mm_set1_pd(0.);
5217 let expected = _mm_set1_epi64x(0);
5218 let r = _mm_castpd_si128(a);
5219 assert_eq_m128i(r, expected);
5220 }
5221
5222 #[simd_test(enable = "sse2")]
5223 unsafe fn test_mm_castps_pd() {
5224 let a = _mm_set1_ps(0.);
5225 let expected = _mm_set1_pd(0.);
5226 let r = _mm_castps_pd(a);
5227 assert_eq_m128d(r, expected);
5228 }
5229
5230 #[simd_test(enable = "sse2")]
5231 unsafe fn test_mm_castps_si128() {
5232 let a = _mm_set1_ps(0.);
5233 let expected = _mm_set1_epi32(0);
5234 let r = _mm_castps_si128(a);
5235 assert_eq_m128i(r, expected);
5236 }
5237
5238 #[simd_test(enable = "sse2")]
5239 unsafe fn test_mm_castsi128_pd() {
5240 let a = _mm_set1_epi64x(0);
5241 let expected = _mm_set1_pd(0.);
5242 let r = _mm_castsi128_pd(a);
5243 assert_eq_m128d(r, expected);
5244 }
5245
5246 #[simd_test(enable = "sse2")]
5247 unsafe fn test_mm_castsi128_ps() {
5248 let a = _mm_set1_epi32(0);
5249 let expected = _mm_set1_ps(0.);
5250 let r = _mm_castsi128_ps(a);
5251 assert_eq_m128(r, expected);
5252 }
5253}