1use crate::{
17 core_arch::{simd::*, x86::*},
18 intrinsics::simd::*,
19 mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34 unsafe { simd_add(a, b) }
35}
36
37#[inline]
42#[target_feature(enable = "avx")]
43#[cfg_attr(test, assert_instr(vaddps))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46 unsafe { simd_add(a, b) }
47}
48
49#[inline]
54#[target_feature(enable = "avx")]
55#[cfg_attr(test, assert_instr(vandp))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
59 unsafe {
60 let a: u64x4 = transmute(a);
61 let b: u64x4 = transmute(b);
62 transmute(simd_and(a, b))
63 }
64}
65
66#[inline]
71#[target_feature(enable = "avx")]
72#[cfg_attr(test, assert_instr(vandps))]
73#[stable(feature = "simd_x86", since = "1.27.0")]
74pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
75 unsafe {
76 let a: u32x8 = transmute(a);
77 let b: u32x8 = transmute(b);
78 transmute(simd_and(a, b))
79 }
80}
81
82#[inline]
87#[target_feature(enable = "avx")]
88#[cfg_attr(test, assert_instr(vorp))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
92 unsafe {
93 let a: u64x4 = transmute(a);
94 let b: u64x4 = transmute(b);
95 transmute(simd_or(a, b))
96 }
97}
98
99#[inline]
104#[target_feature(enable = "avx")]
105#[cfg_attr(test, assert_instr(vorps))]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
108 unsafe {
109 let a: u32x8 = transmute(a);
110 let b: u32x8 = transmute(b);
111 transmute(simd_or(a, b))
112 }
113}
114
115#[inline]
120#[target_feature(enable = "avx")]
121#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
122#[rustc_legacy_const_generics(2)]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
125 static_assert_uimm_bits!(MASK, 8);
126 unsafe {
127 simd_shuffle!(
128 a,
129 b,
130 [
131 MASK as u32 & 0b1,
132 ((MASK as u32 >> 1) & 0b1) + 4,
133 ((MASK as u32 >> 2) & 0b1) + 2,
134 ((MASK as u32 >> 3) & 0b1) + 6,
135 ],
136 )
137 }
138}
139
140#[inline]
145#[target_feature(enable = "avx")]
146#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
147#[rustc_legacy_const_generics(2)]
148#[stable(feature = "simd_x86", since = "1.27.0")]
149pub fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
150 static_assert_uimm_bits!(MASK, 8);
151 unsafe {
152 simd_shuffle!(
153 a,
154 b,
155 [
156 MASK as u32 & 0b11,
157 (MASK as u32 >> 2) & 0b11,
158 ((MASK as u32 >> 4) & 0b11) + 8,
159 ((MASK as u32 >> 6) & 0b11) + 8,
160 (MASK as u32 & 0b11) + 4,
161 ((MASK as u32 >> 2) & 0b11) + 4,
162 ((MASK as u32 >> 4) & 0b11) + 12,
163 ((MASK as u32 >> 6) & 0b11) + 12,
164 ],
165 )
166 }
167}
168
169#[inline]
174#[target_feature(enable = "avx")]
175#[cfg_attr(test, assert_instr(vandnp))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
178 unsafe {
179 let a: u64x4 = transmute(a);
180 let b: u64x4 = transmute(b);
181 transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
182 }
183}
184
185#[inline]
191#[target_feature(enable = "avx")]
192#[cfg_attr(test, assert_instr(vandnps))]
193#[stable(feature = "simd_x86", since = "1.27.0")]
194pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
195 unsafe {
196 let a: u32x8 = transmute(a);
197 let b: u32x8 = transmute(b);
198 transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
199 }
200}
201
202#[inline]
207#[target_feature(enable = "avx")]
208#[cfg_attr(test, assert_instr(vmaxpd))]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
211 unsafe { vmaxpd(a, b) }
212}
213
214#[inline]
219#[target_feature(enable = "avx")]
220#[cfg_attr(test, assert_instr(vmaxps))]
221#[stable(feature = "simd_x86", since = "1.27.0")]
222pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
223 unsafe { vmaxps(a, b) }
224}
225
226#[inline]
231#[target_feature(enable = "avx")]
232#[cfg_attr(test, assert_instr(vminpd))]
233#[stable(feature = "simd_x86", since = "1.27.0")]
234pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
235 unsafe { vminpd(a, b) }
236}
237
238#[inline]
243#[target_feature(enable = "avx")]
244#[cfg_attr(test, assert_instr(vminps))]
245#[stable(feature = "simd_x86", since = "1.27.0")]
246pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
247 unsafe { vminps(a, b) }
248}
249
250#[inline]
255#[target_feature(enable = "avx")]
256#[cfg_attr(test, assert_instr(vmulpd))]
257#[stable(feature = "simd_x86", since = "1.27.0")]
258pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
259 unsafe { simd_mul(a, b) }
260}
261
262#[inline]
267#[target_feature(enable = "avx")]
268#[cfg_attr(test, assert_instr(vmulps))]
269#[stable(feature = "simd_x86", since = "1.27.0")]
270pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
271 unsafe { simd_mul(a, b) }
272}
273
274#[inline]
279#[target_feature(enable = "avx")]
280#[cfg_attr(test, assert_instr(vaddsubpd))]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
283 unsafe {
284 let a = a.as_f64x4();
285 let b = b.as_f64x4();
286 let add = simd_add(a, b);
287 let sub = simd_sub(a, b);
288 simd_shuffle!(add, sub, [4, 1, 6, 3])
289 }
290}
291
292#[inline]
297#[target_feature(enable = "avx")]
298#[cfg_attr(test, assert_instr(vaddsubps))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
301 unsafe {
302 let a = a.as_f32x8();
303 let b = b.as_f32x8();
304 let add = simd_add(a, b);
305 let sub = simd_sub(a, b);
306 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
307 }
308}
309
310#[inline]
315#[target_feature(enable = "avx")]
316#[cfg_attr(test, assert_instr(vsubpd))]
317#[stable(feature = "simd_x86", since = "1.27.0")]
318pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
319 unsafe { simd_sub(a, b) }
320}
321
322#[inline]
327#[target_feature(enable = "avx")]
328#[cfg_attr(test, assert_instr(vsubps))]
329#[stable(feature = "simd_x86", since = "1.27.0")]
330pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
331 unsafe { simd_sub(a, b) }
332}
333
334#[inline]
339#[target_feature(enable = "avx")]
340#[cfg_attr(test, assert_instr(vdivps))]
341#[stable(feature = "simd_x86", since = "1.27.0")]
342pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
343 unsafe { simd_div(a, b) }
344}
345
346#[inline]
351#[target_feature(enable = "avx")]
352#[cfg_attr(test, assert_instr(vdivpd))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
355 unsafe { simd_div(a, b) }
356}
357
358#[inline]
372#[target_feature(enable = "avx")]
373#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
374#[rustc_legacy_const_generics(1)]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
377 static_assert_uimm_bits!(ROUNDING, 4);
378 unsafe { roundpd256(a, ROUNDING) }
379}
380
381#[inline]
386#[target_feature(enable = "avx")]
387#[cfg_attr(test, assert_instr(vroundpd))]
388#[stable(feature = "simd_x86", since = "1.27.0")]
389pub fn _mm256_ceil_pd(a: __m256d) -> __m256d {
390 unsafe { simd_ceil(a) }
391}
392
393#[inline]
398#[target_feature(enable = "avx")]
399#[cfg_attr(test, assert_instr(vroundpd))]
400#[stable(feature = "simd_x86", since = "1.27.0")]
401pub fn _mm256_floor_pd(a: __m256d) -> __m256d {
402 unsafe { simd_floor(a) }
403}
404
405#[inline]
419#[target_feature(enable = "avx")]
420#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
421#[rustc_legacy_const_generics(1)]
422#[stable(feature = "simd_x86", since = "1.27.0")]
423pub fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
424 static_assert_uimm_bits!(ROUNDING, 4);
425 unsafe { roundps256(a, ROUNDING) }
426}
427
428#[inline]
433#[target_feature(enable = "avx")]
434#[cfg_attr(test, assert_instr(vroundps))]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub fn _mm256_ceil_ps(a: __m256) -> __m256 {
437 unsafe { simd_ceil(a) }
438}
439
440#[inline]
445#[target_feature(enable = "avx")]
446#[cfg_attr(test, assert_instr(vroundps))]
447#[stable(feature = "simd_x86", since = "1.27.0")]
448pub fn _mm256_floor_ps(a: __m256) -> __m256 {
449 unsafe { simd_floor(a) }
450}
451
452#[inline]
457#[target_feature(enable = "avx")]
458#[cfg_attr(test, assert_instr(vsqrtps))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460pub fn _mm256_sqrt_ps(a: __m256) -> __m256 {
461 unsafe { simd_fsqrt(a) }
462}
463
464#[inline]
469#[target_feature(enable = "avx")]
470#[cfg_attr(test, assert_instr(vsqrtpd))]
471#[stable(feature = "simd_x86", since = "1.27.0")]
472pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
473 unsafe { simd_fsqrt(a) }
474}
475
476#[inline]
481#[target_feature(enable = "avx")]
482#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
486#[rustc_legacy_const_generics(2)]
487#[stable(feature = "simd_x86", since = "1.27.0")]
488pub fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
489 static_assert_uimm_bits!(IMM4, 4);
490 unsafe {
491 simd_shuffle!(
492 a,
493 b,
494 [
495 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
496 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
497 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
498 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
499 ],
500 )
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx")]
510#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
511#[rustc_legacy_const_generics(2)]
512#[stable(feature = "simd_x86", since = "1.27.0")]
513pub fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
514 static_assert_uimm_bits!(IMM8, 8);
515 unsafe {
516 simd_shuffle!(
517 a,
518 b,
519 [
520 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
521 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
522 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
523 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
524 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
525 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
526 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
527 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
528 ],
529 )
530 }
531}
532
533#[inline]
538#[target_feature(enable = "avx")]
539#[cfg_attr(test, assert_instr(vblendvpd))]
540#[stable(feature = "simd_x86", since = "1.27.0")]
541pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
542 unsafe {
543 let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
544 transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
545 }
546}
547
548#[inline]
553#[target_feature(enable = "avx")]
554#[cfg_attr(test, assert_instr(vblendvps))]
555#[stable(feature = "simd_x86", since = "1.27.0")]
556pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
557 unsafe {
558 let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
559 transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
560 }
561}
562
563#[inline]
570#[target_feature(enable = "avx")]
571#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
572#[rustc_legacy_const_generics(2)]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
575 static_assert_uimm_bits!(IMM8, 8);
576 unsafe { vdpps(a, b, IMM8) }
577}
578
579#[inline]
586#[target_feature(enable = "avx")]
587#[cfg_attr(test, assert_instr(vhaddpd))]
588#[stable(feature = "simd_x86", since = "1.27.0")]
589pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
590 unsafe { vhaddpd(a, b) }
591}
592
593#[inline]
601#[target_feature(enable = "avx")]
602#[cfg_attr(test, assert_instr(vhaddps))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
605 unsafe { vhaddps(a, b) }
606}
607
608#[inline]
615#[target_feature(enable = "avx")]
616#[cfg_attr(test, assert_instr(vhsubpd))]
617#[stable(feature = "simd_x86", since = "1.27.0")]
618pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
619 unsafe { vhsubpd(a, b) }
620}
621
622#[inline]
630#[target_feature(enable = "avx")]
631#[cfg_attr(test, assert_instr(vhsubps))]
632#[stable(feature = "simd_x86", since = "1.27.0")]
633pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
634 unsafe { vhsubps(a, b) }
635}
636
637#[inline]
642#[target_feature(enable = "avx")]
643#[cfg_attr(test, assert_instr(vxorp))]
644#[stable(feature = "simd_x86", since = "1.27.0")]
645pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
646 unsafe {
647 let a: u64x4 = transmute(a);
648 let b: u64x4 = transmute(b);
649 transmute(simd_xor(a, b))
650 }
651}
652
653#[inline]
658#[target_feature(enable = "avx")]
659#[cfg_attr(test, assert_instr(vxorps))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
662 unsafe {
663 let a: u32x8 = transmute(a);
664 let b: u32x8 = transmute(b);
665 transmute(simd_xor(a, b))
666 }
667}
668
669#[stable(feature = "simd_x86", since = "1.27.0")]
671pub const _CMP_EQ_OQ: i32 = 0x00;
672#[stable(feature = "simd_x86", since = "1.27.0")]
674pub const _CMP_LT_OS: i32 = 0x01;
675#[stable(feature = "simd_x86", since = "1.27.0")]
677pub const _CMP_LE_OS: i32 = 0x02;
678#[stable(feature = "simd_x86", since = "1.27.0")]
680pub const _CMP_UNORD_Q: i32 = 0x03;
681#[stable(feature = "simd_x86", since = "1.27.0")]
683pub const _CMP_NEQ_UQ: i32 = 0x04;
684#[stable(feature = "simd_x86", since = "1.27.0")]
686pub const _CMP_NLT_US: i32 = 0x05;
687#[stable(feature = "simd_x86", since = "1.27.0")]
689pub const _CMP_NLE_US: i32 = 0x06;
690#[stable(feature = "simd_x86", since = "1.27.0")]
692pub const _CMP_ORD_Q: i32 = 0x07;
693#[stable(feature = "simd_x86", since = "1.27.0")]
695pub const _CMP_EQ_UQ: i32 = 0x08;
696#[stable(feature = "simd_x86", since = "1.27.0")]
698pub const _CMP_NGE_US: i32 = 0x09;
699#[stable(feature = "simd_x86", since = "1.27.0")]
701pub const _CMP_NGT_US: i32 = 0x0a;
702#[stable(feature = "simd_x86", since = "1.27.0")]
704pub const _CMP_FALSE_OQ: i32 = 0x0b;
705#[stable(feature = "simd_x86", since = "1.27.0")]
707pub const _CMP_NEQ_OQ: i32 = 0x0c;
708#[stable(feature = "simd_x86", since = "1.27.0")]
710pub const _CMP_GE_OS: i32 = 0x0d;
711#[stable(feature = "simd_x86", since = "1.27.0")]
713pub const _CMP_GT_OS: i32 = 0x0e;
714#[stable(feature = "simd_x86", since = "1.27.0")]
716pub const _CMP_TRUE_UQ: i32 = 0x0f;
717#[stable(feature = "simd_x86", since = "1.27.0")]
719pub const _CMP_EQ_OS: i32 = 0x10;
720#[stable(feature = "simd_x86", since = "1.27.0")]
722pub const _CMP_LT_OQ: i32 = 0x11;
723#[stable(feature = "simd_x86", since = "1.27.0")]
725pub const _CMP_LE_OQ: i32 = 0x12;
726#[stable(feature = "simd_x86", since = "1.27.0")]
728pub const _CMP_UNORD_S: i32 = 0x13;
729#[stable(feature = "simd_x86", since = "1.27.0")]
731pub const _CMP_NEQ_US: i32 = 0x14;
732#[stable(feature = "simd_x86", since = "1.27.0")]
734pub const _CMP_NLT_UQ: i32 = 0x15;
735#[stable(feature = "simd_x86", since = "1.27.0")]
737pub const _CMP_NLE_UQ: i32 = 0x16;
738#[stable(feature = "simd_x86", since = "1.27.0")]
740pub const _CMP_ORD_S: i32 = 0x17;
741#[stable(feature = "simd_x86", since = "1.27.0")]
743pub const _CMP_EQ_US: i32 = 0x18;
744#[stable(feature = "simd_x86", since = "1.27.0")]
746pub const _CMP_NGE_UQ: i32 = 0x19;
747#[stable(feature = "simd_x86", since = "1.27.0")]
749pub const _CMP_NGT_UQ: i32 = 0x1a;
750#[stable(feature = "simd_x86", since = "1.27.0")]
752pub const _CMP_FALSE_OS: i32 = 0x1b;
753#[stable(feature = "simd_x86", since = "1.27.0")]
755pub const _CMP_NEQ_OS: i32 = 0x1c;
756#[stable(feature = "simd_x86", since = "1.27.0")]
758pub const _CMP_GE_OQ: i32 = 0x1d;
759#[stable(feature = "simd_x86", since = "1.27.0")]
761pub const _CMP_GT_OQ: i32 = 0x1e;
762#[stable(feature = "simd_x86", since = "1.27.0")]
764pub const _CMP_TRUE_US: i32 = 0x1f;
765
766#[inline]
772#[target_feature(enable = "avx")]
773#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
775#[stable(feature = "simd_x86", since = "1.27.0")]
776pub fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
777 static_assert_uimm_bits!(IMM5, 5);
778 unsafe { vcmppd(a, b, const { IMM5 as i8 }) }
779}
780
781#[inline]
787#[target_feature(enable = "avx")]
788#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
792 static_assert_uimm_bits!(IMM5, 5);
793 unsafe { vcmppd256(a, b, IMM5 as u8) }
794}
795
796#[inline]
802#[target_feature(enable = "avx")]
803#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
807 static_assert_uimm_bits!(IMM5, 5);
808 unsafe { vcmpps(a, b, const { IMM5 as i8 }) }
809}
810
811#[inline]
817#[target_feature(enable = "avx")]
818#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
822 static_assert_uimm_bits!(IMM5, 5);
823 unsafe { vcmpps256(a, b, const { IMM5 as u8 }) }
824}
825
826#[inline]
834#[target_feature(enable = "avx")]
835#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
837#[stable(feature = "simd_x86", since = "1.27.0")]
838pub fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
839 static_assert_uimm_bits!(IMM5, 5);
840 unsafe { vcmpsd(a, b, IMM5 as i8) }
841}
842
843#[inline]
851#[target_feature(enable = "avx")]
852#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855pub fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
856 static_assert_uimm_bits!(IMM5, 5);
857 unsafe { vcmpss(a, b, IMM5 as i8) }
858}
859
860#[inline]
865#[target_feature(enable = "avx")]
866#[cfg_attr(test, assert_instr(vcvtdq2pd))]
867#[stable(feature = "simd_x86", since = "1.27.0")]
868pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
869 unsafe { simd_cast(a.as_i32x4()) }
870}
871
872#[inline]
877#[target_feature(enable = "avx")]
878#[cfg_attr(test, assert_instr(vcvtdq2ps))]
879#[stable(feature = "simd_x86", since = "1.27.0")]
880pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
881 unsafe { simd_cast(a.as_i32x8()) }
882}
883
884#[inline]
889#[target_feature(enable = "avx")]
890#[cfg_attr(test, assert_instr(vcvtpd2ps))]
891#[stable(feature = "simd_x86", since = "1.27.0")]
892pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
893 unsafe { simd_cast(a) }
894}
895
896#[inline]
901#[target_feature(enable = "avx")]
902#[cfg_attr(test, assert_instr(vcvtps2dq))]
903#[stable(feature = "simd_x86", since = "1.27.0")]
904pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
905 unsafe { transmute(vcvtps2dq(a)) }
906}
907
908#[inline]
913#[target_feature(enable = "avx")]
914#[cfg_attr(test, assert_instr(vcvtps2pd))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm256_cvtps_pd(a: __m128) -> __m256d {
917 unsafe { simd_cast(a) }
918}
919
920#[inline]
924#[target_feature(enable = "avx")]
925#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
928 unsafe { simd_extract!(a, 0) }
929}
930
931#[inline]
936#[target_feature(enable = "avx")]
937#[cfg_attr(test, assert_instr(vcvttpd2dq))]
938#[stable(feature = "simd_x86", since = "1.27.0")]
939pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
940 unsafe { transmute(vcvttpd2dq(a)) }
941}
942
943#[inline]
948#[target_feature(enable = "avx")]
949#[cfg_attr(test, assert_instr(vcvtpd2dq))]
950#[stable(feature = "simd_x86", since = "1.27.0")]
951pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
952 unsafe { transmute(vcvtpd2dq(a)) }
953}
954
955#[inline]
960#[target_feature(enable = "avx")]
961#[cfg_attr(test, assert_instr(vcvttps2dq))]
962#[stable(feature = "simd_x86", since = "1.27.0")]
963pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
964 unsafe { transmute(vcvttps2dq(a)) }
965}
966
967#[inline]
972#[target_feature(enable = "avx")]
973#[cfg_attr(
974 all(test, not(target_env = "msvc")),
975 assert_instr(vextractf128, IMM1 = 1)
976)]
977#[rustc_legacy_const_generics(1)]
978#[stable(feature = "simd_x86", since = "1.27.0")]
979pub fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
980 static_assert_uimm_bits!(IMM1, 1);
981 unsafe {
982 simd_shuffle!(
983 a,
984 _mm256_undefined_ps(),
985 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
986 )
987 }
988}
989
990#[inline]
995#[target_feature(enable = "avx")]
996#[cfg_attr(
997 all(test, not(target_env = "msvc")),
998 assert_instr(vextractf128, IMM1 = 1)
999)]
1000#[rustc_legacy_const_generics(1)]
1001#[stable(feature = "simd_x86", since = "1.27.0")]
1002pub fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
1003 static_assert_uimm_bits!(IMM1, 1);
1004 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) }
1005}
1006
1007#[inline]
1011#[target_feature(enable = "avx")]
1012#[cfg_attr(
1013 all(test, not(target_env = "msvc")),
1014 assert_instr(vextractf128, IMM1 = 1)
1015)]
1016#[rustc_legacy_const_generics(1)]
1017#[stable(feature = "simd_x86", since = "1.27.0")]
1018pub fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
1019 static_assert_uimm_bits!(IMM1, 1);
1020 unsafe {
1021 let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
1022 transmute(dst)
1023 }
1024}
1025
1026#[inline]
1030#[target_feature(enable = "avx")]
1031#[rustc_legacy_const_generics(1)]
1033#[stable(feature = "simd_x86", since = "1.27.0")]
1034pub fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
1035 static_assert_uimm_bits!(INDEX, 3);
1036 unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) }
1037}
1038
1039#[inline]
1043#[target_feature(enable = "avx")]
1044#[stable(feature = "simd_x86", since = "1.27.0")]
1045pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1046 unsafe { simd_extract!(a.as_i32x8(), 0) }
1047}
1048
1049#[inline]
1053#[target_feature(enable = "avx")]
1054#[cfg_attr(test, assert_instr(vzeroall))]
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub fn _mm256_zeroall() {
1057 unsafe { vzeroall() }
1058}
1059
1060#[inline]
1065#[target_feature(enable = "avx")]
1066#[cfg_attr(test, assert_instr(vzeroupper))]
1067#[stable(feature = "simd_x86", since = "1.27.0")]
1068pub fn _mm256_zeroupper() {
1069 unsafe { vzeroupper() }
1070}
1071
1072#[inline]
1077#[target_feature(enable = "avx")]
1078#[cfg_attr(test, assert_instr(vpermilps))]
1079#[stable(feature = "simd_x86", since = "1.27.0")]
1080pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1081 unsafe { vpermilps256(a, b.as_i32x8()) }
1082}
1083
1084#[inline]
1089#[target_feature(enable = "avx")]
1090#[cfg_attr(test, assert_instr(vpermilps))]
1091#[stable(feature = "simd_x86", since = "1.27.0")]
1092pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1093 unsafe { vpermilps(a, b.as_i32x4()) }
1094}
1095
1096#[inline]
1101#[target_feature(enable = "avx")]
1102#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1103#[rustc_legacy_const_generics(1)]
1104#[stable(feature = "simd_x86", since = "1.27.0")]
1105pub fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1106 static_assert_uimm_bits!(IMM8, 8);
1107 unsafe {
1108 simd_shuffle!(
1109 a,
1110 _mm256_undefined_ps(),
1111 [
1112 (IMM8 as u32 >> 0) & 0b11,
1113 (IMM8 as u32 >> 2) & 0b11,
1114 (IMM8 as u32 >> 4) & 0b11,
1115 (IMM8 as u32 >> 6) & 0b11,
1116 ((IMM8 as u32 >> 0) & 0b11) + 4,
1117 ((IMM8 as u32 >> 2) & 0b11) + 4,
1118 ((IMM8 as u32 >> 4) & 0b11) + 4,
1119 ((IMM8 as u32 >> 6) & 0b11) + 4,
1120 ],
1121 )
1122 }
1123}
1124
1125#[inline]
1130#[target_feature(enable = "avx")]
1131#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1132#[rustc_legacy_const_generics(1)]
1133#[stable(feature = "simd_x86", since = "1.27.0")]
1134pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1135 static_assert_uimm_bits!(IMM8, 8);
1136 unsafe {
1137 simd_shuffle!(
1138 a,
1139 _mm_undefined_ps(),
1140 [
1141 (IMM8 as u32 >> 0) & 0b11,
1142 (IMM8 as u32 >> 2) & 0b11,
1143 (IMM8 as u32 >> 4) & 0b11,
1144 (IMM8 as u32 >> 6) & 0b11,
1145 ],
1146 )
1147 }
1148}
1149
1150#[inline]
1155#[target_feature(enable = "avx")]
1156#[cfg_attr(test, assert_instr(vpermilpd))]
1157#[stable(feature = "simd_x86", since = "1.27.0")]
1158pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1159 unsafe { vpermilpd256(a, b.as_i64x4()) }
1160}
1161
1162#[inline]
1167#[target_feature(enable = "avx")]
1168#[cfg_attr(test, assert_instr(vpermilpd))]
1169#[stable(feature = "simd_x86", since = "1.27.0")]
1170pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1171 unsafe { vpermilpd(a, b.as_i64x2()) }
1172}
1173
1174#[inline]
1179#[target_feature(enable = "avx")]
1180#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1181#[rustc_legacy_const_generics(1)]
1182#[stable(feature = "simd_x86", since = "1.27.0")]
1183pub fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1184 static_assert_uimm_bits!(IMM4, 4);
1185 unsafe {
1186 simd_shuffle!(
1187 a,
1188 _mm256_undefined_pd(),
1189 [
1190 ((IMM4 as u32 >> 0) & 1),
1191 ((IMM4 as u32 >> 1) & 1),
1192 ((IMM4 as u32 >> 2) & 1) + 2,
1193 ((IMM4 as u32 >> 3) & 1) + 2,
1194 ],
1195 )
1196 }
1197}
1198
1199#[inline]
1204#[target_feature(enable = "avx")]
1205#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1206#[rustc_legacy_const_generics(1)]
1207#[stable(feature = "simd_x86", since = "1.27.0")]
1208pub fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1209 static_assert_uimm_bits!(IMM2, 2);
1210 unsafe {
1211 simd_shuffle!(
1212 a,
1213 _mm_undefined_pd(),
1214 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1215 )
1216 }
1217}
1218
1219#[inline]
1224#[target_feature(enable = "avx")]
1225#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1226#[rustc_legacy_const_generics(2)]
1227#[stable(feature = "simd_x86", since = "1.27.0")]
1228pub fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1229 static_assert_uimm_bits!(IMM8, 8);
1230 unsafe { vperm2f128ps256(a, b, IMM8 as i8) }
1231}
1232
1233#[inline]
1238#[target_feature(enable = "avx")]
1239#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1240#[rustc_legacy_const_generics(2)]
1241#[stable(feature = "simd_x86", since = "1.27.0")]
1242pub fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1243 static_assert_uimm_bits!(IMM8, 8);
1244 unsafe { vperm2f128pd256(a, b, IMM8 as i8) }
1245}
1246
1247#[inline]
1252#[target_feature(enable = "avx")]
1253#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1254#[rustc_legacy_const_generics(2)]
1255#[stable(feature = "simd_x86", since = "1.27.0")]
1256pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1257 static_assert_uimm_bits!(IMM8, 8);
1258 unsafe { transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) }
1259}
1260
1261#[inline]
1266#[target_feature(enable = "avx")]
1267#[cfg_attr(test, assert_instr(vbroadcastss))]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269#[allow(clippy::trivially_copy_pass_by_ref)]
1270pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1271 _mm256_set1_ps(*f)
1272}
1273
1274#[inline]
1279#[target_feature(enable = "avx")]
1280#[cfg_attr(test, assert_instr(vbroadcastss))]
1281#[stable(feature = "simd_x86", since = "1.27.0")]
1282#[allow(clippy::trivially_copy_pass_by_ref)]
1283pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1284 _mm_set1_ps(*f)
1285}
1286
1287#[inline]
1292#[target_feature(enable = "avx")]
1293#[cfg_attr(test, assert_instr(vbroadcastsd))]
1294#[stable(feature = "simd_x86", since = "1.27.0")]
1295#[allow(clippy::trivially_copy_pass_by_ref)]
1296pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1297 _mm256_set1_pd(*f)
1298}
1299
1300#[inline]
1305#[target_feature(enable = "avx")]
1306#[cfg_attr(test, assert_instr(vbroadcastf128))]
1307#[stable(feature = "simd_x86", since = "1.27.0")]
1308pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1309 simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
1310}
1311
1312#[inline]
1317#[target_feature(enable = "avx")]
1318#[cfg_attr(test, assert_instr(vbroadcastf128))]
1319#[stable(feature = "simd_x86", since = "1.27.0")]
1320pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1321 simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
1322}
1323
1324#[inline]
1330#[target_feature(enable = "avx")]
1331#[cfg_attr(
1332 all(test, not(target_env = "msvc")),
1333 assert_instr(vinsertf128, IMM1 = 1)
1334)]
1335#[rustc_legacy_const_generics(2)]
1336#[stable(feature = "simd_x86", since = "1.27.0")]
1337pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1338 static_assert_uimm_bits!(IMM1, 1);
1339 unsafe {
1340 simd_shuffle!(
1341 a,
1342 _mm256_castps128_ps256(b),
1343 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1344 )
1345 }
1346}
1347
1348#[inline]
1354#[target_feature(enable = "avx")]
1355#[cfg_attr(
1356 all(test, not(target_env = "msvc")),
1357 assert_instr(vinsertf128, IMM1 = 1)
1358)]
1359#[rustc_legacy_const_generics(2)]
1360#[stable(feature = "simd_x86", since = "1.27.0")]
1361pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1362 static_assert_uimm_bits!(IMM1, 1);
1363 unsafe {
1364 simd_shuffle!(
1365 a,
1366 _mm256_castpd128_pd256(b),
1367 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1368 )
1369 }
1370}
1371
1372#[inline]
1377#[target_feature(enable = "avx")]
1378#[cfg_attr(
1379 all(test, not(target_env = "msvc")),
1380 assert_instr(vinsertf128, IMM1 = 1)
1381)]
1382#[rustc_legacy_const_generics(2)]
1383#[stable(feature = "simd_x86", since = "1.27.0")]
1384pub fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1385 static_assert_uimm_bits!(IMM1, 1);
1386 unsafe {
1387 let dst: i64x4 = simd_shuffle!(
1388 a.as_i64x4(),
1389 _mm256_castsi128_si256(b).as_i64x4(),
1390 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1391 );
1392 transmute(dst)
1393 }
1394}
1395
1396#[inline]
1401#[target_feature(enable = "avx")]
1402#[rustc_legacy_const_generics(2)]
1404#[stable(feature = "simd_x86", since = "1.27.0")]
1405pub fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1406 static_assert_uimm_bits!(INDEX, 5);
1407 unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) }
1408}
1409
1410#[inline]
1415#[target_feature(enable = "avx")]
1416#[rustc_legacy_const_generics(2)]
1418#[stable(feature = "simd_x86", since = "1.27.0")]
1419pub fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1420 static_assert_uimm_bits!(INDEX, 4);
1421 unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) }
1422}
1423
1424#[inline]
1429#[target_feature(enable = "avx")]
1430#[rustc_legacy_const_generics(2)]
1432#[stable(feature = "simd_x86", since = "1.27.0")]
1433pub fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1434 static_assert_uimm_bits!(INDEX, 3);
1435 unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) }
1436}
1437
1438#[inline]
1445#[target_feature(enable = "avx")]
1446#[cfg_attr(test, assert_instr(vmovap))]
1447#[stable(feature = "simd_x86", since = "1.27.0")]
1448#[allow(clippy::cast_ptr_alignment)]
1449pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1450 *(mem_addr as *const __m256d)
1451}
1452
1453#[inline]
1460#[target_feature(enable = "avx")]
1461#[cfg_attr(test, assert_instr(vmovap))]
1462#[stable(feature = "simd_x86", since = "1.27.0")]
1463#[allow(clippy::cast_ptr_alignment)]
1464pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1465 *(mem_addr as *mut __m256d) = a;
1466}
1467
1468#[inline]
1475#[target_feature(enable = "avx")]
1476#[cfg_attr(test, assert_instr(vmovaps))]
1477#[stable(feature = "simd_x86", since = "1.27.0")]
1478#[allow(clippy::cast_ptr_alignment)]
1479pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1480 *(mem_addr as *const __m256)
1481}
1482
1483#[inline]
1490#[target_feature(enable = "avx")]
1491#[cfg_attr(test, assert_instr(vmovaps))]
1492#[stable(feature = "simd_x86", since = "1.27.0")]
1493#[allow(clippy::cast_ptr_alignment)]
1494pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1495 *(mem_addr as *mut __m256) = a;
1496}
1497
1498#[inline]
1504#[target_feature(enable = "avx")]
1505#[cfg_attr(test, assert_instr(vmovup))]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1508 let mut dst = _mm256_undefined_pd();
1509 ptr::copy_nonoverlapping(
1510 mem_addr as *const u8,
1511 ptr::addr_of_mut!(dst) as *mut u8,
1512 mem::size_of::<__m256d>(),
1513 );
1514 dst
1515}
1516
1517#[inline]
1523#[target_feature(enable = "avx")]
1524#[cfg_attr(test, assert_instr(vmovup))]
1525#[stable(feature = "simd_x86", since = "1.27.0")]
1526pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1527 mem_addr.cast::<__m256d>().write_unaligned(a);
1528}
1529
1530#[inline]
1536#[target_feature(enable = "avx")]
1537#[cfg_attr(test, assert_instr(vmovups))]
1538#[stable(feature = "simd_x86", since = "1.27.0")]
1539pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1540 let mut dst = _mm256_undefined_ps();
1541 ptr::copy_nonoverlapping(
1542 mem_addr as *const u8,
1543 ptr::addr_of_mut!(dst) as *mut u8,
1544 mem::size_of::<__m256>(),
1545 );
1546 dst
1547}
1548
1549#[inline]
1555#[target_feature(enable = "avx")]
1556#[cfg_attr(test, assert_instr(vmovups))]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1559 mem_addr.cast::<__m256>().write_unaligned(a);
1560}
1561
1562#[inline]
1568#[target_feature(enable = "avx")]
1569#[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")]
1571pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1572 *mem_addr
1573}
1574
1575#[inline]
1581#[target_feature(enable = "avx")]
1582#[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")]
1584pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1585 *mem_addr = a;
1586}
1587
1588#[inline]
1593#[target_feature(enable = "avx")]
1594#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1596pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1597 let mut dst = _mm256_undefined_si256();
1598 ptr::copy_nonoverlapping(
1599 mem_addr as *const u8,
1600 ptr::addr_of_mut!(dst) as *mut u8,
1601 mem::size_of::<__m256i>(),
1602 );
1603 dst
1604}
1605
1606#[inline]
1611#[target_feature(enable = "avx")]
1612#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1614pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1615 mem_addr.write_unaligned(a);
1616}
1617
1618#[inline]
1624#[target_feature(enable = "avx")]
1625#[cfg_attr(test, assert_instr(vmaskmovpd))]
1626#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1628 maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
1629}
1630
1631#[inline]
1636#[target_feature(enable = "avx")]
1637#[cfg_attr(test, assert_instr(vmaskmovpd))]
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1640 maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
1641}
1642
1643#[inline]
1649#[target_feature(enable = "avx")]
1650#[cfg_attr(test, assert_instr(vmaskmovpd))]
1651#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1653 maskloadpd(mem_addr as *const i8, mask.as_i64x2())
1654}
1655
1656#[inline]
1661#[target_feature(enable = "avx")]
1662#[cfg_attr(test, assert_instr(vmaskmovpd))]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1665 maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
1666}
1667
1668#[inline]
1674#[target_feature(enable = "avx")]
1675#[cfg_attr(test, assert_instr(vmaskmovps))]
1676#[stable(feature = "simd_x86", since = "1.27.0")]
1677pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1678 maskloadps256(mem_addr as *const i8, mask.as_i32x8())
1679}
1680
1681#[inline]
1686#[target_feature(enable = "avx")]
1687#[cfg_attr(test, assert_instr(vmaskmovps))]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1690 maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
1691}
1692
1693#[inline]
1699#[target_feature(enable = "avx")]
1700#[cfg_attr(test, assert_instr(vmaskmovps))]
1701#[stable(feature = "simd_x86", since = "1.27.0")]
1702pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1703 maskloadps(mem_addr as *const i8, mask.as_i32x4())
1704}
1705
1706#[inline]
1711#[target_feature(enable = "avx")]
1712#[cfg_attr(test, assert_instr(vmaskmovps))]
1713#[stable(feature = "simd_x86", since = "1.27.0")]
1714pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1715 maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
1716}
1717
1718#[inline]
1723#[target_feature(enable = "avx")]
1724#[cfg_attr(test, assert_instr(vmovshdup))]
1725#[stable(feature = "simd_x86", since = "1.27.0")]
1726pub fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1727 unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) }
1728}
1729
1730#[inline]
1735#[target_feature(enable = "avx")]
1736#[cfg_attr(test, assert_instr(vmovsldup))]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738pub fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1739 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) }
1740}
1741
1742#[inline]
1747#[target_feature(enable = "avx")]
1748#[cfg_attr(test, assert_instr(vmovddup))]
1749#[stable(feature = "simd_x86", since = "1.27.0")]
1750pub fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1751 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
1752}
1753
1754#[inline]
1760#[target_feature(enable = "avx")]
1761#[cfg_attr(test, assert_instr(vlddqu))]
1762#[stable(feature = "simd_x86", since = "1.27.0")]
1763pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1764 transmute(vlddqu(mem_addr as *const i8))
1765}
1766
1767#[inline]
1782#[target_feature(enable = "avx")]
1783#[cfg_attr(test, assert_instr(vmovntdq))]
1784#[stable(feature = "simd_x86", since = "1.27.0")]
1785pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1786 crate::arch::asm!(
1787 vps!("vmovntdq", ",{a}"),
1788 p = in(reg) mem_addr,
1789 a = in(ymm_reg) a,
1790 options(nostack, preserves_flags),
1791 );
1792}
1793
1794#[inline]
1809#[target_feature(enable = "avx")]
1810#[cfg_attr(test, assert_instr(vmovntpd))]
1811#[stable(feature = "simd_x86", since = "1.27.0")]
1812#[allow(clippy::cast_ptr_alignment)]
1813pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1814 crate::arch::asm!(
1815 vps!("vmovntpd", ",{a}"),
1816 p = in(reg) mem_addr,
1817 a = in(ymm_reg) a,
1818 options(nostack, preserves_flags),
1819 );
1820}
1821
1822#[inline]
1838#[target_feature(enable = "avx")]
1839#[cfg_attr(test, assert_instr(vmovntps))]
1840#[stable(feature = "simd_x86", since = "1.27.0")]
1841#[allow(clippy::cast_ptr_alignment)]
1842pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1843 crate::arch::asm!(
1844 vps!("vmovntps", ",{a}"),
1845 p = in(reg) mem_addr,
1846 a = in(ymm_reg) a,
1847 options(nostack, preserves_flags),
1848 );
1849}
1850
1851#[inline]
1857#[target_feature(enable = "avx")]
1858#[cfg_attr(test, assert_instr(vrcpps))]
1859#[stable(feature = "simd_x86", since = "1.27.0")]
1860pub fn _mm256_rcp_ps(a: __m256) -> __m256 {
1861 unsafe { vrcpps(a) }
1862}
1863
1864#[inline]
1870#[target_feature(enable = "avx")]
1871#[cfg_attr(test, assert_instr(vrsqrtps))]
1872#[stable(feature = "simd_x86", since = "1.27.0")]
1873pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1874 unsafe { vrsqrtps(a) }
1875}
1876
1877#[inline]
1882#[target_feature(enable = "avx")]
1883#[cfg_attr(test, assert_instr(vunpckhpd))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1886 unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
1887}
1888
1889#[inline]
1894#[target_feature(enable = "avx")]
1895#[cfg_attr(test, assert_instr(vunpckhps))]
1896#[stable(feature = "simd_x86", since = "1.27.0")]
1897pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1898 unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) }
1899}
1900
1901#[inline]
1906#[target_feature(enable = "avx")]
1907#[cfg_attr(test, assert_instr(vunpcklpd))]
1908#[stable(feature = "simd_x86", since = "1.27.0")]
1909pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1910 unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
1911}
1912
1913#[inline]
1918#[target_feature(enable = "avx")]
1919#[cfg_attr(test, assert_instr(vunpcklps))]
1920#[stable(feature = "simd_x86", since = "1.27.0")]
1921pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1922 unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) }
1923}
1924
1925#[inline]
1932#[target_feature(enable = "avx")]
1933#[cfg_attr(test, assert_instr(vptest))]
1934#[stable(feature = "simd_x86", since = "1.27.0")]
1935pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1936 unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) }
1937}
1938
1939#[inline]
1946#[target_feature(enable = "avx")]
1947#[cfg_attr(test, assert_instr(vptest))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1950 unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) }
1951}
1952
1953#[inline]
1961#[target_feature(enable = "avx")]
1962#[cfg_attr(test, assert_instr(vptest))]
1963#[stable(feature = "simd_x86", since = "1.27.0")]
1964pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1965 unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) }
1966}
1967
1968#[inline]
1978#[target_feature(enable = "avx")]
1979#[cfg_attr(test, assert_instr(vtestpd))]
1980#[stable(feature = "simd_x86", since = "1.27.0")]
1981pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1982 unsafe { vtestzpd256(a, b) }
1983}
1984
1985#[inline]
1995#[target_feature(enable = "avx")]
1996#[cfg_attr(test, assert_instr(vtestpd))]
1997#[stable(feature = "simd_x86", since = "1.27.0")]
1998pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1999 unsafe { vtestcpd256(a, b) }
2000}
2001
2002#[inline]
2013#[target_feature(enable = "avx")]
2014#[cfg_attr(test, assert_instr(vtestpd))]
2015#[stable(feature = "simd_x86", since = "1.27.0")]
2016pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
2017 unsafe { vtestnzcpd256(a, b) }
2018}
2019
2020#[inline]
2030#[target_feature(enable = "avx")]
2031#[cfg_attr(test, assert_instr(vtestpd))]
2032#[stable(feature = "simd_x86", since = "1.27.0")]
2033pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
2034 unsafe { vtestzpd(a, b) }
2035}
2036
2037#[inline]
2047#[target_feature(enable = "avx")]
2048#[cfg_attr(test, assert_instr(vtestpd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2051 unsafe { vtestcpd(a, b) }
2052}
2053
2054#[inline]
2065#[target_feature(enable = "avx")]
2066#[cfg_attr(test, assert_instr(vtestpd))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
2069 unsafe { vtestnzcpd(a, b) }
2070}
2071
2072#[inline]
2082#[target_feature(enable = "avx")]
2083#[cfg_attr(test, assert_instr(vtestps))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
2086 unsafe { vtestzps256(a, b) }
2087}
2088
2089#[inline]
2099#[target_feature(enable = "avx")]
2100#[cfg_attr(test, assert_instr(vtestps))]
2101#[stable(feature = "simd_x86", since = "1.27.0")]
2102pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2103 unsafe { vtestcps256(a, b) }
2104}
2105
2106#[inline]
2117#[target_feature(enable = "avx")]
2118#[cfg_attr(test, assert_instr(vtestps))]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2121 unsafe { vtestnzcps256(a, b) }
2122}
2123
2124#[inline]
2134#[target_feature(enable = "avx")]
2135#[cfg_attr(test, assert_instr(vtestps))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2138 unsafe { vtestzps(a, b) }
2139}
2140
2141#[inline]
2151#[target_feature(enable = "avx")]
2152#[cfg_attr(test, assert_instr(vtestps))]
2153#[stable(feature = "simd_x86", since = "1.27.0")]
2154pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2155 unsafe { vtestcps(a, b) }
2156}
2157
2158#[inline]
2169#[target_feature(enable = "avx")]
2170#[cfg_attr(test, assert_instr(vtestps))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2173 unsafe { vtestnzcps(a, b) }
2174}
2175
2176#[inline]
2182#[target_feature(enable = "avx")]
2183#[cfg_attr(test, assert_instr(vmovmskpd))]
2184#[stable(feature = "simd_x86", since = "1.27.0")]
2185pub fn _mm256_movemask_pd(a: __m256d) -> i32 {
2186 unsafe {
2189 let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
2190 simd_bitmask::<i64x4, u8>(mask).into()
2191 }
2192}
2193
2194#[inline]
2200#[target_feature(enable = "avx")]
2201#[cfg_attr(test, assert_instr(vmovmskps))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub fn _mm256_movemask_ps(a: __m256) -> i32 {
2204 unsafe {
2207 let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
2208 simd_bitmask::<i32x8, u8>(mask).into()
2209 }
2210}
2211
2212#[inline]
2216#[target_feature(enable = "avx")]
2217#[cfg_attr(test, assert_instr(vxorp))]
2218#[stable(feature = "simd_x86", since = "1.27.0")]
2219pub fn _mm256_setzero_pd() -> __m256d {
2220 const { unsafe { mem::zeroed() } }
2221}
2222
2223#[inline]
2227#[target_feature(enable = "avx")]
2228#[cfg_attr(test, assert_instr(vxorps))]
2229#[stable(feature = "simd_x86", since = "1.27.0")]
2230pub fn _mm256_setzero_ps() -> __m256 {
2231 const { unsafe { mem::zeroed() } }
2232}
2233
2234#[inline]
2238#[target_feature(enable = "avx")]
2239#[cfg_attr(test, assert_instr(vxor))]
2240#[stable(feature = "simd_x86", since = "1.27.0")]
2241pub fn _mm256_setzero_si256() -> __m256i {
2242 const { unsafe { mem::zeroed() } }
2243}
2244
2245#[inline]
2250#[target_feature(enable = "avx")]
2251#[cfg_attr(test, assert_instr(vinsertf128))]
2253#[stable(feature = "simd_x86", since = "1.27.0")]
2254pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2255 _mm256_setr_pd(d, c, b, a)
2256}
2257
2258#[inline]
2263#[target_feature(enable = "avx")]
2264#[stable(feature = "simd_x86", since = "1.27.0")]
2266pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
2267 _mm256_setr_ps(h, g, f, e, d, c, b, a)
2268}
2269
2270#[inline]
2274#[target_feature(enable = "avx")]
2275#[stable(feature = "simd_x86", since = "1.27.0")]
2277pub fn _mm256_set_epi8(
2278 e00: i8,
2279 e01: i8,
2280 e02: i8,
2281 e03: i8,
2282 e04: i8,
2283 e05: i8,
2284 e06: i8,
2285 e07: i8,
2286 e08: i8,
2287 e09: i8,
2288 e10: i8,
2289 e11: i8,
2290 e12: i8,
2291 e13: i8,
2292 e14: i8,
2293 e15: i8,
2294 e16: i8,
2295 e17: i8,
2296 e18: i8,
2297 e19: i8,
2298 e20: i8,
2299 e21: i8,
2300 e22: i8,
2301 e23: i8,
2302 e24: i8,
2303 e25: i8,
2304 e26: i8,
2305 e27: i8,
2306 e28: i8,
2307 e29: i8,
2308 e30: i8,
2309 e31: i8,
2310) -> __m256i {
2311 #[rustfmt::skip]
2312 _mm256_setr_epi8(
2313 e31, e30, e29, e28, e27, e26, e25, e24,
2314 e23, e22, e21, e20, e19, e18, e17, e16,
2315 e15, e14, e13, e12, e11, e10, e09, e08,
2316 e07, e06, e05, e04, e03, e02, e01, e00,
2317 )
2318}
2319
2320#[inline]
2324#[target_feature(enable = "avx")]
2325#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub fn _mm256_set_epi16(
2328 e00: i16,
2329 e01: i16,
2330 e02: i16,
2331 e03: i16,
2332 e04: i16,
2333 e05: i16,
2334 e06: i16,
2335 e07: i16,
2336 e08: i16,
2337 e09: i16,
2338 e10: i16,
2339 e11: i16,
2340 e12: i16,
2341 e13: i16,
2342 e14: i16,
2343 e15: i16,
2344) -> __m256i {
2345 #[rustfmt::skip]
2346 _mm256_setr_epi16(
2347 e15, e14, e13, e12,
2348 e11, e10, e09, e08,
2349 e07, e06, e05, e04,
2350 e03, e02, e01, e00,
2351 )
2352}
2353
2354#[inline]
2358#[target_feature(enable = "avx")]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2361pub fn _mm256_set_epi32(
2362 e0: i32,
2363 e1: i32,
2364 e2: i32,
2365 e3: i32,
2366 e4: i32,
2367 e5: i32,
2368 e6: i32,
2369 e7: i32,
2370) -> __m256i {
2371 _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
2372}
2373
2374#[inline]
2378#[target_feature(enable = "avx")]
2379#[stable(feature = "simd_x86", since = "1.27.0")]
2381pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2382 _mm256_setr_epi64x(d, c, b, a)
2383}
2384
2385#[inline]
2390#[target_feature(enable = "avx")]
2391#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2394 __m256d([a, b, c, d])
2395}
2396
2397#[inline]
2402#[target_feature(enable = "avx")]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2405pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
2406 __m256([a, b, c, d, e, f, g, h])
2407}
2408
2409#[inline]
2414#[target_feature(enable = "avx")]
2415#[stable(feature = "simd_x86", since = "1.27.0")]
2417pub fn _mm256_setr_epi8(
2418 e00: i8,
2419 e01: i8,
2420 e02: i8,
2421 e03: i8,
2422 e04: i8,
2423 e05: i8,
2424 e06: i8,
2425 e07: i8,
2426 e08: i8,
2427 e09: i8,
2428 e10: i8,
2429 e11: i8,
2430 e12: i8,
2431 e13: i8,
2432 e14: i8,
2433 e15: i8,
2434 e16: i8,
2435 e17: i8,
2436 e18: i8,
2437 e19: i8,
2438 e20: i8,
2439 e21: i8,
2440 e22: i8,
2441 e23: i8,
2442 e24: i8,
2443 e25: i8,
2444 e26: i8,
2445 e27: i8,
2446 e28: i8,
2447 e29: i8,
2448 e30: i8,
2449 e31: i8,
2450) -> __m256i {
2451 unsafe {
2452 #[rustfmt::skip]
2453 transmute(i8x32::new(
2454 e00, e01, e02, e03, e04, e05, e06, e07,
2455 e08, e09, e10, e11, e12, e13, e14, e15,
2456 e16, e17, e18, e19, e20, e21, e22, e23,
2457 e24, e25, e26, e27, e28, e29, e30, e31,
2458 ))
2459 }
2460}
2461
2462#[inline]
2467#[target_feature(enable = "avx")]
2468#[stable(feature = "simd_x86", since = "1.27.0")]
2470pub fn _mm256_setr_epi16(
2471 e00: i16,
2472 e01: i16,
2473 e02: i16,
2474 e03: i16,
2475 e04: i16,
2476 e05: i16,
2477 e06: i16,
2478 e07: i16,
2479 e08: i16,
2480 e09: i16,
2481 e10: i16,
2482 e11: i16,
2483 e12: i16,
2484 e13: i16,
2485 e14: i16,
2486 e15: i16,
2487) -> __m256i {
2488 unsafe {
2489 #[rustfmt::skip]
2490 transmute(i16x16::new(
2491 e00, e01, e02, e03,
2492 e04, e05, e06, e07,
2493 e08, e09, e10, e11,
2494 e12, e13, e14, e15,
2495 ))
2496 }
2497}
2498
2499#[inline]
2504#[target_feature(enable = "avx")]
2505#[stable(feature = "simd_x86", since = "1.27.0")]
2507pub fn _mm256_setr_epi32(
2508 e0: i32,
2509 e1: i32,
2510 e2: i32,
2511 e3: i32,
2512 e4: i32,
2513 e5: i32,
2514 e6: i32,
2515 e7: i32,
2516) -> __m256i {
2517 unsafe { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
2518}
2519
2520#[inline]
2525#[target_feature(enable = "avx")]
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2528pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2529 unsafe { transmute(i64x4::new(a, b, c, d)) }
2530}
2531
2532#[inline]
2537#[target_feature(enable = "avx")]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2540pub fn _mm256_set1_pd(a: f64) -> __m256d {
2541 _mm256_setr_pd(a, a, a, a)
2542}
2543
2544#[inline]
2549#[target_feature(enable = "avx")]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2552pub fn _mm256_set1_ps(a: f32) -> __m256 {
2553 _mm256_setr_ps(a, a, a, a, a, a, a, a)
2554}
2555
2556#[inline]
2561#[target_feature(enable = "avx")]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2564pub fn _mm256_set1_epi8(a: i8) -> __m256i {
2565 #[rustfmt::skip]
2566 _mm256_setr_epi8(
2567 a, a, a, a, a, a, a, a,
2568 a, a, a, a, a, a, a, a,
2569 a, a, a, a, a, a, a, a,
2570 a, a, a, a, a, a, a, a,
2571 )
2572}
2573
2574#[inline]
2579#[target_feature(enable = "avx")]
2580#[cfg_attr(test, assert_instr(vinsertf128))]
2582#[stable(feature = "simd_x86", since = "1.27.0")]
2584pub fn _mm256_set1_epi16(a: i16) -> __m256i {
2585 _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
2586}
2587
2588#[inline]
2593#[target_feature(enable = "avx")]
2594#[stable(feature = "simd_x86", since = "1.27.0")]
2596pub fn _mm256_set1_epi32(a: i32) -> __m256i {
2597 _mm256_setr_epi32(a, a, a, a, a, a, a, a)
2598}
2599
2600#[inline]
2605#[target_feature(enable = "avx")]
2606#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2607#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2608#[stable(feature = "simd_x86", since = "1.27.0")]
2610pub fn _mm256_set1_epi64x(a: i64) -> __m256i {
2611 _mm256_setr_epi64x(a, a, a, a)
2612}
2613
2614#[inline]
2618#[target_feature(enable = "avx")]
2619#[stable(feature = "simd_x86", since = "1.27.0")]
2622pub fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2623 unsafe { transmute(a) }
2624}
2625
2626#[inline]
2630#[target_feature(enable = "avx")]
2631#[stable(feature = "simd_x86", since = "1.27.0")]
2634pub fn _mm256_castps_pd(a: __m256) -> __m256d {
2635 unsafe { transmute(a) }
2636}
2637
2638#[inline]
2642#[target_feature(enable = "avx")]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2646pub fn _mm256_castps_si256(a: __m256) -> __m256i {
2647 unsafe { transmute(a) }
2648}
2649
2650#[inline]
2654#[target_feature(enable = "avx")]
2655#[stable(feature = "simd_x86", since = "1.27.0")]
2658pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2659 unsafe { transmute(a) }
2660}
2661
2662#[inline]
2666#[target_feature(enable = "avx")]
2667#[stable(feature = "simd_x86", since = "1.27.0")]
2670pub fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2671 unsafe { transmute(a) }
2672}
2673
2674#[inline]
2678#[target_feature(enable = "avx")]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2682pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2683 unsafe { transmute(a) }
2684}
2685
2686#[inline]
2690#[target_feature(enable = "avx")]
2691#[stable(feature = "simd_x86", since = "1.27.0")]
2694pub fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2695 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
2696}
2697
2698#[inline]
2702#[target_feature(enable = "avx")]
2703#[stable(feature = "simd_x86", since = "1.27.0")]
2706pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2707 unsafe { simd_shuffle!(a, a, [0, 1]) }
2708}
2709
2710#[inline]
2714#[target_feature(enable = "avx")]
2715#[stable(feature = "simd_x86", since = "1.27.0")]
2718pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2719 unsafe {
2720 let a = a.as_i64x4();
2721 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2722 transmute(dst)
2723 }
2724}
2725
2726#[inline]
2731#[target_feature(enable = "avx")]
2732#[stable(feature = "simd_x86", since = "1.27.0")]
2735pub fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2736 unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) }
2737}
2738
2739#[inline]
2744#[target_feature(enable = "avx")]
2745#[stable(feature = "simd_x86", since = "1.27.0")]
2748pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2749 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) }
2750}
2751
2752#[inline]
2757#[target_feature(enable = "avx")]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2761pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2762 unsafe {
2763 let a = a.as_i64x2();
2764 let undefined = i64x2::ZERO;
2765 let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
2766 transmute(dst)
2767 }
2768}
2769
2770#[inline]
2776#[target_feature(enable = "avx")]
2777#[stable(feature = "simd_x86", since = "1.27.0")]
2780pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2781 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) }
2782}
2783
2784#[inline]
2790#[target_feature(enable = "avx")]
2791#[stable(feature = "simd_x86", since = "1.27.0")]
2794pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2795 unsafe {
2796 let b = i64x2::ZERO;
2797 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2798 transmute(dst)
2799 }
2800}
2801
2802#[inline]
2809#[target_feature(enable = "avx")]
2810#[stable(feature = "simd_x86", since = "1.27.0")]
2813pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2814 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) }
2815}
2816
2817#[inline]
2823#[target_feature(enable = "avx")]
2824#[stable(feature = "simd_x86", since = "1.27.0")]
2826pub fn _mm256_undefined_ps() -> __m256 {
2827 const { unsafe { mem::zeroed() } }
2828}
2829
2830#[inline]
2836#[target_feature(enable = "avx")]
2837#[stable(feature = "simd_x86", since = "1.27.0")]
2839pub fn _mm256_undefined_pd() -> __m256d {
2840 const { unsafe { mem::zeroed() } }
2841}
2842
2843#[inline]
2849#[target_feature(enable = "avx")]
2850#[stable(feature = "simd_x86", since = "1.27.0")]
2852pub fn _mm256_undefined_si256() -> __m256i {
2853 const { unsafe { mem::zeroed() } }
2854}
2855
2856#[inline]
2860#[target_feature(enable = "avx")]
2861#[cfg_attr(test, assert_instr(vinsertf128))]
2862#[stable(feature = "simd_x86", since = "1.27.0")]
2863pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2864 unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) }
2865}
2866
2867#[inline]
2871#[target_feature(enable = "avx")]
2872#[cfg_attr(test, assert_instr(vinsertf128))]
2873#[stable(feature = "simd_x86", since = "1.27.0")]
2874pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2875 unsafe {
2876 let hi: __m128 = transmute(hi);
2877 let lo: __m128 = transmute(lo);
2878 transmute(_mm256_set_m128(hi, lo))
2879 }
2880}
2881
2882#[inline]
2886#[target_feature(enable = "avx")]
2887#[cfg_attr(test, assert_instr(vinsertf128))]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2889pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2890 unsafe {
2891 let hi: __m128 = transmute(hi);
2892 let lo: __m128 = transmute(lo);
2893 transmute(_mm256_set_m128(hi, lo))
2894 }
2895}
2896
2897#[inline]
2901#[target_feature(enable = "avx")]
2902#[cfg_attr(test, assert_instr(vinsertf128))]
2903#[stable(feature = "simd_x86", since = "1.27.0")]
2904pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2905 _mm256_set_m128(hi, lo)
2906}
2907
2908#[inline]
2912#[target_feature(enable = "avx")]
2913#[cfg_attr(test, assert_instr(vinsertf128))]
2914#[stable(feature = "simd_x86", since = "1.27.0")]
2915pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2916 _mm256_set_m128d(hi, lo)
2917}
2918
2919#[inline]
2923#[target_feature(enable = "avx")]
2924#[cfg_attr(test, assert_instr(vinsertf128))]
2925#[stable(feature = "simd_x86", since = "1.27.0")]
2926pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2927 _mm256_set_m128i(hi, lo)
2928}
2929
2930#[inline]
2937#[target_feature(enable = "avx")]
2938#[stable(feature = "simd_x86", since = "1.27.0")]
2940pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2941 let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2942 _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
2943}
2944
2945#[inline]
2952#[target_feature(enable = "avx")]
2953#[stable(feature = "simd_x86", since = "1.27.0")]
2955pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2956 let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
2957 _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
2958}
2959
2960#[inline]
2966#[target_feature(enable = "avx")]
2967#[stable(feature = "simd_x86", since = "1.27.0")]
2969pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2970 let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
2971 _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
2972}
2973
2974#[inline]
2981#[target_feature(enable = "avx")]
2982#[stable(feature = "simd_x86", since = "1.27.0")]
2984pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2985 let lo = _mm256_castps256_ps128(a);
2986 _mm_storeu_ps(loaddr, lo);
2987 let hi = _mm256_extractf128_ps::<1>(a);
2988 _mm_storeu_ps(hiaddr, hi);
2989}
2990
2991#[inline]
2998#[target_feature(enable = "avx")]
2999#[stable(feature = "simd_x86", since = "1.27.0")]
3001pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
3002 let lo = _mm256_castpd256_pd128(a);
3003 _mm_storeu_pd(loaddr, lo);
3004 let hi = _mm256_extractf128_pd::<1>(a);
3005 _mm_storeu_pd(hiaddr, hi);
3006}
3007
3008#[inline]
3014#[target_feature(enable = "avx")]
3015#[stable(feature = "simd_x86", since = "1.27.0")]
3017pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
3018 let lo = _mm256_castsi256_si128(a);
3019 _mm_storeu_si128(loaddr, lo);
3020 let hi = _mm256_extractf128_si256::<1>(a);
3021 _mm_storeu_si128(hiaddr, hi);
3022}
3023
3024#[inline]
3028#[target_feature(enable = "avx")]
3029#[stable(feature = "simd_x86", since = "1.27.0")]
3031pub fn _mm256_cvtss_f32(a: __m256) -> f32 {
3032 unsafe { simd_extract!(a, 0) }
3033}
3034
3035#[allow(improper_ctypes)]
3037unsafe extern "C" {
3038 #[link_name = "llvm.x86.avx.round.pd.256"]
3039 fn roundpd256(a: __m256d, b: i32) -> __m256d;
3040 #[link_name = "llvm.x86.avx.round.ps.256"]
3041 fn roundps256(a: __m256, b: i32) -> __m256;
3042 #[link_name = "llvm.x86.avx.dp.ps.256"]
3043 fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
3044 #[link_name = "llvm.x86.avx.hadd.pd.256"]
3045 fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
3046 #[link_name = "llvm.x86.avx.hadd.ps.256"]
3047 fn vhaddps(a: __m256, b: __m256) -> __m256;
3048 #[link_name = "llvm.x86.avx.hsub.pd.256"]
3049 fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
3050 #[link_name = "llvm.x86.avx.hsub.ps.256"]
3051 fn vhsubps(a: __m256, b: __m256) -> __m256;
3052 #[link_name = "llvm.x86.sse2.cmp.pd"]
3053 fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3054 #[link_name = "llvm.x86.avx.cmp.pd.256"]
3055 fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
3056 #[link_name = "llvm.x86.sse.cmp.ps"]
3057 fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
3058 #[link_name = "llvm.x86.avx.cmp.ps.256"]
3059 fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
3060 #[link_name = "llvm.x86.sse2.cmp.sd"]
3061 fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3062 #[link_name = "llvm.x86.sse.cmp.ss"]
3063 fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
3064 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
3065 fn vcvtps2dq(a: __m256) -> i32x8;
3066 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
3067 fn vcvttpd2dq(a: __m256d) -> i32x4;
3068 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
3069 fn vcvtpd2dq(a: __m256d) -> i32x4;
3070 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
3071 fn vcvttps2dq(a: __m256) -> i32x8;
3072 #[link_name = "llvm.x86.avx.vzeroall"]
3073 fn vzeroall();
3074 #[link_name = "llvm.x86.avx.vzeroupper"]
3075 fn vzeroupper();
3076 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
3077 fn vpermilps256(a: __m256, b: i32x8) -> __m256;
3078 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
3079 fn vpermilps(a: __m128, b: i32x4) -> __m128;
3080 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
3081 fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
3082 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
3083 fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
3084 #[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
3085 fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
3086 #[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
3087 fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
3088 #[link_name = "llvm.x86.avx.vperm2f128.si.256"]
3089 fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
3090 #[link_name = "llvm.x86.avx.maskload.pd.256"]
3091 fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3092 #[link_name = "llvm.x86.avx.maskstore.pd.256"]
3093 fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3094 #[link_name = "llvm.x86.avx.maskload.pd"]
3095 fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3096 #[link_name = "llvm.x86.avx.maskstore.pd"]
3097 fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3098 #[link_name = "llvm.x86.avx.maskload.ps.256"]
3099 fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3100 #[link_name = "llvm.x86.avx.maskstore.ps.256"]
3101 fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3102 #[link_name = "llvm.x86.avx.maskload.ps"]
3103 fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3104 #[link_name = "llvm.x86.avx.maskstore.ps"]
3105 fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3106 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3107 fn vlddqu(mem_addr: *const i8) -> i8x32;
3108 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3109 fn vrcpps(a: __m256) -> __m256;
3110 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3111 fn vrsqrtps(a: __m256) -> __m256;
3112 #[link_name = "llvm.x86.avx.ptestz.256"]
3113 fn ptestz256(a: i64x4, b: i64x4) -> i32;
3114 #[link_name = "llvm.x86.avx.ptestc.256"]
3115 fn ptestc256(a: i64x4, b: i64x4) -> i32;
3116 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3117 fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3118 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3119 fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3120 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3121 fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3122 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3123 fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3124 #[link_name = "llvm.x86.avx.vtestz.pd"]
3125 fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3126 #[link_name = "llvm.x86.avx.vtestc.pd"]
3127 fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3128 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3129 fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3130 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3131 fn vtestzps256(a: __m256, b: __m256) -> i32;
3132 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3133 fn vtestcps256(a: __m256, b: __m256) -> i32;
3134 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3135 fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3136 #[link_name = "llvm.x86.avx.vtestz.ps"]
3137 fn vtestzps(a: __m128, b: __m128) -> i32;
3138 #[link_name = "llvm.x86.avx.vtestc.ps"]
3139 fn vtestcps(a: __m128, b: __m128) -> i32;
3140 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3141 fn vtestnzcps(a: __m128, b: __m128) -> i32;
3142 #[link_name = "llvm.x86.avx.min.ps.256"]
3143 fn vminps(a: __m256, b: __m256) -> __m256;
3144 #[link_name = "llvm.x86.avx.max.ps.256"]
3145 fn vmaxps(a: __m256, b: __m256) -> __m256;
3146 #[link_name = "llvm.x86.avx.min.pd.256"]
3147 fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3148 #[link_name = "llvm.x86.avx.max.pd.256"]
3149 fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3150}
3151
3152#[cfg(test)]
3153mod tests {
3154 use crate::hint::black_box;
3155 use crate::ptr;
3156 use stdarch_test::simd_test;
3157
3158 use crate::core_arch::x86::*;
3159
3160 #[simd_test(enable = "avx")]
3161 unsafe fn test_mm256_add_pd() {
3162 let a = _mm256_setr_pd(1., 2., 3., 4.);
3163 let b = _mm256_setr_pd(5., 6., 7., 8.);
3164 let r = _mm256_add_pd(a, b);
3165 let e = _mm256_setr_pd(6., 8., 10., 12.);
3166 assert_eq_m256d(r, e);
3167 }
3168
3169 #[simd_test(enable = "avx")]
3170 unsafe fn test_mm256_add_ps() {
3171 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3172 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3173 let r = _mm256_add_ps(a, b);
3174 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3175 assert_eq_m256(r, e);
3176 }
3177
3178 #[simd_test(enable = "avx")]
3179 unsafe fn test_mm256_and_pd() {
3180 let a = _mm256_set1_pd(1.);
3181 let b = _mm256_set1_pd(0.6);
3182 let r = _mm256_and_pd(a, b);
3183 let e = _mm256_set1_pd(0.5);
3184 assert_eq_m256d(r, e);
3185 }
3186
3187 #[simd_test(enable = "avx")]
3188 unsafe fn test_mm256_and_ps() {
3189 let a = _mm256_set1_ps(1.);
3190 let b = _mm256_set1_ps(0.6);
3191 let r = _mm256_and_ps(a, b);
3192 let e = _mm256_set1_ps(0.5);
3193 assert_eq_m256(r, e);
3194 }
3195
3196 #[simd_test(enable = "avx")]
3197 unsafe fn test_mm256_or_pd() {
3198 let a = _mm256_set1_pd(1.);
3199 let b = _mm256_set1_pd(0.6);
3200 let r = _mm256_or_pd(a, b);
3201 let e = _mm256_set1_pd(1.2);
3202 assert_eq_m256d(r, e);
3203 }
3204
3205 #[simd_test(enable = "avx")]
3206 unsafe fn test_mm256_or_ps() {
3207 let a = _mm256_set1_ps(1.);
3208 let b = _mm256_set1_ps(0.6);
3209 let r = _mm256_or_ps(a, b);
3210 let e = _mm256_set1_ps(1.2);
3211 assert_eq_m256(r, e);
3212 }
3213
3214 #[simd_test(enable = "avx")]
3215 unsafe fn test_mm256_shuffle_pd() {
3216 let a = _mm256_setr_pd(1., 4., 5., 8.);
3217 let b = _mm256_setr_pd(2., 3., 6., 7.);
3218 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3219 let e = _mm256_setr_pd(4., 3., 8., 7.);
3220 assert_eq_m256d(r, e);
3221 }
3222
3223 #[simd_test(enable = "avx")]
3224 unsafe fn test_mm256_shuffle_ps() {
3225 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3226 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3227 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3228 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3229 assert_eq_m256(r, e);
3230 }
3231
3232 #[simd_test(enable = "avx")]
3233 unsafe fn test_mm256_andnot_pd() {
3234 let a = _mm256_set1_pd(0.);
3235 let b = _mm256_set1_pd(0.6);
3236 let r = _mm256_andnot_pd(a, b);
3237 assert_eq_m256d(r, b);
3238 }
3239
3240 #[simd_test(enable = "avx")]
3241 unsafe fn test_mm256_andnot_ps() {
3242 let a = _mm256_set1_ps(0.);
3243 let b = _mm256_set1_ps(0.6);
3244 let r = _mm256_andnot_ps(a, b);
3245 assert_eq_m256(r, b);
3246 }
3247
3248 #[simd_test(enable = "avx")]
3249 unsafe fn test_mm256_max_pd() {
3250 let a = _mm256_setr_pd(1., 4., 5., 8.);
3251 let b = _mm256_setr_pd(2., 3., 6., 7.);
3252 let r = _mm256_max_pd(a, b);
3253 let e = _mm256_setr_pd(2., 4., 6., 8.);
3254 assert_eq_m256d(r, e);
3255 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3258 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3259 let wu: [u64; 4] = transmute(w);
3260 let xu: [u64; 4] = transmute(x);
3261 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3262 assert_eq!(xu, [0u64; 4]);
3263 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3267 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3268 let yf: [f64; 4] = transmute(y);
3269 let zf: [f64; 4] = transmute(z);
3270 assert_eq!(yf, [0.0; 4]);
3271 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3272 }
3273
3274 #[simd_test(enable = "avx")]
3275 unsafe fn test_mm256_max_ps() {
3276 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3277 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3278 let r = _mm256_max_ps(a, b);
3279 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3280 assert_eq_m256(r, e);
3281 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3284 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3285 let wu: [u32; 8] = transmute(w);
3286 let xu: [u32; 8] = transmute(x);
3287 assert_eq!(wu, [0x8000_0000u32; 8]);
3288 assert_eq!(xu, [0u32; 8]);
3289 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3293 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3294 let yf: [f32; 8] = transmute(y);
3295 let zf: [f32; 8] = transmute(z);
3296 assert_eq!(yf, [0.0; 8]);
3297 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3298 }
3299
3300 #[simd_test(enable = "avx")]
3301 unsafe fn test_mm256_min_pd() {
3302 let a = _mm256_setr_pd(1., 4., 5., 8.);
3303 let b = _mm256_setr_pd(2., 3., 6., 7.);
3304 let r = _mm256_min_pd(a, b);
3305 let e = _mm256_setr_pd(1., 3., 5., 7.);
3306 assert_eq_m256d(r, e);
3307 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3310 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3311 let wu: [u64; 4] = transmute(w);
3312 let xu: [u64; 4] = transmute(x);
3313 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3314 assert_eq!(xu, [0u64; 4]);
3315 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3319 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3320 let yf: [f64; 4] = transmute(y);
3321 let zf: [f64; 4] = transmute(z);
3322 assert_eq!(yf, [0.0; 4]);
3323 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3324 }
3325
3326 #[simd_test(enable = "avx")]
3327 unsafe fn test_mm256_min_ps() {
3328 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3329 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3330 let r = _mm256_min_ps(a, b);
3331 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3332 assert_eq_m256(r, e);
3333 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3336 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3337 let wu: [u32; 8] = transmute(w);
3338 let xu: [u32; 8] = transmute(x);
3339 assert_eq!(wu, [0x8000_0000u32; 8]);
3340 assert_eq!(xu, [0u32; 8]);
3341 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3345 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3346 let yf: [f32; 8] = transmute(y);
3347 let zf: [f32; 8] = transmute(z);
3348 assert_eq!(yf, [0.0; 8]);
3349 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3350 }
3351
3352 #[simd_test(enable = "avx")]
3353 unsafe fn test_mm256_mul_pd() {
3354 let a = _mm256_setr_pd(1., 2., 3., 4.);
3355 let b = _mm256_setr_pd(5., 6., 7., 8.);
3356 let r = _mm256_mul_pd(a, b);
3357 let e = _mm256_setr_pd(5., 12., 21., 32.);
3358 assert_eq_m256d(r, e);
3359 }
3360
3361 #[simd_test(enable = "avx")]
3362 unsafe fn test_mm256_mul_ps() {
3363 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3364 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3365 let r = _mm256_mul_ps(a, b);
3366 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3367 assert_eq_m256(r, e);
3368 }
3369
3370 #[simd_test(enable = "avx")]
3371 unsafe fn test_mm256_addsub_pd() {
3372 let a = _mm256_setr_pd(1., 2., 3., 4.);
3373 let b = _mm256_setr_pd(5., 6., 7., 8.);
3374 let r = _mm256_addsub_pd(a, b);
3375 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3376 assert_eq_m256d(r, e);
3377 }
3378
3379 #[simd_test(enable = "avx")]
3380 unsafe fn test_mm256_addsub_ps() {
3381 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3382 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3383 let r = _mm256_addsub_ps(a, b);
3384 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3385 assert_eq_m256(r, e);
3386 }
3387
3388 #[simd_test(enable = "avx")]
3389 unsafe fn test_mm256_sub_pd() {
3390 let a = _mm256_setr_pd(1., 2., 3., 4.);
3391 let b = _mm256_setr_pd(5., 6., 7., 8.);
3392 let r = _mm256_sub_pd(a, b);
3393 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3394 assert_eq_m256d(r, e);
3395 }
3396
3397 #[simd_test(enable = "avx")]
3398 unsafe fn test_mm256_sub_ps() {
3399 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3400 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3401 let r = _mm256_sub_ps(a, b);
3402 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3403 assert_eq_m256(r, e);
3404 }
3405
3406 #[simd_test(enable = "avx")]
3407 unsafe fn test_mm256_round_pd() {
3408 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3409 let result_closest = _mm256_round_pd::<0b0000>(a);
3410 let result_down = _mm256_round_pd::<0b0001>(a);
3411 let result_up = _mm256_round_pd::<0b0010>(a);
3412 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3413 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3414 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3415 assert_eq_m256d(result_closest, expected_closest);
3416 assert_eq_m256d(result_down, expected_down);
3417 assert_eq_m256d(result_up, expected_up);
3418 }
3419
3420 #[simd_test(enable = "avx")]
3421 unsafe fn test_mm256_floor_pd() {
3422 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3423 let result_down = _mm256_floor_pd(a);
3424 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3425 assert_eq_m256d(result_down, expected_down);
3426 }
3427
3428 #[simd_test(enable = "avx")]
3429 unsafe fn test_mm256_ceil_pd() {
3430 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3431 let result_up = _mm256_ceil_pd(a);
3432 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3433 assert_eq_m256d(result_up, expected_up);
3434 }
3435
3436 #[simd_test(enable = "avx")]
3437 unsafe fn test_mm256_round_ps() {
3438 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3439 let result_closest = _mm256_round_ps::<0b0000>(a);
3440 let result_down = _mm256_round_ps::<0b0001>(a);
3441 let result_up = _mm256_round_ps::<0b0010>(a);
3442 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3443 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3444 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3445 assert_eq_m256(result_closest, expected_closest);
3446 assert_eq_m256(result_down, expected_down);
3447 assert_eq_m256(result_up, expected_up);
3448 }
3449
3450 #[simd_test(enable = "avx")]
3451 unsafe fn test_mm256_floor_ps() {
3452 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3453 let result_down = _mm256_floor_ps(a);
3454 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3455 assert_eq_m256(result_down, expected_down);
3456 }
3457
3458 #[simd_test(enable = "avx")]
3459 unsafe fn test_mm256_ceil_ps() {
3460 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3461 let result_up = _mm256_ceil_ps(a);
3462 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3463 assert_eq_m256(result_up, expected_up);
3464 }
3465
3466 #[simd_test(enable = "avx")]
3467 unsafe fn test_mm256_sqrt_pd() {
3468 let a = _mm256_setr_pd(4., 9., 16., 25.);
3469 let r = _mm256_sqrt_pd(a);
3470 let e = _mm256_setr_pd(2., 3., 4., 5.);
3471 assert_eq_m256d(r, e);
3472 }
3473
3474 #[simd_test(enable = "avx")]
3475 unsafe fn test_mm256_sqrt_ps() {
3476 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3477 let r = _mm256_sqrt_ps(a);
3478 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3479 assert_eq_m256(r, e);
3480 }
3481
3482 #[simd_test(enable = "avx")]
3483 unsafe fn test_mm256_div_ps() {
3484 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3485 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3486 let r = _mm256_div_ps(a, b);
3487 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3488 assert_eq_m256(r, e);
3489 }
3490
3491 #[simd_test(enable = "avx")]
3492 unsafe fn test_mm256_div_pd() {
3493 let a = _mm256_setr_pd(4., 9., 16., 25.);
3494 let b = _mm256_setr_pd(4., 3., 2., 5.);
3495 let r = _mm256_div_pd(a, b);
3496 let e = _mm256_setr_pd(1., 3., 8., 5.);
3497 assert_eq_m256d(r, e);
3498 }
3499
3500 #[simd_test(enable = "avx")]
3501 unsafe fn test_mm256_blend_pd() {
3502 let a = _mm256_setr_pd(4., 9., 16., 25.);
3503 let b = _mm256_setr_pd(4., 3., 2., 5.);
3504 let r = _mm256_blend_pd::<0x0>(a, b);
3505 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3506 let r = _mm256_blend_pd::<0x3>(a, b);
3507 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3508 let r = _mm256_blend_pd::<0xF>(a, b);
3509 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3510 }
3511
3512 #[simd_test(enable = "avx")]
3513 unsafe fn test_mm256_blend_ps() {
3514 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3515 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3516 let r = _mm256_blend_ps::<0x0>(a, b);
3517 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3518 let r = _mm256_blend_ps::<0x3>(a, b);
3519 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3520 let r = _mm256_blend_ps::<0xF>(a, b);
3521 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3522 }
3523
3524 #[simd_test(enable = "avx")]
3525 unsafe fn test_mm256_blendv_pd() {
3526 let a = _mm256_setr_pd(4., 9., 16., 25.);
3527 let b = _mm256_setr_pd(4., 3., 2., 5.);
3528 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3529 let r = _mm256_blendv_pd(a, b, c);
3530 let e = _mm256_setr_pd(4., 9., 2., 5.);
3531 assert_eq_m256d(r, e);
3532 }
3533
3534 #[simd_test(enable = "avx")]
3535 unsafe fn test_mm256_blendv_ps() {
3536 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3537 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3538 #[rustfmt::skip]
3539 let c = _mm256_setr_ps(
3540 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3541 );
3542 let r = _mm256_blendv_ps(a, b, c);
3543 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3544 assert_eq_m256(r, e);
3545 }
3546
3547 #[simd_test(enable = "avx")]
3548 unsafe fn test_mm256_dp_ps() {
3549 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3550 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3551 let r = _mm256_dp_ps::<0xFF>(a, b);
3552 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3553 assert_eq_m256(r, e);
3554 }
3555
3556 #[simd_test(enable = "avx")]
3557 unsafe fn test_mm256_hadd_pd() {
3558 let a = _mm256_setr_pd(4., 9., 16., 25.);
3559 let b = _mm256_setr_pd(4., 3., 2., 5.);
3560 let r = _mm256_hadd_pd(a, b);
3561 let e = _mm256_setr_pd(13., 7., 41., 7.);
3562 assert_eq_m256d(r, e);
3563
3564 let a = _mm256_setr_pd(1., 2., 3., 4.);
3565 let b = _mm256_setr_pd(5., 6., 7., 8.);
3566 let r = _mm256_hadd_pd(a, b);
3567 let e = _mm256_setr_pd(3., 11., 7., 15.);
3568 assert_eq_m256d(r, e);
3569 }
3570
3571 #[simd_test(enable = "avx")]
3572 unsafe fn test_mm256_hadd_ps() {
3573 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3574 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3575 let r = _mm256_hadd_ps(a, b);
3576 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3577 assert_eq_m256(r, e);
3578
3579 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3580 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3581 let r = _mm256_hadd_ps(a, b);
3582 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3583 assert_eq_m256(r, e);
3584 }
3585
3586 #[simd_test(enable = "avx")]
3587 unsafe fn test_mm256_hsub_pd() {
3588 let a = _mm256_setr_pd(4., 9., 16., 25.);
3589 let b = _mm256_setr_pd(4., 3., 2., 5.);
3590 let r = _mm256_hsub_pd(a, b);
3591 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3592 assert_eq_m256d(r, e);
3593
3594 let a = _mm256_setr_pd(1., 2., 3., 4.);
3595 let b = _mm256_setr_pd(5., 6., 7., 8.);
3596 let r = _mm256_hsub_pd(a, b);
3597 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3598 assert_eq_m256d(r, e);
3599 }
3600
3601 #[simd_test(enable = "avx")]
3602 unsafe fn test_mm256_hsub_ps() {
3603 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3604 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3605 let r = _mm256_hsub_ps(a, b);
3606 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3607 assert_eq_m256(r, e);
3608
3609 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3610 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3611 let r = _mm256_hsub_ps(a, b);
3612 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3613 assert_eq_m256(r, e);
3614 }
3615
3616 #[simd_test(enable = "avx")]
3617 unsafe fn test_mm256_xor_pd() {
3618 let a = _mm256_setr_pd(4., 9., 16., 25.);
3619 let b = _mm256_set1_pd(0.);
3620 let r = _mm256_xor_pd(a, b);
3621 assert_eq_m256d(r, a);
3622 }
3623
3624 #[simd_test(enable = "avx")]
3625 unsafe fn test_mm256_xor_ps() {
3626 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3627 let b = _mm256_set1_ps(0.);
3628 let r = _mm256_xor_ps(a, b);
3629 assert_eq_m256(r, a);
3630 }
3631
3632 #[simd_test(enable = "avx")]
3633 unsafe fn test_mm_cmp_pd() {
3634 let a = _mm_setr_pd(4., 9.);
3635 let b = _mm_setr_pd(4., 3.);
3636 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3637 assert!(get_m128d(r, 0).is_nan());
3638 assert!(get_m128d(r, 1).is_nan());
3639 }
3640
3641 #[simd_test(enable = "avx")]
3642 unsafe fn test_mm256_cmp_pd() {
3643 let a = _mm256_setr_pd(1., 2., 3., 4.);
3644 let b = _mm256_setr_pd(5., 6., 7., 8.);
3645 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3646 let e = _mm256_set1_pd(0.);
3647 assert_eq_m256d(r, e);
3648 }
3649
3650 #[simd_test(enable = "avx")]
3651 unsafe fn test_mm_cmp_ps() {
3652 let a = _mm_setr_ps(4., 3., 2., 5.);
3653 let b = _mm_setr_ps(4., 9., 16., 25.);
3654 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3655 assert!(get_m128(r, 0).is_nan());
3656 assert_eq!(get_m128(r, 1), 0.);
3657 assert_eq!(get_m128(r, 2), 0.);
3658 assert_eq!(get_m128(r, 3), 0.);
3659 }
3660
3661 #[simd_test(enable = "avx")]
3662 unsafe fn test_mm256_cmp_ps() {
3663 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3664 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3665 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3666 let e = _mm256_set1_ps(0.);
3667 assert_eq_m256(r, e);
3668 }
3669
3670 #[simd_test(enable = "avx")]
3671 unsafe fn test_mm_cmp_sd() {
3672 let a = _mm_setr_pd(4., 9.);
3673 let b = _mm_setr_pd(4., 3.);
3674 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3675 assert!(get_m128d(r, 0).is_nan());
3676 assert_eq!(get_m128d(r, 1), 9.);
3677 }
3678
3679 #[simd_test(enable = "avx")]
3680 unsafe fn test_mm_cmp_ss() {
3681 let a = _mm_setr_ps(4., 3., 2., 5.);
3682 let b = _mm_setr_ps(4., 9., 16., 25.);
3683 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3684 assert!(get_m128(r, 0).is_nan());
3685 assert_eq!(get_m128(r, 1), 3.);
3686 assert_eq!(get_m128(r, 2), 2.);
3687 assert_eq!(get_m128(r, 3), 5.);
3688 }
3689
3690 #[simd_test(enable = "avx")]
3691 unsafe fn test_mm256_cvtepi32_pd() {
3692 let a = _mm_setr_epi32(4, 9, 16, 25);
3693 let r = _mm256_cvtepi32_pd(a);
3694 let e = _mm256_setr_pd(4., 9., 16., 25.);
3695 assert_eq_m256d(r, e);
3696 }
3697
3698 #[simd_test(enable = "avx")]
3699 unsafe fn test_mm256_cvtepi32_ps() {
3700 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3701 let r = _mm256_cvtepi32_ps(a);
3702 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3703 assert_eq_m256(r, e);
3704 }
3705
3706 #[simd_test(enable = "avx")]
3707 unsafe fn test_mm256_cvtpd_ps() {
3708 let a = _mm256_setr_pd(4., 9., 16., 25.);
3709 let r = _mm256_cvtpd_ps(a);
3710 let e = _mm_setr_ps(4., 9., 16., 25.);
3711 assert_eq_m128(r, e);
3712 }
3713
3714 #[simd_test(enable = "avx")]
3715 unsafe fn test_mm256_cvtps_epi32() {
3716 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3717 let r = _mm256_cvtps_epi32(a);
3718 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3719 assert_eq_m256i(r, e);
3720 }
3721
3722 #[simd_test(enable = "avx")]
3723 unsafe fn test_mm256_cvtps_pd() {
3724 let a = _mm_setr_ps(4., 9., 16., 25.);
3725 let r = _mm256_cvtps_pd(a);
3726 let e = _mm256_setr_pd(4., 9., 16., 25.);
3727 assert_eq_m256d(r, e);
3728 }
3729
3730 #[simd_test(enable = "avx")]
3731 unsafe fn test_mm256_cvtsd_f64() {
3732 let a = _mm256_setr_pd(1., 2., 3., 4.);
3733 let r = _mm256_cvtsd_f64(a);
3734 assert_eq!(r, 1.);
3735 }
3736
3737 #[simd_test(enable = "avx")]
3738 unsafe fn test_mm256_cvttpd_epi32() {
3739 let a = _mm256_setr_pd(4., 9., 16., 25.);
3740 let r = _mm256_cvttpd_epi32(a);
3741 let e = _mm_setr_epi32(4, 9, 16, 25);
3742 assert_eq_m128i(r, e);
3743 }
3744
3745 #[simd_test(enable = "avx")]
3746 unsafe fn test_mm256_cvtpd_epi32() {
3747 let a = _mm256_setr_pd(4., 9., 16., 25.);
3748 let r = _mm256_cvtpd_epi32(a);
3749 let e = _mm_setr_epi32(4, 9, 16, 25);
3750 assert_eq_m128i(r, e);
3751 }
3752
3753 #[simd_test(enable = "avx")]
3754 unsafe fn test_mm256_cvttps_epi32() {
3755 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3756 let r = _mm256_cvttps_epi32(a);
3757 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3758 assert_eq_m256i(r, e);
3759 }
3760
3761 #[simd_test(enable = "avx")]
3762 unsafe fn test_mm256_extractf128_ps() {
3763 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3764 let r = _mm256_extractf128_ps::<0>(a);
3765 let e = _mm_setr_ps(4., 3., 2., 5.);
3766 assert_eq_m128(r, e);
3767 }
3768
3769 #[simd_test(enable = "avx")]
3770 unsafe fn test_mm256_extractf128_pd() {
3771 let a = _mm256_setr_pd(4., 3., 2., 5.);
3772 let r = _mm256_extractf128_pd::<0>(a);
3773 let e = _mm_setr_pd(4., 3.);
3774 assert_eq_m128d(r, e);
3775 }
3776
3777 #[simd_test(enable = "avx")]
3778 unsafe fn test_mm256_extractf128_si256() {
3779 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3780 let r = _mm256_extractf128_si256::<0>(a);
3781 let e = _mm_setr_epi64x(4, 3);
3782 assert_eq_m128i(r, e);
3783 }
3784
3785 #[simd_test(enable = "avx")]
3786 unsafe fn test_mm256_extract_epi32() {
3787 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3788 let r1 = _mm256_extract_epi32::<0>(a);
3789 let r2 = _mm256_extract_epi32::<3>(a);
3790 assert_eq!(r1, -1);
3791 assert_eq!(r2, 3);
3792 }
3793
3794 #[simd_test(enable = "avx")]
3795 unsafe fn test_mm256_cvtsi256_si32() {
3796 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3797 let r = _mm256_cvtsi256_si32(a);
3798 assert_eq!(r, 1);
3799 }
3800
3801 #[simd_test(enable = "avx")]
3802 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroall() {
3804 _mm256_zeroall();
3805 }
3806
3807 #[simd_test(enable = "avx")]
3808 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroupper() {
3810 _mm256_zeroupper();
3811 }
3812
3813 #[simd_test(enable = "avx")]
3814 unsafe fn test_mm256_permutevar_ps() {
3815 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3816 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3817 let r = _mm256_permutevar_ps(a, b);
3818 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3819 assert_eq_m256(r, e);
3820 }
3821
3822 #[simd_test(enable = "avx")]
3823 unsafe fn test_mm_permutevar_ps() {
3824 let a = _mm_setr_ps(4., 3., 2., 5.);
3825 let b = _mm_setr_epi32(1, 2, 3, 4);
3826 let r = _mm_permutevar_ps(a, b);
3827 let e = _mm_setr_ps(3., 2., 5., 4.);
3828 assert_eq_m128(r, e);
3829 }
3830
3831 #[simd_test(enable = "avx")]
3832 unsafe fn test_mm256_permute_ps() {
3833 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3834 let r = _mm256_permute_ps::<0x1b>(a);
3835 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3836 assert_eq_m256(r, e);
3837 }
3838
3839 #[simd_test(enable = "avx")]
3840 unsafe fn test_mm_permute_ps() {
3841 let a = _mm_setr_ps(4., 3., 2., 5.);
3842 let r = _mm_permute_ps::<0x1b>(a);
3843 let e = _mm_setr_ps(5., 2., 3., 4.);
3844 assert_eq_m128(r, e);
3845 }
3846
3847 #[simd_test(enable = "avx")]
3848 unsafe fn test_mm256_permutevar_pd() {
3849 let a = _mm256_setr_pd(4., 3., 2., 5.);
3850 let b = _mm256_setr_epi64x(1, 2, 3, 4);
3851 let r = _mm256_permutevar_pd(a, b);
3852 let e = _mm256_setr_pd(4., 3., 5., 2.);
3853 assert_eq_m256d(r, e);
3854 }
3855
3856 #[simd_test(enable = "avx")]
3857 unsafe fn test_mm_permutevar_pd() {
3858 let a = _mm_setr_pd(4., 3.);
3859 let b = _mm_setr_epi64x(3, 0);
3860 let r = _mm_permutevar_pd(a, b);
3861 let e = _mm_setr_pd(3., 4.);
3862 assert_eq_m128d(r, e);
3863 }
3864
3865 #[simd_test(enable = "avx")]
3866 unsafe fn test_mm256_permute_pd() {
3867 let a = _mm256_setr_pd(4., 3., 2., 5.);
3868 let r = _mm256_permute_pd::<5>(a);
3869 let e = _mm256_setr_pd(3., 4., 5., 2.);
3870 assert_eq_m256d(r, e);
3871 }
3872
3873 #[simd_test(enable = "avx")]
3874 unsafe fn test_mm_permute_pd() {
3875 let a = _mm_setr_pd(4., 3.);
3876 let r = _mm_permute_pd::<1>(a);
3877 let e = _mm_setr_pd(3., 4.);
3878 assert_eq_m128d(r, e);
3879 }
3880
3881 #[simd_test(enable = "avx")]
3882 unsafe fn test_mm256_permute2f128_ps() {
3883 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3884 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3885 let r = _mm256_permute2f128_ps::<0x13>(a, b);
3886 let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3887 assert_eq_m256(r, e);
3888 }
3889
3890 #[simd_test(enable = "avx")]
3891 unsafe fn test_mm256_permute2f128_pd() {
3892 let a = _mm256_setr_pd(1., 2., 3., 4.);
3893 let b = _mm256_setr_pd(5., 6., 7., 8.);
3894 let r = _mm256_permute2f128_pd::<0x31>(a, b);
3895 let e = _mm256_setr_pd(3., 4., 7., 8.);
3896 assert_eq_m256d(r, e);
3897 }
3898
3899 #[simd_test(enable = "avx")]
3900 unsafe fn test_mm256_permute2f128_si256() {
3901 let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3902 let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3903 let r = _mm256_permute2f128_si256::<0x20>(a, b);
3904 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3905 assert_eq_m256i(r, e);
3906 }
3907
3908 #[simd_test(enable = "avx")]
3909 unsafe fn test_mm256_broadcast_ss() {
3910 let r = _mm256_broadcast_ss(&3.);
3911 let e = _mm256_set1_ps(3.);
3912 assert_eq_m256(r, e);
3913 }
3914
3915 #[simd_test(enable = "avx")]
3916 unsafe fn test_mm_broadcast_ss() {
3917 let r = _mm_broadcast_ss(&3.);
3918 let e = _mm_set1_ps(3.);
3919 assert_eq_m128(r, e);
3920 }
3921
3922 #[simd_test(enable = "avx")]
3923 unsafe fn test_mm256_broadcast_sd() {
3924 let r = _mm256_broadcast_sd(&3.);
3925 let e = _mm256_set1_pd(3.);
3926 assert_eq_m256d(r, e);
3927 }
3928
3929 #[simd_test(enable = "avx")]
3930 unsafe fn test_mm256_broadcast_ps() {
3931 let a = _mm_setr_ps(4., 3., 2., 5.);
3932 let r = _mm256_broadcast_ps(&a);
3933 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3934 assert_eq_m256(r, e);
3935 }
3936
3937 #[simd_test(enable = "avx")]
3938 unsafe fn test_mm256_broadcast_pd() {
3939 let a = _mm_setr_pd(4., 3.);
3940 let r = _mm256_broadcast_pd(&a);
3941 let e = _mm256_setr_pd(4., 3., 4., 3.);
3942 assert_eq_m256d(r, e);
3943 }
3944
3945 #[simd_test(enable = "avx")]
3946 unsafe fn test_mm256_insertf128_ps() {
3947 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3948 let b = _mm_setr_ps(4., 9., 16., 25.);
3949 let r = _mm256_insertf128_ps::<0>(a, b);
3950 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3951 assert_eq_m256(r, e);
3952 }
3953
3954 #[simd_test(enable = "avx")]
3955 unsafe fn test_mm256_insertf128_pd() {
3956 let a = _mm256_setr_pd(1., 2., 3., 4.);
3957 let b = _mm_setr_pd(5., 6.);
3958 let r = _mm256_insertf128_pd::<0>(a, b);
3959 let e = _mm256_setr_pd(5., 6., 3., 4.);
3960 assert_eq_m256d(r, e);
3961 }
3962
3963 #[simd_test(enable = "avx")]
3964 unsafe fn test_mm256_insertf128_si256() {
3965 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3966 let b = _mm_setr_epi64x(5, 6);
3967 let r = _mm256_insertf128_si256::<0>(a, b);
3968 let e = _mm256_setr_epi64x(5, 6, 3, 4);
3969 assert_eq_m256i(r, e);
3970 }
3971
3972 #[simd_test(enable = "avx")]
3973 unsafe fn test_mm256_insert_epi8() {
3974 #[rustfmt::skip]
3975 let a = _mm256_setr_epi8(
3976 1, 2, 3, 4, 5, 6, 7, 8,
3977 9, 10, 11, 12, 13, 14, 15, 16,
3978 17, 18, 19, 20, 21, 22, 23, 24,
3979 25, 26, 27, 28, 29, 30, 31, 32,
3980 );
3981 let r = _mm256_insert_epi8::<31>(a, 0);
3982 #[rustfmt::skip]
3983 let e = _mm256_setr_epi8(
3984 1, 2, 3, 4, 5, 6, 7, 8,
3985 9, 10, 11, 12, 13, 14, 15, 16,
3986 17, 18, 19, 20, 21, 22, 23, 24,
3987 25, 26, 27, 28, 29, 30, 31, 0,
3988 );
3989 assert_eq_m256i(r, e);
3990 }
3991
3992 #[simd_test(enable = "avx")]
3993 unsafe fn test_mm256_insert_epi16() {
3994 #[rustfmt::skip]
3995 let a = _mm256_setr_epi16(
3996 0, 1, 2, 3, 4, 5, 6, 7,
3997 8, 9, 10, 11, 12, 13, 14, 15,
3998 );
3999 let r = _mm256_insert_epi16::<15>(a, 0);
4000 #[rustfmt::skip]
4001 let e = _mm256_setr_epi16(
4002 0, 1, 2, 3, 4, 5, 6, 7,
4003 8, 9, 10, 11, 12, 13, 14, 0,
4004 );
4005 assert_eq_m256i(r, e);
4006 }
4007
4008 #[simd_test(enable = "avx")]
4009 unsafe fn test_mm256_insert_epi32() {
4010 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4011 let r = _mm256_insert_epi32::<7>(a, 0);
4012 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
4013 assert_eq_m256i(r, e);
4014 }
4015
4016 #[simd_test(enable = "avx")]
4017 unsafe fn test_mm256_load_pd() {
4018 let a = _mm256_setr_pd(1., 2., 3., 4.);
4019 let p = ptr::addr_of!(a) as *const f64;
4020 let r = _mm256_load_pd(p);
4021 let e = _mm256_setr_pd(1., 2., 3., 4.);
4022 assert_eq_m256d(r, e);
4023 }
4024
4025 #[simd_test(enable = "avx")]
4026 unsafe fn test_mm256_store_pd() {
4027 let a = _mm256_setr_pd(1., 2., 3., 4.);
4028 let mut r = _mm256_undefined_pd();
4029 _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4030 assert_eq_m256d(r, a);
4031 }
4032
4033 #[simd_test(enable = "avx")]
4034 unsafe fn test_mm256_load_ps() {
4035 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4036 let p = ptr::addr_of!(a) as *const f32;
4037 let r = _mm256_load_ps(p);
4038 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4039 assert_eq_m256(r, e);
4040 }
4041
4042 #[simd_test(enable = "avx")]
4043 unsafe fn test_mm256_store_ps() {
4044 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4045 let mut r = _mm256_undefined_ps();
4046 _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4047 assert_eq_m256(r, a);
4048 }
4049
4050 #[simd_test(enable = "avx")]
4051 unsafe fn test_mm256_loadu_pd() {
4052 let a = &[1.0f64, 2., 3., 4.];
4053 let p = a.as_ptr();
4054 let r = _mm256_loadu_pd(black_box(p));
4055 let e = _mm256_setr_pd(1., 2., 3., 4.);
4056 assert_eq_m256d(r, e);
4057 }
4058
4059 #[simd_test(enable = "avx")]
4060 unsafe fn test_mm256_storeu_pd() {
4061 let a = _mm256_set1_pd(9.);
4062 let mut r = _mm256_undefined_pd();
4063 _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4064 assert_eq_m256d(r, a);
4065 }
4066
4067 #[simd_test(enable = "avx")]
4068 unsafe fn test_mm256_loadu_ps() {
4069 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
4070 let p = a.as_ptr();
4071 let r = _mm256_loadu_ps(black_box(p));
4072 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4073 assert_eq_m256(r, e);
4074 }
4075
4076 #[simd_test(enable = "avx")]
4077 unsafe fn test_mm256_storeu_ps() {
4078 let a = _mm256_set1_ps(9.);
4079 let mut r = _mm256_undefined_ps();
4080 _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4081 assert_eq_m256(r, a);
4082 }
4083
4084 #[simd_test(enable = "avx")]
4085 unsafe fn test_mm256_load_si256() {
4086 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4087 let p = ptr::addr_of!(a);
4088 let r = _mm256_load_si256(p);
4089 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4090 assert_eq_m256i(r, e);
4091 }
4092
4093 #[simd_test(enable = "avx")]
4094 unsafe fn test_mm256_store_si256() {
4095 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4096 let mut r = _mm256_undefined_si256();
4097 _mm256_store_si256(ptr::addr_of_mut!(r), a);
4098 assert_eq_m256i(r, a);
4099 }
4100
4101 #[simd_test(enable = "avx")]
4102 unsafe fn test_mm256_loadu_si256() {
4103 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4104 let p = ptr::addr_of!(a);
4105 let r = _mm256_loadu_si256(black_box(p));
4106 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4107 assert_eq_m256i(r, e);
4108 }
4109
4110 #[simd_test(enable = "avx")]
4111 unsafe fn test_mm256_storeu_si256() {
4112 let a = _mm256_set1_epi8(9);
4113 let mut r = _mm256_undefined_si256();
4114 _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4115 assert_eq_m256i(r, a);
4116 }
4117
4118 #[simd_test(enable = "avx")]
4119 unsafe fn test_mm256_maskload_pd() {
4120 let a = &[1.0f64, 2., 3., 4.];
4121 let p = a.as_ptr();
4122 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4123 let r = _mm256_maskload_pd(black_box(p), mask);
4124 let e = _mm256_setr_pd(0., 2., 0., 4.);
4125 assert_eq_m256d(r, e);
4126 }
4127
4128 #[simd_test(enable = "avx")]
4129 unsafe fn test_mm256_maskstore_pd() {
4130 let mut r = _mm256_set1_pd(0.);
4131 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4132 let a = _mm256_setr_pd(1., 2., 3., 4.);
4133 _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4134 let e = _mm256_setr_pd(0., 2., 0., 4.);
4135 assert_eq_m256d(r, e);
4136 }
4137
4138 #[simd_test(enable = "avx")]
4139 unsafe fn test_mm_maskload_pd() {
4140 let a = &[1.0f64, 2.];
4141 let p = a.as_ptr();
4142 let mask = _mm_setr_epi64x(0, !0);
4143 let r = _mm_maskload_pd(black_box(p), mask);
4144 let e = _mm_setr_pd(0., 2.);
4145 assert_eq_m128d(r, e);
4146 }
4147
4148 #[simd_test(enable = "avx")]
4149 unsafe fn test_mm_maskstore_pd() {
4150 let mut r = _mm_set1_pd(0.);
4151 let mask = _mm_setr_epi64x(0, !0);
4152 let a = _mm_setr_pd(1., 2.);
4153 _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4154 let e = _mm_setr_pd(0., 2.);
4155 assert_eq_m128d(r, e);
4156 }
4157
4158 #[simd_test(enable = "avx")]
4159 unsafe fn test_mm256_maskload_ps() {
4160 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4161 let p = a.as_ptr();
4162 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4163 let r = _mm256_maskload_ps(black_box(p), mask);
4164 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4165 assert_eq_m256(r, e);
4166 }
4167
4168 #[simd_test(enable = "avx")]
4169 unsafe fn test_mm256_maskstore_ps() {
4170 let mut r = _mm256_set1_ps(0.);
4171 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4172 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4173 _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4174 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4175 assert_eq_m256(r, e);
4176 }
4177
4178 #[simd_test(enable = "avx")]
4179 unsafe fn test_mm_maskload_ps() {
4180 let a = &[1.0f32, 2., 3., 4.];
4181 let p = a.as_ptr();
4182 let mask = _mm_setr_epi32(0, !0, 0, !0);
4183 let r = _mm_maskload_ps(black_box(p), mask);
4184 let e = _mm_setr_ps(0., 2., 0., 4.);
4185 assert_eq_m128(r, e);
4186 }
4187
4188 #[simd_test(enable = "avx")]
4189 unsafe fn test_mm_maskstore_ps() {
4190 let mut r = _mm_set1_ps(0.);
4191 let mask = _mm_setr_epi32(0, !0, 0, !0);
4192 let a = _mm_setr_ps(1., 2., 3., 4.);
4193 _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4194 let e = _mm_setr_ps(0., 2., 0., 4.);
4195 assert_eq_m128(r, e);
4196 }
4197
4198 #[simd_test(enable = "avx")]
4199 unsafe fn test_mm256_movehdup_ps() {
4200 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4201 let r = _mm256_movehdup_ps(a);
4202 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4203 assert_eq_m256(r, e);
4204 }
4205
4206 #[simd_test(enable = "avx")]
4207 unsafe fn test_mm256_moveldup_ps() {
4208 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4209 let r = _mm256_moveldup_ps(a);
4210 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4211 assert_eq_m256(r, e);
4212 }
4213
4214 #[simd_test(enable = "avx")]
4215 unsafe fn test_mm256_movedup_pd() {
4216 let a = _mm256_setr_pd(1., 2., 3., 4.);
4217 let r = _mm256_movedup_pd(a);
4218 let e = _mm256_setr_pd(1., 1., 3., 3.);
4219 assert_eq_m256d(r, e);
4220 }
4221
4222 #[simd_test(enable = "avx")]
4223 unsafe fn test_mm256_lddqu_si256() {
4224 #[rustfmt::skip]
4225 let a = _mm256_setr_epi8(
4226 1, 2, 3, 4, 5, 6, 7, 8,
4227 9, 10, 11, 12, 13, 14, 15, 16,
4228 17, 18, 19, 20, 21, 22, 23, 24,
4229 25, 26, 27, 28, 29, 30, 31, 32,
4230 );
4231 let p = ptr::addr_of!(a);
4232 let r = _mm256_lddqu_si256(black_box(p));
4233 #[rustfmt::skip]
4234 let e = _mm256_setr_epi8(
4235 1, 2, 3, 4, 5, 6, 7, 8,
4236 9, 10, 11, 12, 13, 14, 15, 16,
4237 17, 18, 19, 20, 21, 22, 23, 24,
4238 25, 26, 27, 28, 29, 30, 31, 32,
4239 );
4240 assert_eq_m256i(r, e);
4241 }
4242
4243 #[simd_test(enable = "avx")]
4244 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_si256() {
4246 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4247 let mut r = _mm256_undefined_si256();
4248 _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4249 assert_eq_m256i(r, a);
4250 }
4251
4252 #[simd_test(enable = "avx")]
4253 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_pd() {
4255 #[repr(align(32))]
4256 struct Memory {
4257 pub data: [f64; 4],
4258 }
4259 let a = _mm256_set1_pd(7.0);
4260 let mut mem = Memory { data: [-1.0; 4] };
4261
4262 _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4263 for i in 0..4 {
4264 assert_eq!(mem.data[i], get_m256d(a, i));
4265 }
4266 }
4267
4268 #[simd_test(enable = "avx")]
4269 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_ps() {
4271 #[repr(align(32))]
4272 struct Memory {
4273 pub data: [f32; 8],
4274 }
4275 let a = _mm256_set1_ps(7.0);
4276 let mut mem = Memory { data: [-1.0; 8] };
4277
4278 _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4279 for i in 0..8 {
4280 assert_eq!(mem.data[i], get_m256(a, i));
4281 }
4282 }
4283
4284 #[simd_test(enable = "avx")]
4285 unsafe fn test_mm256_rcp_ps() {
4286 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4287 let r = _mm256_rcp_ps(a);
4288 #[rustfmt::skip]
4289 let e = _mm256_setr_ps(
4290 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4291 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4292 );
4293 let rel_err = 0.00048828125;
4294 for i in 0..8 {
4295 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4296 }
4297 }
4298
4299 #[simd_test(enable = "avx")]
4300 unsafe fn test_mm256_rsqrt_ps() {
4301 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4302 let r = _mm256_rsqrt_ps(a);
4303 #[rustfmt::skip]
4304 let e = _mm256_setr_ps(
4305 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4306 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4307 );
4308 let rel_err = 0.00048828125;
4309 for i in 0..8 {
4310 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4311 }
4312 }
4313
4314 #[simd_test(enable = "avx")]
4315 unsafe fn test_mm256_unpackhi_pd() {
4316 let a = _mm256_setr_pd(1., 2., 3., 4.);
4317 let b = _mm256_setr_pd(5., 6., 7., 8.);
4318 let r = _mm256_unpackhi_pd(a, b);
4319 let e = _mm256_setr_pd(2., 6., 4., 8.);
4320 assert_eq_m256d(r, e);
4321 }
4322
4323 #[simd_test(enable = "avx")]
4324 unsafe fn test_mm256_unpackhi_ps() {
4325 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4326 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4327 let r = _mm256_unpackhi_ps(a, b);
4328 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4329 assert_eq_m256(r, e);
4330 }
4331
4332 #[simd_test(enable = "avx")]
4333 unsafe fn test_mm256_unpacklo_pd() {
4334 let a = _mm256_setr_pd(1., 2., 3., 4.);
4335 let b = _mm256_setr_pd(5., 6., 7., 8.);
4336 let r = _mm256_unpacklo_pd(a, b);
4337 let e = _mm256_setr_pd(1., 5., 3., 7.);
4338 assert_eq_m256d(r, e);
4339 }
4340
4341 #[simd_test(enable = "avx")]
4342 unsafe fn test_mm256_unpacklo_ps() {
4343 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4344 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4345 let r = _mm256_unpacklo_ps(a, b);
4346 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4347 assert_eq_m256(r, e);
4348 }
4349
4350 #[simd_test(enable = "avx")]
4351 unsafe fn test_mm256_testz_si256() {
4352 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4353 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4354 let r = _mm256_testz_si256(a, b);
4355 assert_eq!(r, 0);
4356 let b = _mm256_set1_epi64x(0);
4357 let r = _mm256_testz_si256(a, b);
4358 assert_eq!(r, 1);
4359 }
4360
4361 #[simd_test(enable = "avx")]
4362 unsafe fn test_mm256_testc_si256() {
4363 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4364 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4365 let r = _mm256_testc_si256(a, b);
4366 assert_eq!(r, 0);
4367 let b = _mm256_set1_epi64x(0);
4368 let r = _mm256_testc_si256(a, b);
4369 assert_eq!(r, 1);
4370 }
4371
4372 #[simd_test(enable = "avx")]
4373 unsafe fn test_mm256_testnzc_si256() {
4374 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4375 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4376 let r = _mm256_testnzc_si256(a, b);
4377 assert_eq!(r, 1);
4378 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4379 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4380 let r = _mm256_testnzc_si256(a, b);
4381 assert_eq!(r, 0);
4382 }
4383
4384 #[simd_test(enable = "avx")]
4385 unsafe fn test_mm256_testz_pd() {
4386 let a = _mm256_setr_pd(1., 2., 3., 4.);
4387 let b = _mm256_setr_pd(5., 6., 7., 8.);
4388 let r = _mm256_testz_pd(a, b);
4389 assert_eq!(r, 1);
4390 let a = _mm256_set1_pd(-1.);
4391 let r = _mm256_testz_pd(a, a);
4392 assert_eq!(r, 0);
4393 }
4394
4395 #[simd_test(enable = "avx")]
4396 unsafe fn test_mm256_testc_pd() {
4397 let a = _mm256_setr_pd(1., 2., 3., 4.);
4398 let b = _mm256_setr_pd(5., 6., 7., 8.);
4399 let r = _mm256_testc_pd(a, b);
4400 assert_eq!(r, 1);
4401 let a = _mm256_set1_pd(1.);
4402 let b = _mm256_set1_pd(-1.);
4403 let r = _mm256_testc_pd(a, b);
4404 assert_eq!(r, 0);
4405 }
4406
4407 #[simd_test(enable = "avx")]
4408 unsafe fn test_mm256_testnzc_pd() {
4409 let a = _mm256_setr_pd(1., 2., 3., 4.);
4410 let b = _mm256_setr_pd(5., 6., 7., 8.);
4411 let r = _mm256_testnzc_pd(a, b);
4412 assert_eq!(r, 0);
4413 let a = _mm256_setr_pd(1., -1., -1., -1.);
4414 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4415 let r = _mm256_testnzc_pd(a, b);
4416 assert_eq!(r, 1);
4417 }
4418
4419 #[simd_test(enable = "avx")]
4420 unsafe fn test_mm_testz_pd() {
4421 let a = _mm_setr_pd(1., 2.);
4422 let b = _mm_setr_pd(5., 6.);
4423 let r = _mm_testz_pd(a, b);
4424 assert_eq!(r, 1);
4425 let a = _mm_set1_pd(-1.);
4426 let r = _mm_testz_pd(a, a);
4427 assert_eq!(r, 0);
4428 }
4429
4430 #[simd_test(enable = "avx")]
4431 unsafe fn test_mm_testc_pd() {
4432 let a = _mm_setr_pd(1., 2.);
4433 let b = _mm_setr_pd(5., 6.);
4434 let r = _mm_testc_pd(a, b);
4435 assert_eq!(r, 1);
4436 let a = _mm_set1_pd(1.);
4437 let b = _mm_set1_pd(-1.);
4438 let r = _mm_testc_pd(a, b);
4439 assert_eq!(r, 0);
4440 }
4441
4442 #[simd_test(enable = "avx")]
4443 unsafe fn test_mm_testnzc_pd() {
4444 let a = _mm_setr_pd(1., 2.);
4445 let b = _mm_setr_pd(5., 6.);
4446 let r = _mm_testnzc_pd(a, b);
4447 assert_eq!(r, 0);
4448 let a = _mm_setr_pd(1., -1.);
4449 let b = _mm_setr_pd(-1., -1.);
4450 let r = _mm_testnzc_pd(a, b);
4451 assert_eq!(r, 1);
4452 }
4453
4454 #[simd_test(enable = "avx")]
4455 unsafe fn test_mm256_testz_ps() {
4456 let a = _mm256_set1_ps(1.);
4457 let r = _mm256_testz_ps(a, a);
4458 assert_eq!(r, 1);
4459 let a = _mm256_set1_ps(-1.);
4460 let r = _mm256_testz_ps(a, a);
4461 assert_eq!(r, 0);
4462 }
4463
4464 #[simd_test(enable = "avx")]
4465 unsafe fn test_mm256_testc_ps() {
4466 let a = _mm256_set1_ps(1.);
4467 let r = _mm256_testc_ps(a, a);
4468 assert_eq!(r, 1);
4469 let b = _mm256_set1_ps(-1.);
4470 let r = _mm256_testc_ps(a, b);
4471 assert_eq!(r, 0);
4472 }
4473
4474 #[simd_test(enable = "avx")]
4475 unsafe fn test_mm256_testnzc_ps() {
4476 let a = _mm256_set1_ps(1.);
4477 let r = _mm256_testnzc_ps(a, a);
4478 assert_eq!(r, 0);
4479 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4480 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4481 let r = _mm256_testnzc_ps(a, b);
4482 assert_eq!(r, 1);
4483 }
4484
4485 #[simd_test(enable = "avx")]
4486 unsafe fn test_mm_testz_ps() {
4487 let a = _mm_set1_ps(1.);
4488 let r = _mm_testz_ps(a, a);
4489 assert_eq!(r, 1);
4490 let a = _mm_set1_ps(-1.);
4491 let r = _mm_testz_ps(a, a);
4492 assert_eq!(r, 0);
4493 }
4494
4495 #[simd_test(enable = "avx")]
4496 unsafe fn test_mm_testc_ps() {
4497 let a = _mm_set1_ps(1.);
4498 let r = _mm_testc_ps(a, a);
4499 assert_eq!(r, 1);
4500 let b = _mm_set1_ps(-1.);
4501 let r = _mm_testc_ps(a, b);
4502 assert_eq!(r, 0);
4503 }
4504
4505 #[simd_test(enable = "avx")]
4506 unsafe fn test_mm_testnzc_ps() {
4507 let a = _mm_set1_ps(1.);
4508 let r = _mm_testnzc_ps(a, a);
4509 assert_eq!(r, 0);
4510 let a = _mm_setr_ps(1., -1., -1., -1.);
4511 let b = _mm_setr_ps(-1., -1., 1., 1.);
4512 let r = _mm_testnzc_ps(a, b);
4513 assert_eq!(r, 1);
4514 }
4515
4516 #[simd_test(enable = "avx")]
4517 unsafe fn test_mm256_movemask_pd() {
4518 let a = _mm256_setr_pd(1., -2., 3., -4.);
4519 let r = _mm256_movemask_pd(a);
4520 assert_eq!(r, 0xA);
4521 }
4522
4523 #[simd_test(enable = "avx")]
4524 unsafe fn test_mm256_movemask_ps() {
4525 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4526 let r = _mm256_movemask_ps(a);
4527 assert_eq!(r, 0xAA);
4528 }
4529
4530 #[simd_test(enable = "avx")]
4531 unsafe fn test_mm256_setzero_pd() {
4532 let r = _mm256_setzero_pd();
4533 assert_eq_m256d(r, _mm256_set1_pd(0.));
4534 }
4535
4536 #[simd_test(enable = "avx")]
4537 unsafe fn test_mm256_setzero_ps() {
4538 let r = _mm256_setzero_ps();
4539 assert_eq_m256(r, _mm256_set1_ps(0.));
4540 }
4541
4542 #[simd_test(enable = "avx")]
4543 unsafe fn test_mm256_setzero_si256() {
4544 let r = _mm256_setzero_si256();
4545 assert_eq_m256i(r, _mm256_set1_epi8(0));
4546 }
4547
4548 #[simd_test(enable = "avx")]
4549 unsafe fn test_mm256_set_pd() {
4550 let r = _mm256_set_pd(1., 2., 3., 4.);
4551 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4552 }
4553
4554 #[simd_test(enable = "avx")]
4555 unsafe fn test_mm256_set_ps() {
4556 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4557 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4558 }
4559
4560 #[simd_test(enable = "avx")]
4561 unsafe fn test_mm256_set_epi8() {
4562 #[rustfmt::skip]
4563 let r = _mm256_set_epi8(
4564 1, 2, 3, 4, 5, 6, 7, 8,
4565 9, 10, 11, 12, 13, 14, 15, 16,
4566 17, 18, 19, 20, 21, 22, 23, 24,
4567 25, 26, 27, 28, 29, 30, 31, 32,
4568 );
4569 #[rustfmt::skip]
4570 let e = _mm256_setr_epi8(
4571 32, 31, 30, 29, 28, 27, 26, 25,
4572 24, 23, 22, 21, 20, 19, 18, 17,
4573 16, 15, 14, 13, 12, 11, 10, 9,
4574 8, 7, 6, 5, 4, 3, 2, 1
4575 );
4576 assert_eq_m256i(r, e);
4577 }
4578
4579 #[simd_test(enable = "avx")]
4580 unsafe fn test_mm256_set_epi16() {
4581 #[rustfmt::skip]
4582 let r = _mm256_set_epi16(
4583 1, 2, 3, 4, 5, 6, 7, 8,
4584 9, 10, 11, 12, 13, 14, 15, 16,
4585 );
4586 #[rustfmt::skip]
4587 let e = _mm256_setr_epi16(
4588 16, 15, 14, 13, 12, 11, 10, 9, 8,
4589 7, 6, 5, 4, 3, 2, 1,
4590 );
4591 assert_eq_m256i(r, e);
4592 }
4593
4594 #[simd_test(enable = "avx")]
4595 unsafe fn test_mm256_set_epi32() {
4596 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4597 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4598 }
4599
4600 #[simd_test(enable = "avx")]
4601 unsafe fn test_mm256_set_epi64x() {
4602 let r = _mm256_set_epi64x(1, 2, 3, 4);
4603 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4604 }
4605
4606 #[simd_test(enable = "avx")]
4607 unsafe fn test_mm256_setr_pd() {
4608 let r = _mm256_setr_pd(1., 2., 3., 4.);
4609 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4610 }
4611
4612 #[simd_test(enable = "avx")]
4613 unsafe fn test_mm256_setr_ps() {
4614 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4615 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4616 }
4617
4618 #[simd_test(enable = "avx")]
4619 unsafe fn test_mm256_setr_epi8() {
4620 #[rustfmt::skip]
4621 let r = _mm256_setr_epi8(
4622 1, 2, 3, 4, 5, 6, 7, 8,
4623 9, 10, 11, 12, 13, 14, 15, 16,
4624 17, 18, 19, 20, 21, 22, 23, 24,
4625 25, 26, 27, 28, 29, 30, 31, 32,
4626 );
4627 #[rustfmt::skip]
4628 let e = _mm256_setr_epi8(
4629 1, 2, 3, 4, 5, 6, 7, 8,
4630 9, 10, 11, 12, 13, 14, 15, 16,
4631 17, 18, 19, 20, 21, 22, 23, 24,
4632 25, 26, 27, 28, 29, 30, 31, 32
4633 );
4634
4635 assert_eq_m256i(r, e);
4636 }
4637
4638 #[simd_test(enable = "avx")]
4639 unsafe fn test_mm256_setr_epi16() {
4640 #[rustfmt::skip]
4641 let r = _mm256_setr_epi16(
4642 1, 2, 3, 4, 5, 6, 7, 8,
4643 9, 10, 11, 12, 13, 14, 15, 16,
4644 );
4645 #[rustfmt::skip]
4646 let e = _mm256_setr_epi16(
4647 1, 2, 3, 4, 5, 6, 7, 8,
4648 9, 10, 11, 12, 13, 14, 15, 16,
4649 );
4650 assert_eq_m256i(r, e);
4651 }
4652
4653 #[simd_test(enable = "avx")]
4654 unsafe fn test_mm256_setr_epi32() {
4655 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4656 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4657 }
4658
4659 #[simd_test(enable = "avx")]
4660 unsafe fn test_mm256_setr_epi64x() {
4661 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4662 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4663 }
4664
4665 #[simd_test(enable = "avx")]
4666 unsafe fn test_mm256_set1_pd() {
4667 let r = _mm256_set1_pd(1.);
4668 assert_eq_m256d(r, _mm256_set1_pd(1.));
4669 }
4670
4671 #[simd_test(enable = "avx")]
4672 unsafe fn test_mm256_set1_ps() {
4673 let r = _mm256_set1_ps(1.);
4674 assert_eq_m256(r, _mm256_set1_ps(1.));
4675 }
4676
4677 #[simd_test(enable = "avx")]
4678 unsafe fn test_mm256_set1_epi8() {
4679 let r = _mm256_set1_epi8(1);
4680 assert_eq_m256i(r, _mm256_set1_epi8(1));
4681 }
4682
4683 #[simd_test(enable = "avx")]
4684 unsafe fn test_mm256_set1_epi16() {
4685 let r = _mm256_set1_epi16(1);
4686 assert_eq_m256i(r, _mm256_set1_epi16(1));
4687 }
4688
4689 #[simd_test(enable = "avx")]
4690 unsafe fn test_mm256_set1_epi32() {
4691 let r = _mm256_set1_epi32(1);
4692 assert_eq_m256i(r, _mm256_set1_epi32(1));
4693 }
4694
4695 #[simd_test(enable = "avx")]
4696 unsafe fn test_mm256_set1_epi64x() {
4697 let r = _mm256_set1_epi64x(1);
4698 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4699 }
4700
4701 #[simd_test(enable = "avx")]
4702 unsafe fn test_mm256_castpd_ps() {
4703 let a = _mm256_setr_pd(1., 2., 3., 4.);
4704 let r = _mm256_castpd_ps(a);
4705 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4706 assert_eq_m256(r, e);
4707 }
4708
4709 #[simd_test(enable = "avx")]
4710 unsafe fn test_mm256_castps_pd() {
4711 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4712 let r = _mm256_castps_pd(a);
4713 let e = _mm256_setr_pd(1., 2., 3., 4.);
4714 assert_eq_m256d(r, e);
4715 }
4716
4717 #[simd_test(enable = "avx")]
4718 unsafe fn test_mm256_castps_si256() {
4719 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4720 let r = _mm256_castps_si256(a);
4721 #[rustfmt::skip]
4722 let e = _mm256_setr_epi8(
4723 0, 0, -128, 63, 0, 0, 0, 64,
4724 0, 0, 64, 64, 0, 0, -128, 64,
4725 0, 0, -96, 64, 0, 0, -64, 64,
4726 0, 0, -32, 64, 0, 0, 0, 65,
4727 );
4728 assert_eq_m256i(r, e);
4729 }
4730
4731 #[simd_test(enable = "avx")]
4732 unsafe fn test_mm256_castsi256_ps() {
4733 #[rustfmt::skip]
4734 let a = _mm256_setr_epi8(
4735 0, 0, -128, 63, 0, 0, 0, 64,
4736 0, 0, 64, 64, 0, 0, -128, 64,
4737 0, 0, -96, 64, 0, 0, -64, 64,
4738 0, 0, -32, 64, 0, 0, 0, 65,
4739 );
4740 let r = _mm256_castsi256_ps(a);
4741 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4742 assert_eq_m256(r, e);
4743 }
4744
4745 #[simd_test(enable = "avx")]
4746 unsafe fn test_mm256_castpd_si256() {
4747 let a = _mm256_setr_pd(1., 2., 3., 4.);
4748 let r = _mm256_castpd_si256(a);
4749 assert_eq_m256d(transmute(r), a);
4750 }
4751
4752 #[simd_test(enable = "avx")]
4753 unsafe fn test_mm256_castsi256_pd() {
4754 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4755 let r = _mm256_castsi256_pd(a);
4756 assert_eq_m256d(r, transmute(a));
4757 }
4758
4759 #[simd_test(enable = "avx")]
4760 unsafe fn test_mm256_castps256_ps128() {
4761 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4762 let r = _mm256_castps256_ps128(a);
4763 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4764 }
4765
4766 #[simd_test(enable = "avx")]
4767 unsafe fn test_mm256_castpd256_pd128() {
4768 let a = _mm256_setr_pd(1., 2., 3., 4.);
4769 let r = _mm256_castpd256_pd128(a);
4770 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4771 }
4772
4773 #[simd_test(enable = "avx")]
4774 unsafe fn test_mm256_castsi256_si128() {
4775 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4776 let r = _mm256_castsi256_si128(a);
4777 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4778 }
4779
4780 #[simd_test(enable = "avx")]
4781 unsafe fn test_mm256_castps128_ps256() {
4782 let a = _mm_setr_ps(1., 2., 3., 4.);
4783 let r = _mm256_castps128_ps256(a);
4784 assert_eq_m128(_mm256_castps256_ps128(r), a);
4785 }
4786
4787 #[simd_test(enable = "avx")]
4788 unsafe fn test_mm256_castpd128_pd256() {
4789 let a = _mm_setr_pd(1., 2.);
4790 let r = _mm256_castpd128_pd256(a);
4791 assert_eq_m128d(_mm256_castpd256_pd128(r), a);
4792 }
4793
4794 #[simd_test(enable = "avx")]
4795 unsafe fn test_mm256_castsi128_si256() {
4796 let a = _mm_setr_epi32(1, 2, 3, 4);
4797 let r = _mm256_castsi128_si256(a);
4798 assert_eq_m128i(_mm256_castsi256_si128(r), a);
4799 }
4800
4801 #[simd_test(enable = "avx")]
4802 unsafe fn test_mm256_zextps128_ps256() {
4803 let a = _mm_setr_ps(1., 2., 3., 4.);
4804 let r = _mm256_zextps128_ps256(a);
4805 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4806 assert_eq_m256(r, e);
4807 }
4808
4809 #[simd_test(enable = "avx")]
4810 unsafe fn test_mm256_zextsi128_si256() {
4811 let a = _mm_setr_epi64x(1, 2);
4812 let r = _mm256_zextsi128_si256(a);
4813 let e = _mm256_setr_epi64x(1, 2, 0, 0);
4814 assert_eq_m256i(r, e);
4815 }
4816
4817 #[simd_test(enable = "avx")]
4818 unsafe fn test_mm256_zextpd128_pd256() {
4819 let a = _mm_setr_pd(1., 2.);
4820 let r = _mm256_zextpd128_pd256(a);
4821 let e = _mm256_setr_pd(1., 2., 0., 0.);
4822 assert_eq_m256d(r, e);
4823 }
4824
4825 #[simd_test(enable = "avx")]
4826 unsafe fn test_mm256_set_m128() {
4827 let hi = _mm_setr_ps(5., 6., 7., 8.);
4828 let lo = _mm_setr_ps(1., 2., 3., 4.);
4829 let r = _mm256_set_m128(hi, lo);
4830 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4831 assert_eq_m256(r, e);
4832 }
4833
4834 #[simd_test(enable = "avx")]
4835 unsafe fn test_mm256_set_m128d() {
4836 let hi = _mm_setr_pd(3., 4.);
4837 let lo = _mm_setr_pd(1., 2.);
4838 let r = _mm256_set_m128d(hi, lo);
4839 let e = _mm256_setr_pd(1., 2., 3., 4.);
4840 assert_eq_m256d(r, e);
4841 }
4842
4843 #[simd_test(enable = "avx")]
4844 unsafe fn test_mm256_set_m128i() {
4845 #[rustfmt::skip]
4846 let hi = _mm_setr_epi8(
4847 17, 18, 19, 20,
4848 21, 22, 23, 24,
4849 25, 26, 27, 28,
4850 29, 30, 31, 32,
4851 );
4852 #[rustfmt::skip]
4853 let lo = _mm_setr_epi8(
4854 1, 2, 3, 4,
4855 5, 6, 7, 8,
4856 9, 10, 11, 12,
4857 13, 14, 15, 16,
4858 );
4859 let r = _mm256_set_m128i(hi, lo);
4860 #[rustfmt::skip]
4861 let e = _mm256_setr_epi8(
4862 1, 2, 3, 4, 5, 6, 7, 8,
4863 9, 10, 11, 12, 13, 14, 15, 16,
4864 17, 18, 19, 20, 21, 22, 23, 24,
4865 25, 26, 27, 28, 29, 30, 31, 32,
4866 );
4867 assert_eq_m256i(r, e);
4868 }
4869
4870 #[simd_test(enable = "avx")]
4871 unsafe fn test_mm256_setr_m128() {
4872 let lo = _mm_setr_ps(1., 2., 3., 4.);
4873 let hi = _mm_setr_ps(5., 6., 7., 8.);
4874 let r = _mm256_setr_m128(lo, hi);
4875 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4876 assert_eq_m256(r, e);
4877 }
4878
4879 #[simd_test(enable = "avx")]
4880 unsafe fn test_mm256_setr_m128d() {
4881 let lo = _mm_setr_pd(1., 2.);
4882 let hi = _mm_setr_pd(3., 4.);
4883 let r = _mm256_setr_m128d(lo, hi);
4884 let e = _mm256_setr_pd(1., 2., 3., 4.);
4885 assert_eq_m256d(r, e);
4886 }
4887
4888 #[simd_test(enable = "avx")]
4889 unsafe fn test_mm256_setr_m128i() {
4890 #[rustfmt::skip]
4891 let lo = _mm_setr_epi8(
4892 1, 2, 3, 4,
4893 5, 6, 7, 8,
4894 9, 10, 11, 12,
4895 13, 14, 15, 16,
4896 );
4897 #[rustfmt::skip]
4898 let hi = _mm_setr_epi8(
4899 17, 18, 19, 20, 21, 22, 23, 24,
4900 25, 26, 27, 28, 29, 30, 31, 32,
4901 );
4902 let r = _mm256_setr_m128i(lo, hi);
4903 #[rustfmt::skip]
4904 let e = _mm256_setr_epi8(
4905 1, 2, 3, 4, 5, 6, 7, 8,
4906 9, 10, 11, 12, 13, 14, 15, 16,
4907 17, 18, 19, 20, 21, 22, 23, 24,
4908 25, 26, 27, 28, 29, 30, 31, 32,
4909 );
4910 assert_eq_m256i(r, e);
4911 }
4912
4913 #[simd_test(enable = "avx")]
4914 unsafe fn test_mm256_loadu2_m128() {
4915 let hi = &[5., 6., 7., 8.];
4916 let hiaddr = hi.as_ptr();
4917 let lo = &[1., 2., 3., 4.];
4918 let loaddr = lo.as_ptr();
4919 let r = _mm256_loadu2_m128(hiaddr, loaddr);
4920 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4921 assert_eq_m256(r, e);
4922 }
4923
4924 #[simd_test(enable = "avx")]
4925 unsafe fn test_mm256_loadu2_m128d() {
4926 let hi = &[3., 4.];
4927 let hiaddr = hi.as_ptr();
4928 let lo = &[1., 2.];
4929 let loaddr = lo.as_ptr();
4930 let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4931 let e = _mm256_setr_pd(1., 2., 3., 4.);
4932 assert_eq_m256d(r, e);
4933 }
4934
4935 #[simd_test(enable = "avx")]
4936 unsafe fn test_mm256_loadu2_m128i() {
4937 #[rustfmt::skip]
4938 let hi = _mm_setr_epi8(
4939 17, 18, 19, 20, 21, 22, 23, 24,
4940 25, 26, 27, 28, 29, 30, 31, 32,
4941 );
4942 #[rustfmt::skip]
4943 let lo = _mm_setr_epi8(
4944 1, 2, 3, 4, 5, 6, 7, 8,
4945 9, 10, 11, 12, 13, 14, 15, 16,
4946 );
4947 let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4948 #[rustfmt::skip]
4949 let e = _mm256_setr_epi8(
4950 1, 2, 3, 4, 5, 6, 7, 8,
4951 9, 10, 11, 12, 13, 14, 15, 16,
4952 17, 18, 19, 20, 21, 22, 23, 24,
4953 25, 26, 27, 28, 29, 30, 31, 32,
4954 );
4955 assert_eq_m256i(r, e);
4956 }
4957
4958 #[simd_test(enable = "avx")]
4959 unsafe fn test_mm256_storeu2_m128() {
4960 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4961 let mut hi = _mm_undefined_ps();
4962 let mut lo = _mm_undefined_ps();
4963 _mm256_storeu2_m128(
4964 ptr::addr_of_mut!(hi) as *mut f32,
4965 ptr::addr_of_mut!(lo) as *mut f32,
4966 a,
4967 );
4968 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
4969 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
4970 }
4971
4972 #[simd_test(enable = "avx")]
4973 unsafe fn test_mm256_storeu2_m128d() {
4974 let a = _mm256_setr_pd(1., 2., 3., 4.);
4975 let mut hi = _mm_undefined_pd();
4976 let mut lo = _mm_undefined_pd();
4977 _mm256_storeu2_m128d(
4978 ptr::addr_of_mut!(hi) as *mut f64,
4979 ptr::addr_of_mut!(lo) as *mut f64,
4980 a,
4981 );
4982 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4983 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
4984 }
4985
4986 #[simd_test(enable = "avx")]
4987 unsafe fn test_mm256_storeu2_m128i() {
4988 #[rustfmt::skip]
4989 let a = _mm256_setr_epi8(
4990 1, 2, 3, 4, 5, 6, 7, 8,
4991 9, 10, 11, 12, 13, 14, 15, 16,
4992 17, 18, 19, 20, 21, 22, 23, 24,
4993 25, 26, 27, 28, 29, 30, 31, 32,
4994 );
4995 let mut hi = _mm_undefined_si128();
4996 let mut lo = _mm_undefined_si128();
4997 _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
4998 #[rustfmt::skip]
4999 let e_hi = _mm_setr_epi8(
5000 17, 18, 19, 20, 21, 22, 23, 24,
5001 25, 26, 27, 28, 29, 30, 31, 32
5002 );
5003 #[rustfmt::skip]
5004 let e_lo = _mm_setr_epi8(
5005 1, 2, 3, 4, 5, 6, 7, 8,
5006 9, 10, 11, 12, 13, 14, 15, 16
5007 );
5008
5009 assert_eq_m128i(hi, e_hi);
5010 assert_eq_m128i(lo, e_lo);
5011 }
5012
5013 #[simd_test(enable = "avx")]
5014 unsafe fn test_mm256_cvtss_f32() {
5015 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5016 let r = _mm256_cvtss_f32(a);
5017 assert_eq!(r, 1.);
5018 }
5019}