1use crate::{
17 core_arch::{simd::*, x86::*},
18 intrinsics::simd::*,
19 mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34 unsafe { simd_add(a, b) }
35}
36
37#[inline]
42#[target_feature(enable = "avx")]
43#[cfg_attr(test, assert_instr(vaddps))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46 unsafe { simd_add(a, b) }
47}
48
49#[inline]
54#[target_feature(enable = "avx")]
55#[cfg_attr(test, assert_instr(vandp))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
59 unsafe {
60 let a: u64x4 = transmute(a);
61 let b: u64x4 = transmute(b);
62 transmute(simd_and(a, b))
63 }
64}
65
66#[inline]
71#[target_feature(enable = "avx")]
72#[cfg_attr(test, assert_instr(vandps))]
73#[stable(feature = "simd_x86", since = "1.27.0")]
74pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
75 unsafe {
76 let a: u32x8 = transmute(a);
77 let b: u32x8 = transmute(b);
78 transmute(simd_and(a, b))
79 }
80}
81
82#[inline]
87#[target_feature(enable = "avx")]
88#[cfg_attr(test, assert_instr(vorp))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
92 unsafe {
93 let a: u64x4 = transmute(a);
94 let b: u64x4 = transmute(b);
95 transmute(simd_or(a, b))
96 }
97}
98
99#[inline]
104#[target_feature(enable = "avx")]
105#[cfg_attr(test, assert_instr(vorps))]
106#[stable(feature = "simd_x86", since = "1.27.0")]
107pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
108 unsafe {
109 let a: u32x8 = transmute(a);
110 let b: u32x8 = transmute(b);
111 transmute(simd_or(a, b))
112 }
113}
114
115#[inline]
120#[target_feature(enable = "avx")]
121#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
122#[rustc_legacy_const_generics(2)]
123#[stable(feature = "simd_x86", since = "1.27.0")]
124pub fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
125 static_assert_uimm_bits!(MASK, 8);
126 unsafe {
127 simd_shuffle!(
128 a,
129 b,
130 [
131 MASK as u32 & 0b1,
132 ((MASK as u32 >> 1) & 0b1) + 4,
133 ((MASK as u32 >> 2) & 0b1) + 2,
134 ((MASK as u32 >> 3) & 0b1) + 6,
135 ],
136 )
137 }
138}
139
140#[inline]
145#[target_feature(enable = "avx")]
146#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
147#[rustc_legacy_const_generics(2)]
148#[stable(feature = "simd_x86", since = "1.27.0")]
149pub fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
150 static_assert_uimm_bits!(MASK, 8);
151 unsafe {
152 simd_shuffle!(
153 a,
154 b,
155 [
156 MASK as u32 & 0b11,
157 (MASK as u32 >> 2) & 0b11,
158 ((MASK as u32 >> 4) & 0b11) + 8,
159 ((MASK as u32 >> 6) & 0b11) + 8,
160 (MASK as u32 & 0b11) + 4,
161 ((MASK as u32 >> 2) & 0b11) + 4,
162 ((MASK as u32 >> 4) & 0b11) + 12,
163 ((MASK as u32 >> 6) & 0b11) + 12,
164 ],
165 )
166 }
167}
168
169#[inline]
174#[target_feature(enable = "avx")]
175#[cfg_attr(test, assert_instr(vandnp))]
176#[stable(feature = "simd_x86", since = "1.27.0")]
177pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
178 unsafe {
179 let a: u64x4 = transmute(a);
180 let b: u64x4 = transmute(b);
181 transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
182 }
183}
184
185#[inline]
191#[target_feature(enable = "avx")]
192#[cfg_attr(test, assert_instr(vandnps))]
193#[stable(feature = "simd_x86", since = "1.27.0")]
194pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
195 unsafe {
196 let a: u32x8 = transmute(a);
197 let b: u32x8 = transmute(b);
198 transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
199 }
200}
201
202#[inline]
207#[target_feature(enable = "avx")]
208#[cfg_attr(test, assert_instr(vmaxpd))]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
211 unsafe { vmaxpd(a, b) }
212}
213
214#[inline]
219#[target_feature(enable = "avx")]
220#[cfg_attr(test, assert_instr(vmaxps))]
221#[stable(feature = "simd_x86", since = "1.27.0")]
222pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
223 unsafe { vmaxps(a, b) }
224}
225
226#[inline]
231#[target_feature(enable = "avx")]
232#[cfg_attr(test, assert_instr(vminpd))]
233#[stable(feature = "simd_x86", since = "1.27.0")]
234pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
235 unsafe { vminpd(a, b) }
236}
237
238#[inline]
243#[target_feature(enable = "avx")]
244#[cfg_attr(test, assert_instr(vminps))]
245#[stable(feature = "simd_x86", since = "1.27.0")]
246pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
247 unsafe { vminps(a, b) }
248}
249
250#[inline]
255#[target_feature(enable = "avx")]
256#[cfg_attr(test, assert_instr(vmulpd))]
257#[stable(feature = "simd_x86", since = "1.27.0")]
258pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
259 unsafe { simd_mul(a, b) }
260}
261
262#[inline]
267#[target_feature(enable = "avx")]
268#[cfg_attr(test, assert_instr(vmulps))]
269#[stable(feature = "simd_x86", since = "1.27.0")]
270pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
271 unsafe { simd_mul(a, b) }
272}
273
274#[inline]
279#[target_feature(enable = "avx")]
280#[cfg_attr(test, assert_instr(vaddsubpd))]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
283 unsafe {
284 let a = a.as_f64x4();
285 let b = b.as_f64x4();
286 let add = simd_add(a, b);
287 let sub = simd_sub(a, b);
288 simd_shuffle!(add, sub, [4, 1, 6, 3])
289 }
290}
291
292#[inline]
297#[target_feature(enable = "avx")]
298#[cfg_attr(test, assert_instr(vaddsubps))]
299#[stable(feature = "simd_x86", since = "1.27.0")]
300pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
301 unsafe {
302 let a = a.as_f32x8();
303 let b = b.as_f32x8();
304 let add = simd_add(a, b);
305 let sub = simd_sub(a, b);
306 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
307 }
308}
309
310#[inline]
315#[target_feature(enable = "avx")]
316#[cfg_attr(test, assert_instr(vsubpd))]
317#[stable(feature = "simd_x86", since = "1.27.0")]
318pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
319 unsafe { simd_sub(a, b) }
320}
321
322#[inline]
327#[target_feature(enable = "avx")]
328#[cfg_attr(test, assert_instr(vsubps))]
329#[stable(feature = "simd_x86", since = "1.27.0")]
330pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
331 unsafe { simd_sub(a, b) }
332}
333
334#[inline]
339#[target_feature(enable = "avx")]
340#[cfg_attr(test, assert_instr(vdivps))]
341#[stable(feature = "simd_x86", since = "1.27.0")]
342pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
343 unsafe { simd_div(a, b) }
344}
345
346#[inline]
351#[target_feature(enable = "avx")]
352#[cfg_attr(test, assert_instr(vdivpd))]
353#[stable(feature = "simd_x86", since = "1.27.0")]
354pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
355 unsafe { simd_div(a, b) }
356}
357
358#[inline]
372#[target_feature(enable = "avx")]
373#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
374#[rustc_legacy_const_generics(1)]
375#[stable(feature = "simd_x86", since = "1.27.0")]
376pub fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
377 static_assert_uimm_bits!(ROUNDING, 4);
378 unsafe { roundpd256(a, ROUNDING) }
379}
380
381#[inline]
386#[target_feature(enable = "avx")]
387#[cfg_attr(test, assert_instr(vroundpd))]
388#[stable(feature = "simd_x86", since = "1.27.0")]
389pub fn _mm256_ceil_pd(a: __m256d) -> __m256d {
390 unsafe { simd_ceil(a) }
391}
392
393#[inline]
398#[target_feature(enable = "avx")]
399#[cfg_attr(test, assert_instr(vroundpd))]
400#[stable(feature = "simd_x86", since = "1.27.0")]
401pub fn _mm256_floor_pd(a: __m256d) -> __m256d {
402 unsafe { simd_floor(a) }
403}
404
405#[inline]
419#[target_feature(enable = "avx")]
420#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
421#[rustc_legacy_const_generics(1)]
422#[stable(feature = "simd_x86", since = "1.27.0")]
423pub fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
424 static_assert_uimm_bits!(ROUNDING, 4);
425 unsafe { roundps256(a, ROUNDING) }
426}
427
428#[inline]
433#[target_feature(enable = "avx")]
434#[cfg_attr(test, assert_instr(vroundps))]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub fn _mm256_ceil_ps(a: __m256) -> __m256 {
437 unsafe { simd_ceil(a) }
438}
439
440#[inline]
445#[target_feature(enable = "avx")]
446#[cfg_attr(test, assert_instr(vroundps))]
447#[stable(feature = "simd_x86", since = "1.27.0")]
448pub fn _mm256_floor_ps(a: __m256) -> __m256 {
449 unsafe { simd_floor(a) }
450}
451
452#[inline]
457#[target_feature(enable = "avx")]
458#[cfg_attr(test, assert_instr(vsqrtps))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460pub fn _mm256_sqrt_ps(a: __m256) -> __m256 {
461 unsafe { simd_fsqrt(a) }
462}
463
464#[inline]
469#[target_feature(enable = "avx")]
470#[cfg_attr(test, assert_instr(vsqrtpd))]
471#[stable(feature = "simd_x86", since = "1.27.0")]
472pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
473 unsafe { simd_fsqrt(a) }
474}
475
476#[inline]
481#[target_feature(enable = "avx")]
482#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
486#[rustc_legacy_const_generics(2)]
487#[stable(feature = "simd_x86", since = "1.27.0")]
488pub fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
489 static_assert_uimm_bits!(IMM4, 4);
490 unsafe {
491 simd_shuffle!(
492 a,
493 b,
494 [
495 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
496 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
497 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
498 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
499 ],
500 )
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx")]
510#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
511#[rustc_legacy_const_generics(2)]
512#[stable(feature = "simd_x86", since = "1.27.0")]
513pub fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
514 static_assert_uimm_bits!(IMM8, 8);
515 unsafe {
516 simd_shuffle!(
517 a,
518 b,
519 [
520 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
521 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
522 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
523 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
524 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
525 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
526 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
527 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
528 ],
529 )
530 }
531}
532
533#[inline]
538#[target_feature(enable = "avx")]
539#[cfg_attr(test, assert_instr(vblendvpd))]
540#[stable(feature = "simd_x86", since = "1.27.0")]
541pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
542 unsafe {
543 let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
544 transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
545 }
546}
547
548#[inline]
553#[target_feature(enable = "avx")]
554#[cfg_attr(test, assert_instr(vblendvps))]
555#[stable(feature = "simd_x86", since = "1.27.0")]
556pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
557 unsafe {
558 let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
559 transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
560 }
561}
562
563#[inline]
570#[target_feature(enable = "avx")]
571#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
572#[rustc_legacy_const_generics(2)]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
575 static_assert_uimm_bits!(IMM8, 8);
576 unsafe { vdpps(a, b, IMM8 as i8) }
577}
578
579#[inline]
586#[target_feature(enable = "avx")]
587#[cfg_attr(test, assert_instr(vhaddpd))]
588#[stable(feature = "simd_x86", since = "1.27.0")]
589pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
590 unsafe { vhaddpd(a, b) }
591}
592
593#[inline]
601#[target_feature(enable = "avx")]
602#[cfg_attr(test, assert_instr(vhaddps))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
605 unsafe { vhaddps(a, b) }
606}
607
608#[inline]
615#[target_feature(enable = "avx")]
616#[cfg_attr(test, assert_instr(vhsubpd))]
617#[stable(feature = "simd_x86", since = "1.27.0")]
618pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
619 unsafe { vhsubpd(a, b) }
620}
621
622#[inline]
630#[target_feature(enable = "avx")]
631#[cfg_attr(test, assert_instr(vhsubps))]
632#[stable(feature = "simd_x86", since = "1.27.0")]
633pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
634 unsafe { vhsubps(a, b) }
635}
636
637#[inline]
642#[target_feature(enable = "avx")]
643#[cfg_attr(test, assert_instr(vxorp))]
644#[stable(feature = "simd_x86", since = "1.27.0")]
645pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
646 unsafe {
647 let a: u64x4 = transmute(a);
648 let b: u64x4 = transmute(b);
649 transmute(simd_xor(a, b))
650 }
651}
652
653#[inline]
658#[target_feature(enable = "avx")]
659#[cfg_attr(test, assert_instr(vxorps))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
662 unsafe {
663 let a: u32x8 = transmute(a);
664 let b: u32x8 = transmute(b);
665 transmute(simd_xor(a, b))
666 }
667}
668
669#[stable(feature = "simd_x86", since = "1.27.0")]
671pub const _CMP_EQ_OQ: i32 = 0x00;
672#[stable(feature = "simd_x86", since = "1.27.0")]
674pub const _CMP_LT_OS: i32 = 0x01;
675#[stable(feature = "simd_x86", since = "1.27.0")]
677pub const _CMP_LE_OS: i32 = 0x02;
678#[stable(feature = "simd_x86", since = "1.27.0")]
680pub const _CMP_UNORD_Q: i32 = 0x03;
681#[stable(feature = "simd_x86", since = "1.27.0")]
683pub const _CMP_NEQ_UQ: i32 = 0x04;
684#[stable(feature = "simd_x86", since = "1.27.0")]
686pub const _CMP_NLT_US: i32 = 0x05;
687#[stable(feature = "simd_x86", since = "1.27.0")]
689pub const _CMP_NLE_US: i32 = 0x06;
690#[stable(feature = "simd_x86", since = "1.27.0")]
692pub const _CMP_ORD_Q: i32 = 0x07;
693#[stable(feature = "simd_x86", since = "1.27.0")]
695pub const _CMP_EQ_UQ: i32 = 0x08;
696#[stable(feature = "simd_x86", since = "1.27.0")]
698pub const _CMP_NGE_US: i32 = 0x09;
699#[stable(feature = "simd_x86", since = "1.27.0")]
701pub const _CMP_NGT_US: i32 = 0x0a;
702#[stable(feature = "simd_x86", since = "1.27.0")]
704pub const _CMP_FALSE_OQ: i32 = 0x0b;
705#[stable(feature = "simd_x86", since = "1.27.0")]
707pub const _CMP_NEQ_OQ: i32 = 0x0c;
708#[stable(feature = "simd_x86", since = "1.27.0")]
710pub const _CMP_GE_OS: i32 = 0x0d;
711#[stable(feature = "simd_x86", since = "1.27.0")]
713pub const _CMP_GT_OS: i32 = 0x0e;
714#[stable(feature = "simd_x86", since = "1.27.0")]
716pub const _CMP_TRUE_UQ: i32 = 0x0f;
717#[stable(feature = "simd_x86", since = "1.27.0")]
719pub const _CMP_EQ_OS: i32 = 0x10;
720#[stable(feature = "simd_x86", since = "1.27.0")]
722pub const _CMP_LT_OQ: i32 = 0x11;
723#[stable(feature = "simd_x86", since = "1.27.0")]
725pub const _CMP_LE_OQ: i32 = 0x12;
726#[stable(feature = "simd_x86", since = "1.27.0")]
728pub const _CMP_UNORD_S: i32 = 0x13;
729#[stable(feature = "simd_x86", since = "1.27.0")]
731pub const _CMP_NEQ_US: i32 = 0x14;
732#[stable(feature = "simd_x86", since = "1.27.0")]
734pub const _CMP_NLT_UQ: i32 = 0x15;
735#[stable(feature = "simd_x86", since = "1.27.0")]
737pub const _CMP_NLE_UQ: i32 = 0x16;
738#[stable(feature = "simd_x86", since = "1.27.0")]
740pub const _CMP_ORD_S: i32 = 0x17;
741#[stable(feature = "simd_x86", since = "1.27.0")]
743pub const _CMP_EQ_US: i32 = 0x18;
744#[stable(feature = "simd_x86", since = "1.27.0")]
746pub const _CMP_NGE_UQ: i32 = 0x19;
747#[stable(feature = "simd_x86", since = "1.27.0")]
749pub const _CMP_NGT_UQ: i32 = 0x1a;
750#[stable(feature = "simd_x86", since = "1.27.0")]
752pub const _CMP_FALSE_OS: i32 = 0x1b;
753#[stable(feature = "simd_x86", since = "1.27.0")]
755pub const _CMP_NEQ_OS: i32 = 0x1c;
756#[stable(feature = "simd_x86", since = "1.27.0")]
758pub const _CMP_GE_OQ: i32 = 0x1d;
759#[stable(feature = "simd_x86", since = "1.27.0")]
761pub const _CMP_GT_OQ: i32 = 0x1e;
762#[stable(feature = "simd_x86", since = "1.27.0")]
764pub const _CMP_TRUE_US: i32 = 0x1f;
765
766#[inline]
772#[target_feature(enable = "avx")]
773#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
775#[stable(feature = "simd_x86", since = "1.27.0")]
776pub fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
777 static_assert_uimm_bits!(IMM5, 5);
778 unsafe { vcmppd(a, b, const { IMM5 as i8 }) }
779}
780
781#[inline]
787#[target_feature(enable = "avx")]
788#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
792 static_assert_uimm_bits!(IMM5, 5);
793 unsafe { vcmppd256(a, b, IMM5 as u8) }
794}
795
796#[inline]
802#[target_feature(enable = "avx")]
803#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
807 static_assert_uimm_bits!(IMM5, 5);
808 unsafe { vcmpps(a, b, const { IMM5 as i8 }) }
809}
810
811#[inline]
817#[target_feature(enable = "avx")]
818#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
822 static_assert_uimm_bits!(IMM5, 5);
823 unsafe { vcmpps256(a, b, const { IMM5 as u8 }) }
824}
825
826#[inline]
834#[target_feature(enable = "avx")]
835#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
837#[stable(feature = "simd_x86", since = "1.27.0")]
838pub fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
839 static_assert_uimm_bits!(IMM5, 5);
840 unsafe { vcmpsd(a, b, IMM5 as i8) }
841}
842
843#[inline]
851#[target_feature(enable = "avx")]
852#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855pub fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
856 static_assert_uimm_bits!(IMM5, 5);
857 unsafe { vcmpss(a, b, IMM5 as i8) }
858}
859
860#[inline]
865#[target_feature(enable = "avx")]
866#[cfg_attr(test, assert_instr(vcvtdq2pd))]
867#[stable(feature = "simd_x86", since = "1.27.0")]
868pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
869 unsafe { simd_cast(a.as_i32x4()) }
870}
871
872#[inline]
877#[target_feature(enable = "avx")]
878#[cfg_attr(test, assert_instr(vcvtdq2ps))]
879#[stable(feature = "simd_x86", since = "1.27.0")]
880pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
881 unsafe { simd_cast(a.as_i32x8()) }
882}
883
884#[inline]
889#[target_feature(enable = "avx")]
890#[cfg_attr(test, assert_instr(vcvtpd2ps))]
891#[stable(feature = "simd_x86", since = "1.27.0")]
892pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
893 unsafe { simd_cast(a) }
894}
895
896#[inline]
901#[target_feature(enable = "avx")]
902#[cfg_attr(test, assert_instr(vcvtps2dq))]
903#[stable(feature = "simd_x86", since = "1.27.0")]
904pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
905 unsafe { transmute(vcvtps2dq(a)) }
906}
907
908#[inline]
913#[target_feature(enable = "avx")]
914#[cfg_attr(test, assert_instr(vcvtps2pd))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916pub fn _mm256_cvtps_pd(a: __m128) -> __m256d {
917 unsafe { simd_cast(a) }
918}
919
920#[inline]
924#[target_feature(enable = "avx")]
925#[stable(feature = "simd_x86", since = "1.27.0")]
927pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
928 unsafe { simd_extract!(a, 0) }
929}
930
931#[inline]
936#[target_feature(enable = "avx")]
937#[cfg_attr(test, assert_instr(vcvttpd2dq))]
938#[stable(feature = "simd_x86", since = "1.27.0")]
939pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
940 unsafe { transmute(vcvttpd2dq(a)) }
941}
942
943#[inline]
948#[target_feature(enable = "avx")]
949#[cfg_attr(test, assert_instr(vcvtpd2dq))]
950#[stable(feature = "simd_x86", since = "1.27.0")]
951pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
952 unsafe { transmute(vcvtpd2dq(a)) }
953}
954
955#[inline]
960#[target_feature(enable = "avx")]
961#[cfg_attr(test, assert_instr(vcvttps2dq))]
962#[stable(feature = "simd_x86", since = "1.27.0")]
963pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
964 unsafe { transmute(vcvttps2dq(a)) }
965}
966
967#[inline]
972#[target_feature(enable = "avx")]
973#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
974#[rustc_legacy_const_generics(1)]
975#[stable(feature = "simd_x86", since = "1.27.0")]
976pub fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
977 static_assert_uimm_bits!(IMM1, 1);
978 unsafe {
979 simd_shuffle!(
980 a,
981 _mm256_undefined_ps(),
982 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
983 )
984 }
985}
986
987#[inline]
992#[target_feature(enable = "avx")]
993#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
994#[rustc_legacy_const_generics(1)]
995#[stable(feature = "simd_x86", since = "1.27.0")]
996pub fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
997 static_assert_uimm_bits!(IMM1, 1);
998 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) }
999}
1000
1001#[inline]
1005#[target_feature(enable = "avx")]
1006#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
1007#[rustc_legacy_const_generics(1)]
1008#[stable(feature = "simd_x86", since = "1.27.0")]
1009pub fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
1010 static_assert_uimm_bits!(IMM1, 1);
1011 unsafe {
1012 let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
1013 transmute(dst)
1014 }
1015}
1016
1017#[inline]
1021#[target_feature(enable = "avx")]
1022#[rustc_legacy_const_generics(1)]
1024#[stable(feature = "simd_x86", since = "1.27.0")]
1025pub fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
1026 static_assert_uimm_bits!(INDEX, 3);
1027 unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) }
1028}
1029
1030#[inline]
1034#[target_feature(enable = "avx")]
1035#[stable(feature = "simd_x86", since = "1.27.0")]
1036pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1037 unsafe { simd_extract!(a.as_i32x8(), 0) }
1038}
1039
1040#[inline]
1044#[target_feature(enable = "avx")]
1045#[cfg_attr(test, assert_instr(vzeroall))]
1046#[stable(feature = "simd_x86", since = "1.27.0")]
1047pub fn _mm256_zeroall() {
1048 unsafe { vzeroall() }
1049}
1050
1051#[inline]
1056#[target_feature(enable = "avx")]
1057#[cfg_attr(test, assert_instr(vzeroupper))]
1058#[stable(feature = "simd_x86", since = "1.27.0")]
1059pub fn _mm256_zeroupper() {
1060 unsafe { vzeroupper() }
1061}
1062
1063#[inline]
1068#[target_feature(enable = "avx")]
1069#[cfg_attr(test, assert_instr(vpermilps))]
1070#[stable(feature = "simd_x86", since = "1.27.0")]
1071pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1072 unsafe { vpermilps256(a, b.as_i32x8()) }
1073}
1074
1075#[inline]
1080#[target_feature(enable = "avx")]
1081#[cfg_attr(test, assert_instr(vpermilps))]
1082#[stable(feature = "simd_x86", since = "1.27.0")]
1083pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1084 unsafe { vpermilps(a, b.as_i32x4()) }
1085}
1086
1087#[inline]
1092#[target_feature(enable = "avx")]
1093#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1094#[rustc_legacy_const_generics(1)]
1095#[stable(feature = "simd_x86", since = "1.27.0")]
1096pub fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1097 static_assert_uimm_bits!(IMM8, 8);
1098 unsafe {
1099 simd_shuffle!(
1100 a,
1101 _mm256_undefined_ps(),
1102 [
1103 (IMM8 as u32 >> 0) & 0b11,
1104 (IMM8 as u32 >> 2) & 0b11,
1105 (IMM8 as u32 >> 4) & 0b11,
1106 (IMM8 as u32 >> 6) & 0b11,
1107 ((IMM8 as u32 >> 0) & 0b11) + 4,
1108 ((IMM8 as u32 >> 2) & 0b11) + 4,
1109 ((IMM8 as u32 >> 4) & 0b11) + 4,
1110 ((IMM8 as u32 >> 6) & 0b11) + 4,
1111 ],
1112 )
1113 }
1114}
1115
1116#[inline]
1121#[target_feature(enable = "avx")]
1122#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1123#[rustc_legacy_const_generics(1)]
1124#[stable(feature = "simd_x86", since = "1.27.0")]
1125pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1126 static_assert_uimm_bits!(IMM8, 8);
1127 unsafe {
1128 simd_shuffle!(
1129 a,
1130 _mm_undefined_ps(),
1131 [
1132 (IMM8 as u32 >> 0) & 0b11,
1133 (IMM8 as u32 >> 2) & 0b11,
1134 (IMM8 as u32 >> 4) & 0b11,
1135 (IMM8 as u32 >> 6) & 0b11,
1136 ],
1137 )
1138 }
1139}
1140
1141#[inline]
1146#[target_feature(enable = "avx")]
1147#[cfg_attr(test, assert_instr(vpermilpd))]
1148#[stable(feature = "simd_x86", since = "1.27.0")]
1149pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1150 unsafe { vpermilpd256(a, b.as_i64x4()) }
1151}
1152
1153#[inline]
1158#[target_feature(enable = "avx")]
1159#[cfg_attr(test, assert_instr(vpermilpd))]
1160#[stable(feature = "simd_x86", since = "1.27.0")]
1161pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1162 unsafe { vpermilpd(a, b.as_i64x2()) }
1163}
1164
1165#[inline]
1170#[target_feature(enable = "avx")]
1171#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1172#[rustc_legacy_const_generics(1)]
1173#[stable(feature = "simd_x86", since = "1.27.0")]
1174pub fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1175 static_assert_uimm_bits!(IMM4, 4);
1176 unsafe {
1177 simd_shuffle!(
1178 a,
1179 _mm256_undefined_pd(),
1180 [
1181 ((IMM4 as u32 >> 0) & 1),
1182 ((IMM4 as u32 >> 1) & 1),
1183 ((IMM4 as u32 >> 2) & 1) + 2,
1184 ((IMM4 as u32 >> 3) & 1) + 2,
1185 ],
1186 )
1187 }
1188}
1189
1190#[inline]
1195#[target_feature(enable = "avx")]
1196#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1197#[rustc_legacy_const_generics(1)]
1198#[stable(feature = "simd_x86", since = "1.27.0")]
1199pub fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1200 static_assert_uimm_bits!(IMM2, 2);
1201 unsafe {
1202 simd_shuffle!(
1203 a,
1204 _mm_undefined_pd(),
1205 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1206 )
1207 }
1208}
1209
1210#[inline]
1215#[target_feature(enable = "avx")]
1216#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1217#[rustc_legacy_const_generics(2)]
1218#[stable(feature = "simd_x86", since = "1.27.0")]
1219pub fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1220 static_assert_uimm_bits!(IMM8, 8);
1221 unsafe { vperm2f128ps256(a, b, IMM8 as i8) }
1222}
1223
1224#[inline]
1229#[target_feature(enable = "avx")]
1230#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1231#[rustc_legacy_const_generics(2)]
1232#[stable(feature = "simd_x86", since = "1.27.0")]
1233pub fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1234 static_assert_uimm_bits!(IMM8, 8);
1235 unsafe { vperm2f128pd256(a, b, IMM8 as i8) }
1236}
1237
1238#[inline]
1243#[target_feature(enable = "avx")]
1244#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1245#[rustc_legacy_const_generics(2)]
1246#[stable(feature = "simd_x86", since = "1.27.0")]
1247pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1248 static_assert_uimm_bits!(IMM8, 8);
1249 unsafe { transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8)) }
1250}
1251
1252#[inline]
1257#[target_feature(enable = "avx")]
1258#[cfg_attr(test, assert_instr(vbroadcastss))]
1259#[stable(feature = "simd_x86", since = "1.27.0")]
1260#[allow(clippy::trivially_copy_pass_by_ref)]
1261pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1262 _mm256_set1_ps(*f)
1263}
1264
1265#[inline]
1270#[target_feature(enable = "avx")]
1271#[cfg_attr(test, assert_instr(vbroadcastss))]
1272#[stable(feature = "simd_x86", since = "1.27.0")]
1273#[allow(clippy::trivially_copy_pass_by_ref)]
1274pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1275 _mm_set1_ps(*f)
1276}
1277
1278#[inline]
1283#[target_feature(enable = "avx")]
1284#[cfg_attr(test, assert_instr(vbroadcastsd))]
1285#[stable(feature = "simd_x86", since = "1.27.0")]
1286#[allow(clippy::trivially_copy_pass_by_ref)]
1287pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1288 _mm256_set1_pd(*f)
1289}
1290
1291#[inline]
1296#[target_feature(enable = "avx")]
1297#[cfg_attr(test, assert_instr(vbroadcastf128))]
1298#[stable(feature = "simd_x86", since = "1.27.0")]
1299pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1300 simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
1301}
1302
1303#[inline]
1308#[target_feature(enable = "avx")]
1309#[cfg_attr(test, assert_instr(vbroadcastf128))]
1310#[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1312 simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
1313}
1314
1315#[inline]
1321#[target_feature(enable = "avx")]
1322#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1323#[rustc_legacy_const_generics(2)]
1324#[stable(feature = "simd_x86", since = "1.27.0")]
1325pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1326 static_assert_uimm_bits!(IMM1, 1);
1327 unsafe {
1328 simd_shuffle!(
1329 a,
1330 _mm256_castps128_ps256(b),
1331 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1332 )
1333 }
1334}
1335
1336#[inline]
1342#[target_feature(enable = "avx")]
1343#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1344#[rustc_legacy_const_generics(2)]
1345#[stable(feature = "simd_x86", since = "1.27.0")]
1346pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1347 static_assert_uimm_bits!(IMM1, 1);
1348 unsafe {
1349 simd_shuffle!(
1350 a,
1351 _mm256_castpd128_pd256(b),
1352 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1353 )
1354 }
1355}
1356
1357#[inline]
1362#[target_feature(enable = "avx")]
1363#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1364#[rustc_legacy_const_generics(2)]
1365#[stable(feature = "simd_x86", since = "1.27.0")]
1366pub fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1367 static_assert_uimm_bits!(IMM1, 1);
1368 unsafe {
1369 let dst: i64x4 = simd_shuffle!(
1370 a.as_i64x4(),
1371 _mm256_castsi128_si256(b).as_i64x4(),
1372 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1373 );
1374 transmute(dst)
1375 }
1376}
1377
1378#[inline]
1383#[target_feature(enable = "avx")]
1384#[rustc_legacy_const_generics(2)]
1386#[stable(feature = "simd_x86", since = "1.27.0")]
1387pub fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1388 static_assert_uimm_bits!(INDEX, 5);
1389 unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) }
1390}
1391
1392#[inline]
1397#[target_feature(enable = "avx")]
1398#[rustc_legacy_const_generics(2)]
1400#[stable(feature = "simd_x86", since = "1.27.0")]
1401pub fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1402 static_assert_uimm_bits!(INDEX, 4);
1403 unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) }
1404}
1405
1406#[inline]
1411#[target_feature(enable = "avx")]
1412#[rustc_legacy_const_generics(2)]
1414#[stable(feature = "simd_x86", since = "1.27.0")]
1415pub fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1416 static_assert_uimm_bits!(INDEX, 3);
1417 unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) }
1418}
1419
1420#[inline]
1427#[target_feature(enable = "avx")]
1428#[cfg_attr(
1429 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1430 assert_instr(vmovap)
1431)]
1432#[stable(feature = "simd_x86", since = "1.27.0")]
1433#[allow(clippy::cast_ptr_alignment)]
1434pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1435 *(mem_addr as *const __m256d)
1436}
1437
1438#[inline]
1445#[target_feature(enable = "avx")]
1446#[cfg_attr(
1447 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1448 assert_instr(vmovap)
1449)]
1450#[stable(feature = "simd_x86", since = "1.27.0")]
1451#[allow(clippy::cast_ptr_alignment)]
1452pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1453 *(mem_addr as *mut __m256d) = a;
1454}
1455
1456#[inline]
1463#[target_feature(enable = "avx")]
1464#[cfg_attr(
1465 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1466 assert_instr(vmovaps)
1467)]
1468#[stable(feature = "simd_x86", since = "1.27.0")]
1469#[allow(clippy::cast_ptr_alignment)]
1470pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1471 *(mem_addr as *const __m256)
1472}
1473
1474#[inline]
1481#[target_feature(enable = "avx")]
1482#[cfg_attr(
1483 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1484 assert_instr(vmovaps)
1485)]
1486#[stable(feature = "simd_x86", since = "1.27.0")]
1487#[allow(clippy::cast_ptr_alignment)]
1488pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1489 *(mem_addr as *mut __m256) = a;
1490}
1491
1492#[inline]
1498#[target_feature(enable = "avx")]
1499#[cfg_attr(test, assert_instr(vmovup))]
1500#[stable(feature = "simd_x86", since = "1.27.0")]
1501pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1502 let mut dst = _mm256_undefined_pd();
1503 ptr::copy_nonoverlapping(
1504 mem_addr as *const u8,
1505 ptr::addr_of_mut!(dst) as *mut u8,
1506 mem::size_of::<__m256d>(),
1507 );
1508 dst
1509}
1510
1511#[inline]
1517#[target_feature(enable = "avx")]
1518#[cfg_attr(test, assert_instr(vmovup))]
1519#[stable(feature = "simd_x86", since = "1.27.0")]
1520pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1521 mem_addr.cast::<__m256d>().write_unaligned(a);
1522}
1523
1524#[inline]
1530#[target_feature(enable = "avx")]
1531#[cfg_attr(test, assert_instr(vmovups))]
1532#[stable(feature = "simd_x86", since = "1.27.0")]
1533pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1534 let mut dst = _mm256_undefined_ps();
1535 ptr::copy_nonoverlapping(
1536 mem_addr as *const u8,
1537 ptr::addr_of_mut!(dst) as *mut u8,
1538 mem::size_of::<__m256>(),
1539 );
1540 dst
1541}
1542
1543#[inline]
1549#[target_feature(enable = "avx")]
1550#[cfg_attr(test, assert_instr(vmovups))]
1551#[stable(feature = "simd_x86", since = "1.27.0")]
1552pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1553 mem_addr.cast::<__m256>().write_unaligned(a);
1554}
1555
1556#[inline]
1562#[target_feature(enable = "avx")]
1563#[cfg_attr(
1564 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1565 assert_instr(vmovaps)
1566)] #[stable(feature = "simd_x86", since = "1.27.0")]
1568pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1569 *mem_addr
1570}
1571
1572#[inline]
1578#[target_feature(enable = "avx")]
1579#[cfg_attr(
1580 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1581 assert_instr(vmovaps)
1582)] #[stable(feature = "simd_x86", since = "1.27.0")]
1584pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1585 *mem_addr = a;
1586}
1587
1588#[inline]
1593#[target_feature(enable = "avx")]
1594#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1596pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1597 let mut dst = _mm256_undefined_si256();
1598 ptr::copy_nonoverlapping(
1599 mem_addr as *const u8,
1600 ptr::addr_of_mut!(dst) as *mut u8,
1601 mem::size_of::<__m256i>(),
1602 );
1603 dst
1604}
1605
1606#[inline]
1611#[target_feature(enable = "avx")]
1612#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1614pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1615 mem_addr.write_unaligned(a);
1616}
1617
1618#[inline]
1624#[target_feature(enable = "avx")]
1625#[cfg_attr(test, assert_instr(vmaskmovpd))]
1626#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1628 maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
1629}
1630
1631#[inline]
1636#[target_feature(enable = "avx")]
1637#[cfg_attr(test, assert_instr(vmaskmovpd))]
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1640 maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
1641}
1642
1643#[inline]
1649#[target_feature(enable = "avx")]
1650#[cfg_attr(test, assert_instr(vmaskmovpd))]
1651#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1653 maskloadpd(mem_addr as *const i8, mask.as_i64x2())
1654}
1655
1656#[inline]
1661#[target_feature(enable = "avx")]
1662#[cfg_attr(test, assert_instr(vmaskmovpd))]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1665 maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
1666}
1667
1668#[inline]
1674#[target_feature(enable = "avx")]
1675#[cfg_attr(test, assert_instr(vmaskmovps))]
1676#[stable(feature = "simd_x86", since = "1.27.0")]
1677pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1678 maskloadps256(mem_addr as *const i8, mask.as_i32x8())
1679}
1680
1681#[inline]
1686#[target_feature(enable = "avx")]
1687#[cfg_attr(test, assert_instr(vmaskmovps))]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1690 maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
1691}
1692
1693#[inline]
1699#[target_feature(enable = "avx")]
1700#[cfg_attr(test, assert_instr(vmaskmovps))]
1701#[stable(feature = "simd_x86", since = "1.27.0")]
1702pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1703 maskloadps(mem_addr as *const i8, mask.as_i32x4())
1704}
1705
1706#[inline]
1711#[target_feature(enable = "avx")]
1712#[cfg_attr(test, assert_instr(vmaskmovps))]
1713#[stable(feature = "simd_x86", since = "1.27.0")]
1714pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1715 maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
1716}
1717
1718#[inline]
1723#[target_feature(enable = "avx")]
1724#[cfg_attr(test, assert_instr(vmovshdup))]
1725#[stable(feature = "simd_x86", since = "1.27.0")]
1726pub fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1727 unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) }
1728}
1729
1730#[inline]
1735#[target_feature(enable = "avx")]
1736#[cfg_attr(test, assert_instr(vmovsldup))]
1737#[stable(feature = "simd_x86", since = "1.27.0")]
1738pub fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1739 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) }
1740}
1741
1742#[inline]
1747#[target_feature(enable = "avx")]
1748#[cfg_attr(test, assert_instr(vmovddup))]
1749#[stable(feature = "simd_x86", since = "1.27.0")]
1750pub fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1751 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
1752}
1753
1754#[inline]
1760#[target_feature(enable = "avx")]
1761#[cfg_attr(test, assert_instr(vlddqu))]
1762#[stable(feature = "simd_x86", since = "1.27.0")]
1763pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1764 transmute(vlddqu(mem_addr as *const i8))
1765}
1766
1767#[inline]
1782#[target_feature(enable = "avx")]
1783#[cfg_attr(test, assert_instr(vmovntdq))]
1784#[stable(feature = "simd_x86", since = "1.27.0")]
1785pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1786 crate::arch::asm!(
1787 vps!("vmovntdq", ",{a}"),
1788 p = in(reg) mem_addr,
1789 a = in(ymm_reg) a,
1790 options(nostack, preserves_flags),
1791 );
1792}
1793
1794#[inline]
1809#[target_feature(enable = "avx")]
1810#[cfg_attr(test, assert_instr(vmovntpd))]
1811#[stable(feature = "simd_x86", since = "1.27.0")]
1812#[allow(clippy::cast_ptr_alignment)]
1813pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1814 crate::arch::asm!(
1815 vps!("vmovntpd", ",{a}"),
1816 p = in(reg) mem_addr,
1817 a = in(ymm_reg) a,
1818 options(nostack, preserves_flags),
1819 );
1820}
1821
1822#[inline]
1838#[target_feature(enable = "avx")]
1839#[cfg_attr(test, assert_instr(vmovntps))]
1840#[stable(feature = "simd_x86", since = "1.27.0")]
1841#[allow(clippy::cast_ptr_alignment)]
1842pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1843 crate::arch::asm!(
1844 vps!("vmovntps", ",{a}"),
1845 p = in(reg) mem_addr,
1846 a = in(ymm_reg) a,
1847 options(nostack, preserves_flags),
1848 );
1849}
1850
1851#[inline]
1857#[target_feature(enable = "avx")]
1858#[cfg_attr(test, assert_instr(vrcpps))]
1859#[stable(feature = "simd_x86", since = "1.27.0")]
1860pub fn _mm256_rcp_ps(a: __m256) -> __m256 {
1861 unsafe { vrcpps(a) }
1862}
1863
1864#[inline]
1870#[target_feature(enable = "avx")]
1871#[cfg_attr(test, assert_instr(vrsqrtps))]
1872#[stable(feature = "simd_x86", since = "1.27.0")]
1873pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1874 unsafe { vrsqrtps(a) }
1875}
1876
1877#[inline]
1882#[target_feature(enable = "avx")]
1883#[cfg_attr(test, assert_instr(vunpckhpd))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1886 unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
1887}
1888
1889#[inline]
1894#[target_feature(enable = "avx")]
1895#[cfg_attr(test, assert_instr(vunpckhps))]
1896#[stable(feature = "simd_x86", since = "1.27.0")]
1897pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1898 unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) }
1899}
1900
1901#[inline]
1906#[target_feature(enable = "avx")]
1907#[cfg_attr(test, assert_instr(vunpcklpd))]
1908#[stable(feature = "simd_x86", since = "1.27.0")]
1909pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1910 unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
1911}
1912
1913#[inline]
1918#[target_feature(enable = "avx")]
1919#[cfg_attr(test, assert_instr(vunpcklps))]
1920#[stable(feature = "simd_x86", since = "1.27.0")]
1921pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1922 unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) }
1923}
1924
1925#[inline]
1932#[target_feature(enable = "avx")]
1933#[cfg_attr(test, assert_instr(vptest))]
1934#[stable(feature = "simd_x86", since = "1.27.0")]
1935pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1936 unsafe { ptestz256(a.as_i64x4(), b.as_i64x4()) }
1937}
1938
1939#[inline]
1946#[target_feature(enable = "avx")]
1947#[cfg_attr(test, assert_instr(vptest))]
1948#[stable(feature = "simd_x86", since = "1.27.0")]
1949pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1950 unsafe { ptestc256(a.as_i64x4(), b.as_i64x4()) }
1951}
1952
1953#[inline]
1961#[target_feature(enable = "avx")]
1962#[cfg_attr(test, assert_instr(vptest))]
1963#[stable(feature = "simd_x86", since = "1.27.0")]
1964pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1965 unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) }
1966}
1967
1968#[inline]
1978#[target_feature(enable = "avx")]
1979#[cfg_attr(test, assert_instr(vtestpd))]
1980#[stable(feature = "simd_x86", since = "1.27.0")]
1981pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1982 unsafe { vtestzpd256(a, b) }
1983}
1984
1985#[inline]
1995#[target_feature(enable = "avx")]
1996#[cfg_attr(test, assert_instr(vtestpd))]
1997#[stable(feature = "simd_x86", since = "1.27.0")]
1998pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1999 unsafe { vtestcpd256(a, b) }
2000}
2001
2002#[inline]
2013#[target_feature(enable = "avx")]
2014#[cfg_attr(test, assert_instr(vtestpd))]
2015#[stable(feature = "simd_x86", since = "1.27.0")]
2016pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
2017 unsafe { vtestnzcpd256(a, b) }
2018}
2019
2020#[inline]
2030#[target_feature(enable = "avx")]
2031#[cfg_attr(test, assert_instr(vtestpd))]
2032#[stable(feature = "simd_x86", since = "1.27.0")]
2033pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
2034 unsafe { vtestzpd(a, b) }
2035}
2036
2037#[inline]
2047#[target_feature(enable = "avx")]
2048#[cfg_attr(test, assert_instr(vtestpd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2051 unsafe { vtestcpd(a, b) }
2052}
2053
2054#[inline]
2065#[target_feature(enable = "avx")]
2066#[cfg_attr(test, assert_instr(vtestpd))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
2069 unsafe { vtestnzcpd(a, b) }
2070}
2071
2072#[inline]
2082#[target_feature(enable = "avx")]
2083#[cfg_attr(test, assert_instr(vtestps))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
2086 unsafe { vtestzps256(a, b) }
2087}
2088
2089#[inline]
2099#[target_feature(enable = "avx")]
2100#[cfg_attr(test, assert_instr(vtestps))]
2101#[stable(feature = "simd_x86", since = "1.27.0")]
2102pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2103 unsafe { vtestcps256(a, b) }
2104}
2105
2106#[inline]
2117#[target_feature(enable = "avx")]
2118#[cfg_attr(test, assert_instr(vtestps))]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2121 unsafe { vtestnzcps256(a, b) }
2122}
2123
2124#[inline]
2134#[target_feature(enable = "avx")]
2135#[cfg_attr(test, assert_instr(vtestps))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2138 unsafe { vtestzps(a, b) }
2139}
2140
2141#[inline]
2151#[target_feature(enable = "avx")]
2152#[cfg_attr(test, assert_instr(vtestps))]
2153#[stable(feature = "simd_x86", since = "1.27.0")]
2154pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2155 unsafe { vtestcps(a, b) }
2156}
2157
2158#[inline]
2169#[target_feature(enable = "avx")]
2170#[cfg_attr(test, assert_instr(vtestps))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2173 unsafe { vtestnzcps(a, b) }
2174}
2175
2176#[inline]
2182#[target_feature(enable = "avx")]
2183#[cfg_attr(test, assert_instr(vmovmskpd))]
2184#[stable(feature = "simd_x86", since = "1.27.0")]
2185pub fn _mm256_movemask_pd(a: __m256d) -> i32 {
2186 unsafe {
2189 let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
2190 simd_bitmask::<i64x4, u8>(mask).into()
2191 }
2192}
2193
2194#[inline]
2200#[target_feature(enable = "avx")]
2201#[cfg_attr(test, assert_instr(vmovmskps))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub fn _mm256_movemask_ps(a: __m256) -> i32 {
2204 unsafe {
2207 let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
2208 simd_bitmask::<i32x8, u8>(mask).into()
2209 }
2210}
2211
2212#[inline]
2216#[target_feature(enable = "avx")]
2217#[cfg_attr(test, assert_instr(vxorp))]
2218#[stable(feature = "simd_x86", since = "1.27.0")]
2219pub fn _mm256_setzero_pd() -> __m256d {
2220 const { unsafe { mem::zeroed() } }
2221}
2222
2223#[inline]
2227#[target_feature(enable = "avx")]
2228#[cfg_attr(test, assert_instr(vxorps))]
2229#[stable(feature = "simd_x86", since = "1.27.0")]
2230pub fn _mm256_setzero_ps() -> __m256 {
2231 const { unsafe { mem::zeroed() } }
2232}
2233
2234#[inline]
2238#[target_feature(enable = "avx")]
2239#[cfg_attr(test, assert_instr(vxor))]
2240#[stable(feature = "simd_x86", since = "1.27.0")]
2241pub fn _mm256_setzero_si256() -> __m256i {
2242 const { unsafe { mem::zeroed() } }
2243}
2244
2245#[inline]
2250#[target_feature(enable = "avx")]
2251#[cfg_attr(test, assert_instr(vinsertf128))]
2253#[stable(feature = "simd_x86", since = "1.27.0")]
2254pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2255 _mm256_setr_pd(d, c, b, a)
2256}
2257
2258#[inline]
2263#[target_feature(enable = "avx")]
2264#[stable(feature = "simd_x86", since = "1.27.0")]
2266pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
2267 _mm256_setr_ps(h, g, f, e, d, c, b, a)
2268}
2269
2270#[inline]
2274#[target_feature(enable = "avx")]
2275#[stable(feature = "simd_x86", since = "1.27.0")]
2277pub fn _mm256_set_epi8(
2278 e00: i8,
2279 e01: i8,
2280 e02: i8,
2281 e03: i8,
2282 e04: i8,
2283 e05: i8,
2284 e06: i8,
2285 e07: i8,
2286 e08: i8,
2287 e09: i8,
2288 e10: i8,
2289 e11: i8,
2290 e12: i8,
2291 e13: i8,
2292 e14: i8,
2293 e15: i8,
2294 e16: i8,
2295 e17: i8,
2296 e18: i8,
2297 e19: i8,
2298 e20: i8,
2299 e21: i8,
2300 e22: i8,
2301 e23: i8,
2302 e24: i8,
2303 e25: i8,
2304 e26: i8,
2305 e27: i8,
2306 e28: i8,
2307 e29: i8,
2308 e30: i8,
2309 e31: i8,
2310) -> __m256i {
2311 #[rustfmt::skip]
2312 _mm256_setr_epi8(
2313 e31, e30, e29, e28, e27, e26, e25, e24,
2314 e23, e22, e21, e20, e19, e18, e17, e16,
2315 e15, e14, e13, e12, e11, e10, e09, e08,
2316 e07, e06, e05, e04, e03, e02, e01, e00,
2317 )
2318}
2319
2320#[inline]
2324#[target_feature(enable = "avx")]
2325#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub fn _mm256_set_epi16(
2328 e00: i16,
2329 e01: i16,
2330 e02: i16,
2331 e03: i16,
2332 e04: i16,
2333 e05: i16,
2334 e06: i16,
2335 e07: i16,
2336 e08: i16,
2337 e09: i16,
2338 e10: i16,
2339 e11: i16,
2340 e12: i16,
2341 e13: i16,
2342 e14: i16,
2343 e15: i16,
2344) -> __m256i {
2345 #[rustfmt::skip]
2346 _mm256_setr_epi16(
2347 e15, e14, e13, e12,
2348 e11, e10, e09, e08,
2349 e07, e06, e05, e04,
2350 e03, e02, e01, e00,
2351 )
2352}
2353
2354#[inline]
2358#[target_feature(enable = "avx")]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2361pub fn _mm256_set_epi32(
2362 e0: i32,
2363 e1: i32,
2364 e2: i32,
2365 e3: i32,
2366 e4: i32,
2367 e5: i32,
2368 e6: i32,
2369 e7: i32,
2370) -> __m256i {
2371 _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
2372}
2373
2374#[inline]
2378#[target_feature(enable = "avx")]
2379#[stable(feature = "simd_x86", since = "1.27.0")]
2381pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2382 _mm256_setr_epi64x(d, c, b, a)
2383}
2384
2385#[inline]
2390#[target_feature(enable = "avx")]
2391#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2394 __m256d([a, b, c, d])
2395}
2396
2397#[inline]
2402#[target_feature(enable = "avx")]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2405pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
2406 __m256([a, b, c, d, e, f, g, h])
2407}
2408
2409#[inline]
2414#[target_feature(enable = "avx")]
2415#[stable(feature = "simd_x86", since = "1.27.0")]
2417pub fn _mm256_setr_epi8(
2418 e00: i8,
2419 e01: i8,
2420 e02: i8,
2421 e03: i8,
2422 e04: i8,
2423 e05: i8,
2424 e06: i8,
2425 e07: i8,
2426 e08: i8,
2427 e09: i8,
2428 e10: i8,
2429 e11: i8,
2430 e12: i8,
2431 e13: i8,
2432 e14: i8,
2433 e15: i8,
2434 e16: i8,
2435 e17: i8,
2436 e18: i8,
2437 e19: i8,
2438 e20: i8,
2439 e21: i8,
2440 e22: i8,
2441 e23: i8,
2442 e24: i8,
2443 e25: i8,
2444 e26: i8,
2445 e27: i8,
2446 e28: i8,
2447 e29: i8,
2448 e30: i8,
2449 e31: i8,
2450) -> __m256i {
2451 unsafe {
2452 #[rustfmt::skip]
2453 transmute(i8x32::new(
2454 e00, e01, e02, e03, e04, e05, e06, e07,
2455 e08, e09, e10, e11, e12, e13, e14, e15,
2456 e16, e17, e18, e19, e20, e21, e22, e23,
2457 e24, e25, e26, e27, e28, e29, e30, e31,
2458 ))
2459 }
2460}
2461
2462#[inline]
2467#[target_feature(enable = "avx")]
2468#[stable(feature = "simd_x86", since = "1.27.0")]
2470pub fn _mm256_setr_epi16(
2471 e00: i16,
2472 e01: i16,
2473 e02: i16,
2474 e03: i16,
2475 e04: i16,
2476 e05: i16,
2477 e06: i16,
2478 e07: i16,
2479 e08: i16,
2480 e09: i16,
2481 e10: i16,
2482 e11: i16,
2483 e12: i16,
2484 e13: i16,
2485 e14: i16,
2486 e15: i16,
2487) -> __m256i {
2488 unsafe {
2489 #[rustfmt::skip]
2490 transmute(i16x16::new(
2491 e00, e01, e02, e03,
2492 e04, e05, e06, e07,
2493 e08, e09, e10, e11,
2494 e12, e13, e14, e15,
2495 ))
2496 }
2497}
2498
2499#[inline]
2504#[target_feature(enable = "avx")]
2505#[stable(feature = "simd_x86", since = "1.27.0")]
2507pub fn _mm256_setr_epi32(
2508 e0: i32,
2509 e1: i32,
2510 e2: i32,
2511 e3: i32,
2512 e4: i32,
2513 e5: i32,
2514 e6: i32,
2515 e7: i32,
2516) -> __m256i {
2517 unsafe { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
2518}
2519
2520#[inline]
2525#[target_feature(enable = "avx")]
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2528pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2529 unsafe { transmute(i64x4::new(a, b, c, d)) }
2530}
2531
2532#[inline]
2537#[target_feature(enable = "avx")]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2540pub fn _mm256_set1_pd(a: f64) -> __m256d {
2541 _mm256_setr_pd(a, a, a, a)
2542}
2543
2544#[inline]
2549#[target_feature(enable = "avx")]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2552pub fn _mm256_set1_ps(a: f32) -> __m256 {
2553 _mm256_setr_ps(a, a, a, a, a, a, a, a)
2554}
2555
2556#[inline]
2561#[target_feature(enable = "avx")]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2564pub fn _mm256_set1_epi8(a: i8) -> __m256i {
2565 #[rustfmt::skip]
2566 _mm256_setr_epi8(
2567 a, a, a, a, a, a, a, a,
2568 a, a, a, a, a, a, a, a,
2569 a, a, a, a, a, a, a, a,
2570 a, a, a, a, a, a, a, a,
2571 )
2572}
2573
2574#[inline]
2579#[target_feature(enable = "avx")]
2580#[cfg_attr(test, assert_instr(vinsertf128))]
2582#[stable(feature = "simd_x86", since = "1.27.0")]
2584pub fn _mm256_set1_epi16(a: i16) -> __m256i {
2585 _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
2586}
2587
2588#[inline]
2593#[target_feature(enable = "avx")]
2594#[stable(feature = "simd_x86", since = "1.27.0")]
2596pub fn _mm256_set1_epi32(a: i32) -> __m256i {
2597 _mm256_setr_epi32(a, a, a, a, a, a, a, a)
2598}
2599
2600#[inline]
2605#[target_feature(enable = "avx")]
2606#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2607#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2608#[stable(feature = "simd_x86", since = "1.27.0")]
2610pub fn _mm256_set1_epi64x(a: i64) -> __m256i {
2611 _mm256_setr_epi64x(a, a, a, a)
2612}
2613
2614#[inline]
2618#[target_feature(enable = "avx")]
2619#[stable(feature = "simd_x86", since = "1.27.0")]
2622pub fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2623 unsafe { transmute(a) }
2624}
2625
2626#[inline]
2630#[target_feature(enable = "avx")]
2631#[stable(feature = "simd_x86", since = "1.27.0")]
2634pub fn _mm256_castps_pd(a: __m256) -> __m256d {
2635 unsafe { transmute(a) }
2636}
2637
2638#[inline]
2642#[target_feature(enable = "avx")]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2646pub fn _mm256_castps_si256(a: __m256) -> __m256i {
2647 unsafe { transmute(a) }
2648}
2649
2650#[inline]
2654#[target_feature(enable = "avx")]
2655#[stable(feature = "simd_x86", since = "1.27.0")]
2658pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2659 unsafe { transmute(a) }
2660}
2661
2662#[inline]
2666#[target_feature(enable = "avx")]
2667#[stable(feature = "simd_x86", since = "1.27.0")]
2670pub fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2671 unsafe { transmute(a) }
2672}
2673
2674#[inline]
2678#[target_feature(enable = "avx")]
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2682pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2683 unsafe { transmute(a) }
2684}
2685
2686#[inline]
2690#[target_feature(enable = "avx")]
2691#[stable(feature = "simd_x86", since = "1.27.0")]
2694pub fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2695 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
2696}
2697
2698#[inline]
2702#[target_feature(enable = "avx")]
2703#[stable(feature = "simd_x86", since = "1.27.0")]
2706pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2707 unsafe { simd_shuffle!(a, a, [0, 1]) }
2708}
2709
2710#[inline]
2714#[target_feature(enable = "avx")]
2715#[stable(feature = "simd_x86", since = "1.27.0")]
2718pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2719 unsafe {
2720 let a = a.as_i64x4();
2721 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2722 transmute(dst)
2723 }
2724}
2725
2726#[inline]
2731#[target_feature(enable = "avx")]
2732#[stable(feature = "simd_x86", since = "1.27.0")]
2735pub fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2736 unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) }
2737}
2738
2739#[inline]
2744#[target_feature(enable = "avx")]
2745#[stable(feature = "simd_x86", since = "1.27.0")]
2748pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2749 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) }
2750}
2751
2752#[inline]
2757#[target_feature(enable = "avx")]
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2761pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2762 unsafe {
2763 let a = a.as_i64x2();
2764 let undefined = i64x2::ZERO;
2765 let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
2766 transmute(dst)
2767 }
2768}
2769
2770#[inline]
2776#[target_feature(enable = "avx")]
2777#[stable(feature = "simd_x86", since = "1.27.0")]
2780pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2781 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) }
2782}
2783
2784#[inline]
2790#[target_feature(enable = "avx")]
2791#[stable(feature = "simd_x86", since = "1.27.0")]
2794pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2795 unsafe {
2796 let b = i64x2::ZERO;
2797 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2798 transmute(dst)
2799 }
2800}
2801
2802#[inline]
2809#[target_feature(enable = "avx")]
2810#[stable(feature = "simd_x86", since = "1.27.0")]
2813pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2814 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) }
2815}
2816
2817#[inline]
2824#[target_feature(enable = "avx")]
2825#[stable(feature = "simd_x86", since = "1.27.0")]
2827pub fn _mm256_undefined_ps() -> __m256 {
2828 const { unsafe { mem::zeroed() } }
2829}
2830
2831#[inline]
2838#[target_feature(enable = "avx")]
2839#[stable(feature = "simd_x86", since = "1.27.0")]
2841pub fn _mm256_undefined_pd() -> __m256d {
2842 const { unsafe { mem::zeroed() } }
2843}
2844
2845#[inline]
2852#[target_feature(enable = "avx")]
2853#[stable(feature = "simd_x86", since = "1.27.0")]
2855pub fn _mm256_undefined_si256() -> __m256i {
2856 const { unsafe { mem::zeroed() } }
2857}
2858
2859#[inline]
2863#[target_feature(enable = "avx")]
2864#[cfg_attr(test, assert_instr(vinsertf128))]
2865#[stable(feature = "simd_x86", since = "1.27.0")]
2866pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2867 unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) }
2868}
2869
2870#[inline]
2874#[target_feature(enable = "avx")]
2875#[cfg_attr(test, assert_instr(vinsertf128))]
2876#[stable(feature = "simd_x86", since = "1.27.0")]
2877pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2878 unsafe {
2879 let hi: __m128 = transmute(hi);
2880 let lo: __m128 = transmute(lo);
2881 transmute(_mm256_set_m128(hi, lo))
2882 }
2883}
2884
2885#[inline]
2889#[target_feature(enable = "avx")]
2890#[cfg_attr(test, assert_instr(vinsertf128))]
2891#[stable(feature = "simd_x86", since = "1.27.0")]
2892pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2893 unsafe {
2894 let hi: __m128 = transmute(hi);
2895 let lo: __m128 = transmute(lo);
2896 transmute(_mm256_set_m128(hi, lo))
2897 }
2898}
2899
2900#[inline]
2904#[target_feature(enable = "avx")]
2905#[cfg_attr(test, assert_instr(vinsertf128))]
2906#[stable(feature = "simd_x86", since = "1.27.0")]
2907pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2908 _mm256_set_m128(hi, lo)
2909}
2910
2911#[inline]
2915#[target_feature(enable = "avx")]
2916#[cfg_attr(test, assert_instr(vinsertf128))]
2917#[stable(feature = "simd_x86", since = "1.27.0")]
2918pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2919 _mm256_set_m128d(hi, lo)
2920}
2921
2922#[inline]
2926#[target_feature(enable = "avx")]
2927#[cfg_attr(test, assert_instr(vinsertf128))]
2928#[stable(feature = "simd_x86", since = "1.27.0")]
2929pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2930 _mm256_set_m128i(hi, lo)
2931}
2932
2933#[inline]
2940#[target_feature(enable = "avx")]
2941#[stable(feature = "simd_x86", since = "1.27.0")]
2943pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2944 let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2945 _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
2946}
2947
2948#[inline]
2955#[target_feature(enable = "avx")]
2956#[stable(feature = "simd_x86", since = "1.27.0")]
2958pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2959 let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
2960 _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
2961}
2962
2963#[inline]
2969#[target_feature(enable = "avx")]
2970#[stable(feature = "simd_x86", since = "1.27.0")]
2972pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2973 let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
2974 _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
2975}
2976
2977#[inline]
2984#[target_feature(enable = "avx")]
2985#[stable(feature = "simd_x86", since = "1.27.0")]
2987pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2988 let lo = _mm256_castps256_ps128(a);
2989 _mm_storeu_ps(loaddr, lo);
2990 let hi = _mm256_extractf128_ps::<1>(a);
2991 _mm_storeu_ps(hiaddr, hi);
2992}
2993
2994#[inline]
3001#[target_feature(enable = "avx")]
3002#[stable(feature = "simd_x86", since = "1.27.0")]
3004pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
3005 let lo = _mm256_castpd256_pd128(a);
3006 _mm_storeu_pd(loaddr, lo);
3007 let hi = _mm256_extractf128_pd::<1>(a);
3008 _mm_storeu_pd(hiaddr, hi);
3009}
3010
3011#[inline]
3017#[target_feature(enable = "avx")]
3018#[stable(feature = "simd_x86", since = "1.27.0")]
3020pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
3021 let lo = _mm256_castsi256_si128(a);
3022 _mm_storeu_si128(loaddr, lo);
3023 let hi = _mm256_extractf128_si256::<1>(a);
3024 _mm_storeu_si128(hiaddr, hi);
3025}
3026
3027#[inline]
3031#[target_feature(enable = "avx")]
3032#[stable(feature = "simd_x86", since = "1.27.0")]
3034pub fn _mm256_cvtss_f32(a: __m256) -> f32 {
3035 unsafe { simd_extract!(a, 0) }
3036}
3037
3038#[allow(improper_ctypes)]
3040unsafe extern "C" {
3041 #[link_name = "llvm.x86.avx.round.pd.256"]
3042 fn roundpd256(a: __m256d, b: i32) -> __m256d;
3043 #[link_name = "llvm.x86.avx.round.ps.256"]
3044 fn roundps256(a: __m256, b: i32) -> __m256;
3045 #[link_name = "llvm.x86.avx.dp.ps.256"]
3046 fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
3047 #[link_name = "llvm.x86.avx.hadd.pd.256"]
3048 fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
3049 #[link_name = "llvm.x86.avx.hadd.ps.256"]
3050 fn vhaddps(a: __m256, b: __m256) -> __m256;
3051 #[link_name = "llvm.x86.avx.hsub.pd.256"]
3052 fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
3053 #[link_name = "llvm.x86.avx.hsub.ps.256"]
3054 fn vhsubps(a: __m256, b: __m256) -> __m256;
3055 #[link_name = "llvm.x86.sse2.cmp.pd"]
3056 fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3057 #[link_name = "llvm.x86.avx.cmp.pd.256"]
3058 fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
3059 #[link_name = "llvm.x86.sse.cmp.ps"]
3060 fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
3061 #[link_name = "llvm.x86.avx.cmp.ps.256"]
3062 fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
3063 #[link_name = "llvm.x86.sse2.cmp.sd"]
3064 fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3065 #[link_name = "llvm.x86.sse.cmp.ss"]
3066 fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
3067 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
3068 fn vcvtps2dq(a: __m256) -> i32x8;
3069 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
3070 fn vcvttpd2dq(a: __m256d) -> i32x4;
3071 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
3072 fn vcvtpd2dq(a: __m256d) -> i32x4;
3073 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
3074 fn vcvttps2dq(a: __m256) -> i32x8;
3075 #[link_name = "llvm.x86.avx.vzeroall"]
3076 fn vzeroall();
3077 #[link_name = "llvm.x86.avx.vzeroupper"]
3078 fn vzeroupper();
3079 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
3080 fn vpermilps256(a: __m256, b: i32x8) -> __m256;
3081 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
3082 fn vpermilps(a: __m128, b: i32x4) -> __m128;
3083 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
3084 fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
3085 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
3086 fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
3087 #[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
3088 fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
3089 #[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
3090 fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
3091 #[link_name = "llvm.x86.avx.vperm2f128.si.256"]
3092 fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
3093 #[link_name = "llvm.x86.avx.maskload.pd.256"]
3094 fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3095 #[link_name = "llvm.x86.avx.maskstore.pd.256"]
3096 fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3097 #[link_name = "llvm.x86.avx.maskload.pd"]
3098 fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3099 #[link_name = "llvm.x86.avx.maskstore.pd"]
3100 fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3101 #[link_name = "llvm.x86.avx.maskload.ps.256"]
3102 fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3103 #[link_name = "llvm.x86.avx.maskstore.ps.256"]
3104 fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3105 #[link_name = "llvm.x86.avx.maskload.ps"]
3106 fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3107 #[link_name = "llvm.x86.avx.maskstore.ps"]
3108 fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3109 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3110 fn vlddqu(mem_addr: *const i8) -> i8x32;
3111 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3112 fn vrcpps(a: __m256) -> __m256;
3113 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3114 fn vrsqrtps(a: __m256) -> __m256;
3115 #[link_name = "llvm.x86.avx.ptestz.256"]
3116 fn ptestz256(a: i64x4, b: i64x4) -> i32;
3117 #[link_name = "llvm.x86.avx.ptestc.256"]
3118 fn ptestc256(a: i64x4, b: i64x4) -> i32;
3119 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3120 fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3121 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3122 fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3123 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3124 fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3125 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3126 fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3127 #[link_name = "llvm.x86.avx.vtestz.pd"]
3128 fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3129 #[link_name = "llvm.x86.avx.vtestc.pd"]
3130 fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3131 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3132 fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3133 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3134 fn vtestzps256(a: __m256, b: __m256) -> i32;
3135 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3136 fn vtestcps256(a: __m256, b: __m256) -> i32;
3137 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3138 fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3139 #[link_name = "llvm.x86.avx.vtestz.ps"]
3140 fn vtestzps(a: __m128, b: __m128) -> i32;
3141 #[link_name = "llvm.x86.avx.vtestc.ps"]
3142 fn vtestcps(a: __m128, b: __m128) -> i32;
3143 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3144 fn vtestnzcps(a: __m128, b: __m128) -> i32;
3145 #[link_name = "llvm.x86.avx.min.ps.256"]
3146 fn vminps(a: __m256, b: __m256) -> __m256;
3147 #[link_name = "llvm.x86.avx.max.ps.256"]
3148 fn vmaxps(a: __m256, b: __m256) -> __m256;
3149 #[link_name = "llvm.x86.avx.min.pd.256"]
3150 fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3151 #[link_name = "llvm.x86.avx.max.pd.256"]
3152 fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3153}
3154
3155#[cfg(test)]
3156mod tests {
3157 use crate::hint::black_box;
3158 use crate::ptr;
3159 use stdarch_test::simd_test;
3160
3161 use crate::core_arch::x86::*;
3162
3163 #[simd_test(enable = "avx")]
3164 unsafe fn test_mm256_add_pd() {
3165 let a = _mm256_setr_pd(1., 2., 3., 4.);
3166 let b = _mm256_setr_pd(5., 6., 7., 8.);
3167 let r = _mm256_add_pd(a, b);
3168 let e = _mm256_setr_pd(6., 8., 10., 12.);
3169 assert_eq_m256d(r, e);
3170 }
3171
3172 #[simd_test(enable = "avx")]
3173 unsafe fn test_mm256_add_ps() {
3174 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3175 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3176 let r = _mm256_add_ps(a, b);
3177 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3178 assert_eq_m256(r, e);
3179 }
3180
3181 #[simd_test(enable = "avx")]
3182 unsafe fn test_mm256_and_pd() {
3183 let a = _mm256_set1_pd(1.);
3184 let b = _mm256_set1_pd(0.6);
3185 let r = _mm256_and_pd(a, b);
3186 let e = _mm256_set1_pd(0.5);
3187 assert_eq_m256d(r, e);
3188 }
3189
3190 #[simd_test(enable = "avx")]
3191 unsafe fn test_mm256_and_ps() {
3192 let a = _mm256_set1_ps(1.);
3193 let b = _mm256_set1_ps(0.6);
3194 let r = _mm256_and_ps(a, b);
3195 let e = _mm256_set1_ps(0.5);
3196 assert_eq_m256(r, e);
3197 }
3198
3199 #[simd_test(enable = "avx")]
3200 unsafe fn test_mm256_or_pd() {
3201 let a = _mm256_set1_pd(1.);
3202 let b = _mm256_set1_pd(0.6);
3203 let r = _mm256_or_pd(a, b);
3204 let e = _mm256_set1_pd(1.2);
3205 assert_eq_m256d(r, e);
3206 }
3207
3208 #[simd_test(enable = "avx")]
3209 unsafe fn test_mm256_or_ps() {
3210 let a = _mm256_set1_ps(1.);
3211 let b = _mm256_set1_ps(0.6);
3212 let r = _mm256_or_ps(a, b);
3213 let e = _mm256_set1_ps(1.2);
3214 assert_eq_m256(r, e);
3215 }
3216
3217 #[simd_test(enable = "avx")]
3218 unsafe fn test_mm256_shuffle_pd() {
3219 let a = _mm256_setr_pd(1., 4., 5., 8.);
3220 let b = _mm256_setr_pd(2., 3., 6., 7.);
3221 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3222 let e = _mm256_setr_pd(4., 3., 8., 7.);
3223 assert_eq_m256d(r, e);
3224 }
3225
3226 #[simd_test(enable = "avx")]
3227 unsafe fn test_mm256_shuffle_ps() {
3228 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3229 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3230 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3231 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3232 assert_eq_m256(r, e);
3233 }
3234
3235 #[simd_test(enable = "avx")]
3236 unsafe fn test_mm256_andnot_pd() {
3237 let a = _mm256_set1_pd(0.);
3238 let b = _mm256_set1_pd(0.6);
3239 let r = _mm256_andnot_pd(a, b);
3240 assert_eq_m256d(r, b);
3241 }
3242
3243 #[simd_test(enable = "avx")]
3244 unsafe fn test_mm256_andnot_ps() {
3245 let a = _mm256_set1_ps(0.);
3246 let b = _mm256_set1_ps(0.6);
3247 let r = _mm256_andnot_ps(a, b);
3248 assert_eq_m256(r, b);
3249 }
3250
3251 #[simd_test(enable = "avx")]
3252 unsafe fn test_mm256_max_pd() {
3253 let a = _mm256_setr_pd(1., 4., 5., 8.);
3254 let b = _mm256_setr_pd(2., 3., 6., 7.);
3255 let r = _mm256_max_pd(a, b);
3256 let e = _mm256_setr_pd(2., 4., 6., 8.);
3257 assert_eq_m256d(r, e);
3258 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3261 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3262 let wu: [u64; 4] = transmute(w);
3263 let xu: [u64; 4] = transmute(x);
3264 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3265 assert_eq!(xu, [0u64; 4]);
3266 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3270 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3271 let yf: [f64; 4] = transmute(y);
3272 let zf: [f64; 4] = transmute(z);
3273 assert_eq!(yf, [0.0; 4]);
3274 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3275 }
3276
3277 #[simd_test(enable = "avx")]
3278 unsafe fn test_mm256_max_ps() {
3279 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3280 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3281 let r = _mm256_max_ps(a, b);
3282 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3283 assert_eq_m256(r, e);
3284 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3287 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3288 let wu: [u32; 8] = transmute(w);
3289 let xu: [u32; 8] = transmute(x);
3290 assert_eq!(wu, [0x8000_0000u32; 8]);
3291 assert_eq!(xu, [0u32; 8]);
3292 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3296 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3297 let yf: [f32; 8] = transmute(y);
3298 let zf: [f32; 8] = transmute(z);
3299 assert_eq!(yf, [0.0; 8]);
3300 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3301 }
3302
3303 #[simd_test(enable = "avx")]
3304 unsafe fn test_mm256_min_pd() {
3305 let a = _mm256_setr_pd(1., 4., 5., 8.);
3306 let b = _mm256_setr_pd(2., 3., 6., 7.);
3307 let r = _mm256_min_pd(a, b);
3308 let e = _mm256_setr_pd(1., 3., 5., 7.);
3309 assert_eq_m256d(r, e);
3310 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3313 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3314 let wu: [u64; 4] = transmute(w);
3315 let xu: [u64; 4] = transmute(x);
3316 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3317 assert_eq!(xu, [0u64; 4]);
3318 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3322 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3323 let yf: [f64; 4] = transmute(y);
3324 let zf: [f64; 4] = transmute(z);
3325 assert_eq!(yf, [0.0; 4]);
3326 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3327 }
3328
3329 #[simd_test(enable = "avx")]
3330 unsafe fn test_mm256_min_ps() {
3331 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3332 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3333 let r = _mm256_min_ps(a, b);
3334 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3335 assert_eq_m256(r, e);
3336 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3339 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3340 let wu: [u32; 8] = transmute(w);
3341 let xu: [u32; 8] = transmute(x);
3342 assert_eq!(wu, [0x8000_0000u32; 8]);
3343 assert_eq!(xu, [0u32; 8]);
3344 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3348 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3349 let yf: [f32; 8] = transmute(y);
3350 let zf: [f32; 8] = transmute(z);
3351 assert_eq!(yf, [0.0; 8]);
3352 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3353 }
3354
3355 #[simd_test(enable = "avx")]
3356 unsafe fn test_mm256_mul_pd() {
3357 let a = _mm256_setr_pd(1., 2., 3., 4.);
3358 let b = _mm256_setr_pd(5., 6., 7., 8.);
3359 let r = _mm256_mul_pd(a, b);
3360 let e = _mm256_setr_pd(5., 12., 21., 32.);
3361 assert_eq_m256d(r, e);
3362 }
3363
3364 #[simd_test(enable = "avx")]
3365 unsafe fn test_mm256_mul_ps() {
3366 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3367 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3368 let r = _mm256_mul_ps(a, b);
3369 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3370 assert_eq_m256(r, e);
3371 }
3372
3373 #[simd_test(enable = "avx")]
3374 unsafe fn test_mm256_addsub_pd() {
3375 let a = _mm256_setr_pd(1., 2., 3., 4.);
3376 let b = _mm256_setr_pd(5., 6., 7., 8.);
3377 let r = _mm256_addsub_pd(a, b);
3378 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3379 assert_eq_m256d(r, e);
3380 }
3381
3382 #[simd_test(enable = "avx")]
3383 unsafe fn test_mm256_addsub_ps() {
3384 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3385 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3386 let r = _mm256_addsub_ps(a, b);
3387 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3388 assert_eq_m256(r, e);
3389 }
3390
3391 #[simd_test(enable = "avx")]
3392 unsafe fn test_mm256_sub_pd() {
3393 let a = _mm256_setr_pd(1., 2., 3., 4.);
3394 let b = _mm256_setr_pd(5., 6., 7., 8.);
3395 let r = _mm256_sub_pd(a, b);
3396 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3397 assert_eq_m256d(r, e);
3398 }
3399
3400 #[simd_test(enable = "avx")]
3401 unsafe fn test_mm256_sub_ps() {
3402 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3403 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3404 let r = _mm256_sub_ps(a, b);
3405 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3406 assert_eq_m256(r, e);
3407 }
3408
3409 #[simd_test(enable = "avx")]
3410 unsafe fn test_mm256_round_pd() {
3411 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3412 let result_closest = _mm256_round_pd::<0b0000>(a);
3413 let result_down = _mm256_round_pd::<0b0001>(a);
3414 let result_up = _mm256_round_pd::<0b0010>(a);
3415 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3416 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3417 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3418 assert_eq_m256d(result_closest, expected_closest);
3419 assert_eq_m256d(result_down, expected_down);
3420 assert_eq_m256d(result_up, expected_up);
3421 }
3422
3423 #[simd_test(enable = "avx")]
3424 unsafe fn test_mm256_floor_pd() {
3425 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3426 let result_down = _mm256_floor_pd(a);
3427 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3428 assert_eq_m256d(result_down, expected_down);
3429 }
3430
3431 #[simd_test(enable = "avx")]
3432 unsafe fn test_mm256_ceil_pd() {
3433 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3434 let result_up = _mm256_ceil_pd(a);
3435 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3436 assert_eq_m256d(result_up, expected_up);
3437 }
3438
3439 #[simd_test(enable = "avx")]
3440 unsafe fn test_mm256_round_ps() {
3441 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3442 let result_closest = _mm256_round_ps::<0b0000>(a);
3443 let result_down = _mm256_round_ps::<0b0001>(a);
3444 let result_up = _mm256_round_ps::<0b0010>(a);
3445 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3446 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3447 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3448 assert_eq_m256(result_closest, expected_closest);
3449 assert_eq_m256(result_down, expected_down);
3450 assert_eq_m256(result_up, expected_up);
3451 }
3452
3453 #[simd_test(enable = "avx")]
3454 unsafe fn test_mm256_floor_ps() {
3455 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3456 let result_down = _mm256_floor_ps(a);
3457 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3458 assert_eq_m256(result_down, expected_down);
3459 }
3460
3461 #[simd_test(enable = "avx")]
3462 unsafe fn test_mm256_ceil_ps() {
3463 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3464 let result_up = _mm256_ceil_ps(a);
3465 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3466 assert_eq_m256(result_up, expected_up);
3467 }
3468
3469 #[simd_test(enable = "avx")]
3470 unsafe fn test_mm256_sqrt_pd() {
3471 let a = _mm256_setr_pd(4., 9., 16., 25.);
3472 let r = _mm256_sqrt_pd(a);
3473 let e = _mm256_setr_pd(2., 3., 4., 5.);
3474 assert_eq_m256d(r, e);
3475 }
3476
3477 #[simd_test(enable = "avx")]
3478 unsafe fn test_mm256_sqrt_ps() {
3479 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3480 let r = _mm256_sqrt_ps(a);
3481 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3482 assert_eq_m256(r, e);
3483 }
3484
3485 #[simd_test(enable = "avx")]
3486 unsafe fn test_mm256_div_ps() {
3487 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3488 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3489 let r = _mm256_div_ps(a, b);
3490 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3491 assert_eq_m256(r, e);
3492 }
3493
3494 #[simd_test(enable = "avx")]
3495 unsafe fn test_mm256_div_pd() {
3496 let a = _mm256_setr_pd(4., 9., 16., 25.);
3497 let b = _mm256_setr_pd(4., 3., 2., 5.);
3498 let r = _mm256_div_pd(a, b);
3499 let e = _mm256_setr_pd(1., 3., 8., 5.);
3500 assert_eq_m256d(r, e);
3501 }
3502
3503 #[simd_test(enable = "avx")]
3504 unsafe fn test_mm256_blend_pd() {
3505 let a = _mm256_setr_pd(4., 9., 16., 25.);
3506 let b = _mm256_setr_pd(4., 3., 2., 5.);
3507 let r = _mm256_blend_pd::<0x0>(a, b);
3508 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3509 let r = _mm256_blend_pd::<0x3>(a, b);
3510 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3511 let r = _mm256_blend_pd::<0xF>(a, b);
3512 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3513 }
3514
3515 #[simd_test(enable = "avx")]
3516 unsafe fn test_mm256_blend_ps() {
3517 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3518 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3519 let r = _mm256_blend_ps::<0x0>(a, b);
3520 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3521 let r = _mm256_blend_ps::<0x3>(a, b);
3522 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3523 let r = _mm256_blend_ps::<0xF>(a, b);
3524 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3525 }
3526
3527 #[simd_test(enable = "avx")]
3528 unsafe fn test_mm256_blendv_pd() {
3529 let a = _mm256_setr_pd(4., 9., 16., 25.);
3530 let b = _mm256_setr_pd(4., 3., 2., 5.);
3531 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3532 let r = _mm256_blendv_pd(a, b, c);
3533 let e = _mm256_setr_pd(4., 9., 2., 5.);
3534 assert_eq_m256d(r, e);
3535 }
3536
3537 #[simd_test(enable = "avx")]
3538 unsafe fn test_mm256_blendv_ps() {
3539 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3540 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3541 #[rustfmt::skip]
3542 let c = _mm256_setr_ps(
3543 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3544 );
3545 let r = _mm256_blendv_ps(a, b, c);
3546 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3547 assert_eq_m256(r, e);
3548 }
3549
3550 #[simd_test(enable = "avx")]
3551 unsafe fn test_mm256_dp_ps() {
3552 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3553 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3554 let r = _mm256_dp_ps::<0xFF>(a, b);
3555 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3556 assert_eq_m256(r, e);
3557 }
3558
3559 #[simd_test(enable = "avx")]
3560 unsafe fn test_mm256_hadd_pd() {
3561 let a = _mm256_setr_pd(4., 9., 16., 25.);
3562 let b = _mm256_setr_pd(4., 3., 2., 5.);
3563 let r = _mm256_hadd_pd(a, b);
3564 let e = _mm256_setr_pd(13., 7., 41., 7.);
3565 assert_eq_m256d(r, e);
3566
3567 let a = _mm256_setr_pd(1., 2., 3., 4.);
3568 let b = _mm256_setr_pd(5., 6., 7., 8.);
3569 let r = _mm256_hadd_pd(a, b);
3570 let e = _mm256_setr_pd(3., 11., 7., 15.);
3571 assert_eq_m256d(r, e);
3572 }
3573
3574 #[simd_test(enable = "avx")]
3575 unsafe fn test_mm256_hadd_ps() {
3576 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3577 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3578 let r = _mm256_hadd_ps(a, b);
3579 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3580 assert_eq_m256(r, e);
3581
3582 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3583 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3584 let r = _mm256_hadd_ps(a, b);
3585 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3586 assert_eq_m256(r, e);
3587 }
3588
3589 #[simd_test(enable = "avx")]
3590 unsafe fn test_mm256_hsub_pd() {
3591 let a = _mm256_setr_pd(4., 9., 16., 25.);
3592 let b = _mm256_setr_pd(4., 3., 2., 5.);
3593 let r = _mm256_hsub_pd(a, b);
3594 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3595 assert_eq_m256d(r, e);
3596
3597 let a = _mm256_setr_pd(1., 2., 3., 4.);
3598 let b = _mm256_setr_pd(5., 6., 7., 8.);
3599 let r = _mm256_hsub_pd(a, b);
3600 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3601 assert_eq_m256d(r, e);
3602 }
3603
3604 #[simd_test(enable = "avx")]
3605 unsafe fn test_mm256_hsub_ps() {
3606 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3607 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3608 let r = _mm256_hsub_ps(a, b);
3609 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3610 assert_eq_m256(r, e);
3611
3612 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3613 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3614 let r = _mm256_hsub_ps(a, b);
3615 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3616 assert_eq_m256(r, e);
3617 }
3618
3619 #[simd_test(enable = "avx")]
3620 unsafe fn test_mm256_xor_pd() {
3621 let a = _mm256_setr_pd(4., 9., 16., 25.);
3622 let b = _mm256_set1_pd(0.);
3623 let r = _mm256_xor_pd(a, b);
3624 assert_eq_m256d(r, a);
3625 }
3626
3627 #[simd_test(enable = "avx")]
3628 unsafe fn test_mm256_xor_ps() {
3629 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3630 let b = _mm256_set1_ps(0.);
3631 let r = _mm256_xor_ps(a, b);
3632 assert_eq_m256(r, a);
3633 }
3634
3635 #[simd_test(enable = "avx")]
3636 unsafe fn test_mm_cmp_pd() {
3637 let a = _mm_setr_pd(4., 9.);
3638 let b = _mm_setr_pd(4., 3.);
3639 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3640 assert!(get_m128d(r, 0).is_nan());
3641 assert!(get_m128d(r, 1).is_nan());
3642 }
3643
3644 #[simd_test(enable = "avx")]
3645 unsafe fn test_mm256_cmp_pd() {
3646 let a = _mm256_setr_pd(1., 2., 3., 4.);
3647 let b = _mm256_setr_pd(5., 6., 7., 8.);
3648 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3649 let e = _mm256_set1_pd(0.);
3650 assert_eq_m256d(r, e);
3651 }
3652
3653 #[simd_test(enable = "avx")]
3654 unsafe fn test_mm_cmp_ps() {
3655 let a = _mm_setr_ps(4., 3., 2., 5.);
3656 let b = _mm_setr_ps(4., 9., 16., 25.);
3657 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3658 assert!(get_m128(r, 0).is_nan());
3659 assert_eq!(get_m128(r, 1), 0.);
3660 assert_eq!(get_m128(r, 2), 0.);
3661 assert_eq!(get_m128(r, 3), 0.);
3662 }
3663
3664 #[simd_test(enable = "avx")]
3665 unsafe fn test_mm256_cmp_ps() {
3666 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3667 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3668 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3669 let e = _mm256_set1_ps(0.);
3670 assert_eq_m256(r, e);
3671 }
3672
3673 #[simd_test(enable = "avx")]
3674 unsafe fn test_mm_cmp_sd() {
3675 let a = _mm_setr_pd(4., 9.);
3676 let b = _mm_setr_pd(4., 3.);
3677 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3678 assert!(get_m128d(r, 0).is_nan());
3679 assert_eq!(get_m128d(r, 1), 9.);
3680 }
3681
3682 #[simd_test(enable = "avx")]
3683 unsafe fn test_mm_cmp_ss() {
3684 let a = _mm_setr_ps(4., 3., 2., 5.);
3685 let b = _mm_setr_ps(4., 9., 16., 25.);
3686 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3687 assert!(get_m128(r, 0).is_nan());
3688 assert_eq!(get_m128(r, 1), 3.);
3689 assert_eq!(get_m128(r, 2), 2.);
3690 assert_eq!(get_m128(r, 3), 5.);
3691 }
3692
3693 #[simd_test(enable = "avx")]
3694 unsafe fn test_mm256_cvtepi32_pd() {
3695 let a = _mm_setr_epi32(4, 9, 16, 25);
3696 let r = _mm256_cvtepi32_pd(a);
3697 let e = _mm256_setr_pd(4., 9., 16., 25.);
3698 assert_eq_m256d(r, e);
3699 }
3700
3701 #[simd_test(enable = "avx")]
3702 unsafe fn test_mm256_cvtepi32_ps() {
3703 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3704 let r = _mm256_cvtepi32_ps(a);
3705 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3706 assert_eq_m256(r, e);
3707 }
3708
3709 #[simd_test(enable = "avx")]
3710 unsafe fn test_mm256_cvtpd_ps() {
3711 let a = _mm256_setr_pd(4., 9., 16., 25.);
3712 let r = _mm256_cvtpd_ps(a);
3713 let e = _mm_setr_ps(4., 9., 16., 25.);
3714 assert_eq_m128(r, e);
3715 }
3716
3717 #[simd_test(enable = "avx")]
3718 unsafe fn test_mm256_cvtps_epi32() {
3719 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3720 let r = _mm256_cvtps_epi32(a);
3721 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3722 assert_eq_m256i(r, e);
3723 }
3724
3725 #[simd_test(enable = "avx")]
3726 unsafe fn test_mm256_cvtps_pd() {
3727 let a = _mm_setr_ps(4., 9., 16., 25.);
3728 let r = _mm256_cvtps_pd(a);
3729 let e = _mm256_setr_pd(4., 9., 16., 25.);
3730 assert_eq_m256d(r, e);
3731 }
3732
3733 #[simd_test(enable = "avx")]
3734 unsafe fn test_mm256_cvtsd_f64() {
3735 let a = _mm256_setr_pd(1., 2., 3., 4.);
3736 let r = _mm256_cvtsd_f64(a);
3737 assert_eq!(r, 1.);
3738 }
3739
3740 #[simd_test(enable = "avx")]
3741 unsafe fn test_mm256_cvttpd_epi32() {
3742 let a = _mm256_setr_pd(4., 9., 16., 25.);
3743 let r = _mm256_cvttpd_epi32(a);
3744 let e = _mm_setr_epi32(4, 9, 16, 25);
3745 assert_eq_m128i(r, e);
3746 }
3747
3748 #[simd_test(enable = "avx")]
3749 unsafe fn test_mm256_cvtpd_epi32() {
3750 let a = _mm256_setr_pd(4., 9., 16., 25.);
3751 let r = _mm256_cvtpd_epi32(a);
3752 let e = _mm_setr_epi32(4, 9, 16, 25);
3753 assert_eq_m128i(r, e);
3754 }
3755
3756 #[simd_test(enable = "avx")]
3757 unsafe fn test_mm256_cvttps_epi32() {
3758 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3759 let r = _mm256_cvttps_epi32(a);
3760 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3761 assert_eq_m256i(r, e);
3762 }
3763
3764 #[simd_test(enable = "avx")]
3765 unsafe fn test_mm256_extractf128_ps() {
3766 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3767 let r = _mm256_extractf128_ps::<0>(a);
3768 let e = _mm_setr_ps(4., 3., 2., 5.);
3769 assert_eq_m128(r, e);
3770 }
3771
3772 #[simd_test(enable = "avx")]
3773 unsafe fn test_mm256_extractf128_pd() {
3774 let a = _mm256_setr_pd(4., 3., 2., 5.);
3775 let r = _mm256_extractf128_pd::<0>(a);
3776 let e = _mm_setr_pd(4., 3.);
3777 assert_eq_m128d(r, e);
3778 }
3779
3780 #[simd_test(enable = "avx")]
3781 unsafe fn test_mm256_extractf128_si256() {
3782 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3783 let r = _mm256_extractf128_si256::<0>(a);
3784 let e = _mm_setr_epi64x(4, 3);
3785 assert_eq_m128i(r, e);
3786 }
3787
3788 #[simd_test(enable = "avx")]
3789 unsafe fn test_mm256_extract_epi32() {
3790 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3791 let r1 = _mm256_extract_epi32::<0>(a);
3792 let r2 = _mm256_extract_epi32::<3>(a);
3793 assert_eq!(r1, -1);
3794 assert_eq!(r2, 3);
3795 }
3796
3797 #[simd_test(enable = "avx")]
3798 unsafe fn test_mm256_cvtsi256_si32() {
3799 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3800 let r = _mm256_cvtsi256_si32(a);
3801 assert_eq!(r, 1);
3802 }
3803
3804 #[simd_test(enable = "avx")]
3805 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroall() {
3807 _mm256_zeroall();
3808 }
3809
3810 #[simd_test(enable = "avx")]
3811 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroupper() {
3813 _mm256_zeroupper();
3814 }
3815
3816 #[simd_test(enable = "avx")]
3817 unsafe fn test_mm256_permutevar_ps() {
3818 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3819 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3820 let r = _mm256_permutevar_ps(a, b);
3821 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3822 assert_eq_m256(r, e);
3823 }
3824
3825 #[simd_test(enable = "avx")]
3826 unsafe fn test_mm_permutevar_ps() {
3827 let a = _mm_setr_ps(4., 3., 2., 5.);
3828 let b = _mm_setr_epi32(1, 2, 3, 4);
3829 let r = _mm_permutevar_ps(a, b);
3830 let e = _mm_setr_ps(3., 2., 5., 4.);
3831 assert_eq_m128(r, e);
3832 }
3833
3834 #[simd_test(enable = "avx")]
3835 unsafe fn test_mm256_permute_ps() {
3836 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3837 let r = _mm256_permute_ps::<0x1b>(a);
3838 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3839 assert_eq_m256(r, e);
3840 }
3841
3842 #[simd_test(enable = "avx")]
3843 unsafe fn test_mm_permute_ps() {
3844 let a = _mm_setr_ps(4., 3., 2., 5.);
3845 let r = _mm_permute_ps::<0x1b>(a);
3846 let e = _mm_setr_ps(5., 2., 3., 4.);
3847 assert_eq_m128(r, e);
3848 }
3849
3850 #[simd_test(enable = "avx")]
3851 unsafe fn test_mm256_permutevar_pd() {
3852 let a = _mm256_setr_pd(4., 3., 2., 5.);
3853 let b = _mm256_setr_epi64x(1, 2, 3, 4);
3854 let r = _mm256_permutevar_pd(a, b);
3855 let e = _mm256_setr_pd(4., 3., 5., 2.);
3856 assert_eq_m256d(r, e);
3857 }
3858
3859 #[simd_test(enable = "avx")]
3860 unsafe fn test_mm_permutevar_pd() {
3861 let a = _mm_setr_pd(4., 3.);
3862 let b = _mm_setr_epi64x(3, 0);
3863 let r = _mm_permutevar_pd(a, b);
3864 let e = _mm_setr_pd(3., 4.);
3865 assert_eq_m128d(r, e);
3866 }
3867
3868 #[simd_test(enable = "avx")]
3869 unsafe fn test_mm256_permute_pd() {
3870 let a = _mm256_setr_pd(4., 3., 2., 5.);
3871 let r = _mm256_permute_pd::<5>(a);
3872 let e = _mm256_setr_pd(3., 4., 5., 2.);
3873 assert_eq_m256d(r, e);
3874 }
3875
3876 #[simd_test(enable = "avx")]
3877 unsafe fn test_mm_permute_pd() {
3878 let a = _mm_setr_pd(4., 3.);
3879 let r = _mm_permute_pd::<1>(a);
3880 let e = _mm_setr_pd(3., 4.);
3881 assert_eq_m128d(r, e);
3882 }
3883
3884 #[simd_test(enable = "avx")]
3885 unsafe fn test_mm256_permute2f128_ps() {
3886 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3887 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3888 let r = _mm256_permute2f128_ps::<0x13>(a, b);
3889 let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3890 assert_eq_m256(r, e);
3891 }
3892
3893 #[simd_test(enable = "avx")]
3894 unsafe fn test_mm256_permute2f128_pd() {
3895 let a = _mm256_setr_pd(1., 2., 3., 4.);
3896 let b = _mm256_setr_pd(5., 6., 7., 8.);
3897 let r = _mm256_permute2f128_pd::<0x31>(a, b);
3898 let e = _mm256_setr_pd(3., 4., 7., 8.);
3899 assert_eq_m256d(r, e);
3900 }
3901
3902 #[simd_test(enable = "avx")]
3903 unsafe fn test_mm256_permute2f128_si256() {
3904 let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3905 let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3906 let r = _mm256_permute2f128_si256::<0x20>(a, b);
3907 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3908 assert_eq_m256i(r, e);
3909 }
3910
3911 #[simd_test(enable = "avx")]
3912 unsafe fn test_mm256_broadcast_ss() {
3913 let r = _mm256_broadcast_ss(&3.);
3914 let e = _mm256_set1_ps(3.);
3915 assert_eq_m256(r, e);
3916 }
3917
3918 #[simd_test(enable = "avx")]
3919 unsafe fn test_mm_broadcast_ss() {
3920 let r = _mm_broadcast_ss(&3.);
3921 let e = _mm_set1_ps(3.);
3922 assert_eq_m128(r, e);
3923 }
3924
3925 #[simd_test(enable = "avx")]
3926 unsafe fn test_mm256_broadcast_sd() {
3927 let r = _mm256_broadcast_sd(&3.);
3928 let e = _mm256_set1_pd(3.);
3929 assert_eq_m256d(r, e);
3930 }
3931
3932 #[simd_test(enable = "avx")]
3933 unsafe fn test_mm256_broadcast_ps() {
3934 let a = _mm_setr_ps(4., 3., 2., 5.);
3935 let r = _mm256_broadcast_ps(&a);
3936 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3937 assert_eq_m256(r, e);
3938 }
3939
3940 #[simd_test(enable = "avx")]
3941 unsafe fn test_mm256_broadcast_pd() {
3942 let a = _mm_setr_pd(4., 3.);
3943 let r = _mm256_broadcast_pd(&a);
3944 let e = _mm256_setr_pd(4., 3., 4., 3.);
3945 assert_eq_m256d(r, e);
3946 }
3947
3948 #[simd_test(enable = "avx")]
3949 unsafe fn test_mm256_insertf128_ps() {
3950 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3951 let b = _mm_setr_ps(4., 9., 16., 25.);
3952 let r = _mm256_insertf128_ps::<0>(a, b);
3953 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3954 assert_eq_m256(r, e);
3955 }
3956
3957 #[simd_test(enable = "avx")]
3958 unsafe fn test_mm256_insertf128_pd() {
3959 let a = _mm256_setr_pd(1., 2., 3., 4.);
3960 let b = _mm_setr_pd(5., 6.);
3961 let r = _mm256_insertf128_pd::<0>(a, b);
3962 let e = _mm256_setr_pd(5., 6., 3., 4.);
3963 assert_eq_m256d(r, e);
3964 }
3965
3966 #[simd_test(enable = "avx")]
3967 unsafe fn test_mm256_insertf128_si256() {
3968 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3969 let b = _mm_setr_epi64x(5, 6);
3970 let r = _mm256_insertf128_si256::<0>(a, b);
3971 let e = _mm256_setr_epi64x(5, 6, 3, 4);
3972 assert_eq_m256i(r, e);
3973 }
3974
3975 #[simd_test(enable = "avx")]
3976 unsafe fn test_mm256_insert_epi8() {
3977 #[rustfmt::skip]
3978 let a = _mm256_setr_epi8(
3979 1, 2, 3, 4, 5, 6, 7, 8,
3980 9, 10, 11, 12, 13, 14, 15, 16,
3981 17, 18, 19, 20, 21, 22, 23, 24,
3982 25, 26, 27, 28, 29, 30, 31, 32,
3983 );
3984 let r = _mm256_insert_epi8::<31>(a, 0);
3985 #[rustfmt::skip]
3986 let e = _mm256_setr_epi8(
3987 1, 2, 3, 4, 5, 6, 7, 8,
3988 9, 10, 11, 12, 13, 14, 15, 16,
3989 17, 18, 19, 20, 21, 22, 23, 24,
3990 25, 26, 27, 28, 29, 30, 31, 0,
3991 );
3992 assert_eq_m256i(r, e);
3993 }
3994
3995 #[simd_test(enable = "avx")]
3996 unsafe fn test_mm256_insert_epi16() {
3997 #[rustfmt::skip]
3998 let a = _mm256_setr_epi16(
3999 0, 1, 2, 3, 4, 5, 6, 7,
4000 8, 9, 10, 11, 12, 13, 14, 15,
4001 );
4002 let r = _mm256_insert_epi16::<15>(a, 0);
4003 #[rustfmt::skip]
4004 let e = _mm256_setr_epi16(
4005 0, 1, 2, 3, 4, 5, 6, 7,
4006 8, 9, 10, 11, 12, 13, 14, 0,
4007 );
4008 assert_eq_m256i(r, e);
4009 }
4010
4011 #[simd_test(enable = "avx")]
4012 unsafe fn test_mm256_insert_epi32() {
4013 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4014 let r = _mm256_insert_epi32::<7>(a, 0);
4015 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
4016 assert_eq_m256i(r, e);
4017 }
4018
4019 #[simd_test(enable = "avx")]
4020 unsafe fn test_mm256_load_pd() {
4021 let a = _mm256_setr_pd(1., 2., 3., 4.);
4022 let p = ptr::addr_of!(a) as *const f64;
4023 let r = _mm256_load_pd(p);
4024 let e = _mm256_setr_pd(1., 2., 3., 4.);
4025 assert_eq_m256d(r, e);
4026 }
4027
4028 #[simd_test(enable = "avx")]
4029 unsafe fn test_mm256_store_pd() {
4030 let a = _mm256_setr_pd(1., 2., 3., 4.);
4031 let mut r = _mm256_undefined_pd();
4032 _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4033 assert_eq_m256d(r, a);
4034 }
4035
4036 #[simd_test(enable = "avx")]
4037 unsafe fn test_mm256_load_ps() {
4038 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4039 let p = ptr::addr_of!(a) as *const f32;
4040 let r = _mm256_load_ps(p);
4041 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4042 assert_eq_m256(r, e);
4043 }
4044
4045 #[simd_test(enable = "avx")]
4046 unsafe fn test_mm256_store_ps() {
4047 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4048 let mut r = _mm256_undefined_ps();
4049 _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4050 assert_eq_m256(r, a);
4051 }
4052
4053 #[simd_test(enable = "avx")]
4054 unsafe fn test_mm256_loadu_pd() {
4055 let a = &[1.0f64, 2., 3., 4.];
4056 let p = a.as_ptr();
4057 let r = _mm256_loadu_pd(black_box(p));
4058 let e = _mm256_setr_pd(1., 2., 3., 4.);
4059 assert_eq_m256d(r, e);
4060 }
4061
4062 #[simd_test(enable = "avx")]
4063 unsafe fn test_mm256_storeu_pd() {
4064 let a = _mm256_set1_pd(9.);
4065 let mut r = _mm256_undefined_pd();
4066 _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4067 assert_eq_m256d(r, a);
4068 }
4069
4070 #[simd_test(enable = "avx")]
4071 unsafe fn test_mm256_loadu_ps() {
4072 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
4073 let p = a.as_ptr();
4074 let r = _mm256_loadu_ps(black_box(p));
4075 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4076 assert_eq_m256(r, e);
4077 }
4078
4079 #[simd_test(enable = "avx")]
4080 unsafe fn test_mm256_storeu_ps() {
4081 let a = _mm256_set1_ps(9.);
4082 let mut r = _mm256_undefined_ps();
4083 _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4084 assert_eq_m256(r, a);
4085 }
4086
4087 #[simd_test(enable = "avx")]
4088 unsafe fn test_mm256_load_si256() {
4089 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4090 let p = ptr::addr_of!(a);
4091 let r = _mm256_load_si256(p);
4092 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4093 assert_eq_m256i(r, e);
4094 }
4095
4096 #[simd_test(enable = "avx")]
4097 unsafe fn test_mm256_store_si256() {
4098 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4099 let mut r = _mm256_undefined_si256();
4100 _mm256_store_si256(ptr::addr_of_mut!(r), a);
4101 assert_eq_m256i(r, a);
4102 }
4103
4104 #[simd_test(enable = "avx")]
4105 unsafe fn test_mm256_loadu_si256() {
4106 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4107 let p = ptr::addr_of!(a);
4108 let r = _mm256_loadu_si256(black_box(p));
4109 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4110 assert_eq_m256i(r, e);
4111 }
4112
4113 #[simd_test(enable = "avx")]
4114 unsafe fn test_mm256_storeu_si256() {
4115 let a = _mm256_set1_epi8(9);
4116 let mut r = _mm256_undefined_si256();
4117 _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4118 assert_eq_m256i(r, a);
4119 }
4120
4121 #[simd_test(enable = "avx")]
4122 unsafe fn test_mm256_maskload_pd() {
4123 let a = &[1.0f64, 2., 3., 4.];
4124 let p = a.as_ptr();
4125 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4126 let r = _mm256_maskload_pd(black_box(p), mask);
4127 let e = _mm256_setr_pd(0., 2., 0., 4.);
4128 assert_eq_m256d(r, e);
4129 }
4130
4131 #[simd_test(enable = "avx")]
4132 unsafe fn test_mm256_maskstore_pd() {
4133 let mut r = _mm256_set1_pd(0.);
4134 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4135 let a = _mm256_setr_pd(1., 2., 3., 4.);
4136 _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4137 let e = _mm256_setr_pd(0., 2., 0., 4.);
4138 assert_eq_m256d(r, e);
4139 }
4140
4141 #[simd_test(enable = "avx")]
4142 unsafe fn test_mm_maskload_pd() {
4143 let a = &[1.0f64, 2.];
4144 let p = a.as_ptr();
4145 let mask = _mm_setr_epi64x(0, !0);
4146 let r = _mm_maskload_pd(black_box(p), mask);
4147 let e = _mm_setr_pd(0., 2.);
4148 assert_eq_m128d(r, e);
4149 }
4150
4151 #[simd_test(enable = "avx")]
4152 unsafe fn test_mm_maskstore_pd() {
4153 let mut r = _mm_set1_pd(0.);
4154 let mask = _mm_setr_epi64x(0, !0);
4155 let a = _mm_setr_pd(1., 2.);
4156 _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4157 let e = _mm_setr_pd(0., 2.);
4158 assert_eq_m128d(r, e);
4159 }
4160
4161 #[simd_test(enable = "avx")]
4162 unsafe fn test_mm256_maskload_ps() {
4163 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4164 let p = a.as_ptr();
4165 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4166 let r = _mm256_maskload_ps(black_box(p), mask);
4167 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4168 assert_eq_m256(r, e);
4169 }
4170
4171 #[simd_test(enable = "avx")]
4172 unsafe fn test_mm256_maskstore_ps() {
4173 let mut r = _mm256_set1_ps(0.);
4174 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4175 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4176 _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4177 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4178 assert_eq_m256(r, e);
4179 }
4180
4181 #[simd_test(enable = "avx")]
4182 unsafe fn test_mm_maskload_ps() {
4183 let a = &[1.0f32, 2., 3., 4.];
4184 let p = a.as_ptr();
4185 let mask = _mm_setr_epi32(0, !0, 0, !0);
4186 let r = _mm_maskload_ps(black_box(p), mask);
4187 let e = _mm_setr_ps(0., 2., 0., 4.);
4188 assert_eq_m128(r, e);
4189 }
4190
4191 #[simd_test(enable = "avx")]
4192 unsafe fn test_mm_maskstore_ps() {
4193 let mut r = _mm_set1_ps(0.);
4194 let mask = _mm_setr_epi32(0, !0, 0, !0);
4195 let a = _mm_setr_ps(1., 2., 3., 4.);
4196 _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4197 let e = _mm_setr_ps(0., 2., 0., 4.);
4198 assert_eq_m128(r, e);
4199 }
4200
4201 #[simd_test(enable = "avx")]
4202 unsafe fn test_mm256_movehdup_ps() {
4203 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4204 let r = _mm256_movehdup_ps(a);
4205 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4206 assert_eq_m256(r, e);
4207 }
4208
4209 #[simd_test(enable = "avx")]
4210 unsafe fn test_mm256_moveldup_ps() {
4211 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4212 let r = _mm256_moveldup_ps(a);
4213 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4214 assert_eq_m256(r, e);
4215 }
4216
4217 #[simd_test(enable = "avx")]
4218 unsafe fn test_mm256_movedup_pd() {
4219 let a = _mm256_setr_pd(1., 2., 3., 4.);
4220 let r = _mm256_movedup_pd(a);
4221 let e = _mm256_setr_pd(1., 1., 3., 3.);
4222 assert_eq_m256d(r, e);
4223 }
4224
4225 #[simd_test(enable = "avx")]
4226 unsafe fn test_mm256_lddqu_si256() {
4227 #[rustfmt::skip]
4228 let a = _mm256_setr_epi8(
4229 1, 2, 3, 4, 5, 6, 7, 8,
4230 9, 10, 11, 12, 13, 14, 15, 16,
4231 17, 18, 19, 20, 21, 22, 23, 24,
4232 25, 26, 27, 28, 29, 30, 31, 32,
4233 );
4234 let p = ptr::addr_of!(a);
4235 let r = _mm256_lddqu_si256(black_box(p));
4236 #[rustfmt::skip]
4237 let e = _mm256_setr_epi8(
4238 1, 2, 3, 4, 5, 6, 7, 8,
4239 9, 10, 11, 12, 13, 14, 15, 16,
4240 17, 18, 19, 20, 21, 22, 23, 24,
4241 25, 26, 27, 28, 29, 30, 31, 32,
4242 );
4243 assert_eq_m256i(r, e);
4244 }
4245
4246 #[simd_test(enable = "avx")]
4247 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_si256() {
4249 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4250 let mut r = _mm256_undefined_si256();
4251 _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4252 assert_eq_m256i(r, a);
4253 }
4254
4255 #[simd_test(enable = "avx")]
4256 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_pd() {
4258 #[repr(align(32))]
4259 struct Memory {
4260 pub data: [f64; 4],
4261 }
4262 let a = _mm256_set1_pd(7.0);
4263 let mut mem = Memory { data: [-1.0; 4] };
4264
4265 _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4266 for i in 0..4 {
4267 assert_eq!(mem.data[i], get_m256d(a, i));
4268 }
4269 }
4270
4271 #[simd_test(enable = "avx")]
4272 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_ps() {
4274 #[repr(align(32))]
4275 struct Memory {
4276 pub data: [f32; 8],
4277 }
4278 let a = _mm256_set1_ps(7.0);
4279 let mut mem = Memory { data: [-1.0; 8] };
4280
4281 _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4282 for i in 0..8 {
4283 assert_eq!(mem.data[i], get_m256(a, i));
4284 }
4285 }
4286
4287 #[simd_test(enable = "avx")]
4288 unsafe fn test_mm256_rcp_ps() {
4289 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4290 let r = _mm256_rcp_ps(a);
4291 #[rustfmt::skip]
4292 let e = _mm256_setr_ps(
4293 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4294 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4295 );
4296 let rel_err = 0.00048828125;
4297 for i in 0..8 {
4298 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4299 }
4300 }
4301
4302 #[simd_test(enable = "avx")]
4303 unsafe fn test_mm256_rsqrt_ps() {
4304 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4305 let r = _mm256_rsqrt_ps(a);
4306 #[rustfmt::skip]
4307 let e = _mm256_setr_ps(
4308 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4309 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4310 );
4311 let rel_err = 0.00048828125;
4312 for i in 0..8 {
4313 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4314 }
4315 }
4316
4317 #[simd_test(enable = "avx")]
4318 unsafe fn test_mm256_unpackhi_pd() {
4319 let a = _mm256_setr_pd(1., 2., 3., 4.);
4320 let b = _mm256_setr_pd(5., 6., 7., 8.);
4321 let r = _mm256_unpackhi_pd(a, b);
4322 let e = _mm256_setr_pd(2., 6., 4., 8.);
4323 assert_eq_m256d(r, e);
4324 }
4325
4326 #[simd_test(enable = "avx")]
4327 unsafe fn test_mm256_unpackhi_ps() {
4328 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4329 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4330 let r = _mm256_unpackhi_ps(a, b);
4331 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4332 assert_eq_m256(r, e);
4333 }
4334
4335 #[simd_test(enable = "avx")]
4336 unsafe fn test_mm256_unpacklo_pd() {
4337 let a = _mm256_setr_pd(1., 2., 3., 4.);
4338 let b = _mm256_setr_pd(5., 6., 7., 8.);
4339 let r = _mm256_unpacklo_pd(a, b);
4340 let e = _mm256_setr_pd(1., 5., 3., 7.);
4341 assert_eq_m256d(r, e);
4342 }
4343
4344 #[simd_test(enable = "avx")]
4345 unsafe fn test_mm256_unpacklo_ps() {
4346 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4347 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4348 let r = _mm256_unpacklo_ps(a, b);
4349 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4350 assert_eq_m256(r, e);
4351 }
4352
4353 #[simd_test(enable = "avx")]
4354 unsafe fn test_mm256_testz_si256() {
4355 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4356 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4357 let r = _mm256_testz_si256(a, b);
4358 assert_eq!(r, 0);
4359 let b = _mm256_set1_epi64x(0);
4360 let r = _mm256_testz_si256(a, b);
4361 assert_eq!(r, 1);
4362 }
4363
4364 #[simd_test(enable = "avx")]
4365 unsafe fn test_mm256_testc_si256() {
4366 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4367 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4368 let r = _mm256_testc_si256(a, b);
4369 assert_eq!(r, 0);
4370 let b = _mm256_set1_epi64x(0);
4371 let r = _mm256_testc_si256(a, b);
4372 assert_eq!(r, 1);
4373 }
4374
4375 #[simd_test(enable = "avx")]
4376 unsafe fn test_mm256_testnzc_si256() {
4377 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4378 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4379 let r = _mm256_testnzc_si256(a, b);
4380 assert_eq!(r, 1);
4381 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4382 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4383 let r = _mm256_testnzc_si256(a, b);
4384 assert_eq!(r, 0);
4385 }
4386
4387 #[simd_test(enable = "avx")]
4388 unsafe fn test_mm256_testz_pd() {
4389 let a = _mm256_setr_pd(1., 2., 3., 4.);
4390 let b = _mm256_setr_pd(5., 6., 7., 8.);
4391 let r = _mm256_testz_pd(a, b);
4392 assert_eq!(r, 1);
4393 let a = _mm256_set1_pd(-1.);
4394 let r = _mm256_testz_pd(a, a);
4395 assert_eq!(r, 0);
4396 }
4397
4398 #[simd_test(enable = "avx")]
4399 unsafe fn test_mm256_testc_pd() {
4400 let a = _mm256_setr_pd(1., 2., 3., 4.);
4401 let b = _mm256_setr_pd(5., 6., 7., 8.);
4402 let r = _mm256_testc_pd(a, b);
4403 assert_eq!(r, 1);
4404 let a = _mm256_set1_pd(1.);
4405 let b = _mm256_set1_pd(-1.);
4406 let r = _mm256_testc_pd(a, b);
4407 assert_eq!(r, 0);
4408 }
4409
4410 #[simd_test(enable = "avx")]
4411 unsafe fn test_mm256_testnzc_pd() {
4412 let a = _mm256_setr_pd(1., 2., 3., 4.);
4413 let b = _mm256_setr_pd(5., 6., 7., 8.);
4414 let r = _mm256_testnzc_pd(a, b);
4415 assert_eq!(r, 0);
4416 let a = _mm256_setr_pd(1., -1., -1., -1.);
4417 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4418 let r = _mm256_testnzc_pd(a, b);
4419 assert_eq!(r, 1);
4420 }
4421
4422 #[simd_test(enable = "avx")]
4423 unsafe fn test_mm_testz_pd() {
4424 let a = _mm_setr_pd(1., 2.);
4425 let b = _mm_setr_pd(5., 6.);
4426 let r = _mm_testz_pd(a, b);
4427 assert_eq!(r, 1);
4428 let a = _mm_set1_pd(-1.);
4429 let r = _mm_testz_pd(a, a);
4430 assert_eq!(r, 0);
4431 }
4432
4433 #[simd_test(enable = "avx")]
4434 unsafe fn test_mm_testc_pd() {
4435 let a = _mm_setr_pd(1., 2.);
4436 let b = _mm_setr_pd(5., 6.);
4437 let r = _mm_testc_pd(a, b);
4438 assert_eq!(r, 1);
4439 let a = _mm_set1_pd(1.);
4440 let b = _mm_set1_pd(-1.);
4441 let r = _mm_testc_pd(a, b);
4442 assert_eq!(r, 0);
4443 }
4444
4445 #[simd_test(enable = "avx")]
4446 unsafe fn test_mm_testnzc_pd() {
4447 let a = _mm_setr_pd(1., 2.);
4448 let b = _mm_setr_pd(5., 6.);
4449 let r = _mm_testnzc_pd(a, b);
4450 assert_eq!(r, 0);
4451 let a = _mm_setr_pd(1., -1.);
4452 let b = _mm_setr_pd(-1., -1.);
4453 let r = _mm_testnzc_pd(a, b);
4454 assert_eq!(r, 1);
4455 }
4456
4457 #[simd_test(enable = "avx")]
4458 unsafe fn test_mm256_testz_ps() {
4459 let a = _mm256_set1_ps(1.);
4460 let r = _mm256_testz_ps(a, a);
4461 assert_eq!(r, 1);
4462 let a = _mm256_set1_ps(-1.);
4463 let r = _mm256_testz_ps(a, a);
4464 assert_eq!(r, 0);
4465 }
4466
4467 #[simd_test(enable = "avx")]
4468 unsafe fn test_mm256_testc_ps() {
4469 let a = _mm256_set1_ps(1.);
4470 let r = _mm256_testc_ps(a, a);
4471 assert_eq!(r, 1);
4472 let b = _mm256_set1_ps(-1.);
4473 let r = _mm256_testc_ps(a, b);
4474 assert_eq!(r, 0);
4475 }
4476
4477 #[simd_test(enable = "avx")]
4478 unsafe fn test_mm256_testnzc_ps() {
4479 let a = _mm256_set1_ps(1.);
4480 let r = _mm256_testnzc_ps(a, a);
4481 assert_eq!(r, 0);
4482 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4483 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4484 let r = _mm256_testnzc_ps(a, b);
4485 assert_eq!(r, 1);
4486 }
4487
4488 #[simd_test(enable = "avx")]
4489 unsafe fn test_mm_testz_ps() {
4490 let a = _mm_set1_ps(1.);
4491 let r = _mm_testz_ps(a, a);
4492 assert_eq!(r, 1);
4493 let a = _mm_set1_ps(-1.);
4494 let r = _mm_testz_ps(a, a);
4495 assert_eq!(r, 0);
4496 }
4497
4498 #[simd_test(enable = "avx")]
4499 unsafe fn test_mm_testc_ps() {
4500 let a = _mm_set1_ps(1.);
4501 let r = _mm_testc_ps(a, a);
4502 assert_eq!(r, 1);
4503 let b = _mm_set1_ps(-1.);
4504 let r = _mm_testc_ps(a, b);
4505 assert_eq!(r, 0);
4506 }
4507
4508 #[simd_test(enable = "avx")]
4509 unsafe fn test_mm_testnzc_ps() {
4510 let a = _mm_set1_ps(1.);
4511 let r = _mm_testnzc_ps(a, a);
4512 assert_eq!(r, 0);
4513 let a = _mm_setr_ps(1., -1., -1., -1.);
4514 let b = _mm_setr_ps(-1., -1., 1., 1.);
4515 let r = _mm_testnzc_ps(a, b);
4516 assert_eq!(r, 1);
4517 }
4518
4519 #[simd_test(enable = "avx")]
4520 unsafe fn test_mm256_movemask_pd() {
4521 let a = _mm256_setr_pd(1., -2., 3., -4.);
4522 let r = _mm256_movemask_pd(a);
4523 assert_eq!(r, 0xA);
4524 }
4525
4526 #[simd_test(enable = "avx")]
4527 unsafe fn test_mm256_movemask_ps() {
4528 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4529 let r = _mm256_movemask_ps(a);
4530 assert_eq!(r, 0xAA);
4531 }
4532
4533 #[simd_test(enable = "avx")]
4534 unsafe fn test_mm256_setzero_pd() {
4535 let r = _mm256_setzero_pd();
4536 assert_eq_m256d(r, _mm256_set1_pd(0.));
4537 }
4538
4539 #[simd_test(enable = "avx")]
4540 unsafe fn test_mm256_setzero_ps() {
4541 let r = _mm256_setzero_ps();
4542 assert_eq_m256(r, _mm256_set1_ps(0.));
4543 }
4544
4545 #[simd_test(enable = "avx")]
4546 unsafe fn test_mm256_setzero_si256() {
4547 let r = _mm256_setzero_si256();
4548 assert_eq_m256i(r, _mm256_set1_epi8(0));
4549 }
4550
4551 #[simd_test(enable = "avx")]
4552 unsafe fn test_mm256_set_pd() {
4553 let r = _mm256_set_pd(1., 2., 3., 4.);
4554 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4555 }
4556
4557 #[simd_test(enable = "avx")]
4558 unsafe fn test_mm256_set_ps() {
4559 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4560 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4561 }
4562
4563 #[simd_test(enable = "avx")]
4564 unsafe fn test_mm256_set_epi8() {
4565 #[rustfmt::skip]
4566 let r = _mm256_set_epi8(
4567 1, 2, 3, 4, 5, 6, 7, 8,
4568 9, 10, 11, 12, 13, 14, 15, 16,
4569 17, 18, 19, 20, 21, 22, 23, 24,
4570 25, 26, 27, 28, 29, 30, 31, 32,
4571 );
4572 #[rustfmt::skip]
4573 let e = _mm256_setr_epi8(
4574 32, 31, 30, 29, 28, 27, 26, 25,
4575 24, 23, 22, 21, 20, 19, 18, 17,
4576 16, 15, 14, 13, 12, 11, 10, 9,
4577 8, 7, 6, 5, 4, 3, 2, 1
4578 );
4579 assert_eq_m256i(r, e);
4580 }
4581
4582 #[simd_test(enable = "avx")]
4583 unsafe fn test_mm256_set_epi16() {
4584 #[rustfmt::skip]
4585 let r = _mm256_set_epi16(
4586 1, 2, 3, 4, 5, 6, 7, 8,
4587 9, 10, 11, 12, 13, 14, 15, 16,
4588 );
4589 #[rustfmt::skip]
4590 let e = _mm256_setr_epi16(
4591 16, 15, 14, 13, 12, 11, 10, 9, 8,
4592 7, 6, 5, 4, 3, 2, 1,
4593 );
4594 assert_eq_m256i(r, e);
4595 }
4596
4597 #[simd_test(enable = "avx")]
4598 unsafe fn test_mm256_set_epi32() {
4599 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4600 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4601 }
4602
4603 #[simd_test(enable = "avx")]
4604 unsafe fn test_mm256_set_epi64x() {
4605 let r = _mm256_set_epi64x(1, 2, 3, 4);
4606 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4607 }
4608
4609 #[simd_test(enable = "avx")]
4610 unsafe fn test_mm256_setr_pd() {
4611 let r = _mm256_setr_pd(1., 2., 3., 4.);
4612 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4613 }
4614
4615 #[simd_test(enable = "avx")]
4616 unsafe fn test_mm256_setr_ps() {
4617 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4618 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4619 }
4620
4621 #[simd_test(enable = "avx")]
4622 unsafe fn test_mm256_setr_epi8() {
4623 #[rustfmt::skip]
4624 let r = _mm256_setr_epi8(
4625 1, 2, 3, 4, 5, 6, 7, 8,
4626 9, 10, 11, 12, 13, 14, 15, 16,
4627 17, 18, 19, 20, 21, 22, 23, 24,
4628 25, 26, 27, 28, 29, 30, 31, 32,
4629 );
4630 #[rustfmt::skip]
4631 let e = _mm256_setr_epi8(
4632 1, 2, 3, 4, 5, 6, 7, 8,
4633 9, 10, 11, 12, 13, 14, 15, 16,
4634 17, 18, 19, 20, 21, 22, 23, 24,
4635 25, 26, 27, 28, 29, 30, 31, 32
4636 );
4637
4638 assert_eq_m256i(r, e);
4639 }
4640
4641 #[simd_test(enable = "avx")]
4642 unsafe fn test_mm256_setr_epi16() {
4643 #[rustfmt::skip]
4644 let r = _mm256_setr_epi16(
4645 1, 2, 3, 4, 5, 6, 7, 8,
4646 9, 10, 11, 12, 13, 14, 15, 16,
4647 );
4648 #[rustfmt::skip]
4649 let e = _mm256_setr_epi16(
4650 1, 2, 3, 4, 5, 6, 7, 8,
4651 9, 10, 11, 12, 13, 14, 15, 16,
4652 );
4653 assert_eq_m256i(r, e);
4654 }
4655
4656 #[simd_test(enable = "avx")]
4657 unsafe fn test_mm256_setr_epi32() {
4658 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4659 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4660 }
4661
4662 #[simd_test(enable = "avx")]
4663 unsafe fn test_mm256_setr_epi64x() {
4664 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4665 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4666 }
4667
4668 #[simd_test(enable = "avx")]
4669 unsafe fn test_mm256_set1_pd() {
4670 let r = _mm256_set1_pd(1.);
4671 assert_eq_m256d(r, _mm256_set1_pd(1.));
4672 }
4673
4674 #[simd_test(enable = "avx")]
4675 unsafe fn test_mm256_set1_ps() {
4676 let r = _mm256_set1_ps(1.);
4677 assert_eq_m256(r, _mm256_set1_ps(1.));
4678 }
4679
4680 #[simd_test(enable = "avx")]
4681 unsafe fn test_mm256_set1_epi8() {
4682 let r = _mm256_set1_epi8(1);
4683 assert_eq_m256i(r, _mm256_set1_epi8(1));
4684 }
4685
4686 #[simd_test(enable = "avx")]
4687 unsafe fn test_mm256_set1_epi16() {
4688 let r = _mm256_set1_epi16(1);
4689 assert_eq_m256i(r, _mm256_set1_epi16(1));
4690 }
4691
4692 #[simd_test(enable = "avx")]
4693 unsafe fn test_mm256_set1_epi32() {
4694 let r = _mm256_set1_epi32(1);
4695 assert_eq_m256i(r, _mm256_set1_epi32(1));
4696 }
4697
4698 #[simd_test(enable = "avx")]
4699 unsafe fn test_mm256_set1_epi64x() {
4700 let r = _mm256_set1_epi64x(1);
4701 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4702 }
4703
4704 #[simd_test(enable = "avx")]
4705 unsafe fn test_mm256_castpd_ps() {
4706 let a = _mm256_setr_pd(1., 2., 3., 4.);
4707 let r = _mm256_castpd_ps(a);
4708 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4709 assert_eq_m256(r, e);
4710 }
4711
4712 #[simd_test(enable = "avx")]
4713 unsafe fn test_mm256_castps_pd() {
4714 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4715 let r = _mm256_castps_pd(a);
4716 let e = _mm256_setr_pd(1., 2., 3., 4.);
4717 assert_eq_m256d(r, e);
4718 }
4719
4720 #[simd_test(enable = "avx")]
4721 unsafe fn test_mm256_castps_si256() {
4722 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4723 let r = _mm256_castps_si256(a);
4724 #[rustfmt::skip]
4725 let e = _mm256_setr_epi8(
4726 0, 0, -128, 63, 0, 0, 0, 64,
4727 0, 0, 64, 64, 0, 0, -128, 64,
4728 0, 0, -96, 64, 0, 0, -64, 64,
4729 0, 0, -32, 64, 0, 0, 0, 65,
4730 );
4731 assert_eq_m256i(r, e);
4732 }
4733
4734 #[simd_test(enable = "avx")]
4735 unsafe fn test_mm256_castsi256_ps() {
4736 #[rustfmt::skip]
4737 let a = _mm256_setr_epi8(
4738 0, 0, -128, 63, 0, 0, 0, 64,
4739 0, 0, 64, 64, 0, 0, -128, 64,
4740 0, 0, -96, 64, 0, 0, -64, 64,
4741 0, 0, -32, 64, 0, 0, 0, 65,
4742 );
4743 let r = _mm256_castsi256_ps(a);
4744 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4745 assert_eq_m256(r, e);
4746 }
4747
4748 #[simd_test(enable = "avx")]
4749 unsafe fn test_mm256_castpd_si256() {
4750 let a = _mm256_setr_pd(1., 2., 3., 4.);
4751 let r = _mm256_castpd_si256(a);
4752 assert_eq_m256d(transmute(r), a);
4753 }
4754
4755 #[simd_test(enable = "avx")]
4756 unsafe fn test_mm256_castsi256_pd() {
4757 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4758 let r = _mm256_castsi256_pd(a);
4759 assert_eq_m256d(r, transmute(a));
4760 }
4761
4762 #[simd_test(enable = "avx")]
4763 unsafe fn test_mm256_castps256_ps128() {
4764 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4765 let r = _mm256_castps256_ps128(a);
4766 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4767 }
4768
4769 #[simd_test(enable = "avx")]
4770 unsafe fn test_mm256_castpd256_pd128() {
4771 let a = _mm256_setr_pd(1., 2., 3., 4.);
4772 let r = _mm256_castpd256_pd128(a);
4773 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4774 }
4775
4776 #[simd_test(enable = "avx")]
4777 unsafe fn test_mm256_castsi256_si128() {
4778 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4779 let r = _mm256_castsi256_si128(a);
4780 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4781 }
4782
4783 #[simd_test(enable = "avx")]
4784 unsafe fn test_mm256_castps128_ps256() {
4785 let a = _mm_setr_ps(1., 2., 3., 4.);
4786 let r = _mm256_castps128_ps256(a);
4787 assert_eq_m128(_mm256_castps256_ps128(r), a);
4788 }
4789
4790 #[simd_test(enable = "avx")]
4791 unsafe fn test_mm256_castpd128_pd256() {
4792 let a = _mm_setr_pd(1., 2.);
4793 let r = _mm256_castpd128_pd256(a);
4794 assert_eq_m128d(_mm256_castpd256_pd128(r), a);
4795 }
4796
4797 #[simd_test(enable = "avx")]
4798 unsafe fn test_mm256_castsi128_si256() {
4799 let a = _mm_setr_epi32(1, 2, 3, 4);
4800 let r = _mm256_castsi128_si256(a);
4801 assert_eq_m128i(_mm256_castsi256_si128(r), a);
4802 }
4803
4804 #[simd_test(enable = "avx")]
4805 unsafe fn test_mm256_zextps128_ps256() {
4806 let a = _mm_setr_ps(1., 2., 3., 4.);
4807 let r = _mm256_zextps128_ps256(a);
4808 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4809 assert_eq_m256(r, e);
4810 }
4811
4812 #[simd_test(enable = "avx")]
4813 unsafe fn test_mm256_zextsi128_si256() {
4814 let a = _mm_setr_epi64x(1, 2);
4815 let r = _mm256_zextsi128_si256(a);
4816 let e = _mm256_setr_epi64x(1, 2, 0, 0);
4817 assert_eq_m256i(r, e);
4818 }
4819
4820 #[simd_test(enable = "avx")]
4821 unsafe fn test_mm256_zextpd128_pd256() {
4822 let a = _mm_setr_pd(1., 2.);
4823 let r = _mm256_zextpd128_pd256(a);
4824 let e = _mm256_setr_pd(1., 2., 0., 0.);
4825 assert_eq_m256d(r, e);
4826 }
4827
4828 #[simd_test(enable = "avx")]
4829 unsafe fn test_mm256_set_m128() {
4830 let hi = _mm_setr_ps(5., 6., 7., 8.);
4831 let lo = _mm_setr_ps(1., 2., 3., 4.);
4832 let r = _mm256_set_m128(hi, lo);
4833 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4834 assert_eq_m256(r, e);
4835 }
4836
4837 #[simd_test(enable = "avx")]
4838 unsafe fn test_mm256_set_m128d() {
4839 let hi = _mm_setr_pd(3., 4.);
4840 let lo = _mm_setr_pd(1., 2.);
4841 let r = _mm256_set_m128d(hi, lo);
4842 let e = _mm256_setr_pd(1., 2., 3., 4.);
4843 assert_eq_m256d(r, e);
4844 }
4845
4846 #[simd_test(enable = "avx")]
4847 unsafe fn test_mm256_set_m128i() {
4848 #[rustfmt::skip]
4849 let hi = _mm_setr_epi8(
4850 17, 18, 19, 20,
4851 21, 22, 23, 24,
4852 25, 26, 27, 28,
4853 29, 30, 31, 32,
4854 );
4855 #[rustfmt::skip]
4856 let lo = _mm_setr_epi8(
4857 1, 2, 3, 4,
4858 5, 6, 7, 8,
4859 9, 10, 11, 12,
4860 13, 14, 15, 16,
4861 );
4862 let r = _mm256_set_m128i(hi, lo);
4863 #[rustfmt::skip]
4864 let e = _mm256_setr_epi8(
4865 1, 2, 3, 4, 5, 6, 7, 8,
4866 9, 10, 11, 12, 13, 14, 15, 16,
4867 17, 18, 19, 20, 21, 22, 23, 24,
4868 25, 26, 27, 28, 29, 30, 31, 32,
4869 );
4870 assert_eq_m256i(r, e);
4871 }
4872
4873 #[simd_test(enable = "avx")]
4874 unsafe fn test_mm256_setr_m128() {
4875 let lo = _mm_setr_ps(1., 2., 3., 4.);
4876 let hi = _mm_setr_ps(5., 6., 7., 8.);
4877 let r = _mm256_setr_m128(lo, hi);
4878 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4879 assert_eq_m256(r, e);
4880 }
4881
4882 #[simd_test(enable = "avx")]
4883 unsafe fn test_mm256_setr_m128d() {
4884 let lo = _mm_setr_pd(1., 2.);
4885 let hi = _mm_setr_pd(3., 4.);
4886 let r = _mm256_setr_m128d(lo, hi);
4887 let e = _mm256_setr_pd(1., 2., 3., 4.);
4888 assert_eq_m256d(r, e);
4889 }
4890
4891 #[simd_test(enable = "avx")]
4892 unsafe fn test_mm256_setr_m128i() {
4893 #[rustfmt::skip]
4894 let lo = _mm_setr_epi8(
4895 1, 2, 3, 4,
4896 5, 6, 7, 8,
4897 9, 10, 11, 12,
4898 13, 14, 15, 16,
4899 );
4900 #[rustfmt::skip]
4901 let hi = _mm_setr_epi8(
4902 17, 18, 19, 20, 21, 22, 23, 24,
4903 25, 26, 27, 28, 29, 30, 31, 32,
4904 );
4905 let r = _mm256_setr_m128i(lo, hi);
4906 #[rustfmt::skip]
4907 let e = _mm256_setr_epi8(
4908 1, 2, 3, 4, 5, 6, 7, 8,
4909 9, 10, 11, 12, 13, 14, 15, 16,
4910 17, 18, 19, 20, 21, 22, 23, 24,
4911 25, 26, 27, 28, 29, 30, 31, 32,
4912 );
4913 assert_eq_m256i(r, e);
4914 }
4915
4916 #[simd_test(enable = "avx")]
4917 unsafe fn test_mm256_loadu2_m128() {
4918 let hi = &[5., 6., 7., 8.];
4919 let hiaddr = hi.as_ptr();
4920 let lo = &[1., 2., 3., 4.];
4921 let loaddr = lo.as_ptr();
4922 let r = _mm256_loadu2_m128(hiaddr, loaddr);
4923 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4924 assert_eq_m256(r, e);
4925 }
4926
4927 #[simd_test(enable = "avx")]
4928 unsafe fn test_mm256_loadu2_m128d() {
4929 let hi = &[3., 4.];
4930 let hiaddr = hi.as_ptr();
4931 let lo = &[1., 2.];
4932 let loaddr = lo.as_ptr();
4933 let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4934 let e = _mm256_setr_pd(1., 2., 3., 4.);
4935 assert_eq_m256d(r, e);
4936 }
4937
4938 #[simd_test(enable = "avx")]
4939 unsafe fn test_mm256_loadu2_m128i() {
4940 #[rustfmt::skip]
4941 let hi = _mm_setr_epi8(
4942 17, 18, 19, 20, 21, 22, 23, 24,
4943 25, 26, 27, 28, 29, 30, 31, 32,
4944 );
4945 #[rustfmt::skip]
4946 let lo = _mm_setr_epi8(
4947 1, 2, 3, 4, 5, 6, 7, 8,
4948 9, 10, 11, 12, 13, 14, 15, 16,
4949 );
4950 let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4951 #[rustfmt::skip]
4952 let e = _mm256_setr_epi8(
4953 1, 2, 3, 4, 5, 6, 7, 8,
4954 9, 10, 11, 12, 13, 14, 15, 16,
4955 17, 18, 19, 20, 21, 22, 23, 24,
4956 25, 26, 27, 28, 29, 30, 31, 32,
4957 );
4958 assert_eq_m256i(r, e);
4959 }
4960
4961 #[simd_test(enable = "avx")]
4962 unsafe fn test_mm256_storeu2_m128() {
4963 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4964 let mut hi = _mm_undefined_ps();
4965 let mut lo = _mm_undefined_ps();
4966 _mm256_storeu2_m128(
4967 ptr::addr_of_mut!(hi) as *mut f32,
4968 ptr::addr_of_mut!(lo) as *mut f32,
4969 a,
4970 );
4971 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
4972 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
4973 }
4974
4975 #[simd_test(enable = "avx")]
4976 unsafe fn test_mm256_storeu2_m128d() {
4977 let a = _mm256_setr_pd(1., 2., 3., 4.);
4978 let mut hi = _mm_undefined_pd();
4979 let mut lo = _mm_undefined_pd();
4980 _mm256_storeu2_m128d(
4981 ptr::addr_of_mut!(hi) as *mut f64,
4982 ptr::addr_of_mut!(lo) as *mut f64,
4983 a,
4984 );
4985 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4986 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
4987 }
4988
4989 #[simd_test(enable = "avx")]
4990 unsafe fn test_mm256_storeu2_m128i() {
4991 #[rustfmt::skip]
4992 let a = _mm256_setr_epi8(
4993 1, 2, 3, 4, 5, 6, 7, 8,
4994 9, 10, 11, 12, 13, 14, 15, 16,
4995 17, 18, 19, 20, 21, 22, 23, 24,
4996 25, 26, 27, 28, 29, 30, 31, 32,
4997 );
4998 let mut hi = _mm_undefined_si128();
4999 let mut lo = _mm_undefined_si128();
5000 _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
5001 #[rustfmt::skip]
5002 let e_hi = _mm_setr_epi8(
5003 17, 18, 19, 20, 21, 22, 23, 24,
5004 25, 26, 27, 28, 29, 30, 31, 32
5005 );
5006 #[rustfmt::skip]
5007 let e_lo = _mm_setr_epi8(
5008 1, 2, 3, 4, 5, 6, 7, 8,
5009 9, 10, 11, 12, 13, 14, 15, 16
5010 );
5011
5012 assert_eq_m128i(hi, e_hi);
5013 assert_eq_m128i(lo, e_lo);
5014 }
5015
5016 #[simd_test(enable = "avx")]
5017 unsafe fn test_mm256_cvtss_f32() {
5018 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5019 let r = _mm256_cvtss_f32(a);
5020 assert_eq!(r, 1.);
5021 }
5022}