Skip to main content

core/stdarch/crates/core_arch/src/x86/
avx512dq.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    mem::transmute,
5};
6
7// And //
8
9/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
10/// and store the results in dst using writemask k (elements are copied from src if the corresponding
11/// bit is not set).
12///
13/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
14#[inline]
15#[target_feature(enable = "avx512dq,avx512vl")]
16#[cfg_attr(test, assert_instr(vandpd))]
17#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19pub const fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
20    unsafe {
21        let and = _mm_and_pd(a, b).as_f64x2();
22        transmute(simd_select_bitmask(k, and, src.as_f64x2()))
23    }
24}
25
26/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
27/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
28///
29/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
30#[inline]
31#[target_feature(enable = "avx512dq,avx512vl")]
32#[cfg_attr(test, assert_instr(vandpd))]
33#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36    unsafe {
37        let and = _mm_and_pd(a, b).as_f64x2();
38        transmute(simd_select_bitmask(k, and, f64x2::ZERO))
39    }
40}
41
42/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
43/// and store the results in dst using writemask k (elements are copied from src if the corresponding
44/// bit is not set).
45///
46/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
47#[inline]
48#[target_feature(enable = "avx512dq,avx512vl")]
49#[cfg_attr(test, assert_instr(vandpd))]
50#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
51#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
52pub const fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
53    unsafe {
54        let and = _mm256_and_pd(a, b).as_f64x4();
55        transmute(simd_select_bitmask(k, and, src.as_f64x4()))
56    }
57}
58
59/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
60/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
61///
62/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
63#[inline]
64#[target_feature(enable = "avx512dq,avx512vl")]
65#[cfg_attr(test, assert_instr(vandpd))]
66#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
67#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
68pub const fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
69    unsafe {
70        let and = _mm256_and_pd(a, b).as_f64x4();
71        transmute(simd_select_bitmask(k, and, f64x4::ZERO))
72    }
73}
74
75/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
76/// and store the results in dst.
77///
78/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
79#[inline]
80#[target_feature(enable = "avx512dq")]
81#[cfg_attr(test, assert_instr(vandp))]
82#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
83#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
84pub const fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
85    unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
86}
87
88/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
89/// and store the results in dst using writemask k (elements are copied from src if the corresponding
90/// bit is not set).
91///
92/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
93#[inline]
94#[target_feature(enable = "avx512dq")]
95#[cfg_attr(test, assert_instr(vandpd))]
96#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
97#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
98pub const fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
99    unsafe {
100        let and = _mm512_and_pd(a, b).as_f64x8();
101        transmute(simd_select_bitmask(k, and, src.as_f64x8()))
102    }
103}
104
105/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
106/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
107///
108/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
109#[inline]
110#[target_feature(enable = "avx512dq")]
111#[cfg_attr(test, assert_instr(vandpd))]
112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
114pub const fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
115    unsafe {
116        let and = _mm512_and_pd(a, b).as_f64x8();
117        transmute(simd_select_bitmask(k, and, f64x8::ZERO))
118    }
119}
120
121/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
122/// and store the results in dst using writemask k (elements are copied from src if the corresponding
123/// bit is not set).
124///
125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
126#[inline]
127#[target_feature(enable = "avx512dq,avx512vl")]
128#[cfg_attr(test, assert_instr(vandps))]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
132    unsafe {
133        let and = _mm_and_ps(a, b).as_f32x4();
134        transmute(simd_select_bitmask(k, and, src.as_f32x4()))
135    }
136}
137
138/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
139/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
140///
141/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
142#[inline]
143#[target_feature(enable = "avx512dq,avx512vl")]
144#[cfg_attr(test, assert_instr(vandps))]
145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
148    unsafe {
149        let and = _mm_and_ps(a, b).as_f32x4();
150        transmute(simd_select_bitmask(k, and, f32x4::ZERO))
151    }
152}
153
154/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
155/// and store the results in dst using writemask k (elements are copied from src if the corresponding
156/// bit is not set).
157///
158/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
159#[inline]
160#[target_feature(enable = "avx512dq,avx512vl")]
161#[cfg_attr(test, assert_instr(vandps))]
162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
165    unsafe {
166        let and = _mm256_and_ps(a, b).as_f32x8();
167        transmute(simd_select_bitmask(k, and, src.as_f32x8()))
168    }
169}
170
171/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
172/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
173///
174/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
175#[inline]
176#[target_feature(enable = "avx512dq,avx512vl")]
177#[cfg_attr(test, assert_instr(vandps))]
178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
180pub const fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
181    unsafe {
182        let and = _mm256_and_ps(a, b).as_f32x8();
183        transmute(simd_select_bitmask(k, and, f32x8::ZERO))
184    }
185}
186
187/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
188/// and store the results in dst.
189///
190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
191#[inline]
192#[target_feature(enable = "avx512dq")]
193#[cfg_attr(test, assert_instr(vandps))]
194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
196pub const fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
197    unsafe {
198        transmute(simd_and(
199            transmute::<_, u32x16>(a),
200            transmute::<_, u32x16>(b),
201        ))
202    }
203}
204
205/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
206/// and store the results in dst using writemask k (elements are copied from src if the corresponding
207/// bit is not set).
208///
209/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
210#[inline]
211#[target_feature(enable = "avx512dq")]
212#[cfg_attr(test, assert_instr(vandps))]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
216    unsafe {
217        let and = _mm512_and_ps(a, b).as_f32x16();
218        transmute(simd_select_bitmask(k, and, src.as_f32x16()))
219    }
220}
221
222/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
223/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
224///
225/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
226#[inline]
227#[target_feature(enable = "avx512dq")]
228#[cfg_attr(test, assert_instr(vandps))]
229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
231pub const fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
232    unsafe {
233        let and = _mm512_and_ps(a, b).as_f32x16();
234        transmute(simd_select_bitmask(k, and, f32x16::ZERO))
235    }
236}
237
238// Andnot
239
240/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
241/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
242/// corresponding bit is not set).
243///
244/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
245#[inline]
246#[target_feature(enable = "avx512dq,avx512vl")]
247#[cfg_attr(test, assert_instr(vandnpd))]
248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
250pub const fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
251    unsafe {
252        let andnot = _mm_andnot_pd(a, b).as_f64x2();
253        transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
254    }
255}
256
257/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
258/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
259/// corresponding bit is not set).
260///
261/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
262#[inline]
263#[target_feature(enable = "avx512dq,avx512vl")]
264#[cfg_attr(test, assert_instr(vandnpd))]
265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
267pub const fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
268    unsafe {
269        let andnot = _mm_andnot_pd(a, b).as_f64x2();
270        transmute(simd_select_bitmask(k, andnot, f64x2::ZERO))
271    }
272}
273
274/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
275/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
276/// corresponding bit is not set).
277///
278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
279#[inline]
280#[target_feature(enable = "avx512dq,avx512vl")]
281#[cfg_attr(test, assert_instr(vandnpd))]
282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
284pub const fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
285    unsafe {
286        let andnot = _mm256_andnot_pd(a, b).as_f64x4();
287        transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
288    }
289}
290
291/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
292/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
293/// corresponding bit is not set).
294///
295/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
296#[inline]
297#[target_feature(enable = "avx512dq,avx512vl")]
298#[cfg_attr(test, assert_instr(vandnpd))]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
301pub const fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
302    unsafe {
303        let andnot = _mm256_andnot_pd(a, b).as_f64x4();
304        transmute(simd_select_bitmask(k, andnot, f64x4::ZERO))
305    }
306}
307
308/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
309/// bitwise AND with b and store the results in dst.
310///
311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
312#[inline]
313#[target_feature(enable = "avx512dq")]
314#[cfg_attr(test, assert_instr(vandnp))]
315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
318    unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) }
319}
320
321/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
322/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
323/// corresponding bit is not set).
324///
325/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
326#[inline]
327#[target_feature(enable = "avx512dq")]
328#[cfg_attr(test, assert_instr(vandnpd))]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
331pub const fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
332    unsafe {
333        let andnot = _mm512_andnot_pd(a, b).as_f64x8();
334        transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
335    }
336}
337
338/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
339/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
340/// corresponding bit is not set).
341///
342/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
343#[inline]
344#[target_feature(enable = "avx512dq")]
345#[cfg_attr(test, assert_instr(vandnpd))]
346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
347#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
348pub const fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
349    unsafe {
350        let andnot = _mm512_andnot_pd(a, b).as_f64x8();
351        transmute(simd_select_bitmask(k, andnot, f64x8::ZERO))
352    }
353}
354
355/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
356/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
357/// corresponding bit is not set).
358///
359/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
360#[inline]
361#[target_feature(enable = "avx512dq,avx512vl")]
362#[cfg_attr(test, assert_instr(vandnps))]
363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
365pub const fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
366    unsafe {
367        let andnot = _mm_andnot_ps(a, b).as_f32x4();
368        transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
369    }
370}
371
372/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
373/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
374/// corresponding bit is not set).
375///
376/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
377#[inline]
378#[target_feature(enable = "avx512dq,avx512vl")]
379#[cfg_attr(test, assert_instr(vandnps))]
380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
382pub const fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
383    unsafe {
384        let andnot = _mm_andnot_ps(a, b).as_f32x4();
385        transmute(simd_select_bitmask(k, andnot, f32x4::ZERO))
386    }
387}
388
389/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
390/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
391/// corresponding bit is not set).
392///
393/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
394#[inline]
395#[target_feature(enable = "avx512dq,avx512vl")]
396#[cfg_attr(test, assert_instr(vandnps))]
397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
399pub const fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
400    unsafe {
401        let andnot = _mm256_andnot_ps(a, b).as_f32x8();
402        transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
403    }
404}
405
406/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
407/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
408/// corresponding bit is not set).
409///
410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
411#[inline]
412#[target_feature(enable = "avx512dq,avx512vl")]
413#[cfg_attr(test, assert_instr(vandnps))]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
416pub const fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
417    unsafe {
418        let andnot = _mm256_andnot_ps(a, b).as_f32x8();
419        transmute(simd_select_bitmask(k, andnot, f32x8::ZERO))
420    }
421}
422
423/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
424/// bitwise AND with b and store the results in dst.
425///
426/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
427#[inline]
428#[target_feature(enable = "avx512dq")]
429#[cfg_attr(test, assert_instr(vandnps))]
430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
432pub const fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
433    unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) }
434}
435
436/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
437/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
438/// corresponding bit is not set).
439///
440/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
441#[inline]
442#[target_feature(enable = "avx512dq")]
443#[cfg_attr(test, assert_instr(vandnps))]
444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
447    unsafe {
448        let andnot = _mm512_andnot_ps(a, b).as_f32x16();
449        transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
450    }
451}
452
453/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
454/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
455/// corresponding bit is not set).
456///
457/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
458#[inline]
459#[target_feature(enable = "avx512dq")]
460#[cfg_attr(test, assert_instr(vandnps))]
461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
463pub const fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
464    unsafe {
465        let andnot = _mm512_andnot_ps(a, b).as_f32x16();
466        transmute(simd_select_bitmask(k, andnot, f32x16::ZERO))
467    }
468}
469
470// Or
471
472/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
473/// and store the results in dst using writemask k (elements are copied from src if the corresponding
474/// bit is not set).
475///
476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
477#[inline]
478#[target_feature(enable = "avx512dq,avx512vl")]
479#[cfg_attr(test, assert_instr(vorpd))]
480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
483    unsafe {
484        let or = _mm_or_pd(a, b).as_f64x2();
485        transmute(simd_select_bitmask(k, or, src.as_f64x2()))
486    }
487}
488
489/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
490/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
491///
492/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
493#[inline]
494#[target_feature(enable = "avx512dq,avx512vl")]
495#[cfg_attr(test, assert_instr(vorpd))]
496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
497#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
498pub const fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
499    unsafe {
500        let or = _mm_or_pd(a, b).as_f64x2();
501        transmute(simd_select_bitmask(k, or, f64x2::ZERO))
502    }
503}
504
505/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
506/// and store the results in dst using writemask k (elements are copied from src if the corresponding
507/// bit is not set).
508///
509/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
510#[inline]
511#[target_feature(enable = "avx512dq,avx512vl")]
512#[cfg_attr(test, assert_instr(vorpd))]
513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
516    unsafe {
517        let or = _mm256_or_pd(a, b).as_f64x4();
518        transmute(simd_select_bitmask(k, or, src.as_f64x4()))
519    }
520}
521
522/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
523/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
524///
525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
526#[inline]
527#[target_feature(enable = "avx512dq,avx512vl")]
528#[cfg_attr(test, assert_instr(vorpd))]
529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
532    unsafe {
533        let or = _mm256_or_pd(a, b).as_f64x4();
534        transmute(simd_select_bitmask(k, or, f64x4::ZERO))
535    }
536}
537
538/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
539/// and store the results in dst.
540///
541/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
542#[inline]
543#[target_feature(enable = "avx512dq")]
544#[cfg_attr(test, assert_instr(vorp))]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
548    unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
549}
550
551/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
552/// store the results in dst using writemask k (elements are copied from src if the corresponding
553/// bit is not set).
554///
555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
556#[inline]
557#[target_feature(enable = "avx512dq")]
558#[cfg_attr(test, assert_instr(vorpd))]
559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
561pub const fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
562    unsafe {
563        let or = _mm512_or_pd(a, b).as_f64x8();
564        transmute(simd_select_bitmask(k, or, src.as_f64x8()))
565    }
566}
567
568/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
569/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
570///
571/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
572#[inline]
573#[target_feature(enable = "avx512dq")]
574#[cfg_attr(test, assert_instr(vorpd))]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
577pub const fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
578    unsafe {
579        let or = _mm512_or_pd(a, b).as_f64x8();
580        transmute(simd_select_bitmask(k, or, f64x8::ZERO))
581    }
582}
583
584/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
585/// and store the results in dst using writemask k (elements are copied from src if the corresponding
586/// bit is not set).
587///
588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
589#[inline]
590#[target_feature(enable = "avx512dq,avx512vl")]
591#[cfg_attr(test, assert_instr(vorps))]
592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
594pub const fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
595    unsafe {
596        let or = _mm_or_ps(a, b).as_f32x4();
597        transmute(simd_select_bitmask(k, or, src.as_f32x4()))
598    }
599}
600
601/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
602/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
603///
604/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
605#[inline]
606#[target_feature(enable = "avx512dq,avx512vl")]
607#[cfg_attr(test, assert_instr(vorps))]
608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
610pub const fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
611    unsafe {
612        let or = _mm_or_ps(a, b).as_f32x4();
613        transmute(simd_select_bitmask(k, or, f32x4::ZERO))
614    }
615}
616
617/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
618/// and store the results in dst using writemask k (elements are copied from src if the corresponding
619/// bit is not set).
620///
621/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
622#[inline]
623#[target_feature(enable = "avx512dq,avx512vl")]
624#[cfg_attr(test, assert_instr(vorps))]
625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
626#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
627pub const fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
628    unsafe {
629        let or = _mm256_or_ps(a, b).as_f32x8();
630        transmute(simd_select_bitmask(k, or, src.as_f32x8()))
631    }
632}
633
634/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
635/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
636///
637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
638#[inline]
639#[target_feature(enable = "avx512dq,avx512vl")]
640#[cfg_attr(test, assert_instr(vorps))]
641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
643pub const fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
644    unsafe {
645        let or = _mm256_or_ps(a, b).as_f32x8();
646        transmute(simd_select_bitmask(k, or, f32x8::ZERO))
647    }
648}
649
650/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
651/// and store the results in dst.
652///
653/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
654#[inline]
655#[target_feature(enable = "avx512dq")]
656#[cfg_attr(test, assert_instr(vorps))]
657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
659pub const fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
660    unsafe {
661        transmute(simd_or(
662            transmute::<_, u32x16>(a),
663            transmute::<_, u32x16>(b),
664        ))
665    }
666}
667
668/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
669/// store the results in dst using writemask k (elements are copied from src if the corresponding
670/// bit is not set).
671///
672/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
673#[inline]
674#[target_feature(enable = "avx512dq")]
675#[cfg_attr(test, assert_instr(vorps))]
676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
678pub const fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
679    unsafe {
680        let or = _mm512_or_ps(a, b).as_f32x16();
681        transmute(simd_select_bitmask(k, or, src.as_f32x16()))
682    }
683}
684
685/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
686/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
687///
688/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
689#[inline]
690#[target_feature(enable = "avx512dq")]
691#[cfg_attr(test, assert_instr(vorps))]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
694pub const fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
695    unsafe {
696        let or = _mm512_or_ps(a, b).as_f32x16();
697        transmute(simd_select_bitmask(k, or, f32x16::ZERO))
698    }
699}
700
701// Xor
702
703/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
704/// and store the results in dst using writemask k (elements are copied from src if the corresponding
705/// bit is not set).
706///
707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
708#[inline]
709#[target_feature(enable = "avx512dq,avx512vl")]
710#[cfg_attr(test, assert_instr(vxorpd))]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
713pub const fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
714    unsafe {
715        let xor = _mm_xor_pd(a, b).as_f64x2();
716        transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
717    }
718}
719
720/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
721/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
722///
723/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
724#[inline]
725#[target_feature(enable = "avx512dq,avx512vl")]
726#[cfg_attr(test, assert_instr(vxorpd))]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
729pub const fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
730    unsafe {
731        let xor = _mm_xor_pd(a, b).as_f64x2();
732        transmute(simd_select_bitmask(k, xor, f64x2::ZERO))
733    }
734}
735
736/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
737/// and store the results in dst using writemask k (elements are copied from src if the corresponding
738/// bit is not set).
739///
740/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
741#[inline]
742#[target_feature(enable = "avx512dq,avx512vl")]
743#[cfg_attr(test, assert_instr(vxorpd))]
744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
747    unsafe {
748        let xor = _mm256_xor_pd(a, b).as_f64x4();
749        transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
750    }
751}
752
753/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
754/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
755///
756/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
757#[inline]
758#[target_feature(enable = "avx512dq,avx512vl")]
759#[cfg_attr(test, assert_instr(vxorpd))]
760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
762pub const fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
763    unsafe {
764        let xor = _mm256_xor_pd(a, b).as_f64x4();
765        transmute(simd_select_bitmask(k, xor, f64x4::ZERO))
766    }
767}
768
769/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
770/// and store the results in dst.
771///
772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
773#[inline]
774#[target_feature(enable = "avx512dq")]
775#[cfg_attr(test, assert_instr(vxorp))]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
778pub const fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
779    unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
780}
781
782/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
783/// store the results in dst using writemask k (elements are copied from src if the corresponding
784/// bit is not set).
785///
786/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
787#[inline]
788#[target_feature(enable = "avx512dq")]
789#[cfg_attr(test, assert_instr(vxorpd))]
790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
792pub const fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
793    unsafe {
794        let xor = _mm512_xor_pd(a, b).as_f64x8();
795        transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
796    }
797}
798
799/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
800/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
801///
802/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
803#[inline]
804#[target_feature(enable = "avx512dq")]
805#[cfg_attr(test, assert_instr(vxorpd))]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
808pub const fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
809    unsafe {
810        let xor = _mm512_xor_pd(a, b).as_f64x8();
811        transmute(simd_select_bitmask(k, xor, f64x8::ZERO))
812    }
813}
814
815/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
816/// and store the results in dst using writemask k (elements are copied from src if the corresponding
817/// bit is not set).
818///
819/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
820#[inline]
821#[target_feature(enable = "avx512dq,avx512vl")]
822#[cfg_attr(test, assert_instr(vxorps))]
823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
825pub const fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
826    unsafe {
827        let xor = _mm_xor_ps(a, b).as_f32x4();
828        transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
829    }
830}
831
832/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
833/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
834///
835/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
836#[inline]
837#[target_feature(enable = "avx512dq,avx512vl")]
838#[cfg_attr(test, assert_instr(vxorps))]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
841pub const fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
842    unsafe {
843        let xor = _mm_xor_ps(a, b).as_f32x4();
844        transmute(simd_select_bitmask(k, xor, f32x4::ZERO))
845    }
846}
847
848/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
849/// and store the results in dst using writemask k (elements are copied from src if the corresponding
850/// bit is not set).
851///
852/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
853#[inline]
854#[target_feature(enable = "avx512dq,avx512vl")]
855#[cfg_attr(test, assert_instr(vxorps))]
856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
857#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
858pub const fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
859    unsafe {
860        let xor = _mm256_xor_ps(a, b).as_f32x8();
861        transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
862    }
863}
864
865/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
866/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
867///
868/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
869#[inline]
870#[target_feature(enable = "avx512dq,avx512vl")]
871#[cfg_attr(test, assert_instr(vxorps))]
872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
874pub const fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
875    unsafe {
876        let xor = _mm256_xor_ps(a, b).as_f32x8();
877        transmute(simd_select_bitmask(k, xor, f32x8::ZERO))
878    }
879}
880
881/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
882/// and store the results in dst.
883///
884/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
885#[inline]
886#[target_feature(enable = "avx512dq")]
887#[cfg_attr(test, assert_instr(vxorps))]
888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
890pub const fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
891    unsafe {
892        transmute(simd_xor(
893            transmute::<_, u32x16>(a),
894            transmute::<_, u32x16>(b),
895        ))
896    }
897}
898
899/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
900/// store the results in dst using writemask k (elements are copied from src if the corresponding
901/// bit is not set).
902///
903/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
904#[inline]
905#[target_feature(enable = "avx512dq")]
906#[cfg_attr(test, assert_instr(vxorps))]
907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
908#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
909pub const fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
910    unsafe {
911        let xor = _mm512_xor_ps(a, b).as_f32x16();
912        transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
913    }
914}
915
916/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
917/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
918///
919/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
920#[inline]
921#[target_feature(enable = "avx512dq")]
922#[cfg_attr(test, assert_instr(vxorps))]
923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
925pub const fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
926    unsafe {
927        let xor = _mm512_xor_ps(a, b).as_f32x16();
928        transmute(simd_select_bitmask(k, xor, f32x16::ZERO))
929    }
930}
931
932// Broadcast
933
934/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
935/// elements of dst.
936///
937/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
938#[inline]
939#[target_feature(enable = "avx512dq,avx512vl")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
942pub const fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
943    unsafe {
944        let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
945        transmute(b)
946    }
947}
948
949/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
950/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
951///
952/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
953#[inline]
954#[target_feature(enable = "avx512dq,avx512vl")]
955#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
958pub const fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
959    unsafe {
960        let b = _mm256_broadcast_f32x2(a).as_f32x8();
961        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
962    }
963}
964
965/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
966/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
967///
968/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
969#[inline]
970#[target_feature(enable = "avx512dq,avx512vl")]
971#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
974pub const fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
975    unsafe {
976        let b = _mm256_broadcast_f32x2(a).as_f32x8();
977        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
978    }
979}
980
981/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
982/// elements of dst.
983///
984/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
985#[inline]
986#[target_feature(enable = "avx512dq")]
987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
989pub const fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
990    unsafe {
991        let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
992        transmute(b)
993    }
994}
995
996/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
997/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
998///
999/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
1000#[inline]
1001#[target_feature(enable = "avx512dq")]
1002#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
1003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1005pub const fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
1006    unsafe {
1007        let b = _mm512_broadcast_f32x2(a).as_f32x16();
1008        transmute(simd_select_bitmask(k, b, src.as_f32x16()))
1009    }
1010}
1011
1012/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
1013/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1014///
1015/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
1016#[inline]
1017#[target_feature(enable = "avx512dq")]
1018#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
1019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1021pub const fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
1022    unsafe {
1023        let b = _mm512_broadcast_f32x2(a).as_f32x16();
1024        transmute(simd_select_bitmask(k, b, f32x16::ZERO))
1025    }
1026}
1027
1028/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1029/// elements of dst.
1030///
1031/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
1032#[inline]
1033#[target_feature(enable = "avx512dq")]
1034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1036pub const fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
1037    unsafe {
1038        let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1039        transmute(b)
1040    }
1041}
1042
1043/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1044/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1045///
1046/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
1047#[inline]
1048#[target_feature(enable = "avx512dq")]
1049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
1052    unsafe {
1053        let b = _mm512_broadcast_f32x8(a).as_f32x16();
1054        transmute(simd_select_bitmask(k, b, src.as_f32x16()))
1055    }
1056}
1057
1058/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1059/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1060///
1061/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
1062#[inline]
1063#[target_feature(enable = "avx512dq")]
1064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1066pub const fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
1067    unsafe {
1068        let b = _mm512_broadcast_f32x8(a).as_f32x16();
1069        transmute(simd_select_bitmask(k, b, f32x16::ZERO))
1070    }
1071}
1072
1073/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1074/// elements of dst.
1075///
1076/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
1077#[inline]
1078#[target_feature(enable = "avx512dq,avx512vl")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1081pub const fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
1082    unsafe {
1083        let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1084        transmute(b)
1085    }
1086}
1087
1088/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1089/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1090///
1091/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
1092#[inline]
1093#[target_feature(enable = "avx512dq,avx512vl")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1096pub const fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
1097    unsafe {
1098        let b = _mm256_broadcast_f64x2(a).as_f64x4();
1099        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
1100    }
1101}
1102
1103/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1104/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1105///
1106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
1107#[inline]
1108#[target_feature(enable = "avx512dq,avx512vl")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1111pub const fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
1112    unsafe {
1113        let b = _mm256_broadcast_f64x2(a).as_f64x4();
1114        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
1115    }
1116}
1117
1118/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1119/// elements of dst.
1120///
1121/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
1122#[inline]
1123#[target_feature(enable = "avx512dq")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1126pub const fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
1127    unsafe {
1128        let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1129        transmute(b)
1130    }
1131}
1132
1133/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1134/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1135///
1136/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
1137#[inline]
1138#[target_feature(enable = "avx512dq")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1141pub const fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
1142    unsafe {
1143        let b = _mm512_broadcast_f64x2(a).as_f64x8();
1144        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
1145    }
1146}
1147
1148/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1149/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1150///
1151/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
1152#[inline]
1153#[target_feature(enable = "avx512dq")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1156pub const fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
1157    unsafe {
1158        let b = _mm512_broadcast_f64x2(a).as_f64x8();
1159        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
1160    }
1161}
1162
1163/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1164///
1165/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
1166#[inline]
1167#[target_feature(enable = "avx512dq,avx512vl")]
1168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1170pub const fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
1171    unsafe {
1172        let a = a.as_i32x4();
1173        let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1174        transmute(b)
1175    }
1176}
1177
1178/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1179/// (elements are copied from src if the corresponding bit is not set).
1180///
1181/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
1182#[inline]
1183#[target_feature(enable = "avx512dq,avx512vl")]
1184#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
1188    unsafe {
1189        let b = _mm_broadcast_i32x2(a).as_i32x4();
1190        transmute(simd_select_bitmask(k, b, src.as_i32x4()))
1191    }
1192}
1193
1194/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1195/// (elements are zeroed out if the corresponding bit is not set).
1196///
1197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
1198#[inline]
1199#[target_feature(enable = "avx512dq,avx512vl")]
1200#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1203pub const fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
1204    unsafe {
1205        let b = _mm_broadcast_i32x2(a).as_i32x4();
1206        transmute(simd_select_bitmask(k, b, i32x4::ZERO))
1207    }
1208}
1209
1210/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1211///
1212/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
1213#[inline]
1214#[target_feature(enable = "avx512dq,avx512vl")]
1215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1217pub const fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
1218    unsafe {
1219        let a = a.as_i32x4();
1220        let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1221        transmute(b)
1222    }
1223}
1224
1225/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1226/// (elements are copied from src if the corresponding bit is not set).
1227///
1228/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
1229#[inline]
1230#[target_feature(enable = "avx512dq,avx512vl")]
1231#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1234pub const fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1235    unsafe {
1236        let b = _mm256_broadcast_i32x2(a).as_i32x8();
1237        transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1238    }
1239}
1240
1241/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1242/// (elements are zeroed out if the corresponding bit is not set).
1243///
1244/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
1245#[inline]
1246#[target_feature(enable = "avx512dq,avx512vl")]
1247#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
1251    unsafe {
1252        let b = _mm256_broadcast_i32x2(a).as_i32x8();
1253        transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1254    }
1255}
1256
1257/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1258///
1259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
1260#[inline]
1261#[target_feature(enable = "avx512dq")]
1262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1264pub const fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
1265    unsafe {
1266        let a = a.as_i32x4();
1267        let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
1268        transmute(b)
1269    }
1270}
1271
1272/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1273/// (elements are copied from src if the corresponding bit is not set).
1274///
1275/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
1276#[inline]
1277#[target_feature(enable = "avx512dq")]
1278#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1281pub const fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
1282    unsafe {
1283        let b = _mm512_broadcast_i32x2(a).as_i32x16();
1284        transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1285    }
1286}
1287
1288/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1289/// (elements are zeroed out if the corresponding bit is not set).
1290///
1291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
1292#[inline]
1293#[target_feature(enable = "avx512dq")]
1294#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1297pub const fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
1298    unsafe {
1299        let b = _mm512_broadcast_i32x2(a).as_i32x16();
1300        transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1301    }
1302}
1303
1304/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
1305///
1306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
1307#[inline]
1308#[target_feature(enable = "avx512dq")]
1309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1311pub const fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
1312    unsafe {
1313        let a = a.as_i32x8();
1314        let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1315        transmute(b)
1316    }
1317}
1318
1319/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
1320/// (elements are copied from src if the corresponding bit is not set).
1321///
1322/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
1323#[inline]
1324#[target_feature(enable = "avx512dq")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1327pub const fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
1328    unsafe {
1329        let b = _mm512_broadcast_i32x8(a).as_i32x16();
1330        transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1331    }
1332}
1333
1334/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
1335/// (elements are zeroed out if the corresponding bit is not set).
1336///
1337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
1338#[inline]
1339#[target_feature(enable = "avx512dq")]
1340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1342pub const fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
1343    unsafe {
1344        let b = _mm512_broadcast_i32x8(a).as_i32x16();
1345        transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1346    }
1347}
1348
1349/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1350///
1351/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
1352#[inline]
1353#[target_feature(enable = "avx512dq,avx512vl")]
1354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1356pub const fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
1357    unsafe {
1358        let a = a.as_i64x2();
1359        let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1360        transmute(b)
1361    }
1362}
1363
1364/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1365/// (elements are copied from src if the corresponding bit is not set).
1366///
1367/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
1368#[inline]
1369#[target_feature(enable = "avx512dq,avx512vl")]
1370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1372pub const fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1373    unsafe {
1374        let b = _mm256_broadcast_i64x2(a).as_i64x4();
1375        transmute(simd_select_bitmask(k, b, src.as_i64x4()))
1376    }
1377}
1378
1379/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1380/// (elements are zeroed out if the corresponding bit is not set).
1381///
1382/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
1383#[inline]
1384#[target_feature(enable = "avx512dq,avx512vl")]
1385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1387pub const fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
1388    unsafe {
1389        let b = _mm256_broadcast_i64x2(a).as_i64x4();
1390        transmute(simd_select_bitmask(k, b, i64x4::ZERO))
1391    }
1392}
1393
1394/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1395///
1396/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
1397#[inline]
1398#[target_feature(enable = "avx512dq")]
1399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1400#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1401pub const fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
1402    unsafe {
1403        let a = a.as_i64x2();
1404        let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1405        transmute(b)
1406    }
1407}
1408
1409/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1410/// (elements are copied from src if the corresponding bit is not set).
1411///
1412/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
1413#[inline]
1414#[target_feature(enable = "avx512dq")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1417pub const fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
1418    unsafe {
1419        let b = _mm512_broadcast_i64x2(a).as_i64x8();
1420        transmute(simd_select_bitmask(k, b, src.as_i64x8()))
1421    }
1422}
1423
1424/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1425/// (elements are zeroed out if the corresponding bit is not set).
1426///
1427/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
1428#[inline]
1429#[target_feature(enable = "avx512dq")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1432pub const fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
1433    unsafe {
1434        let b = _mm512_broadcast_i64x2(a).as_i64x8();
1435        transmute(simd_select_bitmask(k, b, i64x8::ZERO))
1436    }
1437}
1438
1439// Extract
1440
1441/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1442/// selected with IMM8, and stores the result in dst.
1443///
1444/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
1445#[inline]
1446#[target_feature(enable = "avx512dq")]
1447#[rustc_legacy_const_generics(1)]
1448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1450pub const fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
1451    unsafe {
1452        static_assert_uimm_bits!(IMM8, 1);
1453        match IMM8 & 1 {
1454            0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1455            _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1456        }
1457    }
1458}
1459
1460/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1461/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1462/// if the corresponding bit is not set).
1463///
1464/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
1465#[inline]
1466#[target_feature(enable = "avx512dq")]
1467#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1468#[rustc_legacy_const_generics(3)]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(
1472    src: __m256,
1473    k: __mmask8,
1474    a: __m512,
1475) -> __m256 {
1476    unsafe {
1477        static_assert_uimm_bits!(IMM8, 1);
1478        let b = _mm512_extractf32x8_ps::<IMM8>(a);
1479        transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8()))
1480    }
1481}
1482
1483/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1484/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1485/// corresponding bit is not set).
1486///
1487/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
1488#[inline]
1489#[target_feature(enable = "avx512dq")]
1490#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1491#[rustc_legacy_const_generics(2)]
1492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1493#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1494pub const fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
1495    unsafe {
1496        static_assert_uimm_bits!(IMM8, 1);
1497        let b = _mm512_extractf32x8_ps::<IMM8>(a);
1498        transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO))
1499    }
1500}
1501
1502/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1503/// selected with IMM8, and stores the result in dst.
1504///
1505/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
1506#[inline]
1507#[target_feature(enable = "avx512dq,avx512vl")]
1508#[rustc_legacy_const_generics(1)]
1509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1510#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1511pub const fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
1512    unsafe {
1513        static_assert_uimm_bits!(IMM8, 1);
1514        match IMM8 & 1 {
1515            0 => simd_shuffle!(a, a, [0, 1]),
1516            _ => simd_shuffle!(a, a, [2, 3]),
1517        }
1518    }
1519}
1520
1521/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1522/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1523/// if the corresponding bit is not set).
1524///
1525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
1526#[inline]
1527#[target_feature(enable = "avx512dq,avx512vl")]
1528#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1529#[rustc_legacy_const_generics(3)]
1530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
1533    src: __m128d,
1534    k: __mmask8,
1535    a: __m256d,
1536) -> __m128d {
1537    unsafe {
1538        static_assert_uimm_bits!(IMM8, 1);
1539        let b = _mm256_extractf64x2_pd::<IMM8>(a);
1540        transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2()))
1541    }
1542}
1543
1544/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1545/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1546/// corresponding bit is not set).
1547///
1548/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
1549#[inline]
1550#[target_feature(enable = "avx512dq,avx512vl")]
1551#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1552#[rustc_legacy_const_generics(2)]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1555pub const fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
1556    unsafe {
1557        static_assert_uimm_bits!(IMM8, 1);
1558        let b = _mm256_extractf64x2_pd::<IMM8>(a);
1559        transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO))
1560    }
1561}
1562
1563/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1564/// selected with IMM8, and stores the result in dst.
1565///
1566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
1567#[inline]
1568#[target_feature(enable = "avx512dq")]
1569#[rustc_legacy_const_generics(1)]
1570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1572pub const fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
1573    unsafe {
1574        static_assert_uimm_bits!(IMM8, 2);
1575        match IMM8 & 3 {
1576            0 => simd_shuffle!(a, a, [0, 1]),
1577            1 => simd_shuffle!(a, a, [2, 3]),
1578            2 => simd_shuffle!(a, a, [4, 5]),
1579            _ => simd_shuffle!(a, a, [6, 7]),
1580        }
1581    }
1582}
1583
1584/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1585/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1586/// if the corresponding bit is not set).
1587///
1588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
1589#[inline]
1590#[target_feature(enable = "avx512dq")]
1591#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1592#[rustc_legacy_const_generics(3)]
1593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1595pub const fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
1596    src: __m128d,
1597    k: __mmask8,
1598    a: __m512d,
1599) -> __m128d {
1600    unsafe {
1601        static_assert_uimm_bits!(IMM8, 2);
1602        let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1603        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
1604    }
1605}
1606
1607/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1608/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1609/// corresponding bit is not set).
1610///
1611/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
1612#[inline]
1613#[target_feature(enable = "avx512dq")]
1614#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1615#[rustc_legacy_const_generics(2)]
1616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1618pub const fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
1619    unsafe {
1620        static_assert_uimm_bits!(IMM8, 2);
1621        let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1622        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
1623    }
1624}
1625
1626/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1627/// the result in dst.
1628///
1629/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
1630#[inline]
1631#[target_feature(enable = "avx512dq")]
1632#[rustc_legacy_const_generics(1)]
1633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1635pub const fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
1636    unsafe {
1637        static_assert_uimm_bits!(IMM8, 1);
1638        let a = a.as_i32x16();
1639        let b: i32x8 = match IMM8 & 1 {
1640            0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1641            _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1642        };
1643        transmute(b)
1644    }
1645}
1646
1647/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1648/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1649///
1650/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
1651#[inline]
1652#[target_feature(enable = "avx512dq")]
1653#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1654#[rustc_legacy_const_generics(3)]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1657pub const fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
1658    src: __m256i,
1659    k: __mmask8,
1660    a: __m512i,
1661) -> __m256i {
1662    unsafe {
1663        static_assert_uimm_bits!(IMM8, 1);
1664        let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1665        transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1666    }
1667}
1668
1669/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1670/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1671///
1672/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
1673#[inline]
1674#[target_feature(enable = "avx512dq")]
1675#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1676#[rustc_legacy_const_generics(2)]
1677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1679pub const fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
1680    unsafe {
1681        static_assert_uimm_bits!(IMM8, 1);
1682        let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1683        transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1684    }
1685}
1686
1687/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1688/// the result in dst.
1689///
1690/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
1691#[inline]
1692#[target_feature(enable = "avx512dq,avx512vl")]
1693#[rustc_legacy_const_generics(1)]
1694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1696pub const fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
1697    unsafe {
1698        static_assert_uimm_bits!(IMM8, 1);
1699        let a = a.as_i64x4();
1700        match IMM8 & 1 {
1701            0 => simd_shuffle!(a, a, [0, 1]),
1702            _ => simd_shuffle!(a, a, [2, 3]),
1703        }
1704    }
1705}
1706
1707/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1708/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1709///
1710/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
1711#[inline]
1712#[target_feature(enable = "avx512dq,avx512vl")]
1713#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1714#[rustc_legacy_const_generics(3)]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
1718    src: __m128i,
1719    k: __mmask8,
1720    a: __m256i,
1721) -> __m128i {
1722    unsafe {
1723        static_assert_uimm_bits!(IMM8, 1);
1724        let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1725        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1726    }
1727}
1728
1729/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1730/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1731///
1732/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
1733#[inline]
1734#[target_feature(enable = "avx512dq,avx512vl")]
1735#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1736#[rustc_legacy_const_generics(2)]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1739pub const fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
1740    unsafe {
1741        static_assert_uimm_bits!(IMM8, 1);
1742        let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1743        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1744    }
1745}
1746
1747/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1748/// the result in dst.
1749///
1750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
1751#[inline]
1752#[target_feature(enable = "avx512dq")]
1753#[rustc_legacy_const_generics(1)]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1756pub const fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
1757    unsafe {
1758        static_assert_uimm_bits!(IMM8, 2);
1759        let a = a.as_i64x8();
1760        match IMM8 & 3 {
1761            0 => simd_shuffle!(a, a, [0, 1]),
1762            1 => simd_shuffle!(a, a, [2, 3]),
1763            2 => simd_shuffle!(a, a, [4, 5]),
1764            _ => simd_shuffle!(a, a, [6, 7]),
1765        }
1766    }
1767}
1768
1769/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1770/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1771///
1772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
1773#[inline]
1774#[target_feature(enable = "avx512dq")]
1775#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1776#[rustc_legacy_const_generics(3)]
1777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1779pub const fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
1780    src: __m128i,
1781    k: __mmask8,
1782    a: __m512i,
1783) -> __m128i {
1784    unsafe {
1785        static_assert_uimm_bits!(IMM8, 2);
1786        let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1787        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1788    }
1789}
1790
1791/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1792/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1793///
1794/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
1795#[inline]
1796#[target_feature(enable = "avx512dq")]
1797#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1798#[rustc_legacy_const_generics(2)]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1801pub const fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
1802    unsafe {
1803        static_assert_uimm_bits!(IMM8, 2);
1804        let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1805        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1806    }
1807}
1808
1809// Insert
1810
1811/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1812/// elements) from b into dst at the location specified by IMM8.
1813///
1814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
1815#[inline]
1816#[target_feature(enable = "avx512dq")]
1817#[rustc_legacy_const_generics(2)]
1818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1820pub const fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
1821    unsafe {
1822        static_assert_uimm_bits!(IMM8, 1);
1823        let b = _mm512_castps256_ps512(b);
1824        match IMM8 & 1 {
1825            0 => {
1826                simd_shuffle!(
1827                    a,
1828                    b,
1829                    [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1830                )
1831            }
1832            _ => {
1833                simd_shuffle!(
1834                    a,
1835                    b,
1836                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1837                )
1838            }
1839        }
1840    }
1841}
1842
1843/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1844/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1845/// (elements are copied from src if the corresponding bit is not set).
1846///
1847/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
1848#[inline]
1849#[target_feature(enable = "avx512dq")]
1850#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1851#[rustc_legacy_const_generics(4)]
1852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1854pub const fn _mm512_mask_insertf32x8<const IMM8: i32>(
1855    src: __m512,
1856    k: __mmask16,
1857    a: __m512,
1858    b: __m256,
1859) -> __m512 {
1860    unsafe {
1861        static_assert_uimm_bits!(IMM8, 1);
1862        let c = _mm512_insertf32x8::<IMM8>(a, b);
1863        transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16()))
1864    }
1865}
1866
1867/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1868/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1869/// (elements are zeroed out if the corresponding bit is not set).
1870///
1871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
1872#[inline]
1873#[target_feature(enable = "avx512dq")]
1874#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1875#[rustc_legacy_const_generics(3)]
1876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1877#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1878pub const fn _mm512_maskz_insertf32x8<const IMM8: i32>(
1879    k: __mmask16,
1880    a: __m512,
1881    b: __m256,
1882) -> __m512 {
1883    unsafe {
1884        static_assert_uimm_bits!(IMM8, 1);
1885        let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
1886        transmute(simd_select_bitmask(k, c, f32x16::ZERO))
1887    }
1888}
1889
1890/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1891/// elements) from b into dst at the location specified by IMM8.
1892///
1893/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
1894#[inline]
1895#[target_feature(enable = "avx512dq,avx512vl")]
1896#[rustc_legacy_const_generics(2)]
1897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1899pub const fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
1900    unsafe {
1901        static_assert_uimm_bits!(IMM8, 1);
1902        let b = _mm256_castpd128_pd256(b);
1903        match IMM8 & 1 {
1904            0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1905            _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1906        }
1907    }
1908}
1909
1910/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1911/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1912/// (elements are copied from src if the corresponding bit is not set).
1913///
1914/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
1915#[inline]
1916#[target_feature(enable = "avx512dq,avx512vl")]
1917#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1918#[rustc_legacy_const_generics(4)]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1921pub const fn _mm256_mask_insertf64x2<const IMM8: i32>(
1922    src: __m256d,
1923    k: __mmask8,
1924    a: __m256d,
1925    b: __m128d,
1926) -> __m256d {
1927    unsafe {
1928        static_assert_uimm_bits!(IMM8, 1);
1929        let c = _mm256_insertf64x2::<IMM8>(a, b);
1930        transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4()))
1931    }
1932}
1933
1934/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1935/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1936/// (elements are zeroed out if the corresponding bit is not set).
1937///
1938/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
1939#[inline]
1940#[target_feature(enable = "avx512dq,avx512vl")]
1941#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1942#[rustc_legacy_const_generics(3)]
1943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1944#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1945pub const fn _mm256_maskz_insertf64x2<const IMM8: i32>(
1946    k: __mmask8,
1947    a: __m256d,
1948    b: __m128d,
1949) -> __m256d {
1950    unsafe {
1951        static_assert_uimm_bits!(IMM8, 1);
1952        let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
1953        transmute(simd_select_bitmask(k, c, f64x4::ZERO))
1954    }
1955}
1956
1957/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1958/// elements) from b into dst at the location specified by IMM8.
1959///
1960/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
1961#[inline]
1962#[target_feature(enable = "avx512dq")]
1963#[rustc_legacy_const_generics(2)]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1966pub const fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
1967    unsafe {
1968        static_assert_uimm_bits!(IMM8, 2);
1969        let b = _mm512_castpd128_pd512(b);
1970        match IMM8 & 3 {
1971            0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
1972            1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
1973            2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
1974            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
1975        }
1976    }
1977}
1978
1979/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1980/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1981/// (elements are copied from src if the corresponding bit is not set).
1982///
1983/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
1984#[inline]
1985#[target_feature(enable = "avx512dq")]
1986#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1987#[rustc_legacy_const_generics(4)]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1990pub const fn _mm512_mask_insertf64x2<const IMM8: i32>(
1991    src: __m512d,
1992    k: __mmask8,
1993    a: __m512d,
1994    b: __m128d,
1995) -> __m512d {
1996    unsafe {
1997        static_assert_uimm_bits!(IMM8, 2);
1998        let c = _mm512_insertf64x2::<IMM8>(a, b);
1999        transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8()))
2000    }
2001}
2002
2003/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
2004/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
2005/// (elements are zeroed out if the corresponding bit is not set).
2006///
2007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
2008#[inline]
2009#[target_feature(enable = "avx512dq")]
2010#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
2011#[rustc_legacy_const_generics(3)]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2014pub const fn _mm512_maskz_insertf64x2<const IMM8: i32>(
2015    k: __mmask8,
2016    a: __m512d,
2017    b: __m128d,
2018) -> __m512d {
2019    unsafe {
2020        static_assert_uimm_bits!(IMM8, 2);
2021        let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
2022        transmute(simd_select_bitmask(k, c, f64x8::ZERO))
2023    }
2024}
2025
2026/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
2027/// location specified by IMM8.
2028///
2029/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
2030#[inline]
2031#[target_feature(enable = "avx512dq")]
2032#[rustc_legacy_const_generics(2)]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2035pub const fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
2036    unsafe {
2037        static_assert_uimm_bits!(IMM8, 1);
2038        let a = a.as_i32x16();
2039        let b = _mm512_castsi256_si512(b).as_i32x16();
2040        let r: i32x16 = match IMM8 & 1 {
2041            0 => {
2042                simd_shuffle!(
2043                    a,
2044                    b,
2045                    [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
2046                )
2047            }
2048            _ => {
2049                simd_shuffle!(
2050                    a,
2051                    b,
2052                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
2053                )
2054            }
2055        };
2056        transmute(r)
2057    }
2058}
2059
2060/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
2061/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2062/// the corresponding bit is not set).
2063///
2064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
2065#[inline]
2066#[target_feature(enable = "avx512dq")]
2067#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
2068#[rustc_legacy_const_generics(4)]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2071pub const fn _mm512_mask_inserti32x8<const IMM8: i32>(
2072    src: __m512i,
2073    k: __mmask16,
2074    a: __m512i,
2075    b: __m256i,
2076) -> __m512i {
2077    unsafe {
2078        static_assert_uimm_bits!(IMM8, 1);
2079        let c = _mm512_inserti32x8::<IMM8>(a, b);
2080        transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16()))
2081    }
2082}
2083
2084/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
2085/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2086/// corresponding bit is not set).
2087///
2088/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
2089#[inline]
2090#[target_feature(enable = "avx512dq")]
2091#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
2092#[rustc_legacy_const_generics(3)]
2093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2095pub const fn _mm512_maskz_inserti32x8<const IMM8: i32>(
2096    k: __mmask16,
2097    a: __m512i,
2098    b: __m256i,
2099) -> __m512i {
2100    unsafe {
2101        static_assert_uimm_bits!(IMM8, 1);
2102        let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
2103        transmute(simd_select_bitmask(k, c, i32x16::ZERO))
2104    }
2105}
2106
2107/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2108/// location specified by IMM8.
2109///
2110/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
2111#[inline]
2112#[target_feature(enable = "avx512dq,avx512vl")]
2113#[rustc_legacy_const_generics(2)]
2114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2116pub const fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
2117    unsafe {
2118        static_assert_uimm_bits!(IMM8, 1);
2119        let a = a.as_i64x4();
2120        let b = _mm256_castsi128_si256(b).as_i64x4();
2121        match IMM8 & 1 {
2122            0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
2123            _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
2124        }
2125    }
2126}
2127
2128/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2129/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2130/// the corresponding bit is not set).
2131///
2132/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
2133#[inline]
2134#[target_feature(enable = "avx512dq,avx512vl")]
2135#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2136#[rustc_legacy_const_generics(4)]
2137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2139pub const fn _mm256_mask_inserti64x2<const IMM8: i32>(
2140    src: __m256i,
2141    k: __mmask8,
2142    a: __m256i,
2143    b: __m128i,
2144) -> __m256i {
2145    unsafe {
2146        static_assert_uimm_bits!(IMM8, 1);
2147        let c = _mm256_inserti64x2::<IMM8>(a, b);
2148        transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4()))
2149    }
2150}
2151
2152/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2153/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2154/// corresponding bit is not set).
2155///
2156/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
2157#[inline]
2158#[target_feature(enable = "avx512dq,avx512vl")]
2159#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2160#[rustc_legacy_const_generics(3)]
2161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2163pub const fn _mm256_maskz_inserti64x2<const IMM8: i32>(
2164    k: __mmask8,
2165    a: __m256i,
2166    b: __m128i,
2167) -> __m256i {
2168    unsafe {
2169        static_assert_uimm_bits!(IMM8, 1);
2170        let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
2171        transmute(simd_select_bitmask(k, c, i64x4::ZERO))
2172    }
2173}
2174
2175/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2176/// location specified by IMM8.
2177///
2178/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
2179#[inline]
2180#[target_feature(enable = "avx512dq")]
2181#[rustc_legacy_const_generics(2)]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2184pub const fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
2185    unsafe {
2186        static_assert_uimm_bits!(IMM8, 2);
2187        let a = a.as_i64x8();
2188        let b = _mm512_castsi128_si512(b).as_i64x8();
2189        match IMM8 & 3 {
2190            0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
2191            1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
2192            2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
2193            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
2194        }
2195    }
2196}
2197
2198/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2199/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2200/// the corresponding bit is not set).
2201///
2202/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
2203#[inline]
2204#[target_feature(enable = "avx512dq")]
2205#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2206#[rustc_legacy_const_generics(4)]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2209pub const fn _mm512_mask_inserti64x2<const IMM8: i32>(
2210    src: __m512i,
2211    k: __mmask8,
2212    a: __m512i,
2213    b: __m128i,
2214) -> __m512i {
2215    unsafe {
2216        static_assert_uimm_bits!(IMM8, 2);
2217        let c = _mm512_inserti64x2::<IMM8>(a, b);
2218        transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8()))
2219    }
2220}
2221
2222/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2223/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2224/// corresponding bit is not set).
2225///
2226/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
2227#[inline]
2228#[target_feature(enable = "avx512dq")]
2229#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2230#[rustc_legacy_const_generics(3)]
2231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2232#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2233pub const fn _mm512_maskz_inserti64x2<const IMM8: i32>(
2234    k: __mmask8,
2235    a: __m512i,
2236    b: __m128i,
2237) -> __m512i {
2238    unsafe {
2239        static_assert_uimm_bits!(IMM8, 2);
2240        let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
2241        transmute(simd_select_bitmask(k, c, i64x8::ZERO))
2242    }
2243}
2244
2245// Convert
2246
2247/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2248/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2249///
2250/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2251/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2252/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2253/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2254/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2255///
2256/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
2257#[inline]
2258#[target_feature(enable = "avx512dq")]
2259#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2260#[rustc_legacy_const_generics(1)]
2261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2262pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2263    unsafe {
2264        static_assert_rounding!(ROUNDING);
2265        transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
2266    }
2267}
2268
2269/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2270/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2271/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2272///
2273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2278///
2279/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
2280#[inline]
2281#[target_feature(enable = "avx512dq")]
2282#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2283#[rustc_legacy_const_generics(3)]
2284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2285pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
2286    src: __m512d,
2287    k: __mmask8,
2288    a: __m512i,
2289) -> __m512d {
2290    unsafe {
2291        static_assert_rounding!(ROUNDING);
2292        let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2293        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2294    }
2295}
2296
2297/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2298/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2299/// Rounding is done according to the ROUNDING parameter, which can be one of:
2300///
2301/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2302/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2303/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2304/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2305/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2306///
2307/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
2308#[inline]
2309#[target_feature(enable = "avx512dq")]
2310#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2311#[rustc_legacy_const_generics(2)]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2314    unsafe {
2315        static_assert_rounding!(ROUNDING);
2316        let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2317        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2318    }
2319}
2320
2321/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2322/// and store the results in dst.
2323///
2324/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
2325#[inline]
2326#[target_feature(enable = "avx512dq,avx512vl")]
2327#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2329pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
2330    unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) }
2331}
2332
2333/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2334/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2335/// not set).
2336///
2337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
2338#[inline]
2339#[target_feature(enable = "avx512dq,avx512vl")]
2340#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2342pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2343    unsafe {
2344        let b = _mm_cvtepi64_pd(a).as_f64x2();
2345        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2346    }
2347}
2348
2349/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2350/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2351///
2352/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
2353#[inline]
2354#[target_feature(enable = "avx512dq,avx512vl")]
2355#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2357pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
2358    unsafe {
2359        let b = _mm_cvtepi64_pd(a).as_f64x2();
2360        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2361    }
2362}
2363
2364/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2365/// and store the results in dst.
2366///
2367/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
2368#[inline]
2369#[target_feature(enable = "avx512dq,avx512vl")]
2370#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2372pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
2373    unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2374}
2375
2376/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2377/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2378/// not set).
2379///
2380/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
2381#[inline]
2382#[target_feature(enable = "avx512dq,avx512vl")]
2383#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2385pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2386    unsafe {
2387        let b = _mm256_cvtepi64_pd(a).as_f64x4();
2388        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2389    }
2390}
2391
2392/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2393/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2394///
2395/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
2396#[inline]
2397#[target_feature(enable = "avx512dq,avx512vl")]
2398#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2400pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
2401    unsafe {
2402        let b = _mm256_cvtepi64_pd(a).as_f64x4();
2403        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2404    }
2405}
2406
2407/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2408/// and store the results in dst.
2409///
2410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
2411#[inline]
2412#[target_feature(enable = "avx512dq")]
2413#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
2416    unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2417}
2418
2419/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2420/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2421/// not set).
2422///
2423/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
2424#[inline]
2425#[target_feature(enable = "avx512dq")]
2426#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2428pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2429    unsafe {
2430        let b = _mm512_cvtepi64_pd(a).as_f64x8();
2431        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2432    }
2433}
2434
2435/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2436/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2437///
2438/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
2439#[inline]
2440#[target_feature(enable = "avx512dq")]
2441#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2443pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
2444    unsafe {
2445        let b = _mm512_cvtepi64_pd(a).as_f64x8();
2446        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2447    }
2448}
2449
2450/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2451/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2452///
2453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2458///
2459/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
2460#[inline]
2461#[target_feature(enable = "avx512dq")]
2462#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2463#[rustc_legacy_const_generics(1)]
2464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2465pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2466    unsafe {
2467        static_assert_rounding!(ROUNDING);
2468        transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
2469    }
2470}
2471
2472/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2473/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2474/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2475///
2476/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2477/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2478/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2479/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2480/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2481///
2482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
2483#[inline]
2484#[target_feature(enable = "avx512dq")]
2485#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2486#[rustc_legacy_const_generics(3)]
2487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2488pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
2489    src: __m256,
2490    k: __mmask8,
2491    a: __m512i,
2492) -> __m256 {
2493    unsafe {
2494        static_assert_rounding!(ROUNDING);
2495        let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2496        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2497    }
2498}
2499
2500/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2501/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2502/// Rounding is done according to the ROUNDING parameter, which can be one of:
2503///
2504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2509///
2510/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
2511#[inline]
2512#[target_feature(enable = "avx512dq")]
2513#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2514#[rustc_legacy_const_generics(2)]
2515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2516pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2517    unsafe {
2518        static_assert_rounding!(ROUNDING);
2519        let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2520        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2521    }
2522}
2523
2524/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2525/// and store the results in dst.
2526///
2527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
2528#[inline]
2529#[target_feature(enable = "avx512dq,avx512vl")]
2530#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
2533    _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a)
2534}
2535
2536/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2537/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2538/// not set).
2539///
2540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
2541#[inline]
2542#[target_feature(enable = "avx512dq,avx512vl")]
2543#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2545pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2546    unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) }
2547}
2548
2549/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2550/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2551///
2552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
2553#[inline]
2554#[target_feature(enable = "avx512dq,avx512vl")]
2555#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2557pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
2558    _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a)
2559}
2560
2561/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2562/// and store the results in dst.
2563///
2564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
2565#[inline]
2566#[target_feature(enable = "avx512dq,avx512vl")]
2567#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
2570    unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2571}
2572
2573/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2574/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2575/// not set).
2576///
2577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
2578#[inline]
2579#[target_feature(enable = "avx512dq,avx512vl")]
2580#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2583    unsafe {
2584        let b = _mm256_cvtepi64_ps(a).as_f32x4();
2585        transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2586    }
2587}
2588
2589/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2590/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2591///
2592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
2593#[inline]
2594#[target_feature(enable = "avx512dq,avx512vl")]
2595#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2597pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
2598    unsafe {
2599        let b = _mm256_cvtepi64_ps(a).as_f32x4();
2600        transmute(simd_select_bitmask(k, b, f32x4::ZERO))
2601    }
2602}
2603
2604/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2605/// and store the results in dst.
2606///
2607/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
2608#[inline]
2609#[target_feature(enable = "avx512dq")]
2610#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2612pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
2613    unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2614}
2615
2616/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2617/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2618/// not set).
2619///
2620/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
2621#[inline]
2622#[target_feature(enable = "avx512dq")]
2623#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2625pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2626    unsafe {
2627        let b = _mm512_cvtepi64_ps(a).as_f32x8();
2628        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2629    }
2630}
2631
2632/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2633/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2634///
2635/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
2636#[inline]
2637#[target_feature(enable = "avx512dq")]
2638#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2640pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
2641    unsafe {
2642        let b = _mm512_cvtepi64_ps(a).as_f32x8();
2643        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2644    }
2645}
2646
2647/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2648/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2649///
2650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2655///
2656/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
2657#[inline]
2658#[target_feature(enable = "avx512dq")]
2659#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2660#[rustc_legacy_const_generics(1)]
2661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2662pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2663    unsafe {
2664        static_assert_rounding!(ROUNDING);
2665        transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
2666    }
2667}
2668
2669/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2670/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2671/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2672///
2673/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2674/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2675/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2676/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2677/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2678///
2679/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
2680#[inline]
2681#[target_feature(enable = "avx512dq")]
2682#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2683#[rustc_legacy_const_generics(3)]
2684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2685pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
2686    src: __m512d,
2687    k: __mmask8,
2688    a: __m512i,
2689) -> __m512d {
2690    unsafe {
2691        static_assert_rounding!(ROUNDING);
2692        let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2693        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2694    }
2695}
2696
2697/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2698/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2699/// Rounding is done according to the ROUNDING parameter, which can be one of:
2700///
2701/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2702/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2703/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2704/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2705/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2706///
2707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
2708#[inline]
2709#[target_feature(enable = "avx512dq")]
2710#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2711#[rustc_legacy_const_generics(2)]
2712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2713pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2714    unsafe {
2715        static_assert_rounding!(ROUNDING);
2716        let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2717        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2718    }
2719}
2720
2721/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2722/// and store the results in dst.
2723///
2724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
2725#[inline]
2726#[target_feature(enable = "avx512dq,avx512vl")]
2727#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2729pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
2730    unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) }
2731}
2732
2733/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2734/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2735/// not set).
2736///
2737/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
2738#[inline]
2739#[target_feature(enable = "avx512dq,avx512vl")]
2740#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2742pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2743    unsafe {
2744        let b = _mm_cvtepu64_pd(a).as_f64x2();
2745        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2746    }
2747}
2748
2749/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2750/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2751///
2752/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
2753#[inline]
2754#[target_feature(enable = "avx512dq,avx512vl")]
2755#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
2758    unsafe {
2759        let b = _mm_cvtepu64_pd(a).as_f64x2();
2760        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2761    }
2762}
2763
2764/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2765/// and store the results in dst.
2766///
2767/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
2768#[inline]
2769#[target_feature(enable = "avx512dq,avx512vl")]
2770#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2772pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
2773    unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2774}
2775
2776/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2777/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2778/// not set).
2779///
2780/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
2781#[inline]
2782#[target_feature(enable = "avx512dq,avx512vl")]
2783#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2785pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2786    unsafe {
2787        let b = _mm256_cvtepu64_pd(a).as_f64x4();
2788        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2789    }
2790}
2791
2792/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2793/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2794///
2795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
2796#[inline]
2797#[target_feature(enable = "avx512dq,avx512vl")]
2798#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
2801    unsafe {
2802        let b = _mm256_cvtepu64_pd(a).as_f64x4();
2803        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2804    }
2805}
2806
2807/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2808/// and store the results in dst.
2809///
2810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
2811#[inline]
2812#[target_feature(enable = "avx512dq")]
2813#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2815pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
2816    unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2817}
2818
2819/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2820/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2821/// not set).
2822///
2823/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
2824#[inline]
2825#[target_feature(enable = "avx512dq")]
2826#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2828pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2829    unsafe {
2830        let b = _mm512_cvtepu64_pd(a).as_f64x8();
2831        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2832    }
2833}
2834
2835/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2836/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2837///
2838/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
2839#[inline]
2840#[target_feature(enable = "avx512dq")]
2841#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2843pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
2844    unsafe {
2845        let b = _mm512_cvtepu64_pd(a).as_f64x8();
2846        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2847    }
2848}
2849
2850/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2851/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2852///
2853/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2854/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2855/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2856/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2857/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2858///
2859/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
2860#[inline]
2861#[target_feature(enable = "avx512dq")]
2862#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2863#[rustc_legacy_const_generics(1)]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2866    unsafe {
2867        static_assert_rounding!(ROUNDING);
2868        transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
2869    }
2870}
2871
2872/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2873/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2874/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2875///
2876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2881///
2882/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
2883#[inline]
2884#[target_feature(enable = "avx512dq")]
2885#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2886#[rustc_legacy_const_generics(3)]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
2889    src: __m256,
2890    k: __mmask8,
2891    a: __m512i,
2892) -> __m256 {
2893    unsafe {
2894        static_assert_rounding!(ROUNDING);
2895        let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2896        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2897    }
2898}
2899
2900/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2901/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2902/// Rounding is done according to the ROUNDING parameter, which can be one of:
2903///
2904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2909///
2910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
2911#[inline]
2912#[target_feature(enable = "avx512dq")]
2913#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2914#[rustc_legacy_const_generics(2)]
2915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2916pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2917    unsafe {
2918        static_assert_rounding!(ROUNDING);
2919        let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2920        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2921    }
2922}
2923
2924/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2925/// and store the results in dst.
2926///
2927/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
2928#[inline]
2929#[target_feature(enable = "avx512dq,avx512vl")]
2930#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
2933    _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a)
2934}
2935
2936/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2937/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2938/// not set).
2939///
2940/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
2941#[inline]
2942#[target_feature(enable = "avx512dq,avx512vl")]
2943#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2945pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2946    unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) }
2947}
2948
2949/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2950/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2951///
2952/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
2953#[inline]
2954#[target_feature(enable = "avx512dq,avx512vl")]
2955#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2957pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
2958    _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a)
2959}
2960
2961/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2962/// and store the results in dst.
2963///
2964/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
2965#[inline]
2966#[target_feature(enable = "avx512dq,avx512vl")]
2967#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2969pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
2970    unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2971}
2972
2973/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2974/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2975/// not set).
2976///
2977/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
2978#[inline]
2979#[target_feature(enable = "avx512dq,avx512vl")]
2980#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2982pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2983    unsafe {
2984        let b = _mm256_cvtepu64_ps(a).as_f32x4();
2985        transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2986    }
2987}
2988
2989/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2990/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2991///
2992/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
2993#[inline]
2994#[target_feature(enable = "avx512dq,avx512vl")]
2995#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
2998    unsafe {
2999        let b = _mm256_cvtepu64_ps(a).as_f32x4();
3000        transmute(simd_select_bitmask(k, b, f32x4::ZERO))
3001    }
3002}
3003
3004/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3005/// and store the results in dst.
3006///
3007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
3008#[inline]
3009#[target_feature(enable = "avx512dq")]
3010#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3012pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
3013    unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
3014}
3015
3016/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3017/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3018/// not set).
3019///
3020/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
3021#[inline]
3022#[target_feature(enable = "avx512dq")]
3023#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3025pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
3026    unsafe {
3027        let b = _mm512_cvtepu64_ps(a).as_f32x8();
3028        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
3029    }
3030}
3031
3032/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3033/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3034///
3035/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
3036#[inline]
3037#[target_feature(enable = "avx512dq")]
3038#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3040pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
3041    unsafe {
3042        let b = _mm512_cvtepu64_ps(a).as_f32x8();
3043        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
3044    }
3045}
3046
3047/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3048/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3049///
3050/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3051/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3052/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3053/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3054/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3055///
3056/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
3057#[inline]
3058#[target_feature(enable = "avx512dq")]
3059#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3060#[rustc_legacy_const_generics(1)]
3061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3062pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3063    static_assert_rounding!(ROUNDING);
3064    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3065}
3066
3067/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3068/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3069/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3070///
3071/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3072/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3073/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3074/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3075/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3076///
3077/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
3078#[inline]
3079#[target_feature(enable = "avx512dq")]
3080#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3081#[rustc_legacy_const_generics(3)]
3082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3083pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
3084    src: __m512i,
3085    k: __mmask8,
3086    a: __m512d,
3087) -> __m512i {
3088    unsafe {
3089        static_assert_rounding!(ROUNDING);
3090        transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
3091    }
3092}
3093
3094/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3095/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3096/// Rounding is done according to the ROUNDING parameter, which can be one of:
3097///
3098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3103///
3104/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
3105#[inline]
3106#[target_feature(enable = "avx512dq")]
3107#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3108#[rustc_legacy_const_generics(2)]
3109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3110pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3111    static_assert_rounding!(ROUNDING);
3112    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3113}
3114
3115/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3116/// and store the results in dst.
3117///
3118/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
3119#[inline]
3120#[target_feature(enable = "avx512dq,avx512vl")]
3121#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3123pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
3124    _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a)
3125}
3126
3127/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3128/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3129/// not set).
3130///
3131/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
3132#[inline]
3133#[target_feature(enable = "avx512dq,avx512vl")]
3134#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3136pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3137    unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3138}
3139
3140/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3141/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3142///
3143/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
3144#[inline]
3145#[target_feature(enable = "avx512dq,avx512vl")]
3146#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3148pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3149    _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a)
3150}
3151
3152/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3153/// and store the results in dst.
3154///
3155/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
3156#[inline]
3157#[target_feature(enable = "avx512dq,avx512vl")]
3158#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3160pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
3161    _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a)
3162}
3163
3164/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3165/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3166/// not set).
3167///
3168/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
3169#[inline]
3170#[target_feature(enable = "avx512dq,avx512vl")]
3171#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3173pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3174    unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3175}
3176
3177/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3178/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3179///
3180/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
3181#[inline]
3182#[target_feature(enable = "avx512dq,avx512vl")]
3183#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3186    _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a)
3187}
3188
3189/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3190/// and store the results in dst.
3191///
3192/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
3193#[inline]
3194#[target_feature(enable = "avx512dq")]
3195#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3197pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
3198    _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3199}
3200
3201/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3202/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3203/// not set).
3204///
3205/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
3206#[inline]
3207#[target_feature(enable = "avx512dq")]
3208#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3210pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3211    unsafe {
3212        transmute(vcvtpd2qq_512(
3213            a.as_f64x8(),
3214            src.as_i64x8(),
3215            k,
3216            _MM_FROUND_CUR_DIRECTION,
3217        ))
3218    }
3219}
3220
3221/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3222/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3223///
3224/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
3225#[inline]
3226#[target_feature(enable = "avx512dq")]
3227#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3230    _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a)
3231}
3232
3233/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3234/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3235///
3236/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3237/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3238/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3239/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3241///
3242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
3243#[inline]
3244#[target_feature(enable = "avx512dq")]
3245#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3246#[rustc_legacy_const_generics(1)]
3247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3248pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
3249    static_assert_rounding!(ROUNDING);
3250    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3251}
3252
3253/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3254/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3255/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3256///
3257/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3258/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3259/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3260/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3262///
3263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
3264#[inline]
3265#[target_feature(enable = "avx512dq")]
3266#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3267#[rustc_legacy_const_generics(3)]
3268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3269pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
3270    src: __m512i,
3271    k: __mmask8,
3272    a: __m256,
3273) -> __m512i {
3274    unsafe {
3275        static_assert_rounding!(ROUNDING);
3276        transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
3277    }
3278}
3279
3280/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3281/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3282/// Rounding is done according to the ROUNDING parameter, which can be one of:
3283///
3284/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3285/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3286/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3287/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3288/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3289///
3290/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
3291#[inline]
3292#[target_feature(enable = "avx512dq")]
3293#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3294#[rustc_legacy_const_generics(2)]
3295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3296pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3297    static_assert_rounding!(ROUNDING);
3298    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3299}
3300
3301/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3302/// and store the results in dst.
3303///
3304/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
3305#[inline]
3306#[target_feature(enable = "avx512dq,avx512vl")]
3307#[cfg_attr(test, assert_instr(vcvtps2qq))]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309pub fn _mm_cvtps_epi64(a: __m128) -> __m128i {
3310    _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a)
3311}
3312
3313/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3314/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3315/// not set).
3316///
3317/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
3318#[inline]
3319#[target_feature(enable = "avx512dq,avx512vl")]
3320#[cfg_attr(test, assert_instr(vcvtps2qq))]
3321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3322pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3323    unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3324}
3325
3326/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3327/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3328///
3329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
3330#[inline]
3331#[target_feature(enable = "avx512dq,avx512vl")]
3332#[cfg_attr(test, assert_instr(vcvtps2qq))]
3333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3334pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
3335    _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a)
3336}
3337
3338/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3339/// and store the results in dst.
3340///
3341/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
3342#[inline]
3343#[target_feature(enable = "avx512dq,avx512vl")]
3344#[cfg_attr(test, assert_instr(vcvtps2qq))]
3345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3346pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
3347    _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a)
3348}
3349
3350/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3351/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3352/// not set).
3353///
3354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
3355#[inline]
3356#[target_feature(enable = "avx512dq,avx512vl")]
3357#[cfg_attr(test, assert_instr(vcvtps2qq))]
3358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3359pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3360    unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3361}
3362
3363/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3364/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3365///
3366/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
3367#[inline]
3368#[target_feature(enable = "avx512dq,avx512vl")]
3369#[cfg_attr(test, assert_instr(vcvtps2qq))]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
3372    _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a)
3373}
3374
3375/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3376/// and store the results in dst.
3377///
3378/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
3379#[inline]
3380#[target_feature(enable = "avx512dq")]
3381#[cfg_attr(test, assert_instr(vcvtps2qq))]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
3384    _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a)
3385}
3386
3387/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3388/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3389/// not set).
3390///
3391/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
3392#[inline]
3393#[target_feature(enable = "avx512dq")]
3394#[cfg_attr(test, assert_instr(vcvtps2qq))]
3395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3396pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3397    unsafe {
3398        transmute(vcvtps2qq_512(
3399            a.as_f32x8(),
3400            src.as_i64x8(),
3401            k,
3402            _MM_FROUND_CUR_DIRECTION,
3403        ))
3404    }
3405}
3406
3407/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3408/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3409///
3410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
3411#[inline]
3412#[target_feature(enable = "avx512dq")]
3413#[cfg_attr(test, assert_instr(vcvtps2qq))]
3414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3415pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
3416    _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a)
3417}
3418
3419/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3420/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3421///
3422/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3423/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3424/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3425/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3427///
3428/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
3429#[inline]
3430#[target_feature(enable = "avx512dq")]
3431#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3432#[rustc_legacy_const_generics(1)]
3433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3434pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3435    static_assert_rounding!(ROUNDING);
3436    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3437}
3438
3439/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3440/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3441/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3442///
3443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3448///
3449/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
3450#[inline]
3451#[target_feature(enable = "avx512dq")]
3452#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3453#[rustc_legacy_const_generics(3)]
3454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3455pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
3456    src: __m512i,
3457    k: __mmask8,
3458    a: __m512d,
3459) -> __m512i {
3460    unsafe {
3461        static_assert_rounding!(ROUNDING);
3462        transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
3463    }
3464}
3465
3466/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3467/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3468/// Rounding is done according to the ROUNDING parameter, which can be one of:
3469///
3470/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3471/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3472/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3473/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3475///
3476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
3477#[inline]
3478#[target_feature(enable = "avx512dq")]
3479#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3480#[rustc_legacy_const_generics(2)]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3483    static_assert_rounding!(ROUNDING);
3484    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3485}
3486
3487/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3488/// and store the results in dst.
3489///
3490/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
3491#[inline]
3492#[target_feature(enable = "avx512dq,avx512vl")]
3493#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3495pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
3496    _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a)
3497}
3498
3499/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3500/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3501/// not set).
3502///
3503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
3504#[inline]
3505#[target_feature(enable = "avx512dq,avx512vl")]
3506#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3508pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3509    unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
3510}
3511
3512/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3513/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3514///
3515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
3516#[inline]
3517#[target_feature(enable = "avx512dq,avx512vl")]
3518#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3520pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3521    _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a)
3522}
3523
3524/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3525/// and store the results in dst.
3526///
3527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
3528#[inline]
3529#[target_feature(enable = "avx512dq,avx512vl")]
3530#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3532pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
3533    _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a)
3534}
3535
3536/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3537/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3538/// not set).
3539///
3540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
3541#[inline]
3542#[target_feature(enable = "avx512dq,avx512vl")]
3543#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3545pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3546    unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
3547}
3548
3549/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3550/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3551///
3552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
3553#[inline]
3554#[target_feature(enable = "avx512dq,avx512vl")]
3555#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3558    _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a)
3559}
3560
3561/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3562/// and store the results in dst.
3563///
3564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
3565#[inline]
3566#[target_feature(enable = "avx512dq")]
3567#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3569pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
3570    _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a)
3571}
3572
3573/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3574/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3575/// not set).
3576///
3577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
3578#[inline]
3579#[target_feature(enable = "avx512dq")]
3580#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3582pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3583    unsafe {
3584        transmute(vcvtpd2uqq_512(
3585            a.as_f64x8(),
3586            src.as_u64x8(),
3587            k,
3588            _MM_FROUND_CUR_DIRECTION,
3589        ))
3590    }
3591}
3592
3593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3594/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3595///
3596/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
3597#[inline]
3598#[target_feature(enable = "avx512dq")]
3599#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3602    _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a)
3603}
3604
3605/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3606/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3607///
3608/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3609/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3610/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3611/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3612/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3613///
3614/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
3615#[inline]
3616#[target_feature(enable = "avx512dq")]
3617#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3618#[rustc_legacy_const_generics(1)]
3619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3620pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
3621    static_assert_rounding!(ROUNDING);
3622    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3623}
3624
3625/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3626/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3627/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3628///
3629/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3630/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3631/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3632/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3633/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3634///
3635/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
3636#[inline]
3637#[target_feature(enable = "avx512dq")]
3638#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3639#[rustc_legacy_const_generics(3)]
3640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3641pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
3642    src: __m512i,
3643    k: __mmask8,
3644    a: __m256,
3645) -> __m512i {
3646    unsafe {
3647        static_assert_rounding!(ROUNDING);
3648        transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
3649    }
3650}
3651
3652/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3653/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3654/// Rounding is done according to the ROUNDING parameter, which can be one of:
3655///
3656/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3657/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3658/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3659/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3660/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3661///
3662/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
3663#[inline]
3664#[target_feature(enable = "avx512dq")]
3665#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3666#[rustc_legacy_const_generics(2)]
3667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3668pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3669    static_assert_rounding!(ROUNDING);
3670    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3671}
3672
3673/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3674/// and store the results in dst.
3675///
3676/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
3677#[inline]
3678#[target_feature(enable = "avx512dq,avx512vl")]
3679#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681pub fn _mm_cvtps_epu64(a: __m128) -> __m128i {
3682    _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a)
3683}
3684
3685/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3686/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3687/// not set).
3688///
3689/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
3690#[inline]
3691#[target_feature(enable = "avx512dq,avx512vl")]
3692#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3694pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3695    unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
3696}
3697
3698/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3699/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3700///
3701/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
3702#[inline]
3703#[target_feature(enable = "avx512dq,avx512vl")]
3704#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3706pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
3707    _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a)
3708}
3709
3710/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3711/// and store the results in dst.
3712///
3713/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
3714#[inline]
3715#[target_feature(enable = "avx512dq,avx512vl")]
3716#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3718pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
3719    _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a)
3720}
3721
3722/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3723/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3724/// not set).
3725///
3726/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
3727#[inline]
3728#[target_feature(enable = "avx512dq,avx512vl")]
3729#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3731pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3732    unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
3733}
3734
3735/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3736/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3737///
3738/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
3739#[inline]
3740#[target_feature(enable = "avx512dq,avx512vl")]
3741#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3743pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
3744    _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a)
3745}
3746
3747/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3748/// and store the results in dst.
3749///
3750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
3751#[inline]
3752#[target_feature(enable = "avx512dq")]
3753#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3755pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
3756    _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a)
3757}
3758
3759/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3760/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3761/// not set).
3762///
3763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
3764#[inline]
3765#[target_feature(enable = "avx512dq")]
3766#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3769    unsafe {
3770        transmute(vcvtps2uqq_512(
3771            a.as_f32x8(),
3772            src.as_u64x8(),
3773            k,
3774            _MM_FROUND_CUR_DIRECTION,
3775        ))
3776    }
3777}
3778
3779/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3780/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3781///
3782/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
3783#[inline]
3784#[target_feature(enable = "avx512dq")]
3785#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3787pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
3788    _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a)
3789}
3790
3791/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3792/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3793/// to the sae parameter.
3794///
3795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
3796#[inline]
3797#[target_feature(enable = "avx512dq")]
3798#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3799#[rustc_legacy_const_generics(1)]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
3802    static_assert_sae!(SAE);
3803    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3804}
3805
3806/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3807/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3808/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3809///
3810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
3811#[inline]
3812#[target_feature(enable = "avx512dq")]
3813#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3814#[rustc_legacy_const_generics(3)]
3815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3816pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
3817    src: __m512i,
3818    k: __mmask8,
3819    a: __m512d,
3820) -> __m512i {
3821    unsafe {
3822        static_assert_sae!(SAE);
3823        transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
3824    }
3825}
3826
3827/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3828/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3829/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3830///
3831/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
3832#[inline]
3833#[target_feature(enable = "avx512dq")]
3834#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3835#[rustc_legacy_const_generics(2)]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3838    static_assert_sae!(SAE);
3839    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_setzero_si512(), k, a)
3840}
3841
3842/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3843/// with truncation, and store the result in dst.
3844///
3845/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
3846#[inline]
3847#[target_feature(enable = "avx512dq,avx512vl")]
3848#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3850pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
3851    _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a)
3852}
3853
3854/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3855/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3856/// corresponding bit is not set).
3857///
3858/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
3859#[inline]
3860#[target_feature(enable = "avx512dq,avx512vl")]
3861#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3863pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3864    unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3865}
3866
3867/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3868/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3869/// bit is not set).
3870///
3871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
3872#[inline]
3873#[target_feature(enable = "avx512dq,avx512vl")]
3874#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3876pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3877    _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a)
3878}
3879
3880/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3881/// with truncation, and store the result in dst.
3882///
3883/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
3884#[inline]
3885#[target_feature(enable = "avx512dq,avx512vl")]
3886#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3888pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
3889    _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a)
3890}
3891
3892/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3893/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3894/// corresponding bit is not set).
3895///
3896/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
3897#[inline]
3898#[target_feature(enable = "avx512dq,avx512vl")]
3899#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3901pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3902    unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3903}
3904
3905/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3906/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3907/// bit is not set).
3908///
3909/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
3910#[inline]
3911#[target_feature(enable = "avx512dq,avx512vl")]
3912#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3914pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3915    _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a)
3916}
3917
3918/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3919/// with truncation, and store the result in dst.
3920///
3921/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
3922#[inline]
3923#[target_feature(enable = "avx512dq")]
3924#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3926pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
3927    _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3928}
3929
3930/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3931/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3932/// corresponding bit is not set).
3933///
3934/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
3935#[inline]
3936#[target_feature(enable = "avx512dq")]
3937#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3940    unsafe {
3941        transmute(vcvttpd2qq_512(
3942            a.as_f64x8(),
3943            src.as_i64x8(),
3944            k,
3945            _MM_FROUND_CUR_DIRECTION,
3946        ))
3947    }
3948}
3949
3950/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3951/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3952/// bit is not set).
3953///
3954/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
3955#[inline]
3956#[target_feature(enable = "avx512dq")]
3957#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3959pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3960    _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a)
3961}
3962
3963/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3964/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3965/// to the sae parameter.
3966///
3967/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
3968#[inline]
3969#[target_feature(enable = "avx512dq")]
3970#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3971#[rustc_legacy_const_generics(1)]
3972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3973pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
3974    static_assert_sae!(SAE);
3975    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3976}
3977
3978/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3979/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3980/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3981///
3982/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
3983#[inline]
3984#[target_feature(enable = "avx512dq")]
3985#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3986#[rustc_legacy_const_generics(3)]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
3989    src: __m512i,
3990    k: __mmask8,
3991    a: __m256,
3992) -> __m512i {
3993    unsafe {
3994        static_assert_sae!(SAE);
3995        transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
3996    }
3997}
3998
3999/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4000/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4001/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4002///
4003/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
4004#[inline]
4005#[target_feature(enable = "avx512dq")]
4006#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
4007#[rustc_legacy_const_generics(2)]
4008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4009pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4010    static_assert_sae!(SAE);
4011    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_setzero_si512(), k, a)
4012}
4013
4014/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4015/// with truncation, and store the result in dst.
4016///
4017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
4018#[inline]
4019#[target_feature(enable = "avx512dq,avx512vl")]
4020#[cfg_attr(test, assert_instr(vcvttps2qq))]
4021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4022pub fn _mm_cvttps_epi64(a: __m128) -> __m128i {
4023    _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a)
4024}
4025
4026/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4027/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4028/// corresponding bit is not set).
4029///
4030/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
4031#[inline]
4032#[target_feature(enable = "avx512dq,avx512vl")]
4033#[cfg_attr(test, assert_instr(vcvttps2qq))]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4036    unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
4037}
4038
4039/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4040/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4041/// bit is not set).
4042///
4043/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
4044#[inline]
4045#[target_feature(enable = "avx512dq,avx512vl")]
4046#[cfg_attr(test, assert_instr(vcvttps2qq))]
4047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4048pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
4049    _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a)
4050}
4051
4052/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4053/// with truncation, and store the result in dst.
4054///
4055/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
4056#[inline]
4057#[target_feature(enable = "avx512dq,avx512vl")]
4058#[cfg_attr(test, assert_instr(vcvttps2qq))]
4059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4060pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
4061    _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a)
4062}
4063
4064/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4065/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4066/// corresponding bit is not set).
4067///
4068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
4069#[inline]
4070#[target_feature(enable = "avx512dq,avx512vl")]
4071#[cfg_attr(test, assert_instr(vcvttps2qq))]
4072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4073pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4074    unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
4075}
4076
4077/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4078/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4079/// bit is not set).
4080///
4081/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
4082#[inline]
4083#[target_feature(enable = "avx512dq,avx512vl")]
4084#[cfg_attr(test, assert_instr(vcvttps2qq))]
4085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4086pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
4087    _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a)
4088}
4089
4090/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4091/// with truncation, and store the result in dst.
4092///
4093/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
4094#[inline]
4095#[target_feature(enable = "avx512dq")]
4096#[cfg_attr(test, assert_instr(vcvttps2qq))]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
4099    _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a)
4100}
4101
4102/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4103/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4104/// corresponding bit is not set).
4105///
4106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
4107#[inline]
4108#[target_feature(enable = "avx512dq")]
4109#[cfg_attr(test, assert_instr(vcvttps2qq))]
4110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4111pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4112    unsafe {
4113        transmute(vcvttps2qq_512(
4114            a.as_f32x8(),
4115            src.as_i64x8(),
4116            k,
4117            _MM_FROUND_CUR_DIRECTION,
4118        ))
4119    }
4120}
4121
4122/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4123/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4124/// bit is not set).
4125///
4126/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
4127#[inline]
4128#[target_feature(enable = "avx512dq")]
4129#[cfg_attr(test, assert_instr(vcvttps2qq))]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
4132    _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a)
4133}
4134
4135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4136/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4137/// to the sae parameter.
4138///
4139/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
4140#[inline]
4141#[target_feature(enable = "avx512dq")]
4142#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4143#[rustc_legacy_const_generics(1)]
4144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4145pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
4146    static_assert_sae!(SAE);
4147    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
4148}
4149
4150/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4151/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4152/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4153///
4154/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
4155#[inline]
4156#[target_feature(enable = "avx512dq")]
4157#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4158#[rustc_legacy_const_generics(3)]
4159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4160pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
4161    src: __m512i,
4162    k: __mmask8,
4163    a: __m512d,
4164) -> __m512i {
4165    unsafe {
4166        static_assert_sae!(SAE);
4167        transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
4168    }
4169}
4170
4171/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4172/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4173/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4174///
4175/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
4176#[inline]
4177#[target_feature(enable = "avx512dq")]
4178#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4179#[rustc_legacy_const_generics(2)]
4180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4181pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
4182    static_assert_sae!(SAE);
4183    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_setzero_si512(), k, a)
4184}
4185
4186/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4187/// with truncation, and store the result in dst.
4188///
4189/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
4190#[inline]
4191#[target_feature(enable = "avx512dq,avx512vl")]
4192#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4194pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
4195    _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a)
4196}
4197
4198/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4199/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4200/// bit is not set).
4201///
4202/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
4203#[inline]
4204#[target_feature(enable = "avx512dq,avx512vl")]
4205#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4207pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
4208    unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
4209}
4210
4211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4212/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4213/// bit is not set).
4214///
4215/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
4216#[inline]
4217#[target_feature(enable = "avx512dq,avx512vl")]
4218#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4220pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
4221    _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a)
4222}
4223
4224/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4225/// with truncation, and store the result in dst.
4226///
4227/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
4228#[inline]
4229#[target_feature(enable = "avx512dq,avx512vl")]
4230#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4232pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
4233    _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a)
4234}
4235
4236/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4237/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
4238/// bit is not set).
4239///
4240/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
4241#[inline]
4242#[target_feature(enable = "avx512dq,avx512vl")]
4243#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
4246    unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
4247}
4248
4249/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4250/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
4251/// bit is not set).
4252///
4253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
4254#[inline]
4255#[target_feature(enable = "avx512dq,avx512vl")]
4256#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4258pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
4259    _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a)
4260}
4261
4262/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4263/// with truncation, and store the result in dst.
4264///
4265/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
4266#[inline]
4267#[target_feature(enable = "avx512dq")]
4268#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4270pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
4271    _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a)
4272}
4273
4274/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4275/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4276/// bit is not set).
4277///
4278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
4279#[inline]
4280#[target_feature(enable = "avx512dq")]
4281#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
4284    unsafe {
4285        transmute(vcvttpd2uqq_512(
4286            a.as_f64x8(),
4287            src.as_u64x8(),
4288            k,
4289            _MM_FROUND_CUR_DIRECTION,
4290        ))
4291    }
4292}
4293
4294/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4295/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4296///
4297///
4298/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
4299#[inline]
4300#[target_feature(enable = "avx512dq")]
4301#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4303pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
4304    _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a)
4305}
4306
4307/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4308/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4309/// to the sae parameter.
4310///
4311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
4312#[inline]
4313#[target_feature(enable = "avx512dq")]
4314#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4315#[rustc_legacy_const_generics(1)]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
4318    static_assert_sae!(SAE);
4319    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
4320}
4321
4322/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4323/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4324/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4325///
4326/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
4327#[inline]
4328#[target_feature(enable = "avx512dq")]
4329#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4330#[rustc_legacy_const_generics(3)]
4331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4332pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
4333    src: __m512i,
4334    k: __mmask8,
4335    a: __m256,
4336) -> __m512i {
4337    unsafe {
4338        static_assert_sae!(SAE);
4339        transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
4340    }
4341}
4342
4343/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4344/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4345/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4346///
4347/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
4348#[inline]
4349#[target_feature(enable = "avx512dq")]
4350#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4351#[rustc_legacy_const_generics(2)]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4354    static_assert_sae!(SAE);
4355    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_setzero_si512(), k, a)
4356}
4357
4358/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4359/// with truncation, and store the result in dst.
4360///
4361/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
4362#[inline]
4363#[target_feature(enable = "avx512dq,avx512vl")]
4364#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4366pub fn _mm_cvttps_epu64(a: __m128) -> __m128i {
4367    _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a)
4368}
4369
4370/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4371/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4372/// corresponding bit is not set).
4373///
4374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
4375#[inline]
4376#[target_feature(enable = "avx512dq,avx512vl")]
4377#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4380    unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
4381}
4382
4383/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4384/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4385/// bit is not set).
4386///
4387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
4388#[inline]
4389#[target_feature(enable = "avx512dq,avx512vl")]
4390#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4392pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
4393    _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a)
4394}
4395
4396/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4397/// with truncation, and store the result in dst.
4398///
4399/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
4400#[inline]
4401#[target_feature(enable = "avx512dq,avx512vl")]
4402#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4404pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
4405    _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a)
4406}
4407
4408/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4409/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4410/// corresponding bit is not set).
4411///
4412/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
4413#[inline]
4414#[target_feature(enable = "avx512dq,avx512vl")]
4415#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4417pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4418    unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
4419}
4420
4421/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4422/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4423/// bit is not set).
4424///
4425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
4426#[inline]
4427#[target_feature(enable = "avx512dq,avx512vl")]
4428#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4430pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
4431    _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a)
4432}
4433
4434/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4435/// with truncation, and store the result in dst.
4436///
4437/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
4438#[inline]
4439#[target_feature(enable = "avx512dq")]
4440#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4442pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
4443    _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a)
4444}
4445
4446/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4447/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4448/// corresponding bit is not set).
4449///
4450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
4451#[inline]
4452#[target_feature(enable = "avx512dq")]
4453#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4455pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4456    unsafe {
4457        transmute(vcvttps2uqq_512(
4458            a.as_f32x8(),
4459            src.as_u64x8(),
4460            k,
4461            _MM_FROUND_CUR_DIRECTION,
4462        ))
4463    }
4464}
4465
4466/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4467/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4468/// bit is not set).
4469///
4470/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
4471#[inline]
4472#[target_feature(enable = "avx512dq")]
4473#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
4476    _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a)
4477}
4478
4479// Multiply-Low
4480
4481/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4482/// the low 64 bits of the intermediate integers in `dst`.
4483///
4484/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
4485#[inline]
4486#[target_feature(enable = "avx512dq,avx512vl")]
4487#[cfg_attr(test, assert_instr(vpmullq))]
4488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4490pub const fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
4491    unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) }
4492}
4493
4494/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4495/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4496/// `src` if the corresponding bit is not set).
4497///
4498/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
4499#[inline]
4500#[target_feature(enable = "avx512dq,avx512vl")]
4501#[cfg_attr(test, assert_instr(vpmullq))]
4502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4504pub const fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4505    unsafe {
4506        let b = _mm_mullo_epi64(a, b).as_i64x2();
4507        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
4508    }
4509}
4510
4511/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4512/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4513/// the corresponding bit is not set).
4514///
4515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
4516#[inline]
4517#[target_feature(enable = "avx512dq,avx512vl")]
4518#[cfg_attr(test, assert_instr(vpmullq))]
4519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4521pub const fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4522    unsafe {
4523        let b = _mm_mullo_epi64(a, b).as_i64x2();
4524        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
4525    }
4526}
4527
4528/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4529/// the low 64 bits of the intermediate integers in `dst`.
4530///
4531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
4532#[inline]
4533#[target_feature(enable = "avx512dq,avx512vl")]
4534#[cfg_attr(test, assert_instr(vpmullq))]
4535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
4538    unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) }
4539}
4540
4541/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4542/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4543/// `src` if the corresponding bit is not set).
4544///
4545/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
4546#[inline]
4547#[target_feature(enable = "avx512dq,avx512vl")]
4548#[cfg_attr(test, assert_instr(vpmullq))]
4549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4551pub const fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4552    unsafe {
4553        let b = _mm256_mullo_epi64(a, b).as_i64x4();
4554        transmute(simd_select_bitmask(k, b, src.as_i64x4()))
4555    }
4556}
4557
4558/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4559/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4560/// the corresponding bit is not set).
4561///
4562/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
4563#[inline]
4564#[target_feature(enable = "avx512dq,avx512vl")]
4565#[cfg_attr(test, assert_instr(vpmullq))]
4566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4568pub const fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4569    unsafe {
4570        let b = _mm256_mullo_epi64(a, b).as_i64x4();
4571        transmute(simd_select_bitmask(k, b, i64x4::ZERO))
4572    }
4573}
4574
4575/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4576/// the low 64 bits of the intermediate integers in `dst`.
4577///
4578/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
4579#[inline]
4580#[target_feature(enable = "avx512dq")]
4581#[cfg_attr(test, assert_instr(vpmullq))]
4582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4584pub const fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
4585    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
4586}
4587
4588/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4589/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4590/// `src` if the corresponding bit is not set).
4591///
4592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
4593#[inline]
4594#[target_feature(enable = "avx512dq")]
4595#[cfg_attr(test, assert_instr(vpmullq))]
4596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4598pub const fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4599    unsafe {
4600        let b = _mm512_mullo_epi64(a, b).as_i64x8();
4601        transmute(simd_select_bitmask(k, b, src.as_i64x8()))
4602    }
4603}
4604
4605/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4606/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4607/// the corresponding bit is not set).
4608///
4609/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
4610#[inline]
4611#[target_feature(enable = "avx512dq")]
4612#[cfg_attr(test, assert_instr(vpmullq))]
4613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4615pub const fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4616    unsafe {
4617        let b = _mm512_mullo_epi64(a, b).as_i64x8();
4618        transmute(simd_select_bitmask(k, b, i64x8::ZERO))
4619    }
4620}
4621
4622// Mask Registers
4623
4624/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
4625///
4626/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
4627#[inline]
4628#[target_feature(enable = "avx512dq")]
4629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4631pub const fn _cvtmask8_u32(a: __mmask8) -> u32 {
4632    a as u32
4633}
4634
4635/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
4636///
4637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
4638#[inline]
4639#[target_feature(enable = "avx512dq")]
4640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4642pub const fn _cvtu32_mask8(a: u32) -> __mmask8 {
4643    a as __mmask8
4644}
4645
4646/// Add 16-bit masks a and b, and store the result in dst.
4647///
4648/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
4649#[inline]
4650#[target_feature(enable = "avx512dq")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4653pub const fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
4654    a.wrapping_add(b)
4655}
4656
4657/// Add 8-bit masks a and b, and store the result in dst.
4658///
4659/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
4660#[inline]
4661#[target_feature(enable = "avx512dq")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4664pub const fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4665    a.wrapping_add(b)
4666}
4667
4668/// Bitwise AND of 8-bit masks a and b, and store the result in dst.
4669///
4670/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
4671#[inline]
4672#[target_feature(enable = "avx512dq")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4675pub const fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4676    a & b
4677}
4678
4679/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
4680///
4681/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
4682#[inline]
4683#[target_feature(enable = "avx512dq")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4686pub const fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4687    _knot_mask8(a) & b
4688}
4689
4690/// Bitwise NOT of 8-bit mask a, and store the result in dst.
4691///
4692/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
4693#[inline]
4694#[target_feature(enable = "avx512dq")]
4695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _knot_mask8(a: __mmask8) -> __mmask8 {
4698    a ^ 0b11111111
4699}
4700
4701/// Bitwise OR of 8-bit masks a and b, and store the result in dst.
4702///
4703/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
4704#[inline]
4705#[target_feature(enable = "avx512dq")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4708pub const fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4709    a | b
4710}
4711
4712/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
4713///
4714/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
4715#[inline]
4716#[target_feature(enable = "avx512dq")]
4717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4719pub const fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4720    _knot_mask8(_kxor_mask8(a, b))
4721}
4722
4723/// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
4724///
4725/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
4726#[inline]
4727#[target_feature(enable = "avx512dq")]
4728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4730pub const fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4731    a ^ b
4732}
4733
4734/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4735/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
4736///
4737/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
4738#[inline]
4739#[target_feature(enable = "avx512dq")]
4740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4742pub const unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
4743    let tmp = _kor_mask8(a, b);
4744    *all_ones = (tmp == 0xff) as u8;
4745    (tmp == 0) as u8
4746}
4747
4748/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
4749/// store 0 in dst.
4750///
4751/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
4752#[inline]
4753#[target_feature(enable = "avx512dq")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4756pub const fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4757    (_kor_mask8(a, b) == 0xff) as u8
4758}
4759
4760/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4761/// store 0 in dst.
4762///
4763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
4764#[inline]
4765#[target_feature(enable = "avx512dq")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4768pub const fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4769    (_kor_mask8(a, b) == 0) as u8
4770}
4771
4772/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
4773///
4774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
4775#[inline]
4776#[target_feature(enable = "avx512dq")]
4777#[rustc_legacy_const_generics(1)]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4780pub const fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4781    a.unbounded_shl(COUNT)
4782}
4783
4784/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
4785///
4786/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
4787#[inline]
4788#[target_feature(enable = "avx512dq")]
4789#[rustc_legacy_const_generics(1)]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4792pub const fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4793    a.unbounded_shr(COUNT)
4794}
4795
4796/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
4797/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4798/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4799///
4800/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
4801#[inline]
4802#[target_feature(enable = "avx512dq")]
4803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
4806    *and_not = (_kandn_mask16(a, b) == 0) as u8;
4807    (_kand_mask16(a, b) == 0) as u8
4808}
4809
4810/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
4811/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4812/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4813///
4814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
4815#[inline]
4816#[target_feature(enable = "avx512dq")]
4817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4819pub const unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
4820    *and_not = (_kandn_mask8(a, b) == 0) as u8;
4821    (_kand_mask8(a, b) == 0) as u8
4822}
4823
4824/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
4825/// zeros, store 1 in dst, otherwise store 0 in dst.
4826///
4827/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
4828#[inline]
4829#[target_feature(enable = "avx512dq")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4832pub const fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4833    (_kandn_mask16(a, b) == 0) as u8
4834}
4835
4836/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
4837/// zeros, store 1 in dst, otherwise store 0 in dst.
4838///
4839/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
4840#[inline]
4841#[target_feature(enable = "avx512dq")]
4842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4844pub const fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4845    (_kandn_mask8(a, b) == 0) as u8
4846}
4847
4848/// Compute the bitwise AND of 16-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4849/// store 0 in dst.
4850///
4851/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
4852#[inline]
4853#[target_feature(enable = "avx512dq")]
4854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4856pub const fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4857    (_kand_mask16(a, b) == 0) as u8
4858}
4859
4860/// Compute the bitwise AND of 8-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4861/// store 0 in dst.
4862///
4863/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
4864#[inline]
4865#[target_feature(enable = "avx512dq")]
4866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4868pub const fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4869    (_kand_mask8(a, b) == 0) as u8
4870}
4871
4872/// Load 8-bit mask from memory
4873///
4874/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
4875#[inline]
4876#[target_feature(enable = "avx512dq")]
4877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4879pub const unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
4880    *mem_addr
4881}
4882
4883/// Store 8-bit mask to memory
4884///
4885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
4886#[inline]
4887#[target_feature(enable = "avx512dq")]
4888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4890pub const unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
4891    *mem_addr = a;
4892}
4893
4894/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4895/// integer in a.
4896///
4897/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
4898#[inline]
4899#[target_feature(enable = "avx512dq,avx512vl")]
4900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4902pub const fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
4903    let zero = _mm_setzero_si128();
4904    _mm_cmplt_epi32_mask(a, zero)
4905}
4906
4907/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4908/// integer in a.
4909///
4910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
4911#[inline]
4912#[target_feature(enable = "avx512dq,avx512vl")]
4913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4915pub const fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
4916    let zero = _mm256_setzero_si256();
4917    _mm256_cmplt_epi32_mask(a, zero)
4918}
4919
4920/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4921/// integer in a.
4922///
4923/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
4924#[inline]
4925#[target_feature(enable = "avx512dq")]
4926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
4929    let zero = _mm512_setzero_si512();
4930    _mm512_cmplt_epi32_mask(a, zero)
4931}
4932
4933/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4934/// integer in a.
4935///
4936/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
4937#[inline]
4938#[target_feature(enable = "avx512dq,avx512vl")]
4939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4941pub const fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
4942    let zero = _mm_setzero_si128();
4943    _mm_cmplt_epi64_mask(a, zero)
4944}
4945
4946/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4947/// integer in a.
4948///
4949/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
4950#[inline]
4951#[target_feature(enable = "avx512dq,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4954pub const fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
4955    let zero = _mm256_setzero_si256();
4956    _mm256_cmplt_epi64_mask(a, zero)
4957}
4958
4959/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4960/// integer in a.
4961///
4962/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
4963#[inline]
4964#[target_feature(enable = "avx512dq")]
4965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4967pub const fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
4968    let zero = _mm512_setzero_si512();
4969    _mm512_cmplt_epi64_mask(a, zero)
4970}
4971
4972/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4973/// bit in k.
4974///
4975/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
4976#[inline]
4977#[target_feature(enable = "avx512dq,avx512vl")]
4978#[cfg_attr(test, assert_instr(vpmovm2d))]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4981pub const fn _mm_movm_epi32(k: __mmask8) -> __m128i {
4982    let ones = _mm_set1_epi32(-1);
4983    _mm_maskz_mov_epi32(k, ones)
4984}
4985
4986/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4987/// bit in k.
4988///
4989/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
4990#[inline]
4991#[target_feature(enable = "avx512dq,avx512vl")]
4992#[cfg_attr(test, assert_instr(vpmovm2d))]
4993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4995pub const fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
4996    let ones = _mm256_set1_epi32(-1);
4997    _mm256_maskz_mov_epi32(k, ones)
4998}
4999
5000/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
5001/// bit in k.
5002///
5003/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
5004#[inline]
5005#[target_feature(enable = "avx512dq")]
5006#[cfg_attr(test, assert_instr(vpmovm2d))]
5007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
5010    let ones = _mm512_set1_epi32(-1);
5011    _mm512_maskz_mov_epi32(k, ones)
5012}
5013
5014/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5015/// bit in k.
5016///
5017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
5018#[inline]
5019#[target_feature(enable = "avx512dq,avx512vl")]
5020#[cfg_attr(test, assert_instr(vpmovm2q))]
5021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5023pub const fn _mm_movm_epi64(k: __mmask8) -> __m128i {
5024    let ones = _mm_set1_epi64x(-1);
5025    _mm_maskz_mov_epi64(k, ones)
5026}
5027
5028/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5029/// bit in k.
5030///
5031/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
5032#[inline]
5033#[target_feature(enable = "avx512dq,avx512vl")]
5034#[cfg_attr(test, assert_instr(vpmovm2q))]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5037pub const fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
5038    let ones = _mm256_set1_epi64x(-1);
5039    _mm256_maskz_mov_epi64(k, ones)
5040}
5041
5042/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5043/// bit in k.
5044///
5045/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
5046#[inline]
5047#[target_feature(enable = "avx512dq")]
5048#[cfg_attr(test, assert_instr(vpmovm2q))]
5049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5051pub const fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
5052    let ones = _mm512_set1_epi64(-1);
5053    _mm512_maskz_mov_epi64(k, ones)
5054}
5055
5056// Range
5057
5058/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5059/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5060/// Lower 2 bits of IMM8 specifies the operation control:
5061///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5062/// Upper 2 bits of IMM8 specifies the sign control:
5063///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5064/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5065///
5066/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
5067#[inline]
5068#[target_feature(enable = "avx512dq")]
5069#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5070#[rustc_legacy_const_generics(2, 3)]
5071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5072pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
5073    static_assert_uimm_bits!(IMM8, 4);
5074    static_assert_sae!(SAE);
5075    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), 0xff, a, b)
5076}
5077
5078/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5079/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5080/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5081/// Lower 2 bits of IMM8 specifies the operation control:
5082///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5083/// Upper 2 bits of IMM8 specifies the sign control:
5084///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5085/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5086///
5087/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
5088#[inline]
5089#[target_feature(enable = "avx512dq")]
5090#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5091#[rustc_legacy_const_generics(4, 5)]
5092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5093pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
5094    src: __m512d,
5095    k: __mmask8,
5096    a: __m512d,
5097    b: __m512d,
5098) -> __m512d {
5099    unsafe {
5100        static_assert_uimm_bits!(IMM8, 4);
5101        static_assert_sae!(SAE);
5102        transmute(vrangepd_512(
5103            a.as_f64x8(),
5104            b.as_f64x8(),
5105            IMM8,
5106            src.as_f64x8(),
5107            k,
5108            SAE,
5109        ))
5110    }
5111}
5112
5113/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5114/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5115/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5116/// Lower 2 bits of IMM8 specifies the operation control:
5117///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5118/// Upper 2 bits of IMM8 specifies the sign control:
5119///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5120/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5121///
5122/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
5123#[inline]
5124#[target_feature(enable = "avx512dq")]
5125#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5126#[rustc_legacy_const_generics(3, 4)]
5127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5128pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
5129    k: __mmask8,
5130    a: __m512d,
5131    b: __m512d,
5132) -> __m512d {
5133    static_assert_uimm_bits!(IMM8, 4);
5134    static_assert_sae!(SAE);
5135    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a, b)
5136}
5137
5138/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5139/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5140/// Lower 2 bits of IMM8 specifies the operation control:
5141///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5142/// Upper 2 bits of IMM8 specifies the sign control:
5143///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5144///
5145/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
5146#[inline]
5147#[target_feature(enable = "avx512dq,avx512vl")]
5148#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5149#[rustc_legacy_const_generics(2)]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
5152    static_assert_uimm_bits!(IMM8, 4);
5153    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), 0xff, a, b)
5154}
5155
5156/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5157/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5158/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5159/// Lower 2 bits of IMM8 specifies the operation control:
5160///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5161/// Upper 2 bits of IMM8 specifies the sign control:
5162///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5163///
5164/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
5165#[inline]
5166#[target_feature(enable = "avx512dq,avx512vl")]
5167#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5168#[rustc_legacy_const_generics(4)]
5169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5170pub fn _mm_mask_range_pd<const IMM8: i32>(
5171    src: __m128d,
5172    k: __mmask8,
5173    a: __m128d,
5174    b: __m128d,
5175) -> __m128d {
5176    unsafe {
5177        static_assert_uimm_bits!(IMM8, 4);
5178        transmute(vrangepd_128(
5179            a.as_f64x2(),
5180            b.as_f64x2(),
5181            IMM8,
5182            src.as_f64x2(),
5183            k,
5184        ))
5185    }
5186}
5187
5188/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5189/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5190/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5191/// Lower 2 bits of IMM8 specifies the operation control:
5192///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5193/// Upper 2 bits of IMM8 specifies the sign control:
5194///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5195///
5196/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
5197#[inline]
5198#[target_feature(enable = "avx512dq,avx512vl")]
5199#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5200#[rustc_legacy_const_generics(3)]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5203    static_assert_uimm_bits!(IMM8, 4);
5204    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), k, a, b)
5205}
5206
5207/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5208/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5209/// Lower 2 bits of IMM8 specifies the operation control:
5210///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5211/// Upper 2 bits of IMM8 specifies the sign control:
5212///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5213///
5214/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
5215#[inline]
5216#[target_feature(enable = "avx512dq,avx512vl")]
5217#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5218#[rustc_legacy_const_generics(2)]
5219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5220pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
5221    static_assert_uimm_bits!(IMM8, 4);
5222    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), 0xff, a, b)
5223}
5224
5225/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5226/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5227/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5228/// Lower 2 bits of IMM8 specifies the operation control:
5229///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5230/// Upper 2 bits of IMM8 specifies the sign control:
5231///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5232///
5233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
5234#[inline]
5235#[target_feature(enable = "avx512dq,avx512vl")]
5236#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5237#[rustc_legacy_const_generics(4)]
5238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5239pub fn _mm256_mask_range_pd<const IMM8: i32>(
5240    src: __m256d,
5241    k: __mmask8,
5242    a: __m256d,
5243    b: __m256d,
5244) -> __m256d {
5245    unsafe {
5246        static_assert_uimm_bits!(IMM8, 4);
5247        transmute(vrangepd_256(
5248            a.as_f64x4(),
5249            b.as_f64x4(),
5250            IMM8,
5251            src.as_f64x4(),
5252            k,
5253        ))
5254    }
5255}
5256
5257/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5258/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5259/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5260/// Lower 2 bits of IMM8 specifies the operation control:
5261///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5262/// Upper 2 bits of IMM8 specifies the sign control:
5263///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5264///
5265/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
5266#[inline]
5267#[target_feature(enable = "avx512dq,avx512vl")]
5268#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5269#[rustc_legacy_const_generics(3)]
5270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5271pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5272    static_assert_uimm_bits!(IMM8, 4);
5273    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), k, a, b)
5274}
5275
5276/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5277/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5278/// Lower 2 bits of IMM8 specifies the operation control:
5279///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5280/// Upper 2 bits of IMM8 specifies the sign control:
5281///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5282///
5283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
5284#[inline]
5285#[target_feature(enable = "avx512dq")]
5286#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5287#[rustc_legacy_const_generics(2)]
5288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5289pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
5290    static_assert_uimm_bits!(IMM8, 4);
5291    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), 0xff, a, b)
5292}
5293
5294/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5295/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5296/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5297/// Lower 2 bits of IMM8 specifies the operation control:
5298///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5299/// Upper 2 bits of IMM8 specifies the sign control:
5300///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5301///
5302/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
5303#[inline]
5304#[target_feature(enable = "avx512dq")]
5305#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5306#[rustc_legacy_const_generics(4)]
5307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5308pub fn _mm512_mask_range_pd<const IMM8: i32>(
5309    src: __m512d,
5310    k: __mmask8,
5311    a: __m512d,
5312    b: __m512d,
5313) -> __m512d {
5314    unsafe {
5315        static_assert_uimm_bits!(IMM8, 4);
5316        transmute(vrangepd_512(
5317            a.as_f64x8(),
5318            b.as_f64x8(),
5319            IMM8,
5320            src.as_f64x8(),
5321            k,
5322            _MM_FROUND_CUR_DIRECTION,
5323        ))
5324    }
5325}
5326
5327/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5328/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5329/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5330/// Lower 2 bits of IMM8 specifies the operation control:
5331///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5332/// Upper 2 bits of IMM8 specifies the sign control:
5333///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5334///
5335/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
5336#[inline]
5337#[target_feature(enable = "avx512dq")]
5338#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5339#[rustc_legacy_const_generics(3)]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5342    static_assert_uimm_bits!(IMM8, 4);
5343    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), k, a, b)
5344}
5345
5346/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5347/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5348/// Lower 2 bits of IMM8 specifies the operation control:
5349///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5350/// Upper 2 bits of IMM8 specifies the sign control:
5351///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5352/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5353///
5354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
5355#[inline]
5356#[target_feature(enable = "avx512dq")]
5357#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5358#[rustc_legacy_const_generics(2, 3)]
5359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5360pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 {
5361    static_assert_uimm_bits!(IMM8, 4);
5362    static_assert_sae!(SAE);
5363    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), 0xffff, a, b)
5364}
5365
5366/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5367/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5368/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5369/// Lower 2 bits of IMM8 specifies the operation control:
5370///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5371/// Upper 2 bits of IMM8 specifies the sign control:
5372///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5373///
5374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
5375#[inline]
5376#[target_feature(enable = "avx512dq")]
5377#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5378#[rustc_legacy_const_generics(4, 5)]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
5381    src: __m512,
5382    k: __mmask16,
5383    a: __m512,
5384    b: __m512,
5385) -> __m512 {
5386    unsafe {
5387        static_assert_uimm_bits!(IMM8, 4);
5388        static_assert_sae!(SAE);
5389        transmute(vrangeps_512(
5390            a.as_f32x16(),
5391            b.as_f32x16(),
5392            IMM8,
5393            src.as_f32x16(),
5394            k,
5395            SAE,
5396        ))
5397    }
5398}
5399
5400/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5401/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5402/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5403/// Lower 2 bits of IMM8 specifies the operation control:
5404///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5405/// Upper 2 bits of IMM8 specifies the sign control:
5406///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5407///
5408/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
5409#[inline]
5410#[target_feature(enable = "avx512dq")]
5411#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5412#[rustc_legacy_const_generics(3, 4)]
5413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5414pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
5415    k: __mmask16,
5416    a: __m512,
5417    b: __m512,
5418) -> __m512 {
5419    static_assert_uimm_bits!(IMM8, 4);
5420    static_assert_sae!(SAE);
5421    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a, b)
5422}
5423
5424/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5425/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5426/// Lower 2 bits of IMM8 specifies the operation control:
5427///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5428/// Upper 2 bits of IMM8 specifies the sign control:
5429///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5430///
5431/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
5432#[inline]
5433#[target_feature(enable = "avx512dq,avx512vl")]
5434#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5435#[rustc_legacy_const_generics(2)]
5436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5437pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
5438    static_assert_uimm_bits!(IMM8, 4);
5439    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), 0xff, a, b)
5440}
5441
5442/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5443/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5444/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5445/// Lower 2 bits of IMM8 specifies the operation control:
5446///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5447/// Upper 2 bits of IMM8 specifies the sign control:
5448///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5449///
5450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
5451#[inline]
5452#[target_feature(enable = "avx512dq,avx512vl")]
5453#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5454#[rustc_legacy_const_generics(4)]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_mask_range_ps<const IMM8: i32>(
5457    src: __m128,
5458    k: __mmask8,
5459    a: __m128,
5460    b: __m128,
5461) -> __m128 {
5462    unsafe {
5463        static_assert_uimm_bits!(IMM8, 4);
5464        transmute(vrangeps_128(
5465            a.as_f32x4(),
5466            b.as_f32x4(),
5467            IMM8,
5468            src.as_f32x4(),
5469            k,
5470        ))
5471    }
5472}
5473
5474/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5475/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5476/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5477/// Lower 2 bits of IMM8 specifies the operation control:
5478///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5479/// Upper 2 bits of IMM8 specifies the sign control:
5480///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5481///
5482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
5483#[inline]
5484#[target_feature(enable = "avx512dq,avx512vl")]
5485#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5486#[rustc_legacy_const_generics(3)]
5487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5488pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5489    static_assert_uimm_bits!(IMM8, 4);
5490    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), k, a, b)
5491}
5492
5493/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5494/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5495/// Lower 2 bits of IMM8 specifies the operation control:
5496///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5497/// Upper 2 bits of IMM8 specifies the sign control:
5498///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5499///
5500/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
5501#[inline]
5502#[target_feature(enable = "avx512dq,avx512vl")]
5503#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5504#[rustc_legacy_const_generics(2)]
5505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5506pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
5507    static_assert_uimm_bits!(IMM8, 4);
5508    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), 0xff, a, b)
5509}
5510
5511/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5512/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5513/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5514/// Lower 2 bits of IMM8 specifies the operation control:
5515///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5516/// Upper 2 bits of IMM8 specifies the sign control:
5517///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5518///
5519/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
5520#[inline]
5521#[target_feature(enable = "avx512dq,avx512vl")]
5522#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5523#[rustc_legacy_const_generics(4)]
5524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5525pub fn _mm256_mask_range_ps<const IMM8: i32>(
5526    src: __m256,
5527    k: __mmask8,
5528    a: __m256,
5529    b: __m256,
5530) -> __m256 {
5531    unsafe {
5532        static_assert_uimm_bits!(IMM8, 4);
5533        transmute(vrangeps_256(
5534            a.as_f32x8(),
5535            b.as_f32x8(),
5536            IMM8,
5537            src.as_f32x8(),
5538            k,
5539        ))
5540    }
5541}
5542
5543/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5544/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5545/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5546/// Lower 2 bits of IMM8 specifies the operation control:
5547///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5548/// Upper 2 bits of IMM8 specifies the sign control:
5549///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5550///
5551/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
5552#[inline]
5553#[target_feature(enable = "avx512dq,avx512vl")]
5554#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5555#[rustc_legacy_const_generics(3)]
5556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5557pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5558    static_assert_uimm_bits!(IMM8, 4);
5559    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), k, a, b)
5560}
5561
5562/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5563/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5564/// Lower 2 bits of IMM8 specifies the operation control:
5565///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5566/// Upper 2 bits of IMM8 specifies the sign control:
5567///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5568///
5569/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
5570#[inline]
5571#[target_feature(enable = "avx512dq")]
5572#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5573#[rustc_legacy_const_generics(2)]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
5576    static_assert_uimm_bits!(IMM8, 4);
5577    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), 0xffff, a, b)
5578}
5579
5580/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5581/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5582/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5583/// Lower 2 bits of IMM8 specifies the operation control:
5584///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5585/// Upper 2 bits of IMM8 specifies the sign control:
5586///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5587///
5588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
5589#[inline]
5590#[target_feature(enable = "avx512dq")]
5591#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5592#[rustc_legacy_const_generics(4)]
5593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5594pub fn _mm512_mask_range_ps<const IMM8: i32>(
5595    src: __m512,
5596    k: __mmask16,
5597    a: __m512,
5598    b: __m512,
5599) -> __m512 {
5600    unsafe {
5601        static_assert_uimm_bits!(IMM8, 4);
5602        transmute(vrangeps_512(
5603            a.as_f32x16(),
5604            b.as_f32x16(),
5605            IMM8,
5606            src.as_f32x16(),
5607            k,
5608            _MM_FROUND_CUR_DIRECTION,
5609        ))
5610    }
5611}
5612
5613/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5614/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5615/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5616/// Lower 2 bits of IMM8 specifies the operation control:
5617///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5618/// Upper 2 bits of IMM8 specifies the sign control:
5619///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5620///
5621/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
5622#[inline]
5623#[target_feature(enable = "avx512dq")]
5624#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5625#[rustc_legacy_const_generics(3)]
5626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5627pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5628    static_assert_uimm_bits!(IMM8, 4);
5629    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), k, a, b)
5630}
5631
5632/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5633/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5634/// of dst, and copy the upper element from a to the upper element of dst.
5635/// Lower 2 bits of IMM8 specifies the operation control:
5636///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5637/// Upper 2 bits of IMM8 specifies the sign control:
5638///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5639/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5640///
5641/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
5642#[inline]
5643#[target_feature(enable = "avx512dq")]
5644#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5645#[rustc_legacy_const_generics(2, 3)]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
5648    static_assert_uimm_bits!(IMM8, 4);
5649    static_assert_sae!(SAE);
5650    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), 0xff, a, b)
5651}
5652
5653/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5654/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5655/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5656/// upper element from a to the upper element of dst.
5657/// Lower 2 bits of IMM8 specifies the operation control:
5658///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5659/// Upper 2 bits of IMM8 specifies the sign control:
5660///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5661/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5662///
5663/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
5664#[inline]
5665#[target_feature(enable = "avx512dq")]
5666#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5667#[rustc_legacy_const_generics(4, 5)]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
5670    src: __m128d,
5671    k: __mmask8,
5672    a: __m128d,
5673    b: __m128d,
5674) -> __m128d {
5675    unsafe {
5676        static_assert_uimm_bits!(IMM8, 4);
5677        static_assert_sae!(SAE);
5678        transmute(vrangesd(
5679            a.as_f64x2(),
5680            b.as_f64x2(),
5681            src.as_f64x2(),
5682            k,
5683            IMM8,
5684            SAE,
5685        ))
5686    }
5687}
5688
5689/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5690/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5691/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5692/// element from a to the upper element of dst.
5693/// Lower 2 bits of IMM8 specifies the operation control:
5694///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5695/// Upper 2 bits of IMM8 specifies the sign control:
5696///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5697/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5698///
5699/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
5700#[inline]
5701#[target_feature(enable = "avx512dq")]
5702#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5703#[rustc_legacy_const_generics(3, 4)]
5704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5705pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
5706    k: __mmask8,
5707    a: __m128d,
5708    b: __m128d,
5709) -> __m128d {
5710    static_assert_uimm_bits!(IMM8, 4);
5711    static_assert_sae!(SAE);
5712    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
5713}
5714
5715/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5716/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5717/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5718/// upper element from a to the upper element of dst.
5719/// Lower 2 bits of IMM8 specifies the operation control:
5720///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5721/// Upper 2 bits of IMM8 specifies the sign control:
5722///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5723///
5724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
5725#[inline]
5726#[target_feature(enable = "avx512dq")]
5727#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5728#[rustc_legacy_const_generics(4)]
5729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5730pub fn _mm_mask_range_sd<const IMM8: i32>(
5731    src: __m128d,
5732    k: __mmask8,
5733    a: __m128d,
5734    b: __m128d,
5735) -> __m128d {
5736    unsafe {
5737        static_assert_uimm_bits!(IMM8, 4);
5738        transmute(vrangesd(
5739            a.as_f64x2(),
5740            b.as_f64x2(),
5741            src.as_f64x2(),
5742            k,
5743            IMM8,
5744            _MM_FROUND_CUR_DIRECTION,
5745        ))
5746    }
5747}
5748
5749/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5750/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5751/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5752/// element from a to the upper element of dst.
5753/// Lower 2 bits of IMM8 specifies the operation control:
5754///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5755/// Upper 2 bits of IMM8 specifies the sign control:
5756///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5757///
5758/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
5759#[inline]
5760#[target_feature(enable = "avx512dq")]
5761#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5762#[rustc_legacy_const_generics(3)]
5763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5764pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5765    static_assert_uimm_bits!(IMM8, 4);
5766    _mm_mask_range_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
5767}
5768
5769/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5770/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5771/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
5772/// Lower 2 bits of IMM8 specifies the operation control:
5773///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5774/// Upper 2 bits of IMM8 specifies the sign control:
5775///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5776/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5777///
5778/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
5779#[inline]
5780#[target_feature(enable = "avx512dq")]
5781#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5782#[rustc_legacy_const_generics(2, 3)]
5783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5784pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
5785    static_assert_uimm_bits!(IMM8, 4);
5786    static_assert_sae!(SAE);
5787    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), 0xff, a, b)
5788}
5789
5790/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5791/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5792/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5793/// upper 3 packed elements from a to the upper elements of dst.
5794/// Lower 2 bits of IMM8 specifies the operation control:
5795///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5796/// Upper 2 bits of IMM8 specifies the sign control:
5797///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5798/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5799///
5800/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
5801#[inline]
5802#[target_feature(enable = "avx512dq")]
5803#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5804#[rustc_legacy_const_generics(4, 5)]
5805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5806pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
5807    src: __m128,
5808    k: __mmask8,
5809    a: __m128,
5810    b: __m128,
5811) -> __m128 {
5812    unsafe {
5813        static_assert_uimm_bits!(IMM8, 4);
5814        static_assert_sae!(SAE);
5815        transmute(vrangess(
5816            a.as_f32x4(),
5817            b.as_f32x4(),
5818            src.as_f32x4(),
5819            k,
5820            IMM8,
5821            SAE,
5822        ))
5823    }
5824}
5825
5826/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5827/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5828/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5829/// 3 packed elements from a to the upper elements of dst.
5830/// Lower 2 bits of IMM8 specifies the operation control:
5831///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5832/// Upper 2 bits of IMM8 specifies the sign control:
5833///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5834/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5835///
5836/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
5837#[inline]
5838#[target_feature(enable = "avx512dq")]
5839#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5840#[rustc_legacy_const_generics(3, 4)]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
5843    k: __mmask8,
5844    a: __m128,
5845    b: __m128,
5846) -> __m128 {
5847    static_assert_uimm_bits!(IMM8, 4);
5848    static_assert_sae!(SAE);
5849    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
5850}
5851
5852/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5853/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5854/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5855/// upper 3 packed elements from a to the upper elements of dst.
5856/// Lower 2 bits of IMM8 specifies the operation control:
5857///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5858/// Upper 2 bits of IMM8 specifies the sign control:
5859///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5860///
5861/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
5862#[inline]
5863#[target_feature(enable = "avx512dq")]
5864#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5865#[rustc_legacy_const_generics(4)]
5866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5867pub fn _mm_mask_range_ss<const IMM8: i32>(
5868    src: __m128,
5869    k: __mmask8,
5870    a: __m128,
5871    b: __m128,
5872) -> __m128 {
5873    unsafe {
5874        static_assert_uimm_bits!(IMM8, 4);
5875        transmute(vrangess(
5876            a.as_f32x4(),
5877            b.as_f32x4(),
5878            src.as_f32x4(),
5879            k,
5880            IMM8,
5881            _MM_FROUND_CUR_DIRECTION,
5882        ))
5883    }
5884}
5885
5886/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5887/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5888/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5889/// 3 packed elements from a to the upper elements of dst.
5890/// Lower 2 bits of IMM8 specifies the operation control:
5891///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5892/// Upper 2 bits of IMM8 specifies the sign control:
5893///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5894///
5895/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
5896#[inline]
5897#[target_feature(enable = "avx512dq")]
5898#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5899#[rustc_legacy_const_generics(3)]
5900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5901pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5902    static_assert_uimm_bits!(IMM8, 4);
5903    _mm_mask_range_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
5904}
5905
5906// Reduce
5907
5908/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5909/// the number of bits specified by imm8, and store the results in dst.
5910/// Rounding is done according to the imm8 parameter, which can be one of:
5911///
5912/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5913/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5914/// * [`_MM_FROUND_TO_POS_INF`] : round up
5915/// * [`_MM_FROUND_TO_ZERO`] : truncate
5916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5917///
5918/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5919///
5920/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
5921#[inline]
5922#[target_feature(enable = "avx512dq")]
5923#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5924#[rustc_legacy_const_generics(1, 2)]
5925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5926pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
5927    static_assert_uimm_bits!(IMM8, 8);
5928    static_assert_sae!(SAE);
5929    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_undefined_pd(), 0xff, a)
5930}
5931
5932/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5933/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5934/// copied from src to dst if the corresponding mask bit is not set).
5935/// Rounding is done according to the imm8 parameter, which can be one of:
5936///
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5944///
5945/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
5946#[inline]
5947#[target_feature(enable = "avx512dq")]
5948#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5949#[rustc_legacy_const_generics(3, 4)]
5950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5951pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5952    src: __m512d,
5953    k: __mmask8,
5954    a: __m512d,
5955) -> __m512d {
5956    unsafe {
5957        static_assert_uimm_bits!(IMM8, 8);
5958        static_assert_sae!(SAE);
5959        transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
5960    }
5961}
5962
5963/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5964/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5965/// zeroed out if the corresponding mask bit is not set).
5966/// Rounding is done according to the imm8 parameter, which can be one of:
5967///
5968/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5969/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5970/// * [`_MM_FROUND_TO_POS_INF`] : round up
5971/// * [`_MM_FROUND_TO_ZERO`] : truncate
5972/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5973///
5974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5975///
5976/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
5977#[inline]
5978#[target_feature(enable = "avx512dq")]
5979#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5980#[rustc_legacy_const_generics(2, 3)]
5981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5982pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5983    k: __mmask8,
5984    a: __m512d,
5985) -> __m512d {
5986    static_assert_uimm_bits!(IMM8, 8);
5987    static_assert_sae!(SAE);
5988    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a)
5989}
5990
5991/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5992/// the number of bits specified by imm8, and store the results in dst.
5993/// Rounding is done according to the imm8 parameter, which can be one of:
5994///
5995/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5996/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5997/// * [`_MM_FROUND_TO_POS_INF`] : round up
5998/// * [`_MM_FROUND_TO_ZERO`] : truncate
5999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6000///
6001/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
6002#[inline]
6003#[target_feature(enable = "avx512dq,avx512vl")]
6004#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6005#[rustc_legacy_const_generics(1)]
6006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6007pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6008    static_assert_uimm_bits!(IMM8, 8);
6009    _mm_mask_reduce_pd::<IMM8>(_mm_undefined_pd(), 0xff, a)
6010}
6011
6012/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6013/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6014/// copied from src to dst if the corresponding mask bit is not set).
6015/// Rounding is done according to the imm8 parameter, which can be one of:
6016///
6017/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6018/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6019/// * [`_MM_FROUND_TO_POS_INF`] : round up
6020/// * [`_MM_FROUND_TO_ZERO`] : truncate
6021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6022///
6023/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
6024#[inline]
6025#[target_feature(enable = "avx512dq,avx512vl")]
6026#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6027#[rustc_legacy_const_generics(3)]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6030    unsafe {
6031        static_assert_uimm_bits!(IMM8, 8);
6032        transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
6033    }
6034}
6035
6036/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6037/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6038/// zeroed out if the corresponding mask bit is not set).
6039/// Rounding is done according to the imm8 parameter, which can be one of:
6040///
6041/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6042/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6043/// * [`_MM_FROUND_TO_POS_INF`] : round up
6044/// * [`_MM_FROUND_TO_ZERO`] : truncate
6045/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6046///
6047/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
6048#[inline]
6049#[target_feature(enable = "avx512dq,avx512vl")]
6050#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6051#[rustc_legacy_const_generics(2)]
6052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6053pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6054    static_assert_uimm_bits!(IMM8, 8);
6055    _mm_mask_reduce_pd::<IMM8>(_mm_setzero_pd(), k, a)
6056}
6057
6058/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6059/// the number of bits specified by imm8, and store the results in dst.
6060/// Rounding is done according to the imm8 parameter, which can be one of:
6061///
6062/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6063/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6064/// * [`_MM_FROUND_TO_POS_INF`] : round up
6065/// * [`_MM_FROUND_TO_ZERO`] : truncate
6066/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6067///
6068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
6069#[inline]
6070#[target_feature(enable = "avx512dq,avx512vl")]
6071#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6072#[rustc_legacy_const_generics(1)]
6073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6074pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6075    static_assert_uimm_bits!(IMM8, 8);
6076    _mm256_mask_reduce_pd::<IMM8>(_mm256_undefined_pd(), 0xff, a)
6077}
6078
6079/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6080/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6081/// copied from src to dst if the corresponding mask bit is not set).
6082/// Rounding is done according to the imm8 parameter, which can be one of:
6083///
6084/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6085/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6086/// * [`_MM_FROUND_TO_POS_INF`] : round up
6087/// * [`_MM_FROUND_TO_ZERO`] : truncate
6088/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6089///
6090/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
6091#[inline]
6092#[target_feature(enable = "avx512dq,avx512vl")]
6093#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6094#[rustc_legacy_const_generics(3)]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
6097    unsafe {
6098        static_assert_uimm_bits!(IMM8, 8);
6099        transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
6100    }
6101}
6102
6103/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6104/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6105/// zeroed out if the corresponding mask bit is not set).
6106/// Rounding is done according to the imm8 parameter, which can be one of:
6107///
6108/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6109/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6110/// * [`_MM_FROUND_TO_POS_INF`] : round up
6111/// * [`_MM_FROUND_TO_ZERO`] : truncate
6112/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6113///
6114/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
6115#[inline]
6116#[target_feature(enable = "avx512dq,avx512vl")]
6117#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6118#[rustc_legacy_const_generics(2)]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6121    static_assert_uimm_bits!(IMM8, 8);
6122    _mm256_mask_reduce_pd::<IMM8>(_mm256_setzero_pd(), k, a)
6123}
6124
6125/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6126/// the number of bits specified by imm8, and store the results in dst.
6127/// Rounding is done according to the imm8 parameter, which can be one of:
6128///
6129/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6130/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6131/// * [`_MM_FROUND_TO_POS_INF`] : round up
6132/// * [`_MM_FROUND_TO_ZERO`] : truncate
6133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6134///
6135/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
6136#[inline]
6137#[target_feature(enable = "avx512dq")]
6138#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6139#[rustc_legacy_const_generics(1)]
6140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6141pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6142    static_assert_uimm_bits!(IMM8, 8);
6143    _mm512_mask_reduce_pd::<IMM8>(_mm512_undefined_pd(), 0xff, a)
6144}
6145
6146/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6147/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6148/// copied from src to dst if the corresponding mask bit is not set).
6149/// Rounding is done according to the imm8 parameter, which can be one of:
6150///
6151/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6152/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6153/// * [`_MM_FROUND_TO_POS_INF`] : round up
6154/// * [`_MM_FROUND_TO_ZERO`] : truncate
6155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6156///
6157/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
6158#[inline]
6159#[target_feature(enable = "avx512dq")]
6160#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6161#[rustc_legacy_const_generics(3)]
6162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6163pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
6164    unsafe {
6165        static_assert_uimm_bits!(IMM8, 8);
6166        transmute(vreducepd_512(
6167            a.as_f64x8(),
6168            IMM8,
6169            src.as_f64x8(),
6170            k,
6171            _MM_FROUND_CUR_DIRECTION,
6172        ))
6173    }
6174}
6175
6176/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6177/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6178/// zeroed out if the corresponding mask bit is not set).
6179/// Rounding is done according to the imm8 parameter, which can be one of:
6180///
6181/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6182/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6183/// * [`_MM_FROUND_TO_POS_INF`] : round up
6184/// * [`_MM_FROUND_TO_ZERO`] : truncate
6185/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6186///
6187/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
6188#[inline]
6189#[target_feature(enable = "avx512dq")]
6190#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6191#[rustc_legacy_const_generics(2)]
6192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6193pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6194    static_assert_uimm_bits!(IMM8, 8);
6195    _mm512_mask_reduce_pd::<IMM8>(_mm512_setzero_pd(), k, a)
6196}
6197
6198/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6199/// the number of bits specified by imm8, and store the results in dst.
6200/// Rounding is done according to the imm8 parameter, which can be one of:
6201///
6202/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6203/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6204/// * [`_MM_FROUND_TO_POS_INF`] : round up
6205/// * [`_MM_FROUND_TO_ZERO`] : truncate
6206/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6207///
6208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6209///
6210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
6211#[inline]
6212#[target_feature(enable = "avx512dq")]
6213#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6214#[rustc_legacy_const_generics(1, 2)]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
6217    static_assert_uimm_bits!(IMM8, 8);
6218    static_assert_sae!(SAE);
6219    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_undefined_ps(), 0xffff, a)
6220}
6221
6222/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6223/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6224/// copied from src to dst if the corresponding mask bit is not set).
6225/// Rounding is done according to the imm8 parameter, which can be one of:
6226///
6227/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6228/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6229/// * [`_MM_FROUND_TO_POS_INF`] : round up
6230/// * [`_MM_FROUND_TO_ZERO`] : truncate
6231/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6232///
6233/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6234///
6235/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
6236#[inline]
6237#[target_feature(enable = "avx512dq")]
6238#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6239#[rustc_legacy_const_generics(3, 4)]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6242    src: __m512,
6243    k: __mmask16,
6244    a: __m512,
6245) -> __m512 {
6246    unsafe {
6247        static_assert_uimm_bits!(IMM8, 8);
6248        static_assert_sae!(SAE);
6249        transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
6250    }
6251}
6252
6253/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6254/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6255/// zeroed out if the corresponding mask bit is not set).
6256/// Rounding is done according to the imm8 parameter, which can be one of:
6257///
6258/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6259/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6260/// * [`_MM_FROUND_TO_POS_INF`] : round up
6261/// * [`_MM_FROUND_TO_ZERO`] : truncate
6262/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6263///
6264/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6265///
6266/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
6267#[inline]
6268#[target_feature(enable = "avx512dq")]
6269#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6270#[rustc_legacy_const_generics(2, 3)]
6271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6272pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6273    k: __mmask16,
6274    a: __m512,
6275) -> __m512 {
6276    static_assert_uimm_bits!(IMM8, 8);
6277    static_assert_sae!(SAE);
6278    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a)
6279}
6280
6281/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6282/// the number of bits specified by imm8, and store the results in dst.
6283/// Rounding is done according to the imm8 parameter, which can be one of:
6284///
6285/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6286/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6287/// * [`_MM_FROUND_TO_POS_INF`] : round up
6288/// * [`_MM_FROUND_TO_ZERO`] : truncate
6289/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6290///
6291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
6292#[inline]
6293#[target_feature(enable = "avx512dq,avx512vl")]
6294#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6295#[rustc_legacy_const_generics(1)]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
6298    static_assert_uimm_bits!(IMM8, 8);
6299    _mm_mask_reduce_ps::<IMM8>(_mm_undefined_ps(), 0xff, a)
6300}
6301
6302/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6303/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6304/// copied from src to dst if the corresponding mask bit is not set).
6305/// Rounding is done according to the imm8 parameter, which can be one of:
6306///
6307/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6308/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6309/// * [`_MM_FROUND_TO_POS_INF`] : round up
6310/// * [`_MM_FROUND_TO_ZERO`] : truncate
6311/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6312///
6313/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
6314#[inline]
6315#[target_feature(enable = "avx512dq,avx512vl")]
6316#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6317#[rustc_legacy_const_generics(3)]
6318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6319pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
6320    unsafe {
6321        static_assert_uimm_bits!(IMM8, 8);
6322        transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
6323    }
6324}
6325
6326/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6327/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6328/// zeroed out if the corresponding mask bit is not set).
6329/// Rounding is done according to the imm8 parameter, which can be one of:
6330///
6331/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6332/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6333/// * [`_MM_FROUND_TO_POS_INF`] : round up
6334/// * [`_MM_FROUND_TO_ZERO`] : truncate
6335/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6336///
6337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
6338#[inline]
6339#[target_feature(enable = "avx512dq,avx512vl")]
6340#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6341#[rustc_legacy_const_generics(2)]
6342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6343pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6344    static_assert_uimm_bits!(IMM8, 8);
6345    _mm_mask_reduce_ps::<IMM8>(_mm_setzero_ps(), k, a)
6346}
6347
6348/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6349/// the number of bits specified by imm8, and store the results in dst.
6350/// Rounding is done according to the imm8 parameter, which can be one of:
6351///
6352/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6353/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6354/// * [`_MM_FROUND_TO_POS_INF`] : round up
6355/// * [`_MM_FROUND_TO_ZERO`] : truncate
6356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6357///
6358/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
6359#[inline]
6360#[target_feature(enable = "avx512dq,avx512vl")]
6361#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6362#[rustc_legacy_const_generics(1)]
6363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6364pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
6365    static_assert_uimm_bits!(IMM8, 8);
6366    _mm256_mask_reduce_ps::<IMM8>(_mm256_undefined_ps(), 0xff, a)
6367}
6368
6369/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6370/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6371/// copied from src to dst if the corresponding mask bit is not set).
6372/// Rounding is done according to the imm8 parameter, which can be one of:
6373///
6374/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6375/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6376/// * [`_MM_FROUND_TO_POS_INF`] : round up
6377/// * [`_MM_FROUND_TO_ZERO`] : truncate
6378/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6379///
6380/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
6381#[inline]
6382#[target_feature(enable = "avx512dq,avx512vl")]
6383#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6384#[rustc_legacy_const_generics(3)]
6385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6386pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
6387    unsafe {
6388        static_assert_uimm_bits!(IMM8, 8);
6389        transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
6390    }
6391}
6392
6393/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6394/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6395/// zeroed out if the corresponding mask bit is not set).
6396/// Rounding is done according to the imm8 parameter, which can be one of:
6397///
6398/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6399/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6400/// * [`_MM_FROUND_TO_POS_INF`] : round up
6401/// * [`_MM_FROUND_TO_ZERO`] : truncate
6402/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6403///
6404/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
6405#[inline]
6406#[target_feature(enable = "avx512dq,avx512vl")]
6407#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6408#[rustc_legacy_const_generics(2)]
6409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6410pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
6411    static_assert_uimm_bits!(IMM8, 8);
6412    _mm256_mask_reduce_ps::<IMM8>(_mm256_setzero_ps(), k, a)
6413}
6414
6415/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6416/// the number of bits specified by imm8, and store the results in dst.
6417/// Rounding is done according to the imm8 parameter, which can be one of:
6418///
6419/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6420/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6421/// * [`_MM_FROUND_TO_POS_INF`] : round up
6422/// * [`_MM_FROUND_TO_ZERO`] : truncate
6423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6424///
6425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
6426#[inline]
6427#[target_feature(enable = "avx512dq")]
6428#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6429#[rustc_legacy_const_generics(1)]
6430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6431pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
6432    static_assert_uimm_bits!(IMM8, 8);
6433    _mm512_mask_reduce_ps::<IMM8>(_mm512_undefined_ps(), 0xffff, a)
6434}
6435
6436/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6437/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6438/// copied from src to dst if the corresponding mask bit is not set).
6439/// Rounding is done according to the imm8 parameter, which can be one of:
6440///
6441/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6442/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6443/// * [`_MM_FROUND_TO_POS_INF`] : round up
6444/// * [`_MM_FROUND_TO_ZERO`] : truncate
6445/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6446///
6447/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
6448#[inline]
6449#[target_feature(enable = "avx512dq")]
6450#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6451#[rustc_legacy_const_generics(3)]
6452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6453pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
6454    unsafe {
6455        static_assert_uimm_bits!(IMM8, 8);
6456        transmute(vreduceps_512(
6457            a.as_f32x16(),
6458            IMM8,
6459            src.as_f32x16(),
6460            k,
6461            _MM_FROUND_CUR_DIRECTION,
6462        ))
6463    }
6464}
6465
6466/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6467/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6468/// zeroed out if the corresponding mask bit is not set).
6469/// Rounding is done according to the imm8 parameter, which can be one of:
6470///
6471/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6472/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6473/// * [`_MM_FROUND_TO_POS_INF`] : round up
6474/// * [`_MM_FROUND_TO_ZERO`] : truncate
6475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6476///
6477/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
6478#[inline]
6479#[target_feature(enable = "avx512dq")]
6480#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6481#[rustc_legacy_const_generics(2)]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
6484    static_assert_uimm_bits!(IMM8, 8);
6485    _mm512_mask_reduce_ps::<IMM8>(_mm512_setzero_ps(), k, a)
6486}
6487
6488/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6489/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6490/// the upper element from a to the upper element of dst.
6491/// Rounding is done according to the imm8 parameter, which can be one of:
6492///
6493/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6494/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6495/// * [`_MM_FROUND_TO_POS_INF`] : round up
6496/// * [`_MM_FROUND_TO_ZERO`] : truncate
6497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6498///
6499/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6500///
6501/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
6502#[inline]
6503#[target_feature(enable = "avx512dq")]
6504#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6505#[rustc_legacy_const_generics(2, 3)]
6506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6507pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
6508    static_assert_uimm_bits!(IMM8, 8);
6509    static_assert_sae!(SAE);
6510    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_undefined_pd(), 0xff, a, b)
6511}
6512
6513/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6514/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6515/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6516/// to the upper element of dst.
6517/// Rounding is done according to the imm8 parameter, which can be one of:
6518///
6519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6521/// * [`_MM_FROUND_TO_POS_INF`] : round up
6522/// * [`_MM_FROUND_TO_ZERO`] : truncate
6523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6524///
6525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6526///
6527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
6528#[inline]
6529#[target_feature(enable = "avx512dq")]
6530#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6531#[rustc_legacy_const_generics(4, 5)]
6532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6533pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6534    src: __m128d,
6535    k: __mmask8,
6536    a: __m128d,
6537    b: __m128d,
6538) -> __m128d {
6539    unsafe {
6540        static_assert_uimm_bits!(IMM8, 8);
6541        static_assert_sae!(SAE);
6542        transmute(vreducesd(
6543            a.as_f64x2(),
6544            b.as_f64x2(),
6545            src.as_f64x2(),
6546            k,
6547            IMM8,
6548            SAE,
6549        ))
6550    }
6551}
6552
6553/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6554/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6555/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6556/// to the upper element of dst.
6557/// Rounding is done according to the imm8 parameter, which can be one of:
6558///
6559/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6560/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6561/// * [`_MM_FROUND_TO_POS_INF`] : round up
6562/// * [`_MM_FROUND_TO_ZERO`] : truncate
6563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6564///
6565/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6566///
6567/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
6568#[inline]
6569#[target_feature(enable = "avx512dq")]
6570#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6571#[rustc_legacy_const_generics(3, 4)]
6572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6573pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6574    k: __mmask8,
6575    a: __m128d,
6576    b: __m128d,
6577) -> __m128d {
6578    static_assert_uimm_bits!(IMM8, 8);
6579    static_assert_sae!(SAE);
6580    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
6581}
6582
6583/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6584/// by the number of bits specified by imm8, store the result in the lower element of dst using, and
6585/// copy the upper element from a.
6586/// to the upper element of dst.
6587/// Rounding is done according to the imm8 parameter, which can be one of:
6588///
6589/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6590/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6591/// * [`_MM_FROUND_TO_POS_INF`] : round up
6592/// * [`_MM_FROUND_TO_ZERO`] : truncate
6593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6594///
6595/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
6596#[inline]
6597#[target_feature(enable = "avx512dq")]
6598#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6599#[rustc_legacy_const_generics(2)]
6600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6601pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
6602    static_assert_uimm_bits!(IMM8, 8);
6603    _mm_mask_reduce_sd::<IMM8>(_mm_undefined_pd(), 0xff, a, b)
6604}
6605
6606/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6607/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6608/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6609/// to the upper element of dst.
6610/// Rounding is done according to the imm8 parameter, which can be one of:
6611///
6612/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6613/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6614/// * [`_MM_FROUND_TO_POS_INF`] : round up
6615/// * [`_MM_FROUND_TO_ZERO`] : truncate
6616/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6617///
6618/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
6619#[inline]
6620#[target_feature(enable = "avx512dq")]
6621#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6622#[rustc_legacy_const_generics(4)]
6623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6624pub fn _mm_mask_reduce_sd<const IMM8: i32>(
6625    src: __m128d,
6626    k: __mmask8,
6627    a: __m128d,
6628    b: __m128d,
6629) -> __m128d {
6630    unsafe {
6631        static_assert_uimm_bits!(IMM8, 8);
6632        transmute(vreducesd(
6633            a.as_f64x2(),
6634            b.as_f64x2(),
6635            src.as_f64x2(),
6636            k,
6637            IMM8,
6638            _MM_FROUND_CUR_DIRECTION,
6639        ))
6640    }
6641}
6642
6643/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6644/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6645/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6646/// to the upper element of dst.
6647/// Rounding is done according to the imm8 parameter, which can be one of:
6648///
6649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6651/// * [`_MM_FROUND_TO_POS_INF`] : round up
6652/// * [`_MM_FROUND_TO_ZERO`] : truncate
6653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6654///
6655/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
6656#[inline]
6657#[target_feature(enable = "avx512dq")]
6658#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6659#[rustc_legacy_const_generics(3)]
6660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6661pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6662    static_assert_uimm_bits!(IMM8, 8);
6663    _mm_mask_reduce_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
6664}
6665
6666/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6667/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6668/// the upper element from a.
6669/// to the upper element of dst.
6670/// Rounding is done according to the imm8 parameter, which can be one of:
6671///
6672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6674/// * [`_MM_FROUND_TO_POS_INF`] : round up
6675/// * [`_MM_FROUND_TO_ZERO`] : truncate
6676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6677///
6678/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6679///
6680/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
6681#[inline]
6682#[target_feature(enable = "avx512dq")]
6683#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6684#[rustc_legacy_const_generics(2, 3)]
6685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6686pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
6687    static_assert_uimm_bits!(IMM8, 8);
6688    static_assert_sae!(SAE);
6689    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_undefined_ps(), 0xff, a, b)
6690}
6691
6692/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6693/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6694/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6695/// to the upper element of dst.
6696/// Rounding is done according to the imm8 parameter, which can be one of:
6697///
6698/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6699/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6700/// * [`_MM_FROUND_TO_POS_INF`] : round up
6701/// * [`_MM_FROUND_TO_ZERO`] : truncate
6702/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6703///
6704/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6705///
6706/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
6707#[inline]
6708#[target_feature(enable = "avx512dq")]
6709#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6710#[rustc_legacy_const_generics(4, 5)]
6711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6712pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6713    src: __m128,
6714    k: __mmask8,
6715    a: __m128,
6716    b: __m128,
6717) -> __m128 {
6718    unsafe {
6719        static_assert_uimm_bits!(IMM8, 8);
6720        static_assert_sae!(SAE);
6721        transmute(vreducess(
6722            a.as_f32x4(),
6723            b.as_f32x4(),
6724            src.as_f32x4(),
6725            k,
6726            IMM8,
6727            SAE,
6728        ))
6729    }
6730}
6731
6732/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6733/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6734/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6735/// to the upper element of dst.
6736/// Rounding is done according to the imm8 parameter, which can be one of:
6737///
6738/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6739/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6740/// * [`_MM_FROUND_TO_POS_INF`] : round up
6741/// * [`_MM_FROUND_TO_ZERO`] : truncate
6742/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6743///
6744/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6745///
6746/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
6747#[inline]
6748#[target_feature(enable = "avx512dq")]
6749#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6750#[rustc_legacy_const_generics(3, 4)]
6751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6752pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6753    k: __mmask8,
6754    a: __m128,
6755    b: __m128,
6756) -> __m128 {
6757    static_assert_uimm_bits!(IMM8, 8);
6758    static_assert_sae!(SAE);
6759    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
6760}
6761
6762/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6763/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6764/// the upper element from a.
6765/// to the upper element of dst.
6766/// Rounding is done according to the imm8 parameter, which can be one of:
6767///
6768/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6769/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6770/// * [`_MM_FROUND_TO_POS_INF`] : round up
6771/// * [`_MM_FROUND_TO_ZERO`] : truncate
6772/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6773///
6774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
6775#[inline]
6776#[target_feature(enable = "avx512dq")]
6777#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6778#[rustc_legacy_const_generics(2)]
6779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6780pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
6781    static_assert_uimm_bits!(IMM8, 8);
6782    _mm_mask_reduce_ss::<IMM8>(_mm_undefined_ps(), 0xff, a, b)
6783}
6784
6785/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6786/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6787/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6788/// to the upper element of dst.
6789/// Rounding is done according to the imm8 parameter, which can be one of:
6790///
6791/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6792/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6793/// * [`_MM_FROUND_TO_POS_INF`] : round up
6794/// * [`_MM_FROUND_TO_ZERO`] : truncate
6795/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6796///
6797/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
6798#[inline]
6799#[target_feature(enable = "avx512dq")]
6800#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6801#[rustc_legacy_const_generics(4)]
6802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6803pub fn _mm_mask_reduce_ss<const IMM8: i32>(
6804    src: __m128,
6805    k: __mmask8,
6806    a: __m128,
6807    b: __m128,
6808) -> __m128 {
6809    unsafe {
6810        static_assert_uimm_bits!(IMM8, 8);
6811        transmute(vreducess(
6812            a.as_f32x4(),
6813            b.as_f32x4(),
6814            src.as_f32x4(),
6815            k,
6816            IMM8,
6817            _MM_FROUND_CUR_DIRECTION,
6818        ))
6819    }
6820}
6821
6822/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6823/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6824/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6825/// to the upper element of dst.
6826/// Rounding is done according to the imm8 parameter, which can be one of:
6827///
6828/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6829/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6830/// * [`_MM_FROUND_TO_POS_INF`] : round up
6831/// * [`_MM_FROUND_TO_ZERO`] : truncate
6832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6833///
6834/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
6835#[inline]
6836#[target_feature(enable = "avx512dq")]
6837#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6838#[rustc_legacy_const_generics(3)]
6839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6840pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6841    static_assert_uimm_bits!(IMM8, 8);
6842    _mm_mask_reduce_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
6843}
6844
6845// FP-Class
6846
6847/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6848/// by imm8, and store the results in mask vector k.
6849/// imm can be a combination of:
6850///
6851///     - 0x01 // QNaN
6852///     - 0x02 // Positive Zero
6853///     - 0x04 // Negative Zero
6854///     - 0x08 // Positive Infinity
6855///     - 0x10 // Negative Infinity
6856///     - 0x20 // Denormal
6857///     - 0x40 // Negative
6858///     - 0x80 // SNaN
6859///
6860/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
6861#[inline]
6862#[target_feature(enable = "avx512dq,avx512vl")]
6863#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6864#[rustc_legacy_const_generics(1)]
6865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6866pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6867    static_assert_uimm_bits!(IMM8, 8);
6868    unsafe { vfpclasspd_128(a.as_f64x2(), IMM8) }
6869}
6870
6871/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6872/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6873/// corresponding mask bit is not set).
6874/// imm can be a combination of:
6875///
6876///     - 0x01 // QNaN
6877///     - 0x02 // Positive Zero
6878///     - 0x04 // Negative Zero
6879///     - 0x08 // Positive Infinity
6880///     - 0x10 // Negative Infinity
6881///     - 0x20 // Denormal
6882///     - 0x40 // Negative
6883///     - 0x80 // SNaN
6884///
6885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
6886#[inline]
6887#[target_feature(enable = "avx512dq,avx512vl")]
6888#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6889#[rustc_legacy_const_generics(2)]
6890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6891pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6892    _mm_fpclass_pd_mask::<IMM8>(a) & k1
6893}
6894
6895/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6896/// by imm8, and store the results in mask vector k.
6897/// imm can be a combination of:
6898///
6899///     - 0x01 // QNaN
6900///     - 0x02 // Positive Zero
6901///     - 0x04 // Negative Zero
6902///     - 0x08 // Positive Infinity
6903///     - 0x10 // Negative Infinity
6904///     - 0x20 // Denormal
6905///     - 0x40 // Negative
6906///     - 0x80 // SNaN
6907///
6908/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
6909#[inline]
6910#[target_feature(enable = "avx512dq,avx512vl")]
6911#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6912#[rustc_legacy_const_generics(1)]
6913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6914pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
6915    static_assert_uimm_bits!(IMM8, 8);
6916    unsafe { vfpclasspd_256(a.as_f64x4(), IMM8) }
6917}
6918
6919/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6920/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6921/// corresponding mask bit is not set).
6922/// imm can be a combination of:
6923///
6924///     - 0x01 // QNaN
6925///     - 0x02 // Positive Zero
6926///     - 0x04 // Negative Zero
6927///     - 0x08 // Positive Infinity
6928///     - 0x10 // Negative Infinity
6929///     - 0x20 // Denormal
6930///     - 0x40 // Negative
6931///     - 0x80 // SNaN
6932///
6933/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
6934#[inline]
6935#[target_feature(enable = "avx512dq,avx512vl")]
6936#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6937#[rustc_legacy_const_generics(2)]
6938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6939pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
6940    _mm256_fpclass_pd_mask::<IMM8>(a) & k1
6941}
6942
6943/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6944/// by imm8, and store the results in mask vector k.
6945/// imm can be a combination of:
6946///
6947///     - 0x01 // QNaN
6948///     - 0x02 // Positive Zero
6949///     - 0x04 // Negative Zero
6950///     - 0x08 // Positive Infinity
6951///     - 0x10 // Negative Infinity
6952///     - 0x20 // Denormal
6953///     - 0x40 // Negative
6954///     - 0x80 // SNaN
6955///
6956/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
6957#[inline]
6958#[target_feature(enable = "avx512dq")]
6959#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6960#[rustc_legacy_const_generics(1)]
6961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6962pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
6963    static_assert_uimm_bits!(IMM8, 8);
6964    unsafe { vfpclasspd_512(a.as_f64x8(), IMM8) }
6965}
6966
6967/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6968/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6969/// corresponding mask bit is not set).
6970/// imm can be a combination of:
6971///
6972///     - 0x01 // QNaN
6973///     - 0x02 // Positive Zero
6974///     - 0x04 // Negative Zero
6975///     - 0x08 // Positive Infinity
6976///     - 0x10 // Negative Infinity
6977///     - 0x20 // Denormal
6978///     - 0x40 // Negative
6979///     - 0x80 // SNaN
6980///
6981/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
6982#[inline]
6983#[target_feature(enable = "avx512dq")]
6984#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6985#[rustc_legacy_const_generics(2)]
6986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6987pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
6988    _mm512_fpclass_pd_mask::<IMM8>(a) & k1
6989}
6990
6991/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6992/// by imm8, and store the results in mask vector k.
6993/// imm can be a combination of:
6994///
6995///     - 0x01 // QNaN
6996///     - 0x02 // Positive Zero
6997///     - 0x04 // Negative Zero
6998///     - 0x08 // Positive Infinity
6999///     - 0x10 // Negative Infinity
7000///     - 0x20 // Denormal
7001///     - 0x40 // Negative
7002///     - 0x80 // SNaN
7003///
7004/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
7005#[inline]
7006#[target_feature(enable = "avx512dq,avx512vl")]
7007#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7008#[rustc_legacy_const_generics(1)]
7009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7010pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7011    static_assert_uimm_bits!(IMM8, 8);
7012    unsafe { vfpclassps_128(a.as_f32x4(), IMM8) }
7013}
7014
7015/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7016/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7017/// corresponding mask bit is not set).
7018/// imm can be a combination of:
7019///
7020///     - 0x01 // QNaN
7021///     - 0x02 // Positive Zero
7022///     - 0x04 // Negative Zero
7023///     - 0x08 // Positive Infinity
7024///     - 0x10 // Negative Infinity
7025///     - 0x20 // Denormal
7026///     - 0x40 // Negative
7027///     - 0x80 // SNaN
7028///
7029/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
7030#[inline]
7031#[target_feature(enable = "avx512dq,avx512vl")]
7032#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7033#[rustc_legacy_const_generics(2)]
7034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7035pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7036    _mm_fpclass_ps_mask::<IMM8>(a) & k1
7037}
7038
7039/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7040/// by imm8, and store the results in mask vector k.
7041/// imm can be a combination of:
7042///
7043///     - 0x01 // QNaN
7044///     - 0x02 // Positive Zero
7045///     - 0x04 // Negative Zero
7046///     - 0x08 // Positive Infinity
7047///     - 0x10 // Negative Infinity
7048///     - 0x20 // Denormal
7049///     - 0x40 // Negative
7050///     - 0x80 // SNaN
7051///
7052/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
7053#[inline]
7054#[target_feature(enable = "avx512dq,avx512vl")]
7055#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7056#[rustc_legacy_const_generics(1)]
7057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7058pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
7059    static_assert_uimm_bits!(IMM8, 8);
7060    unsafe { vfpclassps_256(a.as_f32x8(), IMM8) }
7061}
7062
7063/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7064/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7065/// corresponding mask bit is not set).
7066/// imm can be a combination of:
7067///
7068///     - 0x01 // QNaN
7069///     - 0x02 // Positive Zero
7070///     - 0x04 // Negative Zero
7071///     - 0x08 // Positive Infinity
7072///     - 0x10 // Negative Infinity
7073///     - 0x20 // Denormal
7074///     - 0x40 // Negative
7075///     - 0x80 // SNaN
7076///
7077/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
7078#[inline]
7079#[target_feature(enable = "avx512dq,avx512vl")]
7080#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7081#[rustc_legacy_const_generics(2)]
7082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7083pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
7084    _mm256_fpclass_ps_mask::<IMM8>(a) & k1
7085}
7086
7087/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7088/// by imm8, and store the results in mask vector k.
7089/// imm can be a combination of:
7090///
7091///     - 0x01 // QNaN
7092///     - 0x02 // Positive Zero
7093///     - 0x04 // Negative Zero
7094///     - 0x08 // Positive Infinity
7095///     - 0x10 // Negative Infinity
7096///     - 0x20 // Denormal
7097///     - 0x40 // Negative
7098///     - 0x80 // SNaN
7099///
7100/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
7101#[inline]
7102#[target_feature(enable = "avx512dq")]
7103#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7104#[rustc_legacy_const_generics(1)]
7105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7106pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
7107    static_assert_uimm_bits!(IMM8, 8);
7108    unsafe { vfpclassps_512(a.as_f32x16(), IMM8) }
7109}
7110
7111/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7112/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7113/// corresponding mask bit is not set).
7114/// imm can be a combination of:
7115///
7116///     - 0x01 // QNaN
7117///     - 0x02 // Positive Zero
7118///     - 0x04 // Negative Zero
7119///     - 0x08 // Positive Infinity
7120///     - 0x10 // Negative Infinity
7121///     - 0x20 // Denormal
7122///     - 0x40 // Negative
7123///     - 0x80 // SNaN
7124///
7125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
7126#[inline]
7127#[target_feature(enable = "avx512dq")]
7128#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7129#[rustc_legacy_const_generics(2)]
7130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7131pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
7132    _mm512_fpclass_ps_mask::<IMM8>(a) & k1
7133}
7134
7135/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
7136/// by imm8, and store the results in mask vector k.
7137/// imm can be a combination of:
7138///
7139///     - 0x01 // QNaN
7140///     - 0x02 // Positive Zero
7141///     - 0x04 // Negative Zero
7142///     - 0x08 // Positive Infinity
7143///     - 0x10 // Negative Infinity
7144///     - 0x20 // Denormal
7145///     - 0x40 // Negative
7146///     - 0x80 // SNaN
7147///
7148/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
7149#[inline]
7150#[target_feature(enable = "avx512dq")]
7151#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
7152#[rustc_legacy_const_generics(1)]
7153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7154pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
7155    static_assert_uimm_bits!(IMM8, 8);
7156    _mm_mask_fpclass_sd_mask::<IMM8>(0xff, a)
7157}
7158
7159/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
7160/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7161/// corresponding mask bit is not set).
7162/// imm can be a combination of:
7163///
7164///     - 0x01 // QNaN
7165///     - 0x02 // Positive Zero
7166///     - 0x04 // Negative Zero
7167///     - 0x08 // Positive Infinity
7168///     - 0x10 // Negative Infinity
7169///     - 0x20 // Denormal
7170///     - 0x40 // Negative
7171///     - 0x80 // SNaN
7172///
7173/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
7174#[inline]
7175#[target_feature(enable = "avx512dq")]
7176#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
7177#[rustc_legacy_const_generics(2)]
7178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7179pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
7180    unsafe {
7181        static_assert_uimm_bits!(IMM8, 8);
7182        vfpclasssd(a.as_f64x2(), IMM8, k1)
7183    }
7184}
7185
7186/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7187/// by imm8, and store the results in mask vector k.
7188/// imm can be a combination of:
7189///
7190///     - 0x01 // QNaN
7191///     - 0x02 // Positive Zero
7192///     - 0x04 // Negative Zero
7193///     - 0x08 // Positive Infinity
7194///     - 0x10 // Negative Infinity
7195///     - 0x20 // Denormal
7196///     - 0x40 // Negative
7197///     - 0x80 // SNaN
7198///
7199/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
7200#[inline]
7201#[target_feature(enable = "avx512dq")]
7202#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7203#[rustc_legacy_const_generics(1)]
7204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7205pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7206    static_assert_uimm_bits!(IMM8, 8);
7207    _mm_mask_fpclass_ss_mask::<IMM8>(0xff, a)
7208}
7209
7210/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7211/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7212/// corresponding mask bit is not set).
7213/// imm can be a combination of:
7214///
7215///     - 0x01 // QNaN
7216///     - 0x02 // Positive Zero
7217///     - 0x04 // Negative Zero
7218///     - 0x08 // Positive Infinity
7219///     - 0x10 // Negative Infinity
7220///     - 0x20 // Denormal
7221///     - 0x40 // Negative
7222///     - 0x80 // SNaN
7223///
7224/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
7225#[inline]
7226#[target_feature(enable = "avx512dq")]
7227#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7228#[rustc_legacy_const_generics(2)]
7229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7230pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7231    unsafe {
7232        static_assert_uimm_bits!(IMM8, 8);
7233        vfpclassss(a.as_f32x4(), IMM8, k1)
7234    }
7235}
7236
7237#[allow(improper_ctypes)]
7238unsafe extern "C" {
7239    #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
7240    fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
7241    #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
7242    fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
7243    #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
7244    fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;
7245
7246    #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
7247    fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
7248    #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
7249    fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
7250    #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
7251    fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;
7252
7253    #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"]
7254    fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
7255    #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"]
7256    fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
7257    #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"]
7258    fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;
7259
7260    #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
7261    fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
7262    #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"]
7263    fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
7264    #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"]
7265    fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;
7266
7267    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
7268    fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7269    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
7270    fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7271    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
7272    fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7273
7274    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
7275    fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7276    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
7277    fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7278    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
7279    fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7280
7281    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
7282    fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7283    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
7284    fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7285    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
7286    fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7287
7288    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
7289    fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7290    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
7291    fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7292    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
7293    fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7294
7295    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
7296    fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7297    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
7298    fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7299    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
7300    fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7301
7302    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
7303    fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7304    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
7305    fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7306    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
7307    fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7308
7309    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
7310    fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7311    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
7312    fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7313    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
7314    fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7315
7316    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
7317    fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7318    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
7319    fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7320    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
7321    fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7322
7323    #[link_name = "llvm.x86.avx512.mask.range.pd.128"]
7324    fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7325    #[link_name = "llvm.x86.avx512.mask.range.pd.256"]
7326    fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7327    #[link_name = "llvm.x86.avx512.mask.range.pd.512"]
7328    fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7329
7330    #[link_name = "llvm.x86.avx512.mask.range.ps.128"]
7331    fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7332    #[link_name = "llvm.x86.avx512.mask.range.ps.256"]
7333    fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7334    #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
7335    fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
7336    -> f32x16;
7337
7338    #[link_name = "llvm.x86.avx512.mask.range.sd"]
7339    fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7340    #[link_name = "llvm.x86.avx512.mask.range.ss"]
7341    fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7342
7343    #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
7344    fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7345    #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
7346    fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7347    #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
7348    fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7349
7350    #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
7351    fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7352    #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
7353    fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7354    #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
7355    fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;
7356
7357    #[link_name = "llvm.x86.avx512.mask.reduce.sd"]
7358    fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7359    #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
7360    fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7361
7362    #[link_name = "llvm.x86.avx512.fpclass.pd.128"]
7363    fn vfpclasspd_128(a: f64x2, imm8: i32) -> __mmask8;
7364    #[link_name = "llvm.x86.avx512.fpclass.pd.256"]
7365    fn vfpclasspd_256(a: f64x4, imm8: i32) -> __mmask8;
7366    #[link_name = "llvm.x86.avx512.fpclass.pd.512"]
7367    fn vfpclasspd_512(a: f64x8, imm8: i32) -> __mmask8;
7368
7369    #[link_name = "llvm.x86.avx512.fpclass.ps.128"]
7370    fn vfpclassps_128(a: f32x4, imm8: i32) -> __mmask8;
7371    #[link_name = "llvm.x86.avx512.fpclass.ps.256"]
7372    fn vfpclassps_256(a: f32x8, imm8: i32) -> __mmask8;
7373    #[link_name = "llvm.x86.avx512.fpclass.ps.512"]
7374    fn vfpclassps_512(a: f32x16, imm8: i32) -> __mmask16;
7375
7376    #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
7377    fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7378    #[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
7379    fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7380}
7381
7382#[cfg(test)]
7383mod tests {
7384    use super::*;
7385    use crate::core_arch::assert_eq_const as assert_eq;
7386    use crate::core_arch::x86::*;
7387
7388    use stdarch_test::simd_test;
7389
7390    const OPRND1_64: f64 = f64::from_bits(0x3333333333333333);
7391    const OPRND2_64: f64 = f64::from_bits(0x5555555555555555);
7392
7393    const AND_64: f64 = f64::from_bits(0x1111111111111111);
7394    const ANDN_64: f64 = f64::from_bits(0x4444444444444444);
7395    const OR_64: f64 = f64::from_bits(0x7777777777777777);
7396    const XOR_64: f64 = f64::from_bits(0x6666666666666666);
7397
7398    const OPRND1_32: f32 = f32::from_bits(0x33333333);
7399    const OPRND2_32: f32 = f32::from_bits(0x55555555);
7400
7401    const AND_32: f32 = f32::from_bits(0x11111111);
7402    const ANDN_32: f32 = f32::from_bits(0x44444444);
7403    const OR_32: f32 = f32::from_bits(0x77777777);
7404    const XOR_32: f32 = f32::from_bits(0x66666666);
7405
7406    #[simd_test(enable = "avx512dq,avx512vl")]
7407    const fn test_mm_mask_and_pd() {
7408        let a = _mm_set1_pd(OPRND1_64);
7409        let b = _mm_set1_pd(OPRND2_64);
7410        let src = _mm_set_pd(1., 2.);
7411        let r = _mm_mask_and_pd(src, 0b01, a, b);
7412        let e = _mm_set_pd(1., AND_64);
7413        assert_eq_m128d(r, e);
7414    }
7415
7416    #[simd_test(enable = "avx512dq,avx512vl")]
7417    const fn test_mm_maskz_and_pd() {
7418        let a = _mm_set1_pd(OPRND1_64);
7419        let b = _mm_set1_pd(OPRND2_64);
7420        let r = _mm_maskz_and_pd(0b01, a, b);
7421        let e = _mm_set_pd(0.0, AND_64);
7422        assert_eq_m128d(r, e);
7423    }
7424
7425    #[simd_test(enable = "avx512dq,avx512vl")]
7426    const fn test_mm256_mask_and_pd() {
7427        let a = _mm256_set1_pd(OPRND1_64);
7428        let b = _mm256_set1_pd(OPRND2_64);
7429        let src = _mm256_set_pd(1., 2., 3., 4.);
7430        let r = _mm256_mask_and_pd(src, 0b0101, a, b);
7431        let e = _mm256_set_pd(1., AND_64, 3., AND_64);
7432        assert_eq_m256d(r, e);
7433    }
7434
7435    #[simd_test(enable = "avx512dq,avx512vl")]
7436    const fn test_mm256_maskz_and_pd() {
7437        let a = _mm256_set1_pd(OPRND1_64);
7438        let b = _mm256_set1_pd(OPRND2_64);
7439        let r = _mm256_maskz_and_pd(0b0101, a, b);
7440        let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64);
7441        assert_eq_m256d(r, e);
7442    }
7443
7444    #[simd_test(enable = "avx512dq")]
7445    const fn test_mm512_and_pd() {
7446        let a = _mm512_set1_pd(OPRND1_64);
7447        let b = _mm512_set1_pd(OPRND2_64);
7448        let r = _mm512_and_pd(a, b);
7449        let e = _mm512_set1_pd(AND_64);
7450        assert_eq_m512d(r, e);
7451    }
7452
7453    #[simd_test(enable = "avx512dq")]
7454    const fn test_mm512_mask_and_pd() {
7455        let a = _mm512_set1_pd(OPRND1_64);
7456        let b = _mm512_set1_pd(OPRND2_64);
7457        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7458        let r = _mm512_mask_and_pd(src, 0b01010101, a, b);
7459        let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64);
7460        assert_eq_m512d(r, e);
7461    }
7462
7463    #[simd_test(enable = "avx512dq")]
7464    const fn test_mm512_maskz_and_pd() {
7465        let a = _mm512_set1_pd(OPRND1_64);
7466        let b = _mm512_set1_pd(OPRND2_64);
7467        let r = _mm512_maskz_and_pd(0b01010101, a, b);
7468        let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64);
7469        assert_eq_m512d(r, e);
7470    }
7471
7472    #[simd_test(enable = "avx512dq,avx512vl")]
7473    const fn test_mm_mask_and_ps() {
7474        let a = _mm_set1_ps(OPRND1_32);
7475        let b = _mm_set1_ps(OPRND2_32);
7476        let src = _mm_set_ps(1., 2., 3., 4.);
7477        let r = _mm_mask_and_ps(src, 0b0101, a, b);
7478        let e = _mm_set_ps(1., AND_32, 3., AND_32);
7479        assert_eq_m128(r, e);
7480    }
7481
7482    #[simd_test(enable = "avx512dq,avx512vl")]
7483    const fn test_mm_maskz_and_ps() {
7484        let a = _mm_set1_ps(OPRND1_32);
7485        let b = _mm_set1_ps(OPRND2_32);
7486        let r = _mm_maskz_and_ps(0b0101, a, b);
7487        let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32);
7488        assert_eq_m128(r, e);
7489    }
7490
7491    #[simd_test(enable = "avx512dq,avx512vl")]
7492    const fn test_mm256_mask_and_ps() {
7493        let a = _mm256_set1_ps(OPRND1_32);
7494        let b = _mm256_set1_ps(OPRND2_32);
7495        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7496        let r = _mm256_mask_and_ps(src, 0b01010101, a, b);
7497        let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32);
7498        assert_eq_m256(r, e);
7499    }
7500
7501    #[simd_test(enable = "avx512dq,avx512vl")]
7502    const fn test_mm256_maskz_and_ps() {
7503        let a = _mm256_set1_ps(OPRND1_32);
7504        let b = _mm256_set1_ps(OPRND2_32);
7505        let r = _mm256_maskz_and_ps(0b01010101, a, b);
7506        let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32);
7507        assert_eq_m256(r, e);
7508    }
7509
7510    #[simd_test(enable = "avx512dq")]
7511    const fn test_mm512_and_ps() {
7512        let a = _mm512_set1_ps(OPRND1_32);
7513        let b = _mm512_set1_ps(OPRND2_32);
7514        let r = _mm512_and_ps(a, b);
7515        let e = _mm512_set1_ps(AND_32);
7516        assert_eq_m512(r, e);
7517    }
7518
7519    #[simd_test(enable = "avx512dq")]
7520    const fn test_mm512_mask_and_ps() {
7521        let a = _mm512_set1_ps(OPRND1_32);
7522        let b = _mm512_set1_ps(OPRND2_32);
7523        let src = _mm512_set_ps(
7524            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7525        );
7526        let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
7527        let e = _mm512_set_ps(
7528            1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
7529            15., AND_32,
7530        );
7531        assert_eq_m512(r, e);
7532    }
7533
7534    #[simd_test(enable = "avx512dq")]
7535    const fn test_mm512_maskz_and_ps() {
7536        let a = _mm512_set1_ps(OPRND1_32);
7537        let b = _mm512_set1_ps(OPRND2_32);
7538        let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
7539        let e = _mm512_set_ps(
7540            0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
7541            AND_32,
7542        );
7543        assert_eq_m512(r, e);
7544    }
7545
7546    #[simd_test(enable = "avx512dq,avx512vl")]
7547    const fn test_mm_mask_andnot_pd() {
7548        let a = _mm_set1_pd(OPRND1_64);
7549        let b = _mm_set1_pd(OPRND2_64);
7550        let src = _mm_set_pd(1., 2.);
7551        let r = _mm_mask_andnot_pd(src, 0b01, a, b);
7552        let e = _mm_set_pd(1., ANDN_64);
7553        assert_eq_m128d(r, e);
7554    }
7555
7556    #[simd_test(enable = "avx512dq,avx512vl")]
7557    const fn test_mm_maskz_andnot_pd() {
7558        let a = _mm_set1_pd(OPRND1_64);
7559        let b = _mm_set1_pd(OPRND2_64);
7560        let r = _mm_maskz_andnot_pd(0b01, a, b);
7561        let e = _mm_set_pd(0.0, ANDN_64);
7562        assert_eq_m128d(r, e);
7563    }
7564
7565    #[simd_test(enable = "avx512dq,avx512vl")]
7566    const fn test_mm256_mask_andnot_pd() {
7567        let a = _mm256_set1_pd(OPRND1_64);
7568        let b = _mm256_set1_pd(OPRND2_64);
7569        let src = _mm256_set_pd(1., 2., 3., 4.);
7570        let r = _mm256_mask_andnot_pd(src, 0b0101, a, b);
7571        let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64);
7572        assert_eq_m256d(r, e);
7573    }
7574
7575    #[simd_test(enable = "avx512dq,avx512vl")]
7576    const fn test_mm256_maskz_andnot_pd() {
7577        let a = _mm256_set1_pd(OPRND1_64);
7578        let b = _mm256_set1_pd(OPRND2_64);
7579        let r = _mm256_maskz_andnot_pd(0b0101, a, b);
7580        let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64);
7581        assert_eq_m256d(r, e);
7582    }
7583
7584    #[simd_test(enable = "avx512dq")]
7585    const fn test_mm512_andnot_pd() {
7586        let a = _mm512_set1_pd(OPRND1_64);
7587        let b = _mm512_set1_pd(OPRND2_64);
7588        let r = _mm512_andnot_pd(a, b);
7589        let e = _mm512_set1_pd(ANDN_64);
7590        assert_eq_m512d(r, e);
7591    }
7592
7593    #[simd_test(enable = "avx512dq")]
7594    const fn test_mm512_mask_andnot_pd() {
7595        let a = _mm512_set1_pd(OPRND1_64);
7596        let b = _mm512_set1_pd(OPRND2_64);
7597        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7598        let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b);
7599        let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64);
7600        assert_eq_m512d(r, e);
7601    }
7602
7603    #[simd_test(enable = "avx512dq")]
7604    const fn test_mm512_maskz_andnot_pd() {
7605        let a = _mm512_set1_pd(OPRND1_64);
7606        let b = _mm512_set1_pd(OPRND2_64);
7607        let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
7608        let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64);
7609        assert_eq_m512d(r, e);
7610    }
7611
7612    #[simd_test(enable = "avx512dq,avx512vl")]
7613    const fn test_mm_mask_andnot_ps() {
7614        let a = _mm_set1_ps(OPRND1_32);
7615        let b = _mm_set1_ps(OPRND2_32);
7616        let src = _mm_set_ps(1., 2., 3., 4.);
7617        let r = _mm_mask_andnot_ps(src, 0b0101, a, b);
7618        let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32);
7619        assert_eq_m128(r, e);
7620    }
7621
7622    #[simd_test(enable = "avx512dq,avx512vl")]
7623    const fn test_mm_maskz_andnot_ps() {
7624        let a = _mm_set1_ps(OPRND1_32);
7625        let b = _mm_set1_ps(OPRND2_32);
7626        let r = _mm_maskz_andnot_ps(0b0101, a, b);
7627        let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32);
7628        assert_eq_m128(r, e);
7629    }
7630
7631    #[simd_test(enable = "avx512dq,avx512vl")]
7632    const fn test_mm256_mask_andnot_ps() {
7633        let a = _mm256_set1_ps(OPRND1_32);
7634        let b = _mm256_set1_ps(OPRND2_32);
7635        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7636        let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b);
7637        let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32);
7638        assert_eq_m256(r, e);
7639    }
7640
7641    #[simd_test(enable = "avx512dq,avx512vl")]
7642    const fn test_mm256_maskz_andnot_ps() {
7643        let a = _mm256_set1_ps(OPRND1_32);
7644        let b = _mm256_set1_ps(OPRND2_32);
7645        let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
7646        let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32);
7647        assert_eq_m256(r, e);
7648    }
7649
7650    #[simd_test(enable = "avx512dq")]
7651    const fn test_mm512_andnot_ps() {
7652        let a = _mm512_set1_ps(OPRND1_32);
7653        let b = _mm512_set1_ps(OPRND2_32);
7654        let r = _mm512_andnot_ps(a, b);
7655        let e = _mm512_set1_ps(ANDN_32);
7656        assert_eq_m512(r, e);
7657    }
7658
7659    #[simd_test(enable = "avx512dq")]
7660    const fn test_mm512_mask_andnot_ps() {
7661        let a = _mm512_set1_ps(OPRND1_32);
7662        let b = _mm512_set1_ps(OPRND2_32);
7663        let src = _mm512_set_ps(
7664            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7665        );
7666        let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
7667        let e = _mm512_set_ps(
7668            1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
7669            ANDN_32, 15., ANDN_32,
7670        );
7671        assert_eq_m512(r, e);
7672    }
7673
7674    #[simd_test(enable = "avx512dq")]
7675    const fn test_mm512_maskz_andnot_ps() {
7676        let a = _mm512_set1_ps(OPRND1_32);
7677        let b = _mm512_set1_ps(OPRND2_32);
7678        let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
7679        let e = _mm512_set_ps(
7680            0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
7681            ANDN_32, 0., ANDN_32,
7682        );
7683        assert_eq_m512(r, e);
7684    }
7685
7686    #[simd_test(enable = "avx512dq,avx512vl")]
7687    const fn test_mm_mask_or_pd() {
7688        let a = _mm_set1_pd(OPRND1_64);
7689        let b = _mm_set1_pd(OPRND2_64);
7690        let src = _mm_set_pd(1., 2.);
7691        let r = _mm_mask_or_pd(src, 0b01, a, b);
7692        let e = _mm_set_pd(1., OR_64);
7693        assert_eq_m128d(r, e);
7694    }
7695
7696    #[simd_test(enable = "avx512dq,avx512vl")]
7697    const fn test_mm_maskz_or_pd() {
7698        let a = _mm_set1_pd(OPRND1_64);
7699        let b = _mm_set1_pd(OPRND2_64);
7700        let r = _mm_maskz_or_pd(0b01, a, b);
7701        let e = _mm_set_pd(0.0, OR_64);
7702        assert_eq_m128d(r, e);
7703    }
7704
7705    #[simd_test(enable = "avx512dq,avx512vl")]
7706    const fn test_mm256_mask_or_pd() {
7707        let a = _mm256_set1_pd(OPRND1_64);
7708        let b = _mm256_set1_pd(OPRND2_64);
7709        let src = _mm256_set_pd(1., 2., 3., 4.);
7710        let r = _mm256_mask_or_pd(src, 0b0101, a, b);
7711        let e = _mm256_set_pd(1., OR_64, 3., OR_64);
7712        assert_eq_m256d(r, e);
7713    }
7714
7715    #[simd_test(enable = "avx512dq,avx512vl")]
7716    const fn test_mm256_maskz_or_pd() {
7717        let a = _mm256_set1_pd(OPRND1_64);
7718        let b = _mm256_set1_pd(OPRND2_64);
7719        let r = _mm256_maskz_or_pd(0b0101, a, b);
7720        let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64);
7721        assert_eq_m256d(r, e);
7722    }
7723
7724    #[simd_test(enable = "avx512dq")]
7725    const fn test_mm512_or_pd() {
7726        let a = _mm512_set1_pd(OPRND1_64);
7727        let b = _mm512_set1_pd(OPRND2_64);
7728        let r = _mm512_or_pd(a, b);
7729        let e = _mm512_set1_pd(OR_64);
7730        assert_eq_m512d(r, e);
7731    }
7732
7733    #[simd_test(enable = "avx512dq")]
7734    const fn test_mm512_mask_or_pd() {
7735        let a = _mm512_set1_pd(OPRND1_64);
7736        let b = _mm512_set1_pd(OPRND2_64);
7737        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7738        let r = _mm512_mask_or_pd(src, 0b01010101, a, b);
7739        let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64);
7740        assert_eq_m512d(r, e);
7741    }
7742
7743    #[simd_test(enable = "avx512dq")]
7744    const fn test_mm512_maskz_or_pd() {
7745        let a = _mm512_set1_pd(OPRND1_64);
7746        let b = _mm512_set1_pd(OPRND2_64);
7747        let r = _mm512_maskz_or_pd(0b01010101, a, b);
7748        let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64);
7749        assert_eq_m512d(r, e);
7750    }
7751
7752    #[simd_test(enable = "avx512dq,avx512vl")]
7753    const fn test_mm_mask_or_ps() {
7754        let a = _mm_set1_ps(OPRND1_32);
7755        let b = _mm_set1_ps(OPRND2_32);
7756        let src = _mm_set_ps(1., 2., 3., 4.);
7757        let r = _mm_mask_or_ps(src, 0b0101, a, b);
7758        let e = _mm_set_ps(1., OR_32, 3., OR_32);
7759        assert_eq_m128(r, e);
7760    }
7761
7762    #[simd_test(enable = "avx512dq,avx512vl")]
7763    const fn test_mm_maskz_or_ps() {
7764        let a = _mm_set1_ps(OPRND1_32);
7765        let b = _mm_set1_ps(OPRND2_32);
7766        let r = _mm_maskz_or_ps(0b0101, a, b);
7767        let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32);
7768        assert_eq_m128(r, e);
7769    }
7770
7771    #[simd_test(enable = "avx512dq,avx512vl")]
7772    const fn test_mm256_mask_or_ps() {
7773        let a = _mm256_set1_ps(OPRND1_32);
7774        let b = _mm256_set1_ps(OPRND2_32);
7775        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7776        let r = _mm256_mask_or_ps(src, 0b01010101, a, b);
7777        let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32);
7778        assert_eq_m256(r, e);
7779    }
7780
7781    #[simd_test(enable = "avx512dq,avx512vl")]
7782    const fn test_mm256_maskz_or_ps() {
7783        let a = _mm256_set1_ps(OPRND1_32);
7784        let b = _mm256_set1_ps(OPRND2_32);
7785        let r = _mm256_maskz_or_ps(0b01010101, a, b);
7786        let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32);
7787        assert_eq_m256(r, e);
7788    }
7789
7790    #[simd_test(enable = "avx512dq")]
7791    const fn test_mm512_or_ps() {
7792        let a = _mm512_set1_ps(OPRND1_32);
7793        let b = _mm512_set1_ps(OPRND2_32);
7794        let r = _mm512_or_ps(a, b);
7795        let e = _mm512_set1_ps(OR_32);
7796        assert_eq_m512(r, e);
7797    }
7798
7799    #[simd_test(enable = "avx512dq")]
7800    const fn test_mm512_mask_or_ps() {
7801        let a = _mm512_set1_ps(OPRND1_32);
7802        let b = _mm512_set1_ps(OPRND2_32);
7803        let src = _mm512_set_ps(
7804            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7805        );
7806        let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
7807        let e = _mm512_set_ps(
7808            1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
7809            OR_32,
7810        );
7811        assert_eq_m512(r, e);
7812    }
7813
7814    #[simd_test(enable = "avx512dq")]
7815    const fn test_mm512_maskz_or_ps() {
7816        let a = _mm512_set1_ps(OPRND1_32);
7817        let b = _mm512_set1_ps(OPRND2_32);
7818        let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
7819        let e = _mm512_set_ps(
7820            0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
7821        );
7822        assert_eq_m512(r, e);
7823    }
7824
7825    #[simd_test(enable = "avx512dq,avx512vl")]
7826    const fn test_mm_mask_xor_pd() {
7827        let a = _mm_set1_pd(OPRND1_64);
7828        let b = _mm_set1_pd(OPRND2_64);
7829        let src = _mm_set_pd(1., 2.);
7830        let r = _mm_mask_xor_pd(src, 0b01, a, b);
7831        let e = _mm_set_pd(1., XOR_64);
7832        assert_eq_m128d(r, e);
7833    }
7834
7835    #[simd_test(enable = "avx512dq,avx512vl")]
7836    const fn test_mm_maskz_xor_pd() {
7837        let a = _mm_set1_pd(OPRND1_64);
7838        let b = _mm_set1_pd(OPRND2_64);
7839        let r = _mm_maskz_xor_pd(0b01, a, b);
7840        let e = _mm_set_pd(0.0, XOR_64);
7841        assert_eq_m128d(r, e);
7842    }
7843
7844    #[simd_test(enable = "avx512dq,avx512vl")]
7845    const fn test_mm256_mask_xor_pd() {
7846        let a = _mm256_set1_pd(OPRND1_64);
7847        let b = _mm256_set1_pd(OPRND2_64);
7848        let src = _mm256_set_pd(1., 2., 3., 4.);
7849        let r = _mm256_mask_xor_pd(src, 0b0101, a, b);
7850        let e = _mm256_set_pd(1., XOR_64, 3., XOR_64);
7851        assert_eq_m256d(r, e);
7852    }
7853
7854    #[simd_test(enable = "avx512dq,avx512vl")]
7855    const fn test_mm256_maskz_xor_pd() {
7856        let a = _mm256_set1_pd(OPRND1_64);
7857        let b = _mm256_set1_pd(OPRND2_64);
7858        let r = _mm256_maskz_xor_pd(0b0101, a, b);
7859        let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64);
7860        assert_eq_m256d(r, e);
7861    }
7862
7863    #[simd_test(enable = "avx512dq")]
7864    const fn test_mm512_xor_pd() {
7865        let a = _mm512_set1_pd(OPRND1_64);
7866        let b = _mm512_set1_pd(OPRND2_64);
7867        let r = _mm512_xor_pd(a, b);
7868        let e = _mm512_set1_pd(XOR_64);
7869        assert_eq_m512d(r, e);
7870    }
7871
7872    #[simd_test(enable = "avx512dq")]
7873    const fn test_mm512_mask_xor_pd() {
7874        let a = _mm512_set1_pd(OPRND1_64);
7875        let b = _mm512_set1_pd(OPRND2_64);
7876        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7877        let r = _mm512_mask_xor_pd(src, 0b01010101, a, b);
7878        let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64);
7879        assert_eq_m512d(r, e);
7880    }
7881
7882    #[simd_test(enable = "avx512dq")]
7883    const fn test_mm512_maskz_xor_pd() {
7884        let a = _mm512_set1_pd(OPRND1_64);
7885        let b = _mm512_set1_pd(OPRND2_64);
7886        let r = _mm512_maskz_xor_pd(0b01010101, a, b);
7887        let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64);
7888        assert_eq_m512d(r, e);
7889    }
7890
7891    #[simd_test(enable = "avx512dq,avx512vl")]
7892    const fn test_mm_mask_xor_ps() {
7893        let a = _mm_set1_ps(OPRND1_32);
7894        let b = _mm_set1_ps(OPRND2_32);
7895        let src = _mm_set_ps(1., 2., 3., 4.);
7896        let r = _mm_mask_xor_ps(src, 0b0101, a, b);
7897        let e = _mm_set_ps(1., XOR_32, 3., XOR_32);
7898        assert_eq_m128(r, e);
7899    }
7900
7901    #[simd_test(enable = "avx512dq,avx512vl")]
7902    const fn test_mm_maskz_xor_ps() {
7903        let a = _mm_set1_ps(OPRND1_32);
7904        let b = _mm_set1_ps(OPRND2_32);
7905        let r = _mm_maskz_xor_ps(0b0101, a, b);
7906        let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32);
7907        assert_eq_m128(r, e);
7908    }
7909
7910    #[simd_test(enable = "avx512dq,avx512vl")]
7911    const fn test_mm256_mask_xor_ps() {
7912        let a = _mm256_set1_ps(OPRND1_32);
7913        let b = _mm256_set1_ps(OPRND2_32);
7914        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7915        let r = _mm256_mask_xor_ps(src, 0b01010101, a, b);
7916        let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32);
7917        assert_eq_m256(r, e);
7918    }
7919
7920    #[simd_test(enable = "avx512dq,avx512vl")]
7921    const fn test_mm256_maskz_xor_ps() {
7922        let a = _mm256_set1_ps(OPRND1_32);
7923        let b = _mm256_set1_ps(OPRND2_32);
7924        let r = _mm256_maskz_xor_ps(0b01010101, a, b);
7925        let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32);
7926        assert_eq_m256(r, e);
7927    }
7928
7929    #[simd_test(enable = "avx512dq")]
7930    const fn test_mm512_xor_ps() {
7931        let a = _mm512_set1_ps(OPRND1_32);
7932        let b = _mm512_set1_ps(OPRND2_32);
7933        let r = _mm512_xor_ps(a, b);
7934        let e = _mm512_set1_ps(XOR_32);
7935        assert_eq_m512(r, e);
7936    }
7937
7938    #[simd_test(enable = "avx512dq")]
7939    const fn test_mm512_mask_xor_ps() {
7940        let a = _mm512_set1_ps(OPRND1_32);
7941        let b = _mm512_set1_ps(OPRND2_32);
7942        let src = _mm512_set_ps(
7943            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7944        );
7945        let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
7946        let e = _mm512_set_ps(
7947            1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
7948            15., XOR_32,
7949        );
7950        assert_eq_m512(r, e);
7951    }
7952
7953    #[simd_test(enable = "avx512dq")]
7954    const fn test_mm512_maskz_xor_ps() {
7955        let a = _mm512_set1_ps(OPRND1_32);
7956        let b = _mm512_set1_ps(OPRND2_32);
7957        let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
7958        let e = _mm512_set_ps(
7959            0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
7960            XOR_32,
7961        );
7962        assert_eq_m512(r, e);
7963    }
7964
7965    #[simd_test(enable = "avx512dq,avx512vl")]
7966    const fn test_mm256_broadcast_f32x2() {
7967        let a = _mm_set_ps(1., 2., 3., 4.);
7968        let r = _mm256_broadcast_f32x2(a);
7969        let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
7970        assert_eq_m256(r, e);
7971    }
7972
7973    #[simd_test(enable = "avx512dq,avx512vl")]
7974    const fn test_mm256_mask_broadcast_f32x2() {
7975        let a = _mm_set_ps(1., 2., 3., 4.);
7976        let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
7977        let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
7978        let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.);
7979        assert_eq_m256(r, e);
7980    }
7981
7982    #[simd_test(enable = "avx512dq,avx512vl")]
7983    const fn test_mm256_maskz_broadcast_f32x2() {
7984        let a = _mm_set_ps(1., 2., 3., 4.);
7985        let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
7986        let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
7987        assert_eq_m256(r, e);
7988    }
7989
7990    #[simd_test(enable = "avx512dq")]
7991    const fn test_mm512_broadcast_f32x2() {
7992        let a = _mm_set_ps(1., 2., 3., 4.);
7993        let r = _mm512_broadcast_f32x2(a);
7994        let e = _mm512_set_ps(
7995            3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4.,
7996        );
7997        assert_eq_m512(r, e);
7998    }
7999
8000    #[simd_test(enable = "avx512dq")]
8001    const fn test_mm512_mask_broadcast_f32x2() {
8002        let a = _mm_set_ps(1., 2., 3., 4.);
8003        let b = _mm512_set_ps(
8004            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
8005        );
8006        let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
8007        let e = _mm512_set_ps(
8008            5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20.,
8009        );
8010        assert_eq_m512(r, e);
8011    }
8012
8013    #[simd_test(enable = "avx512dq")]
8014    const fn test_mm512_maskz_broadcast_f32x2() {
8015        let a = _mm_set_ps(1., 2., 3., 4.);
8016        let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
8017        let e = _mm512_set_ps(
8018            0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0.,
8019        );
8020        assert_eq_m512(r, e);
8021    }
8022
8023    #[simd_test(enable = "avx512dq")]
8024    const fn test_mm512_broadcast_f32x8() {
8025        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8026        let r = _mm512_broadcast_f32x8(a);
8027        let e = _mm512_set_ps(
8028            1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
8029        );
8030        assert_eq_m512(r, e);
8031    }
8032
8033    #[simd_test(enable = "avx512dq")]
8034    const fn test_mm512_mask_broadcast_f32x8() {
8035        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8036        let b = _mm512_set_ps(
8037            9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
8038        );
8039        let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
8040        let e = _mm512_set_ps(
8041            9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
8042        );
8043        assert_eq_m512(r, e);
8044    }
8045
8046    #[simd_test(enable = "avx512dq")]
8047    const fn test_mm512_maskz_broadcast_f32x8() {
8048        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8049        let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
8050        let e = _mm512_set_ps(
8051            0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
8052        );
8053        assert_eq_m512(r, e);
8054    }
8055
8056    #[simd_test(enable = "avx512dq,avx512vl")]
8057    const fn test_mm256_broadcast_f64x2() {
8058        let a = _mm_set_pd(1., 2.);
8059        let r = _mm256_broadcast_f64x2(a);
8060        let e = _mm256_set_pd(1., 2., 1., 2.);
8061        assert_eq_m256d(r, e);
8062    }
8063
8064    #[simd_test(enable = "avx512dq,avx512vl")]
8065    const fn test_mm256_mask_broadcast_f64x2() {
8066        let a = _mm_set_pd(1., 2.);
8067        let b = _mm256_set_pd(3., 4., 5., 6.);
8068        let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
8069        let e = _mm256_set_pd(3., 2., 1., 6.);
8070        assert_eq_m256d(r, e);
8071    }
8072
8073    #[simd_test(enable = "avx512dq,avx512vl")]
8074    const fn test_mm256_maskz_broadcast_f64x2() {
8075        let a = _mm_set_pd(1., 2.);
8076        let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
8077        let e = _mm256_set_pd(0., 2., 1., 0.);
8078        assert_eq_m256d(r, e);
8079    }
8080
8081    #[simd_test(enable = "avx512dq")]
8082    const fn test_mm512_broadcast_f64x2() {
8083        let a = _mm_set_pd(1., 2.);
8084        let r = _mm512_broadcast_f64x2(a);
8085        let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
8086        assert_eq_m512d(r, e);
8087    }
8088
8089    #[simd_test(enable = "avx512dq")]
8090    const fn test_mm512_mask_broadcast_f64x2() {
8091        let a = _mm_set_pd(1., 2.);
8092        let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
8093        let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
8094        let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
8095        assert_eq_m512d(r, e);
8096    }
8097
8098    #[simd_test(enable = "avx512dq")]
8099    const fn test_mm512_maskz_broadcast_f64x2() {
8100        let a = _mm_set_pd(1., 2.);
8101        let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
8102        let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
8103        assert_eq_m512d(r, e);
8104    }
8105
8106    #[simd_test(enable = "avx512dq,avx512vl")]
8107    const fn test_mm_broadcast_i32x2() {
8108        let a = _mm_set_epi32(1, 2, 3, 4);
8109        let r = _mm_broadcast_i32x2(a);
8110        let e = _mm_set_epi32(3, 4, 3, 4);
8111        assert_eq_m128i(r, e);
8112    }
8113
8114    #[simd_test(enable = "avx512dq,avx512vl")]
8115    const fn test_mm_mask_broadcast_i32x2() {
8116        let a = _mm_set_epi32(1, 2, 3, 4);
8117        let b = _mm_set_epi32(5, 6, 7, 8);
8118        let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
8119        let e = _mm_set_epi32(5, 4, 3, 8);
8120        assert_eq_m128i(r, e);
8121    }
8122
8123    #[simd_test(enable = "avx512dq,avx512vl")]
8124    const fn test_mm_maskz_broadcast_i32x2() {
8125        let a = _mm_set_epi32(1, 2, 3, 4);
8126        let r = _mm_maskz_broadcast_i32x2(0b0110, a);
8127        let e = _mm_set_epi32(0, 4, 3, 0);
8128        assert_eq_m128i(r, e);
8129    }
8130
8131    #[simd_test(enable = "avx512dq,avx512vl")]
8132    const fn test_mm256_broadcast_i32x2() {
8133        let a = _mm_set_epi32(1, 2, 3, 4);
8134        let r = _mm256_broadcast_i32x2(a);
8135        let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
8136        assert_eq_m256i(r, e);
8137    }
8138
8139    #[simd_test(enable = "avx512dq,avx512vl")]
8140    const fn test_mm256_mask_broadcast_i32x2() {
8141        let a = _mm_set_epi32(1, 2, 3, 4);
8142        let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
8143        let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
8144        let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4);
8145        assert_eq_m256i(r, e);
8146    }
8147
8148    #[simd_test(enable = "avx512dq,avx512vl")]
8149    const fn test_mm256_maskz_broadcast_i32x2() {
8150        let a = _mm_set_epi32(1, 2, 3, 4);
8151        let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
8152        let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
8153        assert_eq_m256i(r, e);
8154    }
8155
8156    #[simd_test(enable = "avx512dq")]
8157    const fn test_mm512_broadcast_i32x2() {
8158        let a = _mm_set_epi32(1, 2, 3, 4);
8159        let r = _mm512_broadcast_i32x2(a);
8160        let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
8161        assert_eq_m512i(r, e);
8162    }
8163
8164    #[simd_test(enable = "avx512dq")]
8165    const fn test_mm512_mask_broadcast_i32x2() {
8166        let a = _mm_set_epi32(1, 2, 3, 4);
8167        let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
8168        let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
8169        let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20);
8170        assert_eq_m512i(r, e);
8171    }
8172
8173    #[simd_test(enable = "avx512dq")]
8174    const fn test_mm512_maskz_broadcast_i32x2() {
8175        let a = _mm_set_epi32(1, 2, 3, 4);
8176        let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
8177        let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
8178        assert_eq_m512i(r, e);
8179    }
8180
8181    #[simd_test(enable = "avx512dq")]
8182    const fn test_mm512_broadcast_i32x8() {
8183        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8184        let r = _mm512_broadcast_i32x8(a);
8185        let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
8186        assert_eq_m512i(r, e);
8187    }
8188
8189    #[simd_test(enable = "avx512dq")]
8190    const fn test_mm512_mask_broadcast_i32x8() {
8191        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8192        let b = _mm512_set_epi32(
8193            9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
8194        );
8195        let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
8196        let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
8197        assert_eq_m512i(r, e);
8198    }
8199
8200    #[simd_test(enable = "avx512dq")]
8201    const fn test_mm512_maskz_broadcast_i32x8() {
8202        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8203        let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
8204        let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
8205        assert_eq_m512i(r, e);
8206    }
8207
8208    #[simd_test(enable = "avx512dq,avx512vl")]
8209    const fn test_mm256_broadcast_i64x2() {
8210        let a = _mm_set_epi64x(1, 2);
8211        let r = _mm256_broadcast_i64x2(a);
8212        let e = _mm256_set_epi64x(1, 2, 1, 2);
8213        assert_eq_m256i(r, e);
8214    }
8215
8216    #[simd_test(enable = "avx512dq,avx512vl")]
8217    const fn test_mm256_mask_broadcast_i64x2() {
8218        let a = _mm_set_epi64x(1, 2);
8219        let b = _mm256_set_epi64x(3, 4, 5, 6);
8220        let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
8221        let e = _mm256_set_epi64x(3, 2, 1, 6);
8222        assert_eq_m256i(r, e);
8223    }
8224
8225    #[simd_test(enable = "avx512dq,avx512vl")]
8226    const fn test_mm256_maskz_broadcast_i64x2() {
8227        let a = _mm_set_epi64x(1, 2);
8228        let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
8229        let e = _mm256_set_epi64x(0, 2, 1, 0);
8230        assert_eq_m256i(r, e);
8231    }
8232
8233    #[simd_test(enable = "avx512dq")]
8234    const fn test_mm512_broadcast_i64x2() {
8235        let a = _mm_set_epi64x(1, 2);
8236        let r = _mm512_broadcast_i64x2(a);
8237        let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
8238        assert_eq_m512i(r, e);
8239    }
8240
8241    #[simd_test(enable = "avx512dq")]
8242    const fn test_mm512_mask_broadcast_i64x2() {
8243        let a = _mm_set_epi64x(1, 2);
8244        let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
8245        let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
8246        let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
8247        assert_eq_m512i(r, e);
8248    }
8249
8250    #[simd_test(enable = "avx512dq")]
8251    const fn test_mm512_maskz_broadcast_i64x2() {
8252        let a = _mm_set_epi64x(1, 2);
8253        let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
8254        let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
8255        assert_eq_m512i(r, e);
8256    }
8257
8258    #[simd_test(enable = "avx512dq")]
8259    const fn test_mm512_extractf32x8_ps() {
8260        let a = _mm512_set_ps(
8261            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8262        );
8263        let r = _mm512_extractf32x8_ps::<1>(a);
8264        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8265        assert_eq_m256(r, e);
8266    }
8267
8268    #[simd_test(enable = "avx512dq")]
8269    const fn test_mm512_mask_extractf32x8_ps() {
8270        let a = _mm512_set_ps(
8271            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8272        );
8273        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8274        let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
8275        let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.);
8276        assert_eq_m256(r, e);
8277    }
8278
8279    #[simd_test(enable = "avx512dq")]
8280    const fn test_mm512_maskz_extractf32x8_ps() {
8281        let a = _mm512_set_ps(
8282            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8283        );
8284        let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
8285        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8286        assert_eq_m256(r, e);
8287    }
8288
8289    #[simd_test(enable = "avx512dq,avx512vl")]
8290    const fn test_mm256_extractf64x2_pd() {
8291        let a = _mm256_set_pd(1., 2., 3., 4.);
8292        let r = _mm256_extractf64x2_pd::<1>(a);
8293        let e = _mm_set_pd(1., 2.);
8294        assert_eq_m128d(r, e);
8295    }
8296
8297    #[simd_test(enable = "avx512dq,avx512vl")]
8298    const fn test_mm256_mask_extractf64x2_pd() {
8299        let a = _mm256_set_pd(1., 2., 3., 4.);
8300        let b = _mm_set_pd(5., 6.);
8301        let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
8302        let e = _mm_set_pd(5., 2.);
8303        assert_eq_m128d(r, e);
8304    }
8305
8306    #[simd_test(enable = "avx512dq,avx512vl")]
8307    const fn test_mm256_maskz_extractf64x2_pd() {
8308        let a = _mm256_set_pd(1., 2., 3., 4.);
8309        let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
8310        let e = _mm_set_pd(0., 2.);
8311        assert_eq_m128d(r, e);
8312    }
8313
8314    #[simd_test(enable = "avx512dq")]
8315    const fn test_mm512_extractf64x2_pd() {
8316        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8317        let r = _mm512_extractf64x2_pd::<2>(a);
8318        let e = _mm_set_pd(3., 4.);
8319        assert_eq_m128d(r, e);
8320    }
8321
8322    #[simd_test(enable = "avx512dq")]
8323    const fn test_mm512_mask_extractf64x2_pd() {
8324        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8325        let b = _mm_set_pd(9., 10.);
8326        let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
8327        let e = _mm_set_pd(9., 4.);
8328        assert_eq_m128d(r, e);
8329    }
8330
8331    #[simd_test(enable = "avx512dq")]
8332    const fn test_mm512_maskz_extractf64x2_pd() {
8333        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8334        let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
8335        let e = _mm_set_pd(0., 4.);
8336        assert_eq_m128d(r, e);
8337    }
8338
8339    #[simd_test(enable = "avx512dq")]
8340    const fn test_mm512_extracti32x8_epi32() {
8341        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8342        let r = _mm512_extracti32x8_epi32::<1>(a);
8343        let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8344        assert_eq_m256i(r, e);
8345    }
8346
8347    #[simd_test(enable = "avx512dq")]
8348    const fn test_mm512_mask_extracti32x8_epi32() {
8349        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8350        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8351        let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
8352        let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8);
8353        assert_eq_m256i(r, e);
8354    }
8355
8356    #[simd_test(enable = "avx512dq")]
8357    const fn test_mm512_maskz_extracti32x8_epi32() {
8358        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8359        let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
8360        let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
8361        assert_eq_m256i(r, e);
8362    }
8363
8364    #[simd_test(enable = "avx512dq,avx512vl")]
8365    const fn test_mm256_extracti64x2_epi64() {
8366        let a = _mm256_set_epi64x(1, 2, 3, 4);
8367        let r = _mm256_extracti64x2_epi64::<1>(a);
8368        let e = _mm_set_epi64x(1, 2);
8369        assert_eq_m128i(r, e);
8370    }
8371
8372    #[simd_test(enable = "avx512dq,avx512vl")]
8373    const fn test_mm256_mask_extracti64x2_epi64() {
8374        let a = _mm256_set_epi64x(1, 2, 3, 4);
8375        let b = _mm_set_epi64x(5, 6);
8376        let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
8377        let e = _mm_set_epi64x(5, 2);
8378        assert_eq_m128i(r, e);
8379    }
8380
8381    #[simd_test(enable = "avx512dq,avx512vl")]
8382    const fn test_mm256_maskz_extracti64x2_epi64() {
8383        let a = _mm256_set_epi64x(1, 2, 3, 4);
8384        let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
8385        let e = _mm_set_epi64x(0, 2);
8386        assert_eq_m128i(r, e);
8387    }
8388
8389    #[simd_test(enable = "avx512dq")]
8390    const fn test_mm512_extracti64x2_epi64() {
8391        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8392        let r = _mm512_extracti64x2_epi64::<2>(a);
8393        let e = _mm_set_epi64x(3, 4);
8394        assert_eq_m128i(r, e);
8395    }
8396
8397    #[simd_test(enable = "avx512dq")]
8398    const fn test_mm512_mask_extracti64x2_epi64() {
8399        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8400        let b = _mm_set_epi64x(9, 10);
8401        let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
8402        let e = _mm_set_epi64x(9, 4);
8403        assert_eq_m128i(r, e);
8404    }
8405
8406    #[simd_test(enable = "avx512dq")]
8407    const fn test_mm512_maskz_extracti64x2_epi64() {
8408        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8409        let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
8410        let e = _mm_set_epi64x(0, 4);
8411        assert_eq_m128i(r, e);
8412    }
8413
8414    #[simd_test(enable = "avx512dq")]
8415    const fn test_mm512_insertf32x8() {
8416        let a = _mm512_set_ps(
8417            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8418        );
8419        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8420        let r = _mm512_insertf32x8::<1>(a, b);
8421        let e = _mm512_set_ps(
8422            17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16.,
8423        );
8424        assert_eq_m512(r, e);
8425    }
8426
8427    #[simd_test(enable = "avx512dq")]
8428    const fn test_mm512_mask_insertf32x8() {
8429        let a = _mm512_set_ps(
8430            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8431        );
8432        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8433        let src = _mm512_set_ps(
8434            25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
8435        );
8436        let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
8437        let e = _mm512_set_ps(
8438            25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40.,
8439        );
8440        assert_eq_m512(r, e);
8441    }
8442
8443    #[simd_test(enable = "avx512dq")]
8444    const fn test_mm512_maskz_insertf32x8() {
8445        let a = _mm512_set_ps(
8446            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8447        );
8448        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8449        let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
8450        let e = _mm512_set_ps(
8451            0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0.,
8452        );
8453        assert_eq_m512(r, e);
8454    }
8455
8456    #[simd_test(enable = "avx512dq,avx512vl")]
8457    const fn test_mm256_insertf64x2() {
8458        let a = _mm256_set_pd(1., 2., 3., 4.);
8459        let b = _mm_set_pd(5., 6.);
8460        let r = _mm256_insertf64x2::<1>(a, b);
8461        let e = _mm256_set_pd(5., 6., 3., 4.);
8462        assert_eq_m256d(r, e);
8463    }
8464
8465    #[simd_test(enable = "avx512dq,avx512vl")]
8466    const fn test_mm256_mask_insertf64x2() {
8467        let a = _mm256_set_pd(1., 2., 3., 4.);
8468        let b = _mm_set_pd(5., 6.);
8469        let src = _mm256_set_pd(7., 8., 9., 10.);
8470        let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
8471        let e = _mm256_set_pd(7., 6., 3., 10.);
8472        assert_eq_m256d(r, e);
8473    }
8474
8475    #[simd_test(enable = "avx512dq,avx512vl")]
8476    const fn test_mm256_maskz_insertf64x2() {
8477        let a = _mm256_set_pd(1., 2., 3., 4.);
8478        let b = _mm_set_pd(5., 6.);
8479        let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
8480        let e = _mm256_set_pd(0., 6., 3., 0.);
8481        assert_eq_m256d(r, e);
8482    }
8483
8484    #[simd_test(enable = "avx512dq")]
8485    const fn test_mm512_insertf64x2() {
8486        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8487        let b = _mm_set_pd(9., 10.);
8488        let r = _mm512_insertf64x2::<2>(a, b);
8489        let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.);
8490        assert_eq_m512d(r, e);
8491    }
8492
8493    #[simd_test(enable = "avx512dq")]
8494    const fn test_mm512_mask_insertf64x2() {
8495        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8496        let b = _mm_set_pd(9., 10.);
8497        let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
8498        let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
8499        let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.);
8500        assert_eq_m512d(r, e);
8501    }
8502
8503    #[simd_test(enable = "avx512dq")]
8504    const fn test_mm512_maskz_insertf64x2() {
8505        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8506        let b = _mm_set_pd(9., 10.);
8507        let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
8508        let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.);
8509        assert_eq_m512d(r, e);
8510    }
8511
8512    #[simd_test(enable = "avx512dq")]
8513    const fn test_mm512_inserti32x8() {
8514        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8515        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8516        let r = _mm512_inserti32x8::<1>(a, b);
8517        let e = _mm512_set_epi32(
8518            17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16,
8519        );
8520        assert_eq_m512i(r, e);
8521    }
8522
8523    #[simd_test(enable = "avx512dq")]
8524    const fn test_mm512_mask_inserti32x8() {
8525        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8526        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8527        let src = _mm512_set_epi32(
8528            25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
8529        );
8530        let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
8531        let e = _mm512_set_epi32(
8532            25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40,
8533        );
8534        assert_eq_m512i(r, e);
8535    }
8536
8537    #[simd_test(enable = "avx512dq")]
8538    const fn test_mm512_maskz_inserti32x8() {
8539        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8540        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8541        let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
8542        let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0);
8543        assert_eq_m512i(r, e);
8544    }
8545
8546    #[simd_test(enable = "avx512dq,avx512vl")]
8547    const fn test_mm256_inserti64x2() {
8548        let a = _mm256_set_epi64x(1, 2, 3, 4);
8549        let b = _mm_set_epi64x(5, 6);
8550        let r = _mm256_inserti64x2::<1>(a, b);
8551        let e = _mm256_set_epi64x(5, 6, 3, 4);
8552        assert_eq_m256i(r, e);
8553    }
8554
8555    #[simd_test(enable = "avx512dq,avx512vl")]
8556    const fn test_mm256_mask_inserti64x2() {
8557        let a = _mm256_set_epi64x(1, 2, 3, 4);
8558        let b = _mm_set_epi64x(5, 6);
8559        let src = _mm256_set_epi64x(7, 8, 9, 10);
8560        let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
8561        let e = _mm256_set_epi64x(7, 6, 3, 10);
8562        assert_eq_m256i(r, e);
8563    }
8564
8565    #[simd_test(enable = "avx512dq,avx512vl")]
8566    const fn test_mm256_maskz_inserti64x2() {
8567        let a = _mm256_set_epi64x(1, 2, 3, 4);
8568        let b = _mm_set_epi64x(5, 6);
8569        let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
8570        let e = _mm256_set_epi64x(0, 6, 3, 0);
8571        assert_eq_m256i(r, e);
8572    }
8573
8574    #[simd_test(enable = "avx512dq")]
8575    const fn test_mm512_inserti64x2() {
8576        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8577        let b = _mm_set_epi64x(9, 10);
8578        let r = _mm512_inserti64x2::<2>(a, b);
8579        let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8);
8580        assert_eq_m512i(r, e);
8581    }
8582
8583    #[simd_test(enable = "avx512dq")]
8584    const fn test_mm512_mask_inserti64x2() {
8585        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8586        let b = _mm_set_epi64x(9, 10);
8587        let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
8588        let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
8589        let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8);
8590        assert_eq_m512i(r, e);
8591    }
8592
8593    #[simd_test(enable = "avx512dq")]
8594    const fn test_mm512_maskz_inserti64x2() {
8595        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8596        let b = _mm_set_epi64x(9, 10);
8597        let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
8598        let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8);
8599        assert_eq_m512i(r, e);
8600    }
8601
8602    #[simd_test(enable = "avx512dq")]
8603    fn test_mm512_cvt_roundepi64_pd() {
8604        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8605        let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8606        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8607        assert_eq_m512d(r, e);
8608    }
8609
8610    #[simd_test(enable = "avx512dq")]
8611    fn test_mm512_mask_cvt_roundepi64_pd() {
8612        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8613        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8614        let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8615            b, 0b01101001, a,
8616        );
8617        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8618        assert_eq_m512d(r, e);
8619    }
8620
8621    #[simd_test(enable = "avx512dq")]
8622    fn test_mm512_maskz_cvt_roundepi64_pd() {
8623        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8624        let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8625            0b01101001, a,
8626        );
8627        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8628        assert_eq_m512d(r, e);
8629    }
8630
8631    #[simd_test(enable = "avx512dq,avx512vl")]
8632    fn test_mm_cvtepi64_pd() {
8633        let a = _mm_set_epi64x(1, 2);
8634        let r = _mm_cvtepi64_pd(a);
8635        let e = _mm_set_pd(1., 2.);
8636        assert_eq_m128d(r, e);
8637    }
8638
8639    #[simd_test(enable = "avx512dq,avx512vl")]
8640    fn test_mm_mask_cvtepi64_pd() {
8641        let a = _mm_set_epi64x(1, 2);
8642        let b = _mm_set_pd(3., 4.);
8643        let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
8644        let e = _mm_set_pd(3., 2.);
8645        assert_eq_m128d(r, e);
8646    }
8647
8648    #[simd_test(enable = "avx512dq,avx512vl")]
8649    fn test_mm_maskz_cvtepi64_pd() {
8650        let a = _mm_set_epi64x(1, 2);
8651        let r = _mm_maskz_cvtepi64_pd(0b01, a);
8652        let e = _mm_set_pd(0., 2.);
8653        assert_eq_m128d(r, e);
8654    }
8655
8656    #[simd_test(enable = "avx512dq,avx512vl")]
8657    fn test_mm256_cvtepi64_pd() {
8658        let a = _mm256_set_epi64x(1, 2, 3, 4);
8659        let r = _mm256_cvtepi64_pd(a);
8660        let e = _mm256_set_pd(1., 2., 3., 4.);
8661        assert_eq_m256d(r, e);
8662    }
8663
8664    #[simd_test(enable = "avx512dq,avx512vl")]
8665    fn test_mm256_mask_cvtepi64_pd() {
8666        let a = _mm256_set_epi64x(1, 2, 3, 4);
8667        let b = _mm256_set_pd(5., 6., 7., 8.);
8668        let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
8669        let e = _mm256_set_pd(5., 2., 3., 8.);
8670        assert_eq_m256d(r, e);
8671    }
8672
8673    #[simd_test(enable = "avx512dq,avx512vl")]
8674    fn test_mm256_maskz_cvtepi64_pd() {
8675        let a = _mm256_set_epi64x(1, 2, 3, 4);
8676        let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
8677        let e = _mm256_set_pd(0., 2., 3., 0.);
8678        assert_eq_m256d(r, e);
8679    }
8680
8681    #[simd_test(enable = "avx512dq")]
8682    fn test_mm512_cvtepi64_pd() {
8683        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8684        let r = _mm512_cvtepi64_pd(a);
8685        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8686        assert_eq_m512d(r, e);
8687    }
8688
8689    #[simd_test(enable = "avx512dq")]
8690    fn test_mm512_mask_cvtepi64_pd() {
8691        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8692        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8693        let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
8694        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8695        assert_eq_m512d(r, e);
8696    }
8697
8698    #[simd_test(enable = "avx512dq")]
8699    fn test_mm512_maskz_cvtepi64_pd() {
8700        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8701        let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
8702        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8703        assert_eq_m512d(r, e);
8704    }
8705
8706    #[simd_test(enable = "avx512dq")]
8707    fn test_mm512_cvt_roundepi64_ps() {
8708        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8709        let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8710        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8711        assert_eq_m256(r, e);
8712    }
8713
8714    #[simd_test(enable = "avx512dq")]
8715    fn test_mm512_mask_cvt_roundepi64_ps() {
8716        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8717        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8718        let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8719            b, 0b01101001, a,
8720        );
8721        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8722        assert_eq_m256(r, e);
8723    }
8724
8725    #[simd_test(enable = "avx512dq")]
8726    fn test_mm512_maskz_cvt_roundepi64_ps() {
8727        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8728        let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8729            0b01101001, a,
8730        );
8731        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8732        assert_eq_m256(r, e);
8733    }
8734
8735    #[simd_test(enable = "avx512dq,avx512vl")]
8736    fn test_mm_cvtepi64_ps() {
8737        let a = _mm_set_epi64x(1, 2);
8738        let r = _mm_cvtepi64_ps(a);
8739        let e = _mm_set_ps(0., 0., 1., 2.);
8740        assert_eq_m128(r, e);
8741    }
8742
8743    #[simd_test(enable = "avx512dq,avx512vl")]
8744    fn test_mm_mask_cvtepi64_ps() {
8745        let a = _mm_set_epi64x(1, 2);
8746        let b = _mm_set_ps(3., 4., 5., 6.);
8747        let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
8748        let e = _mm_set_ps(0., 0., 5., 2.);
8749        assert_eq_m128(r, e);
8750    }
8751
8752    #[simd_test(enable = "avx512dq,avx512vl")]
8753    fn test_mm_maskz_cvtepi64_ps() {
8754        let a = _mm_set_epi64x(1, 2);
8755        let r = _mm_maskz_cvtepi64_ps(0b01, a);
8756        let e = _mm_set_ps(0., 0., 0., 2.);
8757        assert_eq_m128(r, e);
8758    }
8759
8760    #[simd_test(enable = "avx512dq,avx512vl")]
8761    fn test_mm256_cvtepi64_ps() {
8762        let a = _mm256_set_epi64x(1, 2, 3, 4);
8763        let r = _mm256_cvtepi64_ps(a);
8764        let e = _mm_set_ps(1., 2., 3., 4.);
8765        assert_eq_m128(r, e);
8766    }
8767
8768    #[simd_test(enable = "avx512dq,avx512vl")]
8769    fn test_mm256_mask_cvtepi64_ps() {
8770        let a = _mm256_set_epi64x(1, 2, 3, 4);
8771        let b = _mm_set_ps(5., 6., 7., 8.);
8772        let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
8773        let e = _mm_set_ps(5., 2., 3., 8.);
8774        assert_eq_m128(r, e);
8775    }
8776
8777    #[simd_test(enable = "avx512dq,avx512vl")]
8778    fn test_mm256_maskz_cvtepi64_ps() {
8779        let a = _mm256_set_epi64x(1, 2, 3, 4);
8780        let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
8781        let e = _mm_set_ps(0., 2., 3., 0.);
8782        assert_eq_m128(r, e);
8783    }
8784
8785    #[simd_test(enable = "avx512dq")]
8786    fn test_mm512_cvtepi64_ps() {
8787        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8788        let r = _mm512_cvtepi64_ps(a);
8789        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8790        assert_eq_m256(r, e);
8791    }
8792
8793    #[simd_test(enable = "avx512dq")]
8794    fn test_mm512_mask_cvtepi64_ps() {
8795        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8796        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8797        let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
8798        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8799        assert_eq_m256(r, e);
8800    }
8801
8802    #[simd_test(enable = "avx512dq")]
8803    fn test_mm512_maskz_cvtepi64_ps() {
8804        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8805        let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
8806        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8807        assert_eq_m256(r, e);
8808    }
8809
8810    #[simd_test(enable = "avx512dq")]
8811    fn test_mm512_cvt_roundepu64_pd() {
8812        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8813        let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8814        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8815        assert_eq_m512d(r, e);
8816    }
8817
8818    #[simd_test(enable = "avx512dq")]
8819    fn test_mm512_mask_cvt_roundepu64_pd() {
8820        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8821        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8822        let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8823            b, 0b01101001, a,
8824        );
8825        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8826        assert_eq_m512d(r, e);
8827    }
8828
8829    #[simd_test(enable = "avx512dq")]
8830    fn test_mm512_maskz_cvt_roundepu64_pd() {
8831        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8832        let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8833            0b01101001, a,
8834        );
8835        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8836        assert_eq_m512d(r, e);
8837    }
8838
8839    #[simd_test(enable = "avx512dq,avx512vl")]
8840    fn test_mm_cvtepu64_pd() {
8841        let a = _mm_set_epi64x(1, 2);
8842        let r = _mm_cvtepu64_pd(a);
8843        let e = _mm_set_pd(1., 2.);
8844        assert_eq_m128d(r, e);
8845    }
8846
8847    #[simd_test(enable = "avx512dq,avx512vl")]
8848    fn test_mm_mask_cvtepu64_pd() {
8849        let a = _mm_set_epi64x(1, 2);
8850        let b = _mm_set_pd(3., 4.);
8851        let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
8852        let e = _mm_set_pd(3., 2.);
8853        assert_eq_m128d(r, e);
8854    }
8855
8856    #[simd_test(enable = "avx512dq,avx512vl")]
8857    fn test_mm_maskz_cvtepu64_pd() {
8858        let a = _mm_set_epi64x(1, 2);
8859        let r = _mm_maskz_cvtepu64_pd(0b01, a);
8860        let e = _mm_set_pd(0., 2.);
8861        assert_eq_m128d(r, e);
8862    }
8863
8864    #[simd_test(enable = "avx512dq,avx512vl")]
8865    fn test_mm256_cvtepu64_pd() {
8866        let a = _mm256_set_epi64x(1, 2, 3, 4);
8867        let r = _mm256_cvtepu64_pd(a);
8868        let e = _mm256_set_pd(1., 2., 3., 4.);
8869        assert_eq_m256d(r, e);
8870    }
8871
8872    #[simd_test(enable = "avx512dq,avx512vl")]
8873    fn test_mm256_mask_cvtepu64_pd() {
8874        let a = _mm256_set_epi64x(1, 2, 3, 4);
8875        let b = _mm256_set_pd(5., 6., 7., 8.);
8876        let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
8877        let e = _mm256_set_pd(5., 2., 3., 8.);
8878        assert_eq_m256d(r, e);
8879    }
8880
8881    #[simd_test(enable = "avx512dq,avx512vl")]
8882    fn test_mm256_maskz_cvtepu64_pd() {
8883        let a = _mm256_set_epi64x(1, 2, 3, 4);
8884        let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
8885        let e = _mm256_set_pd(0., 2., 3., 0.);
8886        assert_eq_m256d(r, e);
8887    }
8888
8889    #[simd_test(enable = "avx512dq")]
8890    fn test_mm512_cvtepu64_pd() {
8891        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8892        let r = _mm512_cvtepu64_pd(a);
8893        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8894        assert_eq_m512d(r, e);
8895    }
8896
8897    #[simd_test(enable = "avx512dq")]
8898    fn test_mm512_mask_cvtepu64_pd() {
8899        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8900        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8901        let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
8902        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8903        assert_eq_m512d(r, e);
8904    }
8905
8906    #[simd_test(enable = "avx512dq")]
8907    fn test_mm512_maskz_cvtepu64_pd() {
8908        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8909        let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
8910        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8911        assert_eq_m512d(r, e);
8912    }
8913
8914    #[simd_test(enable = "avx512dq")]
8915    fn test_mm512_cvt_roundepu64_ps() {
8916        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8917        let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8918        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8919        assert_eq_m256(r, e);
8920    }
8921
8922    #[simd_test(enable = "avx512dq")]
8923    fn test_mm512_mask_cvt_roundepu64_ps() {
8924        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8925        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8926        let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8927            b, 0b01101001, a,
8928        );
8929        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8930        assert_eq_m256(r, e);
8931    }
8932
8933    #[simd_test(enable = "avx512dq")]
8934    fn test_mm512_maskz_cvt_roundepu64_ps() {
8935        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8936        let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8937            0b01101001, a,
8938        );
8939        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8940        assert_eq_m256(r, e);
8941    }
8942
8943    #[simd_test(enable = "avx512dq,avx512vl")]
8944    fn test_mm_cvtepu64_ps() {
8945        let a = _mm_set_epi64x(1, 2);
8946        let r = _mm_cvtepu64_ps(a);
8947        let e = _mm_set_ps(0., 0., 1., 2.);
8948        assert_eq_m128(r, e);
8949    }
8950
8951    #[simd_test(enable = "avx512dq,avx512vl")]
8952    fn test_mm_mask_cvtepu64_ps() {
8953        let a = _mm_set_epi64x(1, 2);
8954        let b = _mm_set_ps(3., 4., 5., 6.);
8955        let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
8956        let e = _mm_set_ps(0., 0., 5., 2.);
8957        assert_eq_m128(r, e);
8958    }
8959
8960    #[simd_test(enable = "avx512dq,avx512vl")]
8961    fn test_mm_maskz_cvtepu64_ps() {
8962        let a = _mm_set_epi64x(1, 2);
8963        let r = _mm_maskz_cvtepu64_ps(0b01, a);
8964        let e = _mm_set_ps(0., 0., 0., 2.);
8965        assert_eq_m128(r, e);
8966    }
8967
8968    #[simd_test(enable = "avx512dq,avx512vl")]
8969    fn test_mm256_cvtepu64_ps() {
8970        let a = _mm256_set_epi64x(1, 2, 3, 4);
8971        let r = _mm256_cvtepu64_ps(a);
8972        let e = _mm_set_ps(1., 2., 3., 4.);
8973        assert_eq_m128(r, e);
8974    }
8975
8976    #[simd_test(enable = "avx512dq,avx512vl")]
8977    fn test_mm256_mask_cvtepu64_ps() {
8978        let a = _mm256_set_epi64x(1, 2, 3, 4);
8979        let b = _mm_set_ps(5., 6., 7., 8.);
8980        let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
8981        let e = _mm_set_ps(5., 2., 3., 8.);
8982        assert_eq_m128(r, e);
8983    }
8984
8985    #[simd_test(enable = "avx512dq,avx512vl")]
8986    fn test_mm256_maskz_cvtepu64_ps() {
8987        let a = _mm256_set_epi64x(1, 2, 3, 4);
8988        let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
8989        let e = _mm_set_ps(0., 2., 3., 0.);
8990        assert_eq_m128(r, e);
8991    }
8992
8993    #[simd_test(enable = "avx512dq")]
8994    fn test_mm512_cvtepu64_ps() {
8995        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8996        let r = _mm512_cvtepu64_ps(a);
8997        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8998        assert_eq_m256(r, e);
8999    }
9000
9001    #[simd_test(enable = "avx512dq")]
9002    fn test_mm512_mask_cvtepu64_ps() {
9003        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9004        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
9005        let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
9006        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
9007        assert_eq_m256(r, e);
9008    }
9009
9010    #[simd_test(enable = "avx512dq")]
9011    fn test_mm512_maskz_cvtepu64_ps() {
9012        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9013        let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
9014        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
9015        assert_eq_m256(r, e);
9016    }
9017
9018    #[simd_test(enable = "avx512dq")]
9019    fn test_mm512_cvt_roundpd_epi64() {
9020        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9021        let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9022        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9023        assert_eq_m512i(r, e);
9024    }
9025
9026    #[simd_test(enable = "avx512dq")]
9027    fn test_mm512_mask_cvt_roundpd_epi64() {
9028        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9029        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9030        let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9031            b, 0b01101001, a,
9032        );
9033        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9034        assert_eq_m512i(r, e);
9035    }
9036
9037    #[simd_test(enable = "avx512dq")]
9038    fn test_mm512_maskz_cvt_roundpd_epi64() {
9039        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9040        let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9041            0b01101001, a,
9042        );
9043        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9044        assert_eq_m512i(r, e);
9045    }
9046
9047    #[simd_test(enable = "avx512dq,avx512vl")]
9048    fn test_mm_cvtpd_epi64() {
9049        let a = _mm_set_pd(1., 2.);
9050        let r = _mm_cvtpd_epi64(a);
9051        let e = _mm_set_epi64x(1, 2);
9052        assert_eq_m128i(r, e);
9053    }
9054
9055    #[simd_test(enable = "avx512dq,avx512vl")]
9056    fn test_mm_mask_cvtpd_epi64() {
9057        let a = _mm_set_pd(1., 2.);
9058        let b = _mm_set_epi64x(3, 4);
9059        let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
9060        let e = _mm_set_epi64x(3, 2);
9061        assert_eq_m128i(r, e);
9062    }
9063
9064    #[simd_test(enable = "avx512dq,avx512vl")]
9065    fn test_mm_maskz_cvtpd_epi64() {
9066        let a = _mm_set_pd(1., 2.);
9067        let r = _mm_maskz_cvtpd_epi64(0b01, a);
9068        let e = _mm_set_epi64x(0, 2);
9069        assert_eq_m128i(r, e);
9070    }
9071
9072    #[simd_test(enable = "avx512dq,avx512vl")]
9073    fn test_mm256_cvtpd_epi64() {
9074        let a = _mm256_set_pd(1., 2., 3., 4.);
9075        let r = _mm256_cvtpd_epi64(a);
9076        let e = _mm256_set_epi64x(1, 2, 3, 4);
9077        assert_eq_m256i(r, e);
9078    }
9079
9080    #[simd_test(enable = "avx512dq,avx512vl")]
9081    fn test_mm256_mask_cvtpd_epi64() {
9082        let a = _mm256_set_pd(1., 2., 3., 4.);
9083        let b = _mm256_set_epi64x(5, 6, 7, 8);
9084        let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
9085        let e = _mm256_set_epi64x(5, 2, 3, 8);
9086        assert_eq_m256i(r, e);
9087    }
9088
9089    #[simd_test(enable = "avx512dq,avx512vl")]
9090    fn test_mm256_maskz_cvtpd_epi64() {
9091        let a = _mm256_set_pd(1., 2., 3., 4.);
9092        let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
9093        let e = _mm256_set_epi64x(0, 2, 3, 0);
9094        assert_eq_m256i(r, e);
9095    }
9096
9097    #[simd_test(enable = "avx512dq")]
9098    fn test_mm512_cvtpd_epi64() {
9099        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9100        let r = _mm512_cvtpd_epi64(a);
9101        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9102        assert_eq_m512i(r, e);
9103    }
9104
9105    #[simd_test(enable = "avx512dq")]
9106    fn test_mm512_mask_cvtpd_epi64() {
9107        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9108        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9109        let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
9110        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9111        assert_eq_m512i(r, e);
9112    }
9113
9114    #[simd_test(enable = "avx512dq")]
9115    fn test_mm512_maskz_cvtpd_epi64() {
9116        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9117        let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
9118        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9119        assert_eq_m512i(r, e);
9120    }
9121
9122    #[simd_test(enable = "avx512dq")]
9123    fn test_mm512_cvt_roundps_epi64() {
9124        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9125        let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9126        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9127        assert_eq_m512i(r, e);
9128    }
9129
9130    #[simd_test(enable = "avx512dq")]
9131    fn test_mm512_mask_cvt_roundps_epi64() {
9132        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9133        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9134        let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9135            b, 0b01101001, a,
9136        );
9137        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9138        assert_eq_m512i(r, e);
9139    }
9140
9141    #[simd_test(enable = "avx512dq")]
9142    fn test_mm512_maskz_cvt_roundps_epi64() {
9143        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9144        let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9145            0b01101001, a,
9146        );
9147        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9148        assert_eq_m512i(r, e);
9149    }
9150
9151    #[simd_test(enable = "avx512dq,avx512vl")]
9152    fn test_mm_cvtps_epi64() {
9153        let a = _mm_set_ps(1., 2., 3., 4.);
9154        let r = _mm_cvtps_epi64(a);
9155        let e = _mm_set_epi64x(3, 4);
9156        assert_eq_m128i(r, e);
9157    }
9158
9159    #[simd_test(enable = "avx512dq,avx512vl")]
9160    fn test_mm_mask_cvtps_epi64() {
9161        let a = _mm_set_ps(1., 2., 3., 4.);
9162        let b = _mm_set_epi64x(5, 6);
9163        let r = _mm_mask_cvtps_epi64(b, 0b01, a);
9164        let e = _mm_set_epi64x(5, 4);
9165        assert_eq_m128i(r, e);
9166    }
9167
9168    #[simd_test(enable = "avx512dq,avx512vl")]
9169    fn test_mm_maskz_cvtps_epi64() {
9170        let a = _mm_set_ps(1., 2., 3., 4.);
9171        let r = _mm_maskz_cvtps_epi64(0b01, a);
9172        let e = _mm_set_epi64x(0, 4);
9173        assert_eq_m128i(r, e);
9174    }
9175
9176    #[simd_test(enable = "avx512dq,avx512vl")]
9177    fn test_mm256_cvtps_epi64() {
9178        let a = _mm_set_ps(1., 2., 3., 4.);
9179        let r = _mm256_cvtps_epi64(a);
9180        let e = _mm256_set_epi64x(1, 2, 3, 4);
9181        assert_eq_m256i(r, e);
9182    }
9183
9184    #[simd_test(enable = "avx512dq,avx512vl")]
9185    fn test_mm256_mask_cvtps_epi64() {
9186        let a = _mm_set_ps(1., 2., 3., 4.);
9187        let b = _mm256_set_epi64x(5, 6, 7, 8);
9188        let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
9189        let e = _mm256_set_epi64x(5, 2, 3, 8);
9190        assert_eq_m256i(r, e);
9191    }
9192
9193    #[simd_test(enable = "avx512dq,avx512vl")]
9194    fn test_mm256_maskz_cvtps_epi64() {
9195        let a = _mm_set_ps(1., 2., 3., 4.);
9196        let r = _mm256_maskz_cvtps_epi64(0b0110, a);
9197        let e = _mm256_set_epi64x(0, 2, 3, 0);
9198        assert_eq_m256i(r, e);
9199    }
9200
9201    #[simd_test(enable = "avx512dq")]
9202    fn test_mm512_cvtps_epi64() {
9203        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9204        let r = _mm512_cvtps_epi64(a);
9205        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9206        assert_eq_m512i(r, e);
9207    }
9208
9209    #[simd_test(enable = "avx512dq")]
9210    fn test_mm512_mask_cvtps_epi64() {
9211        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9212        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9213        let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
9214        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9215        assert_eq_m512i(r, e);
9216    }
9217
9218    #[simd_test(enable = "avx512dq")]
9219    fn test_mm512_maskz_cvtps_epi64() {
9220        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9221        let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
9222        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9223        assert_eq_m512i(r, e);
9224    }
9225
9226    #[simd_test(enable = "avx512dq")]
9227    fn test_mm512_cvt_roundpd_epu64() {
9228        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9229        let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9230        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9231        assert_eq_m512i(r, e);
9232    }
9233
9234    #[simd_test(enable = "avx512dq")]
9235    fn test_mm512_mask_cvt_roundpd_epu64() {
9236        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9237        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9238        let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9239            b, 0b01101001, a,
9240        );
9241        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9242        assert_eq_m512i(r, e);
9243    }
9244
9245    #[simd_test(enable = "avx512dq")]
9246    fn test_mm512_maskz_cvt_roundpd_epu64() {
9247        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9248        let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9249            0b01101001, a,
9250        );
9251        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9252        assert_eq_m512i(r, e);
9253    }
9254
9255    #[simd_test(enable = "avx512dq,avx512vl")]
9256    fn test_mm_cvtpd_epu64() {
9257        let a = _mm_set_pd(1., 2.);
9258        let r = _mm_cvtpd_epu64(a);
9259        let e = _mm_set_epi64x(1, 2);
9260        assert_eq_m128i(r, e);
9261    }
9262
9263    #[simd_test(enable = "avx512dq,avx512vl")]
9264    fn test_mm_mask_cvtpd_epu64() {
9265        let a = _mm_set_pd(1., 2.);
9266        let b = _mm_set_epi64x(3, 4);
9267        let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
9268        let e = _mm_set_epi64x(3, 2);
9269        assert_eq_m128i(r, e);
9270    }
9271
9272    #[simd_test(enable = "avx512dq,avx512vl")]
9273    fn test_mm_maskz_cvtpd_epu64() {
9274        let a = _mm_set_pd(1., 2.);
9275        let r = _mm_maskz_cvtpd_epu64(0b01, a);
9276        let e = _mm_set_epi64x(0, 2);
9277        assert_eq_m128i(r, e);
9278    }
9279
9280    #[simd_test(enable = "avx512dq,avx512vl")]
9281    fn test_mm256_cvtpd_epu64() {
9282        let a = _mm256_set_pd(1., 2., 3., 4.);
9283        let r = _mm256_cvtpd_epu64(a);
9284        let e = _mm256_set_epi64x(1, 2, 3, 4);
9285        assert_eq_m256i(r, e);
9286    }
9287
9288    #[simd_test(enable = "avx512dq,avx512vl")]
9289    fn test_mm256_mask_cvtpd_epu64() {
9290        let a = _mm256_set_pd(1., 2., 3., 4.);
9291        let b = _mm256_set_epi64x(5, 6, 7, 8);
9292        let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
9293        let e = _mm256_set_epi64x(5, 2, 3, 8);
9294        assert_eq_m256i(r, e);
9295    }
9296
9297    #[simd_test(enable = "avx512dq,avx512vl")]
9298    fn test_mm256_maskz_cvtpd_epu64() {
9299        let a = _mm256_set_pd(1., 2., 3., 4.);
9300        let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
9301        let e = _mm256_set_epi64x(0, 2, 3, 0);
9302        assert_eq_m256i(r, e);
9303    }
9304
9305    #[simd_test(enable = "avx512dq")]
9306    fn test_mm512_cvtpd_epu64() {
9307        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9308        let r = _mm512_cvtpd_epu64(a);
9309        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9310        assert_eq_m512i(r, e);
9311    }
9312
9313    #[simd_test(enable = "avx512dq")]
9314    fn test_mm512_mask_cvtpd_epu64() {
9315        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9316        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9317        let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
9318        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9319        assert_eq_m512i(r, e);
9320    }
9321
9322    #[simd_test(enable = "avx512dq")]
9323    fn test_mm512_maskz_cvtpd_epu64() {
9324        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9325        let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
9326        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9327        assert_eq_m512i(r, e);
9328    }
9329
9330    #[simd_test(enable = "avx512dq")]
9331    fn test_mm512_cvt_roundps_epu64() {
9332        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9333        let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9334        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9335        assert_eq_m512i(r, e);
9336    }
9337
9338    #[simd_test(enable = "avx512dq")]
9339    fn test_mm512_mask_cvt_roundps_epu64() {
9340        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9341        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9342        let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9343            b, 0b01101001, a,
9344        );
9345        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9346        assert_eq_m512i(r, e);
9347    }
9348
9349    #[simd_test(enable = "avx512dq")]
9350    fn test_mm512_maskz_cvt_roundps_epu64() {
9351        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9352        let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9353            0b01101001, a,
9354        );
9355        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9356        assert_eq_m512i(r, e);
9357    }
9358
9359    #[simd_test(enable = "avx512dq,avx512vl")]
9360    fn test_mm_cvtps_epu64() {
9361        let a = _mm_set_ps(1., 2., 3., 4.);
9362        let r = _mm_cvtps_epu64(a);
9363        let e = _mm_set_epi64x(3, 4);
9364        assert_eq_m128i(r, e);
9365    }
9366
9367    #[simd_test(enable = "avx512dq,avx512vl")]
9368    fn test_mm_mask_cvtps_epu64() {
9369        let a = _mm_set_ps(1., 2., 3., 4.);
9370        let b = _mm_set_epi64x(5, 6);
9371        let r = _mm_mask_cvtps_epu64(b, 0b01, a);
9372        let e = _mm_set_epi64x(5, 4);
9373        assert_eq_m128i(r, e);
9374    }
9375
9376    #[simd_test(enable = "avx512dq,avx512vl")]
9377    fn test_mm_maskz_cvtps_epu64() {
9378        let a = _mm_set_ps(1., 2., 3., 4.);
9379        let r = _mm_maskz_cvtps_epu64(0b01, a);
9380        let e = _mm_set_epi64x(0, 4);
9381        assert_eq_m128i(r, e);
9382    }
9383
9384    #[simd_test(enable = "avx512dq,avx512vl")]
9385    fn test_mm256_cvtps_epu64() {
9386        let a = _mm_set_ps(1., 2., 3., 4.);
9387        let r = _mm256_cvtps_epu64(a);
9388        let e = _mm256_set_epi64x(1, 2, 3, 4);
9389        assert_eq_m256i(r, e);
9390    }
9391
9392    #[simd_test(enable = "avx512dq,avx512vl")]
9393    fn test_mm256_mask_cvtps_epu64() {
9394        let a = _mm_set_ps(1., 2., 3., 4.);
9395        let b = _mm256_set_epi64x(5, 6, 7, 8);
9396        let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
9397        let e = _mm256_set_epi64x(5, 2, 3, 8);
9398        assert_eq_m256i(r, e);
9399    }
9400
9401    #[simd_test(enable = "avx512dq,avx512vl")]
9402    fn test_mm256_maskz_cvtps_epu64() {
9403        let a = _mm_set_ps(1., 2., 3., 4.);
9404        let r = _mm256_maskz_cvtps_epu64(0b0110, a);
9405        let e = _mm256_set_epi64x(0, 2, 3, 0);
9406        assert_eq_m256i(r, e);
9407    }
9408
9409    #[simd_test(enable = "avx512dq")]
9410    fn test_mm512_cvtps_epu64() {
9411        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9412        let r = _mm512_cvtps_epu64(a);
9413        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9414        assert_eq_m512i(r, e);
9415    }
9416
9417    #[simd_test(enable = "avx512dq")]
9418    fn test_mm512_mask_cvtps_epu64() {
9419        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9420        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9421        let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
9422        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9423        assert_eq_m512i(r, e);
9424    }
9425
9426    #[simd_test(enable = "avx512dq")]
9427    fn test_mm512_maskz_cvtps_epu64() {
9428        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9429        let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
9430        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9431        assert_eq_m512i(r, e);
9432    }
9433
9434    #[simd_test(enable = "avx512dq")]
9435    fn test_mm512_cvtt_roundpd_epi64() {
9436        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9437        let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
9438        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9439        assert_eq_m512i(r, e);
9440    }
9441
9442    #[simd_test(enable = "avx512dq")]
9443    fn test_mm512_mask_cvtt_roundpd_epi64() {
9444        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9445        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9446        let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9447        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9448        assert_eq_m512i(r, e);
9449    }
9450
9451    #[simd_test(enable = "avx512dq")]
9452    fn test_mm512_maskz_cvtt_roundpd_epi64() {
9453        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9454        let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9455        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9456        assert_eq_m512i(r, e);
9457    }
9458
9459    #[simd_test(enable = "avx512dq,avx512vl")]
9460    fn test_mm_cvttpd_epi64() {
9461        let a = _mm_set_pd(1., 2.);
9462        let r = _mm_cvttpd_epi64(a);
9463        let e = _mm_set_epi64x(1, 2);
9464        assert_eq_m128i(r, e);
9465    }
9466
9467    #[simd_test(enable = "avx512dq,avx512vl")]
9468    fn test_mm_mask_cvttpd_epi64() {
9469        let a = _mm_set_pd(1., 2.);
9470        let b = _mm_set_epi64x(3, 4);
9471        let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
9472        let e = _mm_set_epi64x(3, 2);
9473        assert_eq_m128i(r, e);
9474    }
9475
9476    #[simd_test(enable = "avx512dq,avx512vl")]
9477    fn test_mm_maskz_cvttpd_epi64() {
9478        let a = _mm_set_pd(1., 2.);
9479        let r = _mm_maskz_cvttpd_epi64(0b01, a);
9480        let e = _mm_set_epi64x(0, 2);
9481        assert_eq_m128i(r, e);
9482    }
9483
9484    #[simd_test(enable = "avx512dq,avx512vl")]
9485    fn test_mm256_cvttpd_epi64() {
9486        let a = _mm256_set_pd(1., 2., 3., 4.);
9487        let r = _mm256_cvttpd_epi64(a);
9488        let e = _mm256_set_epi64x(1, 2, 3, 4);
9489        assert_eq_m256i(r, e);
9490    }
9491
9492    #[simd_test(enable = "avx512dq,avx512vl")]
9493    fn test_mm256_mask_cvttpd_epi64() {
9494        let a = _mm256_set_pd(1., 2., 3., 4.);
9495        let b = _mm256_set_epi64x(5, 6, 7, 8);
9496        let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
9497        let e = _mm256_set_epi64x(5, 2, 3, 8);
9498        assert_eq_m256i(r, e);
9499    }
9500
9501    #[simd_test(enable = "avx512dq,avx512vl")]
9502    fn test_mm256_maskz_cvttpd_epi64() {
9503        let a = _mm256_set_pd(1., 2., 3., 4.);
9504        let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
9505        let e = _mm256_set_epi64x(0, 2, 3, 0);
9506        assert_eq_m256i(r, e);
9507    }
9508
9509    #[simd_test(enable = "avx512dq")]
9510    fn test_mm512_cvttpd_epi64() {
9511        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9512        let r = _mm512_cvttpd_epi64(a);
9513        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9514        assert_eq_m512i(r, e);
9515    }
9516
9517    #[simd_test(enable = "avx512dq")]
9518    fn test_mm512_mask_cvttpd_epi64() {
9519        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9520        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9521        let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
9522        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9523        assert_eq_m512i(r, e);
9524    }
9525
9526    #[simd_test(enable = "avx512dq")]
9527    fn test_mm512_maskz_cvttpd_epi64() {
9528        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9529        let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
9530        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9531        assert_eq_m512i(r, e);
9532    }
9533
9534    #[simd_test(enable = "avx512dq")]
9535    fn test_mm512_cvtt_roundps_epi64() {
9536        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9537        let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
9538        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9539        assert_eq_m512i(r, e);
9540    }
9541
9542    #[simd_test(enable = "avx512dq")]
9543    fn test_mm512_mask_cvtt_roundps_epi64() {
9544        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9545        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9546        let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9547        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9548        assert_eq_m512i(r, e);
9549    }
9550
9551    #[simd_test(enable = "avx512dq")]
9552    fn test_mm512_maskz_cvtt_roundps_epi64() {
9553        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9554        let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9555        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9556        assert_eq_m512i(r, e);
9557    }
9558
9559    #[simd_test(enable = "avx512dq,avx512vl")]
9560    fn test_mm_cvttps_epi64() {
9561        let a = _mm_set_ps(1., 2., 3., 4.);
9562        let r = _mm_cvttps_epi64(a);
9563        let e = _mm_set_epi64x(3, 4);
9564        assert_eq_m128i(r, e);
9565    }
9566
9567    #[simd_test(enable = "avx512dq,avx512vl")]
9568    fn test_mm_mask_cvttps_epi64() {
9569        let a = _mm_set_ps(1., 2., 3., 4.);
9570        let b = _mm_set_epi64x(5, 6);
9571        let r = _mm_mask_cvttps_epi64(b, 0b01, a);
9572        let e = _mm_set_epi64x(5, 4);
9573        assert_eq_m128i(r, e);
9574    }
9575
9576    #[simd_test(enable = "avx512dq,avx512vl")]
9577    fn test_mm_maskz_cvttps_epi64() {
9578        let a = _mm_set_ps(1., 2., 3., 4.);
9579        let r = _mm_maskz_cvttps_epi64(0b01, a);
9580        let e = _mm_set_epi64x(0, 4);
9581        assert_eq_m128i(r, e);
9582    }
9583
9584    #[simd_test(enable = "avx512dq,avx512vl")]
9585    fn test_mm256_cvttps_epi64() {
9586        let a = _mm_set_ps(1., 2., 3., 4.);
9587        let r = _mm256_cvttps_epi64(a);
9588        let e = _mm256_set_epi64x(1, 2, 3, 4);
9589        assert_eq_m256i(r, e);
9590    }
9591
9592    #[simd_test(enable = "avx512dq,avx512vl")]
9593    fn test_mm256_mask_cvttps_epi64() {
9594        let a = _mm_set_ps(1., 2., 3., 4.);
9595        let b = _mm256_set_epi64x(5, 6, 7, 8);
9596        let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
9597        let e = _mm256_set_epi64x(5, 2, 3, 8);
9598        assert_eq_m256i(r, e);
9599    }
9600
9601    #[simd_test(enable = "avx512dq,avx512vl")]
9602    fn test_mm256_maskz_cvttps_epi64() {
9603        let a = _mm_set_ps(1., 2., 3., 4.);
9604        let r = _mm256_maskz_cvttps_epi64(0b0110, a);
9605        let e = _mm256_set_epi64x(0, 2, 3, 0);
9606        assert_eq_m256i(r, e);
9607    }
9608
9609    #[simd_test(enable = "avx512dq")]
9610    fn test_mm512_cvttps_epi64() {
9611        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9612        let r = _mm512_cvttps_epi64(a);
9613        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9614        assert_eq_m512i(r, e);
9615    }
9616
9617    #[simd_test(enable = "avx512dq")]
9618    fn test_mm512_mask_cvttps_epi64() {
9619        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9620        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9621        let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
9622        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9623        assert_eq_m512i(r, e);
9624    }
9625
9626    #[simd_test(enable = "avx512dq")]
9627    fn test_mm512_maskz_cvttps_epi64() {
9628        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9629        let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
9630        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9631        assert_eq_m512i(r, e);
9632    }
9633
9634    #[simd_test(enable = "avx512dq")]
9635    fn test_mm512_cvtt_roundpd_epu64() {
9636        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9637        let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
9638        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9639        assert_eq_m512i(r, e);
9640    }
9641
9642    #[simd_test(enable = "avx512dq")]
9643    fn test_mm512_mask_cvtt_roundpd_epu64() {
9644        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9645        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9646        let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9647        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9648        assert_eq_m512i(r, e);
9649    }
9650
9651    #[simd_test(enable = "avx512dq")]
9652    fn test_mm512_maskz_cvtt_roundpd_epu64() {
9653        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9654        let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9655        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9656        assert_eq_m512i(r, e);
9657    }
9658
9659    #[simd_test(enable = "avx512dq,avx512vl")]
9660    fn test_mm_cvttpd_epu64() {
9661        let a = _mm_set_pd(1., 2.);
9662        let r = _mm_cvttpd_epu64(a);
9663        let e = _mm_set_epi64x(1, 2);
9664        assert_eq_m128i(r, e);
9665    }
9666
9667    #[simd_test(enable = "avx512dq,avx512vl")]
9668    fn test_mm_mask_cvttpd_epu64() {
9669        let a = _mm_set_pd(1., 2.);
9670        let b = _mm_set_epi64x(3, 4);
9671        let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
9672        let e = _mm_set_epi64x(3, 2);
9673        assert_eq_m128i(r, e);
9674    }
9675
9676    #[simd_test(enable = "avx512dq,avx512vl")]
9677    fn test_mm_maskz_cvttpd_epu64() {
9678        let a = _mm_set_pd(1., 2.);
9679        let r = _mm_maskz_cvttpd_epu64(0b01, a);
9680        let e = _mm_set_epi64x(0, 2);
9681        assert_eq_m128i(r, e);
9682    }
9683
9684    #[simd_test(enable = "avx512dq,avx512vl")]
9685    fn test_mm256_cvttpd_epu64() {
9686        let a = _mm256_set_pd(1., 2., 3., 4.);
9687        let r = _mm256_cvttpd_epu64(a);
9688        let e = _mm256_set_epi64x(1, 2, 3, 4);
9689        assert_eq_m256i(r, e);
9690    }
9691
9692    #[simd_test(enable = "avx512dq,avx512vl")]
9693    fn test_mm256_mask_cvttpd_epu64() {
9694        let a = _mm256_set_pd(1., 2., 3., 4.);
9695        let b = _mm256_set_epi64x(5, 6, 7, 8);
9696        let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
9697        let e = _mm256_set_epi64x(5, 2, 3, 8);
9698        assert_eq_m256i(r, e);
9699    }
9700
9701    #[simd_test(enable = "avx512dq,avx512vl")]
9702    fn test_mm256_maskz_cvttpd_epu64() {
9703        let a = _mm256_set_pd(1., 2., 3., 4.);
9704        let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
9705        let e = _mm256_set_epi64x(0, 2, 3, 0);
9706        assert_eq_m256i(r, e);
9707    }
9708
9709    #[simd_test(enable = "avx512dq")]
9710    fn test_mm512_cvttpd_epu64() {
9711        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9712        let r = _mm512_cvttpd_epu64(a);
9713        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9714        assert_eq_m512i(r, e);
9715    }
9716
9717    #[simd_test(enable = "avx512dq")]
9718    fn test_mm512_mask_cvttpd_epu64() {
9719        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9720        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9721        let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
9722        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9723        assert_eq_m512i(r, e);
9724    }
9725
9726    #[simd_test(enable = "avx512dq")]
9727    fn test_mm512_maskz_cvttpd_epu64() {
9728        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9729        let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
9730        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9731        assert_eq_m512i(r, e);
9732    }
9733
9734    #[simd_test(enable = "avx512dq")]
9735    fn test_mm512_cvtt_roundps_epu64() {
9736        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9737        let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
9738        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9739        assert_eq_m512i(r, e);
9740    }
9741
9742    #[simd_test(enable = "avx512dq")]
9743    fn test_mm512_mask_cvtt_roundps_epu64() {
9744        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9745        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9746        let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9747        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9748        assert_eq_m512i(r, e);
9749    }
9750
9751    #[simd_test(enable = "avx512dq")]
9752    fn test_mm512_maskz_cvtt_roundps_epu64() {
9753        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9754        let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9755        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9756        assert_eq_m512i(r, e);
9757    }
9758
9759    #[simd_test(enable = "avx512dq,avx512vl")]
9760    fn test_mm_cvttps_epu64() {
9761        let a = _mm_set_ps(1., 2., 3., 4.);
9762        let r = _mm_cvttps_epu64(a);
9763        let e = _mm_set_epi64x(3, 4);
9764        assert_eq_m128i(r, e);
9765    }
9766
9767    #[simd_test(enable = "avx512dq,avx512vl")]
9768    fn test_mm_mask_cvttps_epu64() {
9769        let a = _mm_set_ps(1., 2., 3., 4.);
9770        let b = _mm_set_epi64x(5, 6);
9771        let r = _mm_mask_cvttps_epu64(b, 0b01, a);
9772        let e = _mm_set_epi64x(5, 4);
9773        assert_eq_m128i(r, e);
9774    }
9775
9776    #[simd_test(enable = "avx512dq,avx512vl")]
9777    fn test_mm_maskz_cvttps_epu64() {
9778        let a = _mm_set_ps(1., 2., 3., 4.);
9779        let r = _mm_maskz_cvttps_epu64(0b01, a);
9780        let e = _mm_set_epi64x(0, 4);
9781        assert_eq_m128i(r, e);
9782    }
9783
9784    #[simd_test(enable = "avx512dq,avx512vl")]
9785    fn test_mm256_cvttps_epu64() {
9786        let a = _mm_set_ps(1., 2., 3., 4.);
9787        let r = _mm256_cvttps_epu64(a);
9788        let e = _mm256_set_epi64x(1, 2, 3, 4);
9789        assert_eq_m256i(r, e);
9790    }
9791
9792    #[simd_test(enable = "avx512dq,avx512vl")]
9793    fn test_mm256_mask_cvttps_epu64() {
9794        let a = _mm_set_ps(1., 2., 3., 4.);
9795        let b = _mm256_set_epi64x(5, 6, 7, 8);
9796        let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
9797        let e = _mm256_set_epi64x(5, 2, 3, 8);
9798        assert_eq_m256i(r, e);
9799    }
9800
9801    #[simd_test(enable = "avx512dq,avx512vl")]
9802    fn test_mm256_maskz_cvttps_epu64() {
9803        let a = _mm_set_ps(1., 2., 3., 4.);
9804        let r = _mm256_maskz_cvttps_epu64(0b0110, a);
9805        let e = _mm256_set_epi64x(0, 2, 3, 0);
9806        assert_eq_m256i(r, e);
9807    }
9808
9809    #[simd_test(enable = "avx512dq")]
9810    fn test_mm512_cvttps_epu64() {
9811        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9812        let r = _mm512_cvttps_epu64(a);
9813        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9814        assert_eq_m512i(r, e);
9815    }
9816
9817    #[simd_test(enable = "avx512dq")]
9818    fn test_mm512_mask_cvttps_epu64() {
9819        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9820        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9821        let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
9822        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9823        assert_eq_m512i(r, e);
9824    }
9825
9826    #[simd_test(enable = "avx512dq")]
9827    fn test_mm512_maskz_cvttps_epu64() {
9828        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9829        let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
9830        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9831        assert_eq_m512i(r, e);
9832    }
9833
9834    #[simd_test(enable = "avx512dq,avx512vl")]
9835    const fn test_mm_mullo_epi64() {
9836        let a = _mm_set_epi64x(1, 2);
9837        let b = _mm_set_epi64x(3, 4);
9838        let r = _mm_mullo_epi64(a, b);
9839        let e = _mm_set_epi64x(3, 8);
9840        assert_eq_m128i(r, e);
9841    }
9842
9843    #[simd_test(enable = "avx512dq,avx512vl")]
9844    const fn test_mm_mask_mullo_epi64() {
9845        let a = _mm_set_epi64x(1, 2);
9846        let b = _mm_set_epi64x(3, 4);
9847        let c = _mm_set_epi64x(5, 6);
9848        let r = _mm_mask_mullo_epi64(c, 0b01, a, b);
9849        let e = _mm_set_epi64x(5, 8);
9850        assert_eq_m128i(r, e);
9851    }
9852
9853    #[simd_test(enable = "avx512dq,avx512vl")]
9854    const fn test_mm_maskz_mullo_epi64() {
9855        let a = _mm_set_epi64x(1, 2);
9856        let b = _mm_set_epi64x(3, 4);
9857        let r = _mm_maskz_mullo_epi64(0b01, a, b);
9858        let e = _mm_set_epi64x(0, 8);
9859        assert_eq_m128i(r, e);
9860    }
9861
9862    #[simd_test(enable = "avx512dq,avx512vl")]
9863    const fn test_mm256_mullo_epi64() {
9864        let a = _mm256_set_epi64x(1, 2, 3, 4);
9865        let b = _mm256_set_epi64x(5, 6, 7, 8);
9866        let r = _mm256_mullo_epi64(a, b);
9867        let e = _mm256_set_epi64x(5, 12, 21, 32);
9868        assert_eq_m256i(r, e);
9869    }
9870
9871    #[simd_test(enable = "avx512dq,avx512vl")]
9872    const fn test_mm256_mask_mullo_epi64() {
9873        let a = _mm256_set_epi64x(1, 2, 3, 4);
9874        let b = _mm256_set_epi64x(5, 6, 7, 8);
9875        let c = _mm256_set_epi64x(9, 10, 11, 12);
9876        let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b);
9877        let e = _mm256_set_epi64x(9, 12, 21, 12);
9878        assert_eq_m256i(r, e);
9879    }
9880
9881    #[simd_test(enable = "avx512dq,avx512vl")]
9882    const fn test_mm256_maskz_mullo_epi64() {
9883        let a = _mm256_set_epi64x(1, 2, 3, 4);
9884        let b = _mm256_set_epi64x(5, 6, 7, 8);
9885        let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
9886        let e = _mm256_set_epi64x(0, 12, 21, 0);
9887        assert_eq_m256i(r, e);
9888    }
9889
9890    #[simd_test(enable = "avx512dq")]
9891    const fn test_mm512_mullo_epi64() {
9892        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9893        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9894        let r = _mm512_mullo_epi64(a, b);
9895        let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128);
9896        assert_eq_m512i(r, e);
9897    }
9898
9899    #[simd_test(enable = "avx512dq")]
9900    const fn test_mm512_mask_mullo_epi64() {
9901        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9902        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9903        let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
9904        let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b);
9905        let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128);
9906        assert_eq_m512i(r, e);
9907    }
9908
9909    #[simd_test(enable = "avx512dq")]
9910    const fn test_mm512_maskz_mullo_epi64() {
9911        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9912        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9913        let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
9914        let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128);
9915        assert_eq_m512i(r, e);
9916    }
9917
9918    #[simd_test(enable = "avx512dq")]
9919    const fn test_cvtmask8_u32() {
9920        let a: __mmask8 = 0b01101001;
9921        let r = _cvtmask8_u32(a);
9922        let e: u32 = 0b01101001;
9923        assert_eq!(r, e);
9924    }
9925
9926    #[simd_test(enable = "avx512dq")]
9927    const fn test_cvtu32_mask8() {
9928        let a: u32 = 0b01101001;
9929        let r = _cvtu32_mask8(a);
9930        let e: __mmask8 = 0b01101001;
9931        assert_eq!(r, e);
9932    }
9933
9934    #[simd_test(enable = "avx512dq")]
9935    const fn test_kadd_mask16() {
9936        let a: __mmask16 = 27549;
9937        let b: __mmask16 = 23434;
9938        let r = _kadd_mask16(a, b);
9939        let e: __mmask16 = 50983;
9940        assert_eq!(r, e);
9941    }
9942
9943    #[simd_test(enable = "avx512dq")]
9944    const fn test_kadd_mask8() {
9945        let a: __mmask8 = 98;
9946        let b: __mmask8 = 117;
9947        let r = _kadd_mask8(a, b);
9948        let e: __mmask8 = 215;
9949        assert_eq!(r, e);
9950    }
9951
9952    #[simd_test(enable = "avx512dq")]
9953    const fn test_kand_mask8() {
9954        let a: __mmask8 = 0b01101001;
9955        let b: __mmask8 = 0b10110011;
9956        let r = _kand_mask8(a, b);
9957        let e: __mmask8 = 0b00100001;
9958        assert_eq!(r, e);
9959    }
9960
9961    #[simd_test(enable = "avx512dq")]
9962    const fn test_kandn_mask8() {
9963        let a: __mmask8 = 0b01101001;
9964        let b: __mmask8 = 0b10110011;
9965        let r = _kandn_mask8(a, b);
9966        let e: __mmask8 = 0b10010010;
9967        assert_eq!(r, e);
9968    }
9969
9970    #[simd_test(enable = "avx512dq")]
9971    const fn test_knot_mask8() {
9972        let a: __mmask8 = 0b01101001;
9973        let r = _knot_mask8(a);
9974        let e: __mmask8 = 0b10010110;
9975        assert_eq!(r, e);
9976    }
9977
9978    #[simd_test(enable = "avx512dq")]
9979    const fn test_kor_mask8() {
9980        let a: __mmask8 = 0b01101001;
9981        let b: __mmask8 = 0b10110011;
9982        let r = _kor_mask8(a, b);
9983        let e: __mmask8 = 0b11111011;
9984        assert_eq!(r, e);
9985    }
9986
9987    #[simd_test(enable = "avx512dq")]
9988    const fn test_kxnor_mask8() {
9989        let a: __mmask8 = 0b01101001;
9990        let b: __mmask8 = 0b10110011;
9991        let r = _kxnor_mask8(a, b);
9992        let e: __mmask8 = 0b00100101;
9993        assert_eq!(r, e);
9994    }
9995
9996    #[simd_test(enable = "avx512dq")]
9997    const fn test_kxor_mask8() {
9998        let a: __mmask8 = 0b01101001;
9999        let b: __mmask8 = 0b10110011;
10000        let r = _kxor_mask8(a, b);
10001        let e: __mmask8 = 0b11011010;
10002        assert_eq!(r, e);
10003    }
10004
10005    #[simd_test(enable = "avx512dq")]
10006    const fn test_kortest_mask8_u8() {
10007        let a: __mmask8 = 0b01101001;
10008        let b: __mmask8 = 0b10110110;
10009        let mut all_ones: u8 = 0;
10010        let r = unsafe { _kortest_mask8_u8(a, b, &mut all_ones) };
10011        assert_eq!(r, 0);
10012        assert_eq!(all_ones, 1);
10013    }
10014
10015    #[simd_test(enable = "avx512dq")]
10016    const fn test_kortestc_mask8_u8() {
10017        let a: __mmask8 = 0b01101001;
10018        let b: __mmask8 = 0b10110110;
10019        let r = _kortestc_mask8_u8(a, b);
10020        assert_eq!(r, 1);
10021    }
10022
10023    #[simd_test(enable = "avx512dq")]
10024    const fn test_kortestz_mask8_u8() {
10025        let a: __mmask8 = 0b01101001;
10026        let b: __mmask8 = 0b10110110;
10027        let r = _kortestz_mask8_u8(a, b);
10028        assert_eq!(r, 0);
10029    }
10030
10031    #[simd_test(enable = "avx512dq")]
10032    const fn test_kshiftli_mask8() {
10033        let a: __mmask8 = 0b01101001;
10034        let r = _kshiftli_mask8::<3>(a);
10035        let e: __mmask8 = 0b01001000;
10036        assert_eq!(r, e);
10037
10038        let r = _kshiftli_mask8::<7>(a);
10039        let e: __mmask8 = 0b10000000;
10040        assert_eq!(r, e);
10041
10042        let r = _kshiftli_mask8::<8>(a);
10043        let e: __mmask8 = 0b00000000;
10044        assert_eq!(r, e);
10045
10046        let r = _kshiftli_mask8::<9>(a);
10047        let e: __mmask8 = 0b00000000;
10048        assert_eq!(r, e);
10049    }
10050
10051    #[simd_test(enable = "avx512dq")]
10052    const fn test_kshiftri_mask8() {
10053        let a: __mmask8 = 0b10101001;
10054        let r = _kshiftri_mask8::<3>(a);
10055        let e: __mmask8 = 0b00010101;
10056        assert_eq!(r, e);
10057
10058        let r = _kshiftri_mask8::<7>(a);
10059        let e: __mmask8 = 0b00000001;
10060        assert_eq!(r, e);
10061
10062        let r = _kshiftri_mask8::<8>(a);
10063        let e: __mmask8 = 0b00000000;
10064        assert_eq!(r, e);
10065
10066        let r = _kshiftri_mask8::<9>(a);
10067        let e: __mmask8 = 0b00000000;
10068        assert_eq!(r, e);
10069    }
10070
10071    #[simd_test(enable = "avx512dq")]
10072    const fn test_ktest_mask8_u8() {
10073        let a: __mmask8 = 0b01101001;
10074        let b: __mmask8 = 0b10010110;
10075        let mut and_not: u8 = 0;
10076        let r = unsafe { _ktest_mask8_u8(a, b, &mut and_not) };
10077        assert_eq!(r, 1);
10078        assert_eq!(and_not, 0);
10079    }
10080
10081    #[simd_test(enable = "avx512dq")]
10082    const fn test_ktestc_mask8_u8() {
10083        let a: __mmask8 = 0b01101001;
10084        let b: __mmask8 = 0b10010110;
10085        let r = _ktestc_mask8_u8(a, b);
10086        assert_eq!(r, 0);
10087    }
10088
10089    #[simd_test(enable = "avx512dq")]
10090    const fn test_ktestz_mask8_u8() {
10091        let a: __mmask8 = 0b01101001;
10092        let b: __mmask8 = 0b10010110;
10093        let r = _ktestz_mask8_u8(a, b);
10094        assert_eq!(r, 1);
10095    }
10096
10097    #[simd_test(enable = "avx512dq")]
10098    const fn test_ktest_mask16_u8() {
10099        let a: __mmask16 = 0b0110100100111100;
10100        let b: __mmask16 = 0b1001011011000011;
10101        let mut and_not: u8 = 0;
10102        let r = unsafe { _ktest_mask16_u8(a, b, &mut and_not) };
10103        assert_eq!(r, 1);
10104        assert_eq!(and_not, 0);
10105    }
10106
10107    #[simd_test(enable = "avx512dq")]
10108    const fn test_ktestc_mask16_u8() {
10109        let a: __mmask16 = 0b0110100100111100;
10110        let b: __mmask16 = 0b1001011011000011;
10111        let r = _ktestc_mask16_u8(a, b);
10112        assert_eq!(r, 0);
10113    }
10114
10115    #[simd_test(enable = "avx512dq")]
10116    const fn test_ktestz_mask16_u8() {
10117        let a: __mmask16 = 0b0110100100111100;
10118        let b: __mmask16 = 0b1001011011000011;
10119        let r = _ktestz_mask16_u8(a, b);
10120        assert_eq!(r, 1);
10121    }
10122
10123    #[simd_test(enable = "avx512dq")]
10124    const fn test_load_mask8() {
10125        let a: __mmask8 = 0b01101001;
10126        let r = unsafe { _load_mask8(&a) };
10127        let e: __mmask8 = 0b01101001;
10128        assert_eq!(r, e);
10129    }
10130
10131    #[simd_test(enable = "avx512dq")]
10132    const fn test_store_mask8() {
10133        let a: __mmask8 = 0b01101001;
10134        let mut r = 0;
10135        unsafe {
10136            _store_mask8(&mut r, a);
10137        }
10138        let e: __mmask8 = 0b01101001;
10139        assert_eq!(r, e);
10140    }
10141
10142    #[simd_test(enable = "avx512dq,avx512vl")]
10143    const fn test_mm_movepi32_mask() {
10144        let a = _mm_set_epi32(0, -2, -3, 4);
10145        let r = _mm_movepi32_mask(a);
10146        let e = 0b0110;
10147        assert_eq!(r, e);
10148    }
10149
10150    #[simd_test(enable = "avx512dq,avx512vl")]
10151    const fn test_mm256_movepi32_mask() {
10152        let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
10153        let r = _mm256_movepi32_mask(a);
10154        let e = 0b01101001;
10155        assert_eq!(r, e);
10156    }
10157
10158    #[simd_test(enable = "avx512dq")]
10159    const fn test_mm512_movepi32_mask() {
10160        let a = _mm512_set_epi32(
10161            0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
10162        );
10163        let r = _mm512_movepi32_mask(a);
10164        let e = 0b0110100100111100;
10165        assert_eq!(r, e);
10166    }
10167
10168    #[simd_test(enable = "avx512dq,avx512vl")]
10169    const fn test_mm_movepi64_mask() {
10170        let a = _mm_set_epi64x(0, -2);
10171        let r = _mm_movepi64_mask(a);
10172        let e = 0b01;
10173        assert_eq!(r, e);
10174    }
10175
10176    #[simd_test(enable = "avx512dq,avx512vl")]
10177    const fn test_mm256_movepi64_mask() {
10178        let a = _mm256_set_epi64x(0, -2, -3, 4);
10179        let r = _mm256_movepi64_mask(a);
10180        let e = 0b0110;
10181        assert_eq!(r, e);
10182    }
10183
10184    #[simd_test(enable = "avx512dq")]
10185    const fn test_mm512_movepi64_mask() {
10186        let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
10187        let r = _mm512_movepi64_mask(a);
10188        let e = 0b01101001;
10189        assert_eq!(r, e);
10190    }
10191
10192    #[simd_test(enable = "avx512dq,avx512vl")]
10193    const fn test_mm_movm_epi32() {
10194        let a = 0b0110;
10195        let r = _mm_movm_epi32(a);
10196        let e = _mm_set_epi32(0, -1, -1, 0);
10197        assert_eq_m128i(r, e);
10198    }
10199
10200    #[simd_test(enable = "avx512dq,avx512vl")]
10201    const fn test_mm256_movm_epi32() {
10202        let a = 0b01101001;
10203        let r = _mm256_movm_epi32(a);
10204        let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
10205        assert_eq_m256i(r, e);
10206    }
10207
10208    #[simd_test(enable = "avx512dq")]
10209    const fn test_mm512_movm_epi32() {
10210        let a = 0b0110100100111100;
10211        let r = _mm512_movm_epi32(a);
10212        let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
10213        assert_eq_m512i(r, e);
10214    }
10215
10216    #[simd_test(enable = "avx512dq,avx512vl")]
10217    const fn test_mm_movm_epi64() {
10218        let a = 0b01;
10219        let r = _mm_movm_epi64(a);
10220        let e = _mm_set_epi64x(0, -1);
10221        assert_eq_m128i(r, e);
10222    }
10223
10224    #[simd_test(enable = "avx512dq,avx512vl")]
10225    const fn test_mm256_movm_epi64() {
10226        let a = 0b0110;
10227        let r = _mm256_movm_epi64(a);
10228        let e = _mm256_set_epi64x(0, -1, -1, 0);
10229        assert_eq_m256i(r, e);
10230    }
10231
10232    #[simd_test(enable = "avx512dq")]
10233    const fn test_mm512_movm_epi64() {
10234        let a = 0b01101001;
10235        let r = _mm512_movm_epi64(a);
10236        let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
10237        assert_eq_m512i(r, e);
10238    }
10239
10240    #[simd_test(enable = "avx512dq")]
10241    fn test_mm512_range_round_pd() {
10242        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10243        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10244        let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10245        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10246        assert_eq_m512d(r, e);
10247    }
10248
10249    #[simd_test(enable = "avx512dq")]
10250    fn test_mm512_mask_range_round_pd() {
10251        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10252        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10253        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10254        let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b);
10255        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10256        assert_eq_m512d(r, e);
10257    }
10258
10259    #[simd_test(enable = "avx512dq")]
10260    fn test_mm512_maskz_range_round_pd() {
10261        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10262        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10263        let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
10264        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10265        assert_eq_m512d(r, e);
10266    }
10267
10268    #[simd_test(enable = "avx512dq,avx512vl")]
10269    fn test_mm_range_pd() {
10270        let a = _mm_set_pd(1., 2.);
10271        let b = _mm_set_pd(2., 1.);
10272        let r = _mm_range_pd::<0b0101>(a, b);
10273        let e = _mm_set_pd(2., 2.);
10274        assert_eq_m128d(r, e);
10275    }
10276
10277    #[simd_test(enable = "avx512dq,avx512vl")]
10278    fn test_mm_mask_range_pd() {
10279        let a = _mm_set_pd(1., 2.);
10280        let b = _mm_set_pd(2., 1.);
10281        let c = _mm_set_pd(3., 4.);
10282        let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b);
10283        let e = _mm_set_pd(3., 2.);
10284        assert_eq_m128d(r, e);
10285    }
10286
10287    #[simd_test(enable = "avx512dq,avx512vl")]
10288    fn test_mm_maskz_range_pd() {
10289        let a = _mm_set_pd(1., 2.);
10290        let b = _mm_set_pd(2., 1.);
10291        let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
10292        let e = _mm_set_pd(0., 2.);
10293        assert_eq_m128d(r, e);
10294    }
10295
10296    #[simd_test(enable = "avx512dq,avx512vl")]
10297    fn test_mm256_range_pd() {
10298        let a = _mm256_set_pd(1., 2., 3., 4.);
10299        let b = _mm256_set_pd(2., 1., 4., 3.);
10300        let r = _mm256_range_pd::<0b0101>(a, b);
10301        let e = _mm256_set_pd(2., 2., 4., 4.);
10302        assert_eq_m256d(r, e);
10303    }
10304
10305    #[simd_test(enable = "avx512dq,avx512vl")]
10306    fn test_mm256_mask_range_pd() {
10307        let a = _mm256_set_pd(1., 2., 3., 4.);
10308        let b = _mm256_set_pd(2., 1., 4., 3.);
10309        let c = _mm256_set_pd(5., 6., 7., 8.);
10310        let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b);
10311        let e = _mm256_set_pd(5., 2., 4., 8.);
10312        assert_eq_m256d(r, e);
10313    }
10314
10315    #[simd_test(enable = "avx512dq,avx512vl")]
10316    fn test_mm256_maskz_range_pd() {
10317        let a = _mm256_set_pd(1., 2., 3., 4.);
10318        let b = _mm256_set_pd(2., 1., 4., 3.);
10319        let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
10320        let e = _mm256_set_pd(0., 2., 4., 0.);
10321        assert_eq_m256d(r, e);
10322    }
10323
10324    #[simd_test(enable = "avx512dq")]
10325    fn test_mm512_range_pd() {
10326        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10327        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10328        let r = _mm512_range_pd::<0b0101>(a, b);
10329        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10330        assert_eq_m512d(r, e);
10331    }
10332
10333    #[simd_test(enable = "avx512dq")]
10334    fn test_mm512_mask_range_pd() {
10335        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10336        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10337        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10338        let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b);
10339        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10340        assert_eq_m512d(r, e);
10341    }
10342
10343    #[simd_test(enable = "avx512dq")]
10344    fn test_mm512_maskz_range_pd() {
10345        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10346        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10347        let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
10348        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10349        assert_eq_m512d(r, e);
10350    }
10351
10352    #[simd_test(enable = "avx512dq")]
10353    fn test_mm512_range_round_ps() {
10354        let a = _mm512_set_ps(
10355            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10356        );
10357        let b = _mm512_set_ps(
10358            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10359        );
10360        let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10361        let e = _mm512_set_ps(
10362            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10363        );
10364        assert_eq_m512(r, e);
10365    }
10366
10367    #[simd_test(enable = "avx512dq")]
10368    fn test_mm512_mask_range_round_ps() {
10369        let a = _mm512_set_ps(
10370            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10371        );
10372        let b = _mm512_set_ps(
10373            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10374        );
10375        let c = _mm512_set_ps(
10376            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10377        );
10378        let r =
10379            _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b);
10380        let e = _mm512_set_ps(
10381            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10382        );
10383        assert_eq_m512(r, e);
10384    }
10385
10386    #[simd_test(enable = "avx512dq")]
10387    fn test_mm512_maskz_range_round_ps() {
10388        let a = _mm512_set_ps(
10389            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10390        );
10391        let b = _mm512_set_ps(
10392            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10393        );
10394        let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b);
10395        let e = _mm512_set_ps(
10396            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10397        );
10398        assert_eq_m512(r, e);
10399    }
10400
10401    #[simd_test(enable = "avx512dq,avx512vl")]
10402    fn test_mm_range_ps() {
10403        let a = _mm_set_ps(1., 2., 3., 4.);
10404        let b = _mm_set_ps(2., 1., 4., 3.);
10405        let r = _mm_range_ps::<0b0101>(a, b);
10406        let e = _mm_set_ps(2., 2., 4., 4.);
10407        assert_eq_m128(r, e);
10408    }
10409
10410    #[simd_test(enable = "avx512dq,avx512vl")]
10411    fn test_mm_mask_range_ps() {
10412        let a = _mm_set_ps(1., 2., 3., 4.);
10413        let b = _mm_set_ps(2., 1., 4., 3.);
10414        let c = _mm_set_ps(5., 6., 7., 8.);
10415        let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b);
10416        let e = _mm_set_ps(5., 2., 4., 8.);
10417        assert_eq_m128(r, e);
10418    }
10419
10420    #[simd_test(enable = "avx512dq,avx512vl")]
10421    fn test_mm_maskz_range_ps() {
10422        let a = _mm_set_ps(1., 2., 3., 4.);
10423        let b = _mm_set_ps(2., 1., 4., 3.);
10424        let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
10425        let e = _mm_set_ps(0., 2., 4., 0.);
10426        assert_eq_m128(r, e);
10427    }
10428
10429    #[simd_test(enable = "avx512dq,avx512vl")]
10430    fn test_mm256_range_ps() {
10431        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10432        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10433        let r = _mm256_range_ps::<0b0101>(a, b);
10434        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
10435        assert_eq_m256(r, e);
10436    }
10437
10438    #[simd_test(enable = "avx512dq,avx512vl")]
10439    fn test_mm256_mask_range_ps() {
10440        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10441        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10442        let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
10443        let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b);
10444        let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.);
10445        assert_eq_m256(r, e);
10446    }
10447
10448    #[simd_test(enable = "avx512dq,avx512vl")]
10449    fn test_mm256_maskz_range_ps() {
10450        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10451        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10452        let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
10453        let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.);
10454        assert_eq_m256(r, e);
10455    }
10456
10457    #[simd_test(enable = "avx512dq")]
10458    fn test_mm512_range_ps() {
10459        let a = _mm512_set_ps(
10460            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10461        );
10462        let b = _mm512_set_ps(
10463            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10464        );
10465        let r = _mm512_range_ps::<0b0101>(a, b);
10466        let e = _mm512_set_ps(
10467            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10468        );
10469        assert_eq_m512(r, e);
10470    }
10471
10472    #[simd_test(enable = "avx512dq")]
10473    fn test_mm512_mask_range_ps() {
10474        let a = _mm512_set_ps(
10475            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10476        );
10477        let b = _mm512_set_ps(
10478            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10479        );
10480        let c = _mm512_set_ps(
10481            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10482        );
10483        let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b);
10484        let e = _mm512_set_ps(
10485            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10486        );
10487        assert_eq_m512(r, e);
10488    }
10489
10490    #[simd_test(enable = "avx512dq")]
10491    fn test_mm512_maskz_range_ps() {
10492        let a = _mm512_set_ps(
10493            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10494        );
10495        let b = _mm512_set_ps(
10496            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10497        );
10498        let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b);
10499        let e = _mm512_set_ps(
10500            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10501        );
10502        assert_eq_m512(r, e);
10503    }
10504
10505    #[simd_test(enable = "avx512dq")]
10506    fn test_mm_range_round_sd() {
10507        let a = _mm_set_sd(1.);
10508        let b = _mm_set_sd(2.);
10509        let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10510        let e = _mm_set_sd(2.);
10511        assert_eq_m128d(r, e);
10512    }
10513
10514    #[simd_test(enable = "avx512dq")]
10515    fn test_mm_mask_range_round_sd() {
10516        let a = _mm_set_sd(1.);
10517        let b = _mm_set_sd(2.);
10518        let c = _mm_set_sd(3.);
10519        let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10520        let e = _mm_set_sd(3.);
10521        assert_eq_m128d(r, e);
10522    }
10523
10524    #[simd_test(enable = "avx512dq")]
10525    fn test_mm_maskz_range_round_sd() {
10526        let a = _mm_set_sd(1.);
10527        let b = _mm_set_sd(2.);
10528        let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10529        let e = _mm_set_sd(0.);
10530        assert_eq_m128d(r, e);
10531    }
10532
10533    #[simd_test(enable = "avx512dq")]
10534    fn test_mm_mask_range_sd() {
10535        let a = _mm_set_sd(1.);
10536        let b = _mm_set_sd(2.);
10537        let c = _mm_set_sd(3.);
10538        let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b);
10539        let e = _mm_set_sd(3.);
10540        assert_eq_m128d(r, e);
10541    }
10542
10543    #[simd_test(enable = "avx512dq")]
10544    fn test_mm_maskz_range_sd() {
10545        let a = _mm_set_sd(1.);
10546        let b = _mm_set_sd(2.);
10547        let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
10548        let e = _mm_set_sd(0.);
10549        assert_eq_m128d(r, e);
10550    }
10551
10552    #[simd_test(enable = "avx512dq")]
10553    fn test_mm_range_round_ss() {
10554        let a = _mm_set_ss(1.);
10555        let b = _mm_set_ss(2.);
10556        let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10557        let e = _mm_set_ss(2.);
10558        assert_eq_m128(r, e);
10559    }
10560
10561    #[simd_test(enable = "avx512dq")]
10562    fn test_mm_mask_range_round_ss() {
10563        let a = _mm_set_ss(1.);
10564        let b = _mm_set_ss(2.);
10565        let c = _mm_set_ss(3.);
10566        let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10567        let e = _mm_set_ss(3.);
10568        assert_eq_m128(r, e);
10569    }
10570
10571    #[simd_test(enable = "avx512dq")]
10572    fn test_mm_maskz_range_round_ss() {
10573        let a = _mm_set_ss(1.);
10574        let b = _mm_set_ss(2.);
10575        let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10576        let e = _mm_set_ss(0.);
10577        assert_eq_m128(r, e);
10578    }
10579
10580    #[simd_test(enable = "avx512dq")]
10581    fn test_mm_mask_range_ss() {
10582        let a = _mm_set_ss(1.);
10583        let b = _mm_set_ss(2.);
10584        let c = _mm_set_ss(3.);
10585        let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b);
10586        let e = _mm_set_ss(3.);
10587        assert_eq_m128(r, e);
10588    }
10589
10590    #[simd_test(enable = "avx512dq")]
10591    fn test_mm_maskz_range_ss() {
10592        let a = _mm_set_ss(1.);
10593        let b = _mm_set_ss(2.);
10594        let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
10595        let e = _mm_set_ss(0.);
10596        assert_eq_m128(r, e);
10597    }
10598
10599    #[simd_test(enable = "avx512dq")]
10600    fn test_mm512_reduce_round_pd() {
10601        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10602        let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10603        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10604        assert_eq_m512d(r, e);
10605    }
10606
10607    #[simd_test(enable = "avx512dq")]
10608    fn test_mm512_mask_reduce_round_pd() {
10609        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10610        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10611        let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10612            src, 0b01101001, a,
10613        );
10614        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10615        assert_eq_m512d(r, e);
10616    }
10617
10618    #[simd_test(enable = "avx512dq")]
10619    fn test_mm512_maskz_reduce_round_pd() {
10620        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10621        let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10622            0b01101001, a,
10623        );
10624        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10625        assert_eq_m512d(r, e);
10626    }
10627
10628    #[simd_test(enable = "avx512dq,avx512vl")]
10629    fn test_mm_reduce_pd() {
10630        let a = _mm_set_pd(0.25, 0.50);
10631        let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10632        let e = _mm_set_pd(0.25, 0.);
10633        assert_eq_m128d(r, e);
10634    }
10635
10636    #[simd_test(enable = "avx512dq,avx512vl")]
10637    fn test_mm_mask_reduce_pd() {
10638        let a = _mm_set_pd(0.25, 0.50);
10639        let src = _mm_set_pd(3., 4.);
10640        let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
10641        let e = _mm_set_pd(3., 0.);
10642        assert_eq_m128d(r, e);
10643    }
10644
10645    #[simd_test(enable = "avx512dq,avx512vl")]
10646    fn test_mm_maskz_reduce_pd() {
10647        let a = _mm_set_pd(0.25, 0.50);
10648        let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
10649        let e = _mm_set_pd(0., 0.);
10650        assert_eq_m128d(r, e);
10651    }
10652
10653    #[simd_test(enable = "avx512dq,avx512vl")]
10654    fn test_mm256_reduce_pd() {
10655        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10656        let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10657        let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
10658        assert_eq_m256d(r, e);
10659    }
10660
10661    #[simd_test(enable = "avx512dq,avx512vl")]
10662    fn test_mm256_mask_reduce_pd() {
10663        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10664        let src = _mm256_set_pd(3., 4., 5., 6.);
10665        let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10666        let e = _mm256_set_pd(3., 0., 0.25, 6.);
10667        assert_eq_m256d(r, e);
10668    }
10669
10670    #[simd_test(enable = "avx512dq,avx512vl")]
10671    fn test_mm256_maskz_reduce_pd() {
10672        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10673        let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10674        let e = _mm256_set_pd(0., 0., 0.25, 0.);
10675        assert_eq_m256d(r, e);
10676    }
10677
10678    #[simd_test(enable = "avx512dq")]
10679    fn test_mm512_reduce_pd() {
10680        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10681        let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10682        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10683        assert_eq_m512d(r, e);
10684    }
10685
10686    #[simd_test(enable = "avx512dq")]
10687    fn test_mm512_mask_reduce_pd() {
10688        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10689        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10690        let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10691        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10692        assert_eq_m512d(r, e);
10693    }
10694
10695    #[simd_test(enable = "avx512dq")]
10696    fn test_mm512_maskz_reduce_pd() {
10697        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10698        let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10699        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10700        assert_eq_m512d(r, e);
10701    }
10702
10703    #[simd_test(enable = "avx512dq")]
10704    fn test_mm512_reduce_round_ps() {
10705        let a = _mm512_set_ps(
10706            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10707            4.0,
10708        );
10709        let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10710        let e = _mm512_set_ps(
10711            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10712        );
10713        assert_eq_m512(r, e);
10714    }
10715
10716    #[simd_test(enable = "avx512dq")]
10717    fn test_mm512_mask_reduce_round_ps() {
10718        let a = _mm512_set_ps(
10719            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10720            4.0,
10721        );
10722        let src = _mm512_set_ps(
10723            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10724        );
10725        let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10726            src,
10727            0b0110100100111100,
10728            a,
10729        );
10730        let e = _mm512_set_ps(
10731            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10732        );
10733        assert_eq_m512(r, e);
10734    }
10735
10736    #[simd_test(enable = "avx512dq")]
10737    fn test_mm512_maskz_reduce_round_ps() {
10738        let a = _mm512_set_ps(
10739            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10740            4.0,
10741        );
10742        let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10743            0b0110100100111100,
10744            a,
10745        );
10746        let e = _mm512_set_ps(
10747            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10748        );
10749        assert_eq_m512(r, e);
10750    }
10751
10752    #[simd_test(enable = "avx512dq,avx512vl")]
10753    fn test_mm_reduce_ps() {
10754        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10755        let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10756        let e = _mm_set_ps(0.25, 0., 0.25, 0.);
10757        assert_eq_m128(r, e);
10758    }
10759
10760    #[simd_test(enable = "avx512dq,avx512vl")]
10761    fn test_mm_mask_reduce_ps() {
10762        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10763        let src = _mm_set_ps(2., 3., 4., 5.);
10764        let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10765        let e = _mm_set_ps(2., 0., 0.25, 5.);
10766        assert_eq_m128(r, e);
10767    }
10768
10769    #[simd_test(enable = "avx512dq,avx512vl")]
10770    fn test_mm_maskz_reduce_ps() {
10771        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10772        let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10773        let e = _mm_set_ps(0., 0., 0.25, 0.);
10774        assert_eq_m128(r, e);
10775    }
10776
10777    #[simd_test(enable = "avx512dq,avx512vl")]
10778    fn test_mm256_reduce_ps() {
10779        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10780        let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10781        let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10782        assert_eq_m256(r, e);
10783    }
10784
10785    #[simd_test(enable = "avx512dq,avx512vl")]
10786    fn test_mm256_mask_reduce_ps() {
10787        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10788        let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
10789        let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10790        let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10791        assert_eq_m256(r, e);
10792    }
10793
10794    #[simd_test(enable = "avx512dq,avx512vl")]
10795    fn test_mm256_maskz_reduce_ps() {
10796        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10797        let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10798        let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10799        assert_eq_m256(r, e);
10800    }
10801
10802    #[simd_test(enable = "avx512dq")]
10803    fn test_mm512_reduce_ps() {
10804        let a = _mm512_set_ps(
10805            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10806            4.0,
10807        );
10808        let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10809        let e = _mm512_set_ps(
10810            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10811        );
10812        assert_eq_m512(r, e);
10813    }
10814
10815    #[simd_test(enable = "avx512dq")]
10816    fn test_mm512_mask_reduce_ps() {
10817        let a = _mm512_set_ps(
10818            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10819            4.0,
10820        );
10821        let src = _mm512_set_ps(
10822            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10823        );
10824        let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a);
10825        let e = _mm512_set_ps(
10826            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10827        );
10828        assert_eq_m512(r, e);
10829    }
10830
10831    #[simd_test(enable = "avx512dq")]
10832    fn test_mm512_maskz_reduce_ps() {
10833        let a = _mm512_set_ps(
10834            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10835            4.0,
10836        );
10837        let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a);
10838        let e = _mm512_set_ps(
10839            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10840        );
10841        assert_eq_m512(r, e);
10842    }
10843
10844    #[simd_test(enable = "avx512dq")]
10845    fn test_mm_reduce_round_sd() {
10846        let a = _mm_set_pd(1., 2.);
10847        let b = _mm_set_sd(0.25);
10848        let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10849        let e = _mm_set_pd(1., 0.25);
10850        assert_eq_m128d(r, e);
10851    }
10852
10853    #[simd_test(enable = "avx512dq")]
10854    fn test_mm_mask_reduce_round_sd() {
10855        let a = _mm_set_pd(1., 2.);
10856        let b = _mm_set_sd(0.25);
10857        let c = _mm_set_pd(3., 4.);
10858        let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10859            c, 0b0, a, b,
10860        );
10861        let e = _mm_set_pd(1., 4.);
10862        assert_eq_m128d(r, e);
10863    }
10864
10865    #[simd_test(enable = "avx512dq")]
10866    fn test_mm_maskz_reduce_round_sd() {
10867        let a = _mm_set_pd(1., 2.);
10868        let b = _mm_set_sd(0.25);
10869        let r =
10870            _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10871        let e = _mm_set_pd(1., 0.);
10872        assert_eq_m128d(r, e);
10873    }
10874
10875    #[simd_test(enable = "avx512dq")]
10876    fn test_mm_reduce_sd() {
10877        let a = _mm_set_pd(1., 2.);
10878        let b = _mm_set_sd(0.25);
10879        let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10880        let e = _mm_set_pd(1., 0.25);
10881        assert_eq_m128d(r, e);
10882    }
10883
10884    #[simd_test(enable = "avx512dq")]
10885    fn test_mm_mask_reduce_sd() {
10886        let a = _mm_set_pd(1., 2.);
10887        let b = _mm_set_sd(0.25);
10888        let c = _mm_set_pd(3., 4.);
10889        let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10890        let e = _mm_set_pd(1., 4.);
10891        assert_eq_m128d(r, e);
10892    }
10893
10894    #[simd_test(enable = "avx512dq")]
10895    fn test_mm_maskz_reduce_sd() {
10896        let a = _mm_set_pd(1., 2.);
10897        let b = _mm_set_sd(0.25);
10898        let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10899        let e = _mm_set_pd(1., 0.);
10900        assert_eq_m128d(r, e);
10901    }
10902
10903    #[simd_test(enable = "avx512dq")]
10904    fn test_mm_reduce_round_ss() {
10905        let a = _mm_set_ps(1., 2., 3., 4.);
10906        let b = _mm_set_ss(0.25);
10907        let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10908        let e = _mm_set_ps(1., 2., 3., 0.25);
10909        assert_eq_m128(r, e);
10910    }
10911
10912    #[simd_test(enable = "avx512dq")]
10913    fn test_mm_mask_reduce_round_ss() {
10914        let a = _mm_set_ps(1., 2., 3., 4.);
10915        let b = _mm_set_ss(0.25);
10916        let c = _mm_set_ps(5., 6., 7., 8.);
10917        let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10918            c, 0b0, a, b,
10919        );
10920        let e = _mm_set_ps(1., 2., 3., 8.);
10921        assert_eq_m128(r, e);
10922    }
10923
10924    #[simd_test(enable = "avx512dq")]
10925    fn test_mm_maskz_reduce_round_ss() {
10926        let a = _mm_set_ps(1., 2., 3., 4.);
10927        let b = _mm_set_ss(0.25);
10928        let r =
10929            _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10930        let e = _mm_set_ps(1., 2., 3., 0.);
10931        assert_eq_m128(r, e);
10932    }
10933
10934    #[simd_test(enable = "avx512dq")]
10935    fn test_mm_reduce_ss() {
10936        let a = _mm_set_ps(1., 2., 3., 4.);
10937        let b = _mm_set_ss(0.25);
10938        let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10939        let e = _mm_set_ps(1., 2., 3., 0.25);
10940        assert_eq_m128(r, e);
10941    }
10942
10943    #[simd_test(enable = "avx512dq")]
10944    fn test_mm_mask_reduce_ss() {
10945        let a = _mm_set_ps(1., 2., 3., 4.);
10946        let b = _mm_set_ss(0.25);
10947        let c = _mm_set_ps(5., 6., 7., 8.);
10948        let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10949        let e = _mm_set_ps(1., 2., 3., 8.);
10950        assert_eq_m128(r, e);
10951    }
10952
10953    #[simd_test(enable = "avx512dq")]
10954    fn test_mm_maskz_reduce_ss() {
10955        let a = _mm_set_ps(1., 2., 3., 4.);
10956        let b = _mm_set_ss(0.25);
10957        let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10958        let e = _mm_set_ps(1., 2., 3., 0.);
10959        assert_eq_m128(r, e);
10960    }
10961
10962    #[simd_test(enable = "avx512dq,avx512vl")]
10963    fn test_mm_fpclass_pd_mask() {
10964        let a = _mm_set_pd(1., f64::INFINITY);
10965        let r = _mm_fpclass_pd_mask::<0x18>(a);
10966        let e = 0b01;
10967        assert_eq!(r, e);
10968    }
10969
10970    #[simd_test(enable = "avx512dq,avx512vl")]
10971    fn test_mm_mask_fpclass_pd_mask() {
10972        let a = _mm_set_pd(1., f64::INFINITY);
10973        let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
10974        let e = 0b00;
10975        assert_eq!(r, e);
10976    }
10977
10978    #[simd_test(enable = "avx512dq,avx512vl")]
10979    fn test_mm256_fpclass_pd_mask() {
10980        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10981        let r = _mm256_fpclass_pd_mask::<0x18>(a);
10982        let e = 0b0110;
10983        assert_eq!(r, e);
10984    }
10985
10986    #[simd_test(enable = "avx512dq,avx512vl")]
10987    fn test_mm256_mask_fpclass_pd_mask() {
10988        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10989        let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
10990        let e = 0b0010;
10991        assert_eq!(r, e);
10992    }
10993
10994    #[simd_test(enable = "avx512dq")]
10995    fn test_mm512_fpclass_pd_mask() {
10996        let a = _mm512_set_pd(
10997            1.,
10998            f64::INFINITY,
10999            f64::NEG_INFINITY,
11000            0.0,
11001            -0.0,
11002            -2.0,
11003            f64::NAN,
11004            1.0e-308,
11005        );
11006        let r = _mm512_fpclass_pd_mask::<0x18>(a);
11007        let e = 0b01100000;
11008        assert_eq!(r, e);
11009    }
11010
11011    #[simd_test(enable = "avx512dq")]
11012    fn test_mm512_mask_fpclass_pd_mask() {
11013        let a = _mm512_set_pd(
11014            1.,
11015            f64::INFINITY,
11016            f64::NEG_INFINITY,
11017            0.0,
11018            -0.0,
11019            -2.0,
11020            f64::NAN,
11021            1.0e-308,
11022        );
11023        let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a);
11024        let e = 0b00100000;
11025        assert_eq!(r, e);
11026    }
11027
11028    #[simd_test(enable = "avx512dq,avx512vl")]
11029    fn test_mm_fpclass_ps_mask() {
11030        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
11031        let r = _mm_fpclass_ps_mask::<0x18>(a);
11032        let e = 0b0110;
11033        assert_eq!(r, e);
11034    }
11035
11036    #[simd_test(enable = "avx512dq,avx512vl")]
11037    fn test_mm_mask_fpclass_ps_mask() {
11038        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
11039        let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
11040        let e = 0b0010;
11041        assert_eq!(r, e);
11042    }
11043
11044    #[simd_test(enable = "avx512dq,avx512vl")]
11045    fn test_mm256_fpclass_ps_mask() {
11046        let a = _mm256_set_ps(
11047            1.,
11048            f32::INFINITY,
11049            f32::NEG_INFINITY,
11050            0.0,
11051            -0.0,
11052            -2.0,
11053            f32::NAN,
11054            1.0e-38,
11055        );
11056        let r = _mm256_fpclass_ps_mask::<0x18>(a);
11057        let e = 0b01100000;
11058        assert_eq!(r, e);
11059    }
11060
11061    #[simd_test(enable = "avx512dq,avx512vl")]
11062    fn test_mm256_mask_fpclass_ps_mask() {
11063        let a = _mm256_set_ps(
11064            1.,
11065            f32::INFINITY,
11066            f32::NEG_INFINITY,
11067            0.0,
11068            -0.0,
11069            -2.0,
11070            f32::NAN,
11071            1.0e-38,
11072        );
11073        let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a);
11074        let e = 0b00100000;
11075        assert_eq!(r, e);
11076    }
11077
11078    #[simd_test(enable = "avx512dq")]
11079    fn test_mm512_fpclass_ps_mask() {
11080        let a = _mm512_set_ps(
11081            1.,
11082            f32::INFINITY,
11083            f32::NEG_INFINITY,
11084            0.0,
11085            -0.0,
11086            -2.0,
11087            f32::NAN,
11088            1.0e-38,
11089            -1.,
11090            f32::NEG_INFINITY,
11091            f32::INFINITY,
11092            -0.0,
11093            0.0,
11094            2.0,
11095            f32::NAN,
11096            -1.0e-38,
11097        );
11098        let r = _mm512_fpclass_ps_mask::<0x18>(a);
11099        let e = 0b0110000001100000;
11100        assert_eq!(r, e);
11101    }
11102
11103    #[simd_test(enable = "avx512dq")]
11104    fn test_mm512_mask_fpclass_ps_mask() {
11105        let a = _mm512_set_ps(
11106            1.,
11107            f32::INFINITY,
11108            f32::NEG_INFINITY,
11109            0.0,
11110            -0.0,
11111            -2.0,
11112            f32::NAN,
11113            1.0e-38,
11114            -1.,
11115            f32::NEG_INFINITY,
11116            f32::INFINITY,
11117            -0.0,
11118            0.0,
11119            2.0,
11120            f32::NAN,
11121            -1.0e-38,
11122        );
11123        let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a);
11124        let e = 0b0010000000100000;
11125        assert_eq!(r, e);
11126    }
11127
11128    #[simd_test(enable = "avx512dq")]
11129    fn test_mm_fpclass_sd_mask() {
11130        let a = _mm_set_pd(1., f64::INFINITY);
11131        let r = _mm_fpclass_sd_mask::<0x18>(a);
11132        let e = 0b1;
11133        assert_eq!(r, e);
11134    }
11135
11136    #[simd_test(enable = "avx512dq")]
11137    fn test_mm_mask_fpclass_sd_mask() {
11138        let a = _mm_set_sd(f64::INFINITY);
11139        let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
11140        let e = 0b0;
11141        assert_eq!(r, e);
11142    }
11143
11144    #[simd_test(enable = "avx512dq")]
11145    fn test_mm_fpclass_ss_mask() {
11146        let a = _mm_set_ss(f32::INFINITY);
11147        let r = _mm_fpclass_ss_mask::<0x18>(a);
11148        let e = 0b1;
11149        assert_eq!(r, e);
11150    }
11151
11152    #[simd_test(enable = "avx512dq")]
11153    fn test_mm_mask_fpclass_ss_mask() {
11154        let a = _mm_set_ss(f32::INFINITY);
11155        let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
11156        let e = 0b0;
11157        assert_eq!(r, e);
11158    }
11159}