core/stdarch/crates/core_arch/src/x86/
avx512dq.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    mem::transmute,
5};
6
7// And //
8
9/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
10/// and store the results in dst using writemask k (elements are copied from src if the corresponding
11/// bit is not set).
12///
13/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
14#[inline]
15#[target_feature(enable = "avx512dq,avx512vl")]
16#[cfg_attr(test, assert_instr(vandpd))]
17#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19    unsafe {
20        let and = _mm_and_pd(a, b).as_f64x2();
21        transmute(simd_select_bitmask(k, and, src.as_f64x2()))
22    }
23}
24
25/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
26/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
27///
28/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
29#[inline]
30#[target_feature(enable = "avx512dq,avx512vl")]
31#[cfg_attr(test, assert_instr(vandpd))]
32#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34    unsafe {
35        let and = _mm_and_pd(a, b).as_f64x2();
36        transmute(simd_select_bitmask(k, and, f64x2::ZERO))
37    }
38}
39
40/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
41/// and store the results in dst using writemask k (elements are copied from src if the corresponding
42/// bit is not set).
43///
44/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
45#[inline]
46#[target_feature(enable = "avx512dq,avx512vl")]
47#[cfg_attr(test, assert_instr(vandpd))]
48#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
49pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
50    unsafe {
51        let and = _mm256_and_pd(a, b).as_f64x4();
52        transmute(simd_select_bitmask(k, and, src.as_f64x4()))
53    }
54}
55
56/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
57/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
58///
59/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
60#[inline]
61#[target_feature(enable = "avx512dq,avx512vl")]
62#[cfg_attr(test, assert_instr(vandpd))]
63#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
65    unsafe {
66        let and = _mm256_and_pd(a, b).as_f64x4();
67        transmute(simd_select_bitmask(k, and, f64x4::ZERO))
68    }
69}
70
71/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
72/// and store the results in dst.
73///
74/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
75#[inline]
76#[target_feature(enable = "avx512dq")]
77#[cfg_attr(test, assert_instr(vandp))]
78#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
79pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
80    unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
81}
82
83/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
84/// and store the results in dst using writemask k (elements are copied from src if the corresponding
85/// bit is not set).
86///
87/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
88#[inline]
89#[target_feature(enable = "avx512dq")]
90#[cfg_attr(test, assert_instr(vandpd))]
91#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
93    unsafe {
94        let and = _mm512_and_pd(a, b).as_f64x8();
95        transmute(simd_select_bitmask(k, and, src.as_f64x8()))
96    }
97}
98
99/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
100/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
101///
102/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
103#[inline]
104#[target_feature(enable = "avx512dq")]
105#[cfg_attr(test, assert_instr(vandpd))]
106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
108    unsafe {
109        let and = _mm512_and_pd(a, b).as_f64x8();
110        transmute(simd_select_bitmask(k, and, f64x8::ZERO))
111    }
112}
113
114/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
115/// and store the results in dst using writemask k (elements are copied from src if the corresponding
116/// bit is not set).
117///
118/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
119#[inline]
120#[target_feature(enable = "avx512dq,avx512vl")]
121#[cfg_attr(test, assert_instr(vandps))]
122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
123pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
124    unsafe {
125        let and = _mm_and_ps(a, b).as_f32x4();
126        transmute(simd_select_bitmask(k, and, src.as_f32x4()))
127    }
128}
129
130/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
131/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
132///
133/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
134#[inline]
135#[target_feature(enable = "avx512dq,avx512vl")]
136#[cfg_attr(test, assert_instr(vandps))]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
139    unsafe {
140        let and = _mm_and_ps(a, b).as_f32x4();
141        transmute(simd_select_bitmask(k, and, f32x4::ZERO))
142    }
143}
144
145/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
146/// and store the results in dst using writemask k (elements are copied from src if the corresponding
147/// bit is not set).
148///
149/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
150#[inline]
151#[target_feature(enable = "avx512dq,avx512vl")]
152#[cfg_attr(test, assert_instr(vandps))]
153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
154pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
155    unsafe {
156        let and = _mm256_and_ps(a, b).as_f32x8();
157        transmute(simd_select_bitmask(k, and, src.as_f32x8()))
158    }
159}
160
161/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
162/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
163///
164/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
165#[inline]
166#[target_feature(enable = "avx512dq,avx512vl")]
167#[cfg_attr(test, assert_instr(vandps))]
168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
169pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
170    unsafe {
171        let and = _mm256_and_ps(a, b).as_f32x8();
172        transmute(simd_select_bitmask(k, and, f32x8::ZERO))
173    }
174}
175
176/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
177/// and store the results in dst.
178///
179/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
180#[inline]
181#[target_feature(enable = "avx512dq")]
182#[cfg_attr(test, assert_instr(vandps))]
183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
184pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
185    unsafe {
186        transmute(simd_and(
187            transmute::<_, u32x16>(a),
188            transmute::<_, u32x16>(b),
189        ))
190    }
191}
192
193/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
194/// and store the results in dst using writemask k (elements are copied from src if the corresponding
195/// bit is not set).
196///
197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
198#[inline]
199#[target_feature(enable = "avx512dq")]
200#[cfg_attr(test, assert_instr(vandps))]
201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
202pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
203    unsafe {
204        let and = _mm512_and_ps(a, b).as_f32x16();
205        transmute(simd_select_bitmask(k, and, src.as_f32x16()))
206    }
207}
208
209/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
210/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
211///
212/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
213#[inline]
214#[target_feature(enable = "avx512dq")]
215#[cfg_attr(test, assert_instr(vandps))]
216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
217pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
218    unsafe {
219        let and = _mm512_and_ps(a, b).as_f32x16();
220        transmute(simd_select_bitmask(k, and, f32x16::ZERO))
221    }
222}
223
224// Andnot
225
226/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
227/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
228/// corresponding bit is not set).
229///
230/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
231#[inline]
232#[target_feature(enable = "avx512dq,avx512vl")]
233#[cfg_attr(test, assert_instr(vandnpd))]
234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
235pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
236    unsafe {
237        let andnot = _mm_andnot_pd(a, b).as_f64x2();
238        transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
239    }
240}
241
242/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
243/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
244/// corresponding bit is not set).
245///
246/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
247#[inline]
248#[target_feature(enable = "avx512dq,avx512vl")]
249#[cfg_attr(test, assert_instr(vandnpd))]
250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
251pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
252    unsafe {
253        let andnot = _mm_andnot_pd(a, b).as_f64x2();
254        transmute(simd_select_bitmask(k, andnot, f64x2::ZERO))
255    }
256}
257
258/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
259/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
260/// corresponding bit is not set).
261///
262/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
263#[inline]
264#[target_feature(enable = "avx512dq,avx512vl")]
265#[cfg_attr(test, assert_instr(vandnpd))]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
268    unsafe {
269        let andnot = _mm256_andnot_pd(a, b).as_f64x4();
270        transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
271    }
272}
273
274/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
275/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
276/// corresponding bit is not set).
277///
278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
279#[inline]
280#[target_feature(enable = "avx512dq,avx512vl")]
281#[cfg_attr(test, assert_instr(vandnpd))]
282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
283pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
284    unsafe {
285        let andnot = _mm256_andnot_pd(a, b).as_f64x4();
286        transmute(simd_select_bitmask(k, andnot, f64x4::ZERO))
287    }
288}
289
290/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
291/// bitwise AND with b and store the results in dst.
292///
293/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
294#[inline]
295#[target_feature(enable = "avx512dq")]
296#[cfg_attr(test, assert_instr(vandnp))]
297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
298pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
299    unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) }
300}
301
302/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
303/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
304/// corresponding bit is not set).
305///
306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
307#[inline]
308#[target_feature(enable = "avx512dq")]
309#[cfg_attr(test, assert_instr(vandnpd))]
310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
311pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
312    unsafe {
313        let andnot = _mm512_andnot_pd(a, b).as_f64x8();
314        transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
315    }
316}
317
318/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
319/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
320/// corresponding bit is not set).
321///
322/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
323#[inline]
324#[target_feature(enable = "avx512dq")]
325#[cfg_attr(test, assert_instr(vandnpd))]
326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
327pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
328    unsafe {
329        let andnot = _mm512_andnot_pd(a, b).as_f64x8();
330        transmute(simd_select_bitmask(k, andnot, f64x8::ZERO))
331    }
332}
333
334/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
335/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
336/// corresponding bit is not set).
337///
338/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
339#[inline]
340#[target_feature(enable = "avx512dq,avx512vl")]
341#[cfg_attr(test, assert_instr(vandnps))]
342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
343pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
344    unsafe {
345        let andnot = _mm_andnot_ps(a, b).as_f32x4();
346        transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
347    }
348}
349
350/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
351/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
352/// corresponding bit is not set).
353///
354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
355#[inline]
356#[target_feature(enable = "avx512dq,avx512vl")]
357#[cfg_attr(test, assert_instr(vandnps))]
358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
359pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
360    unsafe {
361        let andnot = _mm_andnot_ps(a, b).as_f32x4();
362        transmute(simd_select_bitmask(k, andnot, f32x4::ZERO))
363    }
364}
365
366/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
367/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
368/// corresponding bit is not set).
369///
370/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
371#[inline]
372#[target_feature(enable = "avx512dq,avx512vl")]
373#[cfg_attr(test, assert_instr(vandnps))]
374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
376    unsafe {
377        let andnot = _mm256_andnot_ps(a, b).as_f32x8();
378        transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
379    }
380}
381
382/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
383/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
384/// corresponding bit is not set).
385///
386/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
387#[inline]
388#[target_feature(enable = "avx512dq,avx512vl")]
389#[cfg_attr(test, assert_instr(vandnps))]
390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
391pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
392    unsafe {
393        let andnot = _mm256_andnot_ps(a, b).as_f32x8();
394        transmute(simd_select_bitmask(k, andnot, f32x8::ZERO))
395    }
396}
397
398/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
399/// bitwise AND with b and store the results in dst.
400///
401/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
402#[inline]
403#[target_feature(enable = "avx512dq")]
404#[cfg_attr(test, assert_instr(vandnps))]
405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
406pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
407    unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) }
408}
409
410/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
411/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
412/// corresponding bit is not set).
413///
414/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
415#[inline]
416#[target_feature(enable = "avx512dq")]
417#[cfg_attr(test, assert_instr(vandnps))]
418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
419pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
420    unsafe {
421        let andnot = _mm512_andnot_ps(a, b).as_f32x16();
422        transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
423    }
424}
425
426/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
427/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
428/// corresponding bit is not set).
429///
430/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
431#[inline]
432#[target_feature(enable = "avx512dq")]
433#[cfg_attr(test, assert_instr(vandnps))]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
436    unsafe {
437        let andnot = _mm512_andnot_ps(a, b).as_f32x16();
438        transmute(simd_select_bitmask(k, andnot, f32x16::ZERO))
439    }
440}
441
442// Or
443
444/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
445/// and store the results in dst using writemask k (elements are copied from src if the corresponding
446/// bit is not set).
447///
448/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
449#[inline]
450#[target_feature(enable = "avx512dq,avx512vl")]
451#[cfg_attr(test, assert_instr(vorpd))]
452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
453pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
454    unsafe {
455        let or = _mm_or_pd(a, b).as_f64x2();
456        transmute(simd_select_bitmask(k, or, src.as_f64x2()))
457    }
458}
459
460/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
461/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
462///
463/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
464#[inline]
465#[target_feature(enable = "avx512dq,avx512vl")]
466#[cfg_attr(test, assert_instr(vorpd))]
467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
468pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
469    unsafe {
470        let or = _mm_or_pd(a, b).as_f64x2();
471        transmute(simd_select_bitmask(k, or, f64x2::ZERO))
472    }
473}
474
475/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
476/// and store the results in dst using writemask k (elements are copied from src if the corresponding
477/// bit is not set).
478///
479/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
480#[inline]
481#[target_feature(enable = "avx512dq,avx512vl")]
482#[cfg_attr(test, assert_instr(vorpd))]
483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
484pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
485    unsafe {
486        let or = _mm256_or_pd(a, b).as_f64x4();
487        transmute(simd_select_bitmask(k, or, src.as_f64x4()))
488    }
489}
490
491/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
492/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
493///
494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
495#[inline]
496#[target_feature(enable = "avx512dq,avx512vl")]
497#[cfg_attr(test, assert_instr(vorpd))]
498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
499pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
500    unsafe {
501        let or = _mm256_or_pd(a, b).as_f64x4();
502        transmute(simd_select_bitmask(k, or, f64x4::ZERO))
503    }
504}
505
506/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
507/// and store the results in dst.
508///
509/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
510#[inline]
511#[target_feature(enable = "avx512dq")]
512#[cfg_attr(test, assert_instr(vorp))]
513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
515    unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
516}
517
518/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
519/// store the results in dst using writemask k (elements are copied from src if the corresponding
520/// bit is not set).
521///
522/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
523#[inline]
524#[target_feature(enable = "avx512dq")]
525#[cfg_attr(test, assert_instr(vorpd))]
526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
527pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
528    unsafe {
529        let or = _mm512_or_pd(a, b).as_f64x8();
530        transmute(simd_select_bitmask(k, or, src.as_f64x8()))
531    }
532}
533
534/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
535/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
536///
537/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
538#[inline]
539#[target_feature(enable = "avx512dq")]
540#[cfg_attr(test, assert_instr(vorpd))]
541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
542pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
543    unsafe {
544        let or = _mm512_or_pd(a, b).as_f64x8();
545        transmute(simd_select_bitmask(k, or, f64x8::ZERO))
546    }
547}
548
549/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
550/// and store the results in dst using writemask k (elements are copied from src if the corresponding
551/// bit is not set).
552///
553/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
554#[inline]
555#[target_feature(enable = "avx512dq,avx512vl")]
556#[cfg_attr(test, assert_instr(vorps))]
557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
558pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
559    unsafe {
560        let or = _mm_or_ps(a, b).as_f32x4();
561        transmute(simd_select_bitmask(k, or, src.as_f32x4()))
562    }
563}
564
565/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
566/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
567///
568/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
569#[inline]
570#[target_feature(enable = "avx512dq,avx512vl")]
571#[cfg_attr(test, assert_instr(vorps))]
572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
573pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
574    unsafe {
575        let or = _mm_or_ps(a, b).as_f32x4();
576        transmute(simd_select_bitmask(k, or, f32x4::ZERO))
577    }
578}
579
580/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
581/// and store the results in dst using writemask k (elements are copied from src if the corresponding
582/// bit is not set).
583///
584/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
585#[inline]
586#[target_feature(enable = "avx512dq,avx512vl")]
587#[cfg_attr(test, assert_instr(vorps))]
588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
589pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
590    unsafe {
591        let or = _mm256_or_ps(a, b).as_f32x8();
592        transmute(simd_select_bitmask(k, or, src.as_f32x8()))
593    }
594}
595
596/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
597/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
598///
599/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
600#[inline]
601#[target_feature(enable = "avx512dq,avx512vl")]
602#[cfg_attr(test, assert_instr(vorps))]
603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
604pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
605    unsafe {
606        let or = _mm256_or_ps(a, b).as_f32x8();
607        transmute(simd_select_bitmask(k, or, f32x8::ZERO))
608    }
609}
610
611/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
612/// and store the results in dst.
613///
614/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
615#[inline]
616#[target_feature(enable = "avx512dq")]
617#[cfg_attr(test, assert_instr(vorps))]
618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
619pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
620    unsafe {
621        transmute(simd_or(
622            transmute::<_, u32x16>(a),
623            transmute::<_, u32x16>(b),
624        ))
625    }
626}
627
628/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
629/// store the results in dst using writemask k (elements are copied from src if the corresponding
630/// bit is not set).
631///
632/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
633#[inline]
634#[target_feature(enable = "avx512dq")]
635#[cfg_attr(test, assert_instr(vorps))]
636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
637pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
638    unsafe {
639        let or = _mm512_or_ps(a, b).as_f32x16();
640        transmute(simd_select_bitmask(k, or, src.as_f32x16()))
641    }
642}
643
644/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
645/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
646///
647/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
648#[inline]
649#[target_feature(enable = "avx512dq")]
650#[cfg_attr(test, assert_instr(vorps))]
651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
652pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
653    unsafe {
654        let or = _mm512_or_ps(a, b).as_f32x16();
655        transmute(simd_select_bitmask(k, or, f32x16::ZERO))
656    }
657}
658
659// Xor
660
661/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
662/// and store the results in dst using writemask k (elements are copied from src if the corresponding
663/// bit is not set).
664///
665/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
666#[inline]
667#[target_feature(enable = "avx512dq,avx512vl")]
668#[cfg_attr(test, assert_instr(vxorpd))]
669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
670pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
671    unsafe {
672        let xor = _mm_xor_pd(a, b).as_f64x2();
673        transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
674    }
675}
676
677/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
678/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
679///
680/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
681#[inline]
682#[target_feature(enable = "avx512dq,avx512vl")]
683#[cfg_attr(test, assert_instr(vxorpd))]
684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
685pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
686    unsafe {
687        let xor = _mm_xor_pd(a, b).as_f64x2();
688        transmute(simd_select_bitmask(k, xor, f64x2::ZERO))
689    }
690}
691
692/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
693/// and store the results in dst using writemask k (elements are copied from src if the corresponding
694/// bit is not set).
695///
696/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
697#[inline]
698#[target_feature(enable = "avx512dq,avx512vl")]
699#[cfg_attr(test, assert_instr(vxorpd))]
700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
701pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
702    unsafe {
703        let xor = _mm256_xor_pd(a, b).as_f64x4();
704        transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
705    }
706}
707
708/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
709/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
710///
711/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
712#[inline]
713#[target_feature(enable = "avx512dq,avx512vl")]
714#[cfg_attr(test, assert_instr(vxorpd))]
715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
716pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
717    unsafe {
718        let xor = _mm256_xor_pd(a, b).as_f64x4();
719        transmute(simd_select_bitmask(k, xor, f64x4::ZERO))
720    }
721}
722
723/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
724/// and store the results in dst.
725///
726/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
727#[inline]
728#[target_feature(enable = "avx512dq")]
729#[cfg_attr(test, assert_instr(vxorp))]
730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
731pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
732    unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
733}
734
735/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
736/// store the results in dst using writemask k (elements are copied from src if the corresponding
737/// bit is not set).
738///
739/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
740#[inline]
741#[target_feature(enable = "avx512dq")]
742#[cfg_attr(test, assert_instr(vxorpd))]
743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
744pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
745    unsafe {
746        let xor = _mm512_xor_pd(a, b).as_f64x8();
747        transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
748    }
749}
750
751/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
752/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
753///
754/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
755#[inline]
756#[target_feature(enable = "avx512dq")]
757#[cfg_attr(test, assert_instr(vxorpd))]
758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
759pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
760    unsafe {
761        let xor = _mm512_xor_pd(a, b).as_f64x8();
762        transmute(simd_select_bitmask(k, xor, f64x8::ZERO))
763    }
764}
765
766/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
767/// and store the results in dst using writemask k (elements are copied from src if the corresponding
768/// bit is not set).
769///
770/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
771#[inline]
772#[target_feature(enable = "avx512dq,avx512vl")]
773#[cfg_attr(test, assert_instr(vxorps))]
774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
775pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
776    unsafe {
777        let xor = _mm_xor_ps(a, b).as_f32x4();
778        transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
779    }
780}
781
782/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
783/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
784///
785/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
786#[inline]
787#[target_feature(enable = "avx512dq,avx512vl")]
788#[cfg_attr(test, assert_instr(vxorps))]
789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
790pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
791    unsafe {
792        let xor = _mm_xor_ps(a, b).as_f32x4();
793        transmute(simd_select_bitmask(k, xor, f32x4::ZERO))
794    }
795}
796
797/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
798/// and store the results in dst using writemask k (elements are copied from src if the corresponding
799/// bit is not set).
800///
801/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
802#[inline]
803#[target_feature(enable = "avx512dq,avx512vl")]
804#[cfg_attr(test, assert_instr(vxorps))]
805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
806pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
807    unsafe {
808        let xor = _mm256_xor_ps(a, b).as_f32x8();
809        transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
810    }
811}
812
813/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
814/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
815///
816/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
817#[inline]
818#[target_feature(enable = "avx512dq,avx512vl")]
819#[cfg_attr(test, assert_instr(vxorps))]
820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
821pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
822    unsafe {
823        let xor = _mm256_xor_ps(a, b).as_f32x8();
824        transmute(simd_select_bitmask(k, xor, f32x8::ZERO))
825    }
826}
827
828/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
829/// and store the results in dst.
830///
831/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
832#[inline]
833#[target_feature(enable = "avx512dq")]
834#[cfg_attr(test, assert_instr(vxorps))]
835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
836pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
837    unsafe {
838        transmute(simd_xor(
839            transmute::<_, u32x16>(a),
840            transmute::<_, u32x16>(b),
841        ))
842    }
843}
844
845/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
846/// store the results in dst using writemask k (elements are copied from src if the corresponding
847/// bit is not set).
848///
849/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
850#[inline]
851#[target_feature(enable = "avx512dq")]
852#[cfg_attr(test, assert_instr(vxorps))]
853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
854pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
855    unsafe {
856        let xor = _mm512_xor_ps(a, b).as_f32x16();
857        transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
858    }
859}
860
861/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
862/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
863///
864/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
865#[inline]
866#[target_feature(enable = "avx512dq")]
867#[cfg_attr(test, assert_instr(vxorps))]
868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
869pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
870    unsafe {
871        let xor = _mm512_xor_ps(a, b).as_f32x16();
872        transmute(simd_select_bitmask(k, xor, f32x16::ZERO))
873    }
874}
875
876// Broadcast
877
878/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
879/// elements of dst.
880///
881/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
882#[inline]
883#[target_feature(enable = "avx512dq,avx512vl")]
884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
885pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
886    unsafe {
887        let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
888        transmute(b)
889    }
890}
891
892/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
893/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
894///
895/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
896#[inline]
897#[target_feature(enable = "avx512dq,avx512vl")]
898#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
900pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
901    unsafe {
902        let b = _mm256_broadcast_f32x2(a).as_f32x8();
903        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
904    }
905}
906
907/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
908/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
909///
910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
911#[inline]
912#[target_feature(enable = "avx512dq,avx512vl")]
913#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
915pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
916    unsafe {
917        let b = _mm256_broadcast_f32x2(a).as_f32x8();
918        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
919    }
920}
921
922/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
923/// elements of dst.
924///
925/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
926#[inline]
927#[target_feature(enable = "avx512dq")]
928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
929pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
930    unsafe {
931        let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
932        transmute(b)
933    }
934}
935
936/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
937/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
938///
939/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
940#[inline]
941#[target_feature(enable = "avx512dq")]
942#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
944pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
945    unsafe {
946        let b = _mm512_broadcast_f32x2(a).as_f32x16();
947        transmute(simd_select_bitmask(k, b, src.as_f32x16()))
948    }
949}
950
951/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
952/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
953///
954/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
955#[inline]
956#[target_feature(enable = "avx512dq")]
957#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
959pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
960    unsafe {
961        let b = _mm512_broadcast_f32x2(a).as_f32x16();
962        transmute(simd_select_bitmask(k, b, f32x16::ZERO))
963    }
964}
965
966/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
967/// elements of dst.
968///
969/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
970#[inline]
971#[target_feature(enable = "avx512dq")]
972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
973pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
974    unsafe {
975        let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
976        transmute(b)
977    }
978}
979
980/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
981/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
982///
983/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
984#[inline]
985#[target_feature(enable = "avx512dq")]
986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
987pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
988    unsafe {
989        let b = _mm512_broadcast_f32x8(a).as_f32x16();
990        transmute(simd_select_bitmask(k, b, src.as_f32x16()))
991    }
992}
993
994/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
995/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
996///
997/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
998#[inline]
999#[target_feature(enable = "avx512dq")]
1000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1001pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
1002    unsafe {
1003        let b = _mm512_broadcast_f32x8(a).as_f32x16();
1004        transmute(simd_select_bitmask(k, b, f32x16::ZERO))
1005    }
1006}
1007
1008/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1009/// elements of dst.
1010///
1011/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
1012#[inline]
1013#[target_feature(enable = "avx512dq,avx512vl")]
1014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1015pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
1016    unsafe {
1017        let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1018        transmute(b)
1019    }
1020}
1021
1022/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1023/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1024///
1025/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
1026#[inline]
1027#[target_feature(enable = "avx512dq,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
1030    unsafe {
1031        let b = _mm256_broadcast_f64x2(a).as_f64x4();
1032        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
1033    }
1034}
1035
1036/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1037/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1038///
1039/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
1040#[inline]
1041#[target_feature(enable = "avx512dq,avx512vl")]
1042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1043pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
1044    unsafe {
1045        let b = _mm256_broadcast_f64x2(a).as_f64x4();
1046        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
1047    }
1048}
1049
1050/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1051/// elements of dst.
1052///
1053/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
1054#[inline]
1055#[target_feature(enable = "avx512dq")]
1056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1057pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
1058    unsafe {
1059        let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1060        transmute(b)
1061    }
1062}
1063
1064/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1065/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1066///
1067/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
1068#[inline]
1069#[target_feature(enable = "avx512dq")]
1070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
1072    unsafe {
1073        let b = _mm512_broadcast_f64x2(a).as_f64x8();
1074        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
1075    }
1076}
1077
1078/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1079/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1080///
1081/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
1082#[inline]
1083#[target_feature(enable = "avx512dq")]
1084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1085pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
1086    unsafe {
1087        let b = _mm512_broadcast_f64x2(a).as_f64x8();
1088        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
1089    }
1090}
1091
1092/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1093///
1094/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
1095#[inline]
1096#[target_feature(enable = "avx512dq,avx512vl")]
1097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1098pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
1099    unsafe {
1100        let a = a.as_i32x4();
1101        let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1102        transmute(b)
1103    }
1104}
1105
1106/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1107/// (elements are copied from src if the corresponding bit is not set).
1108///
1109/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
1110#[inline]
1111#[target_feature(enable = "avx512dq,avx512vl")]
1112#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1114pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
1115    unsafe {
1116        let b = _mm_broadcast_i32x2(a).as_i32x4();
1117        transmute(simd_select_bitmask(k, b, src.as_i32x4()))
1118    }
1119}
1120
1121/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1122/// (elements are zeroed out if the corresponding bit is not set).
1123///
1124/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
1125#[inline]
1126#[target_feature(enable = "avx512dq,avx512vl")]
1127#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1129pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
1130    unsafe {
1131        let b = _mm_broadcast_i32x2(a).as_i32x4();
1132        transmute(simd_select_bitmask(k, b, i32x4::ZERO))
1133    }
1134}
1135
1136/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1137///
1138/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
1139#[inline]
1140#[target_feature(enable = "avx512dq,avx512vl")]
1141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1142pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
1143    unsafe {
1144        let a = a.as_i32x4();
1145        let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1146        transmute(b)
1147    }
1148}
1149
1150/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1151/// (elements are copied from src if the corresponding bit is not set).
1152///
1153/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
1154#[inline]
1155#[target_feature(enable = "avx512dq,avx512vl")]
1156#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1159    unsafe {
1160        let b = _mm256_broadcast_i32x2(a).as_i32x8();
1161        transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1162    }
1163}
1164
1165/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1166/// (elements are zeroed out if the corresponding bit is not set).
1167///
1168/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
1169#[inline]
1170#[target_feature(enable = "avx512dq,avx512vl")]
1171#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
1174    unsafe {
1175        let b = _mm256_broadcast_i32x2(a).as_i32x8();
1176        transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1177    }
1178}
1179
1180/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1181///
1182/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
1183#[inline]
1184#[target_feature(enable = "avx512dq")]
1185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1186pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
1187    unsafe {
1188        let a = a.as_i32x4();
1189        let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
1190        transmute(b)
1191    }
1192}
1193
1194/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1195/// (elements are copied from src if the corresponding bit is not set).
1196///
1197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
1198#[inline]
1199#[target_feature(enable = "avx512dq")]
1200#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1202pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
1203    unsafe {
1204        let b = _mm512_broadcast_i32x2(a).as_i32x16();
1205        transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1206    }
1207}
1208
1209/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1210/// (elements are zeroed out if the corresponding bit is not set).
1211///
1212/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
1213#[inline]
1214#[target_feature(enable = "avx512dq")]
1215#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1217pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
1218    unsafe {
1219        let b = _mm512_broadcast_i32x2(a).as_i32x16();
1220        transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1221    }
1222}
1223
1224/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
1225///
1226/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
1227#[inline]
1228#[target_feature(enable = "avx512dq")]
1229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1230pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
1231    unsafe {
1232        let a = a.as_i32x8();
1233        let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1234        transmute(b)
1235    }
1236}
1237
1238/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
1239/// (elements are copied from src if the corresponding bit is not set).
1240///
1241/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
1242#[inline]
1243#[target_feature(enable = "avx512dq")]
1244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1245pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
1246    unsafe {
1247        let b = _mm512_broadcast_i32x8(a).as_i32x16();
1248        transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1249    }
1250}
1251
1252/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
1253/// (elements are zeroed out if the corresponding bit is not set).
1254///
1255/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
1256#[inline]
1257#[target_feature(enable = "avx512dq")]
1258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1259pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
1260    unsafe {
1261        let b = _mm512_broadcast_i32x8(a).as_i32x16();
1262        transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1263    }
1264}
1265
1266/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1267///
1268/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
1269#[inline]
1270#[target_feature(enable = "avx512dq,avx512vl")]
1271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
1273    unsafe {
1274        let a = a.as_i64x2();
1275        let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1276        transmute(b)
1277    }
1278}
1279
1280/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1281/// (elements are copied from src if the corresponding bit is not set).
1282///
1283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
1284#[inline]
1285#[target_feature(enable = "avx512dq,avx512vl")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1288    unsafe {
1289        let b = _mm256_broadcast_i64x2(a).as_i64x4();
1290        transmute(simd_select_bitmask(k, b, src.as_i64x4()))
1291    }
1292}
1293
1294/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1295/// (elements are zeroed out if the corresponding bit is not set).
1296///
1297/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
1298#[inline]
1299#[target_feature(enable = "avx512dq,avx512vl")]
1300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1301pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
1302    unsafe {
1303        let b = _mm256_broadcast_i64x2(a).as_i64x4();
1304        transmute(simd_select_bitmask(k, b, i64x4::ZERO))
1305    }
1306}
1307
1308/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1309///
1310/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
1311#[inline]
1312#[target_feature(enable = "avx512dq")]
1313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
1315    unsafe {
1316        let a = a.as_i64x2();
1317        let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1318        transmute(b)
1319    }
1320}
1321
1322/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1323/// (elements are copied from src if the corresponding bit is not set).
1324///
1325/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
1326#[inline]
1327#[target_feature(enable = "avx512dq")]
1328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
1330    unsafe {
1331        let b = _mm512_broadcast_i64x2(a).as_i64x8();
1332        transmute(simd_select_bitmask(k, b, src.as_i64x8()))
1333    }
1334}
1335
1336/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1337/// (elements are zeroed out if the corresponding bit is not set).
1338///
1339/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
1340#[inline]
1341#[target_feature(enable = "avx512dq")]
1342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1343pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
1344    unsafe {
1345        let b = _mm512_broadcast_i64x2(a).as_i64x8();
1346        transmute(simd_select_bitmask(k, b, i64x8::ZERO))
1347    }
1348}
1349
1350// Extract
1351
1352/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1353/// selected with IMM8, and stores the result in dst.
1354///
1355/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
1356#[inline]
1357#[target_feature(enable = "avx512dq")]
1358#[rustc_legacy_const_generics(1)]
1359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1360pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
1361    unsafe {
1362        static_assert_uimm_bits!(IMM8, 1);
1363        match IMM8 & 1 {
1364            0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1365            _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1366        }
1367    }
1368}
1369
1370/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1371/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1372/// if the corresponding bit is not set).
1373///
1374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
1375#[inline]
1376#[target_feature(enable = "avx512dq")]
1377#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1378#[rustc_legacy_const_generics(3)]
1379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1380pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m512) -> __m256 {
1381    unsafe {
1382        static_assert_uimm_bits!(IMM8, 1);
1383        let b = _mm512_extractf32x8_ps::<IMM8>(a);
1384        transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8()))
1385    }
1386}
1387
1388/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1389/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1390/// corresponding bit is not set).
1391///
1392/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
1393#[inline]
1394#[target_feature(enable = "avx512dq")]
1395#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1396#[rustc_legacy_const_generics(2)]
1397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1398pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
1399    unsafe {
1400        static_assert_uimm_bits!(IMM8, 1);
1401        let b = _mm512_extractf32x8_ps::<IMM8>(a);
1402        transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO))
1403    }
1404}
1405
1406/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1407/// selected with IMM8, and stores the result in dst.
1408///
1409/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
1410#[inline]
1411#[target_feature(enable = "avx512dq,avx512vl")]
1412#[rustc_legacy_const_generics(1)]
1413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1414pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
1415    unsafe {
1416        static_assert_uimm_bits!(IMM8, 1);
1417        match IMM8 & 1 {
1418            0 => simd_shuffle!(a, a, [0, 1]),
1419            _ => simd_shuffle!(a, a, [2, 3]),
1420        }
1421    }
1422}
1423
1424/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1425/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1426/// if the corresponding bit is not set).
1427///
1428/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
1429#[inline]
1430#[target_feature(enable = "avx512dq,avx512vl")]
1431#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1432#[rustc_legacy_const_generics(3)]
1433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1434pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
1435    src: __m128d,
1436    k: __mmask8,
1437    a: __m256d,
1438) -> __m128d {
1439    unsafe {
1440        static_assert_uimm_bits!(IMM8, 1);
1441        let b = _mm256_extractf64x2_pd::<IMM8>(a);
1442        transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2()))
1443    }
1444}
1445
1446/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1447/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1448/// corresponding bit is not set).
1449///
1450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
1451#[inline]
1452#[target_feature(enable = "avx512dq,avx512vl")]
1453#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1454#[rustc_legacy_const_generics(2)]
1455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1456pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
1457    unsafe {
1458        static_assert_uimm_bits!(IMM8, 1);
1459        let b = _mm256_extractf64x2_pd::<IMM8>(a);
1460        transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO))
1461    }
1462}
1463
1464/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1465/// selected with IMM8, and stores the result in dst.
1466///
1467/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
1468#[inline]
1469#[target_feature(enable = "avx512dq")]
1470#[rustc_legacy_const_generics(1)]
1471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1472pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
1473    unsafe {
1474        static_assert_uimm_bits!(IMM8, 2);
1475        match IMM8 & 3 {
1476            0 => simd_shuffle!(a, a, [0, 1]),
1477            1 => simd_shuffle!(a, a, [2, 3]),
1478            2 => simd_shuffle!(a, a, [4, 5]),
1479            _ => simd_shuffle!(a, a, [6, 7]),
1480        }
1481    }
1482}
1483
1484/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1485/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1486/// if the corresponding bit is not set).
1487///
1488/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
1489#[inline]
1490#[target_feature(enable = "avx512dq")]
1491#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1492#[rustc_legacy_const_generics(3)]
1493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1494pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
1495    src: __m128d,
1496    k: __mmask8,
1497    a: __m512d,
1498) -> __m128d {
1499    unsafe {
1500        static_assert_uimm_bits!(IMM8, 2);
1501        let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1502        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
1503    }
1504}
1505
1506/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1507/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1508/// corresponding bit is not set).
1509///
1510/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
1511#[inline]
1512#[target_feature(enable = "avx512dq")]
1513#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1514#[rustc_legacy_const_generics(2)]
1515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1516pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
1517    unsafe {
1518        static_assert_uimm_bits!(IMM8, 2);
1519        let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1520        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
1521    }
1522}
1523
1524/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1525/// the result in dst.
1526///
1527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
1528#[inline]
1529#[target_feature(enable = "avx512dq")]
1530#[rustc_legacy_const_generics(1)]
1531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1532pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
1533    unsafe {
1534        static_assert_uimm_bits!(IMM8, 1);
1535        let a = a.as_i32x16();
1536        let b: i32x8 = match IMM8 & 1 {
1537            0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1538            _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1539        };
1540        transmute(b)
1541    }
1542}
1543
1544/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1545/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1546///
1547/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
1548#[inline]
1549#[target_feature(enable = "avx512dq")]
1550#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1551#[rustc_legacy_const_generics(3)]
1552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1553pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
1554    src: __m256i,
1555    k: __mmask8,
1556    a: __m512i,
1557) -> __m256i {
1558    unsafe {
1559        static_assert_uimm_bits!(IMM8, 1);
1560        let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1561        transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1562    }
1563}
1564
1565/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1566/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1567///
1568/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
1569#[inline]
1570#[target_feature(enable = "avx512dq")]
1571#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1572#[rustc_legacy_const_generics(2)]
1573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1574pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
1575    unsafe {
1576        static_assert_uimm_bits!(IMM8, 1);
1577        let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1578        transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1579    }
1580}
1581
1582/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1583/// the result in dst.
1584///
1585/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
1586#[inline]
1587#[target_feature(enable = "avx512dq,avx512vl")]
1588#[rustc_legacy_const_generics(1)]
1589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1590pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
1591    unsafe {
1592        static_assert_uimm_bits!(IMM8, 1);
1593        let a = a.as_i64x4();
1594        match IMM8 & 1 {
1595            0 => simd_shuffle!(a, a, [0, 1]),
1596            _ => simd_shuffle!(a, a, [2, 3]),
1597        }
1598    }
1599}
1600
1601/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1602/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1603///
1604/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
1605#[inline]
1606#[target_feature(enable = "avx512dq,avx512vl")]
1607#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1608#[rustc_legacy_const_generics(3)]
1609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1610pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
1611    src: __m128i,
1612    k: __mmask8,
1613    a: __m256i,
1614) -> __m128i {
1615    unsafe {
1616        static_assert_uimm_bits!(IMM8, 1);
1617        let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1618        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1619    }
1620}
1621
1622/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1623/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1624///
1625/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
1626#[inline]
1627#[target_feature(enable = "avx512dq,avx512vl")]
1628#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1629#[rustc_legacy_const_generics(2)]
1630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1631pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
1632    unsafe {
1633        static_assert_uimm_bits!(IMM8, 1);
1634        let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1635        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1636    }
1637}
1638
1639/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1640/// the result in dst.
1641///
1642/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
1643#[inline]
1644#[target_feature(enable = "avx512dq")]
1645#[rustc_legacy_const_generics(1)]
1646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1647pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
1648    unsafe {
1649        static_assert_uimm_bits!(IMM8, 2);
1650        let a = a.as_i64x8();
1651        match IMM8 & 3 {
1652            0 => simd_shuffle!(a, a, [0, 1]),
1653            1 => simd_shuffle!(a, a, [2, 3]),
1654            2 => simd_shuffle!(a, a, [4, 5]),
1655            _ => simd_shuffle!(a, a, [6, 7]),
1656        }
1657    }
1658}
1659
1660/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1661/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1662///
1663/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
1664#[inline]
1665#[target_feature(enable = "avx512dq")]
1666#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1667#[rustc_legacy_const_generics(3)]
1668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1669pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
1670    src: __m128i,
1671    k: __mmask8,
1672    a: __m512i,
1673) -> __m128i {
1674    unsafe {
1675        static_assert_uimm_bits!(IMM8, 2);
1676        let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1677        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1678    }
1679}
1680
1681/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1682/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1683///
1684/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
1685#[inline]
1686#[target_feature(enable = "avx512dq")]
1687#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1688#[rustc_legacy_const_generics(2)]
1689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1690pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
1691    unsafe {
1692        static_assert_uimm_bits!(IMM8, 2);
1693        let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1694        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1695    }
1696}
1697
1698// Insert
1699
1700/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1701/// elements) from b into dst at the location specified by IMM8.
1702///
1703/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
1704#[inline]
1705#[target_feature(enable = "avx512dq")]
1706#[rustc_legacy_const_generics(2)]
1707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1708pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
1709    unsafe {
1710        static_assert_uimm_bits!(IMM8, 1);
1711        let b = _mm512_castps256_ps512(b);
1712        match IMM8 & 1 {
1713            0 => {
1714                simd_shuffle!(
1715                    a,
1716                    b,
1717                    [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1718                )
1719            }
1720            _ => {
1721                simd_shuffle!(
1722                    a,
1723                    b,
1724                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1725                )
1726            }
1727        }
1728    }
1729}
1730
1731/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1732/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1733/// (elements are copied from src if the corresponding bit is not set).
1734///
1735/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
1736#[inline]
1737#[target_feature(enable = "avx512dq")]
1738#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1739#[rustc_legacy_const_generics(4)]
1740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1741pub fn _mm512_mask_insertf32x8<const IMM8: i32>(
1742    src: __m512,
1743    k: __mmask16,
1744    a: __m512,
1745    b: __m256,
1746) -> __m512 {
1747    unsafe {
1748        static_assert_uimm_bits!(IMM8, 1);
1749        let c = _mm512_insertf32x8::<IMM8>(a, b);
1750        transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16()))
1751    }
1752}
1753
1754/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1755/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1756/// (elements are zeroed out if the corresponding bit is not set).
1757///
1758/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
1759#[inline]
1760#[target_feature(enable = "avx512dq")]
1761#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1762#[rustc_legacy_const_generics(3)]
1763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1764pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m256) -> __m512 {
1765    unsafe {
1766        static_assert_uimm_bits!(IMM8, 1);
1767        let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
1768        transmute(simd_select_bitmask(k, c, f32x16::ZERO))
1769    }
1770}
1771
1772/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1773/// elements) from b into dst at the location specified by IMM8.
1774///
1775/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
1776#[inline]
1777#[target_feature(enable = "avx512dq,avx512vl")]
1778#[rustc_legacy_const_generics(2)]
1779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1780pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
1781    unsafe {
1782        static_assert_uimm_bits!(IMM8, 1);
1783        let b = _mm256_castpd128_pd256(b);
1784        match IMM8 & 1 {
1785            0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1786            _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1787        }
1788    }
1789}
1790
1791/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1792/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1793/// (elements are copied from src if the corresponding bit is not set).
1794///
1795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
1796#[inline]
1797#[target_feature(enable = "avx512dq,avx512vl")]
1798#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1799#[rustc_legacy_const_generics(4)]
1800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1801pub fn _mm256_mask_insertf64x2<const IMM8: i32>(
1802    src: __m256d,
1803    k: __mmask8,
1804    a: __m256d,
1805    b: __m128d,
1806) -> __m256d {
1807    unsafe {
1808        static_assert_uimm_bits!(IMM8, 1);
1809        let c = _mm256_insertf64x2::<IMM8>(a, b);
1810        transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4()))
1811    }
1812}
1813
1814/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1815/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1816/// (elements are zeroed out if the corresponding bit is not set).
1817///
1818/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
1819#[inline]
1820#[target_feature(enable = "avx512dq,avx512vl")]
1821#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1822#[rustc_legacy_const_generics(3)]
1823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1824pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m128d) -> __m256d {
1825    unsafe {
1826        static_assert_uimm_bits!(IMM8, 1);
1827        let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
1828        transmute(simd_select_bitmask(k, c, f64x4::ZERO))
1829    }
1830}
1831
1832/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1833/// elements) from b into dst at the location specified by IMM8.
1834///
1835/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
1836#[inline]
1837#[target_feature(enable = "avx512dq")]
1838#[rustc_legacy_const_generics(2)]
1839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1840pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
1841    unsafe {
1842        static_assert_uimm_bits!(IMM8, 2);
1843        let b = _mm512_castpd128_pd512(b);
1844        match IMM8 & 3 {
1845            0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
1846            1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
1847            2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
1848            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
1849        }
1850    }
1851}
1852
1853/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1854/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1855/// (elements are copied from src if the corresponding bit is not set).
1856///
1857/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
1858#[inline]
1859#[target_feature(enable = "avx512dq")]
1860#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1861#[rustc_legacy_const_generics(4)]
1862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863pub fn _mm512_mask_insertf64x2<const IMM8: i32>(
1864    src: __m512d,
1865    k: __mmask8,
1866    a: __m512d,
1867    b: __m128d,
1868) -> __m512d {
1869    unsafe {
1870        static_assert_uimm_bits!(IMM8, 2);
1871        let c = _mm512_insertf64x2::<IMM8>(a, b);
1872        transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8()))
1873    }
1874}
1875
1876/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1877/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1878/// (elements are zeroed out if the corresponding bit is not set).
1879///
1880/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
1881#[inline]
1882#[target_feature(enable = "avx512dq")]
1883#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1884#[rustc_legacy_const_generics(3)]
1885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1886pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m128d) -> __m512d {
1887    unsafe {
1888        static_assert_uimm_bits!(IMM8, 2);
1889        let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
1890        transmute(simd_select_bitmask(k, c, f64x8::ZERO))
1891    }
1892}
1893
1894/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
1895/// location specified by IMM8.
1896///
1897/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
1898#[inline]
1899#[target_feature(enable = "avx512dq")]
1900#[rustc_legacy_const_generics(2)]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
1903    unsafe {
1904        static_assert_uimm_bits!(IMM8, 1);
1905        let a = a.as_i32x16();
1906        let b = _mm512_castsi256_si512(b).as_i32x16();
1907        let r: i32x16 = match IMM8 & 1 {
1908            0 => {
1909                simd_shuffle!(
1910                    a,
1911                    b,
1912                    [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1913                )
1914            }
1915            _ => {
1916                simd_shuffle!(
1917                    a,
1918                    b,
1919                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1920                )
1921            }
1922        };
1923        transmute(r)
1924    }
1925}
1926
1927/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
1928/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1929/// the corresponding bit is not set).
1930///
1931/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
1932#[inline]
1933#[target_feature(enable = "avx512dq")]
1934#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
1935#[rustc_legacy_const_generics(4)]
1936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1937pub fn _mm512_mask_inserti32x8<const IMM8: i32>(
1938    src: __m512i,
1939    k: __mmask16,
1940    a: __m512i,
1941    b: __m256i,
1942) -> __m512i {
1943    unsafe {
1944        static_assert_uimm_bits!(IMM8, 1);
1945        let c = _mm512_inserti32x8::<IMM8>(a, b);
1946        transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16()))
1947    }
1948}
1949
1950/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
1951/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
1952/// corresponding bit is not set).
1953///
1954/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
1955#[inline]
1956#[target_feature(enable = "avx512dq")]
1957#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
1958#[rustc_legacy_const_generics(3)]
1959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1960pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m256i) -> __m512i {
1961    unsafe {
1962        static_assert_uimm_bits!(IMM8, 1);
1963        let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
1964        transmute(simd_select_bitmask(k, c, i32x16::ZERO))
1965    }
1966}
1967
1968/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
1969/// location specified by IMM8.
1970///
1971/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
1972#[inline]
1973#[target_feature(enable = "avx512dq,avx512vl")]
1974#[rustc_legacy_const_generics(2)]
1975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1976pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
1977    unsafe {
1978        static_assert_uimm_bits!(IMM8, 1);
1979        let a = a.as_i64x4();
1980        let b = _mm256_castsi128_si256(b).as_i64x4();
1981        match IMM8 & 1 {
1982            0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1983            _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1984        }
1985    }
1986}
1987
1988/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
1989/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1990/// the corresponding bit is not set).
1991///
1992/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
1993#[inline]
1994#[target_feature(enable = "avx512dq,avx512vl")]
1995#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
1996#[rustc_legacy_const_generics(4)]
1997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1998pub fn _mm256_mask_inserti64x2<const IMM8: i32>(
1999    src: __m256i,
2000    k: __mmask8,
2001    a: __m256i,
2002    b: __m128i,
2003) -> __m256i {
2004    unsafe {
2005        static_assert_uimm_bits!(IMM8, 1);
2006        let c = _mm256_inserti64x2::<IMM8>(a, b);
2007        transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4()))
2008    }
2009}
2010
2011/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2012/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2013/// corresponding bit is not set).
2014///
2015/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
2016#[inline]
2017#[target_feature(enable = "avx512dq,avx512vl")]
2018#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2019#[rustc_legacy_const_generics(3)]
2020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2021pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
2022    unsafe {
2023        static_assert_uimm_bits!(IMM8, 1);
2024        let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
2025        transmute(simd_select_bitmask(k, c, i64x4::ZERO))
2026    }
2027}
2028
2029/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2030/// location specified by IMM8.
2031///
2032/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
2033#[inline]
2034#[target_feature(enable = "avx512dq")]
2035#[rustc_legacy_const_generics(2)]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
2038    unsafe {
2039        static_assert_uimm_bits!(IMM8, 2);
2040        let a = a.as_i64x8();
2041        let b = _mm512_castsi128_si512(b).as_i64x8();
2042        match IMM8 & 3 {
2043            0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
2044            1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
2045            2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
2046            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
2047        }
2048    }
2049}
2050
2051/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2052/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2053/// the corresponding bit is not set).
2054///
2055/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
2056#[inline]
2057#[target_feature(enable = "avx512dq")]
2058#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2059#[rustc_legacy_const_generics(4)]
2060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061pub fn _mm512_mask_inserti64x2<const IMM8: i32>(
2062    src: __m512i,
2063    k: __mmask8,
2064    a: __m512i,
2065    b: __m128i,
2066) -> __m512i {
2067    unsafe {
2068        static_assert_uimm_bits!(IMM8, 2);
2069        let c = _mm512_inserti64x2::<IMM8>(a, b);
2070        transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8()))
2071    }
2072}
2073
2074/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2075/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2076/// corresponding bit is not set).
2077///
2078/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
2079#[inline]
2080#[target_feature(enable = "avx512dq")]
2081#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2082#[rustc_legacy_const_generics(3)]
2083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2084pub fn _mm512_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m128i) -> __m512i {
2085    unsafe {
2086        static_assert_uimm_bits!(IMM8, 2);
2087        let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
2088        transmute(simd_select_bitmask(k, c, i64x8::ZERO))
2089    }
2090}
2091
2092// Convert
2093
2094/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2095/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2096///
2097/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2098/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2099/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2100/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2101/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2102///
2103/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
2104#[inline]
2105#[target_feature(enable = "avx512dq")]
2106#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2107#[rustc_legacy_const_generics(1)]
2108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2110    unsafe {
2111        static_assert_rounding!(ROUNDING);
2112        transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
2113    }
2114}
2115
2116/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2117/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2118/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2119///
2120/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2121/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2122/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2123/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2124/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2125///
2126/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
2127#[inline]
2128#[target_feature(enable = "avx512dq")]
2129#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2130#[rustc_legacy_const_generics(3)]
2131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2132pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
2133    src: __m512d,
2134    k: __mmask8,
2135    a: __m512i,
2136) -> __m512d {
2137    unsafe {
2138        static_assert_rounding!(ROUNDING);
2139        let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2140        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2141    }
2142}
2143
2144/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2145/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2146/// Rounding is done according to the ROUNDING parameter, which can be one of:
2147///
2148/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2149/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2150/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2151/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2153///
2154/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
2155#[inline]
2156#[target_feature(enable = "avx512dq")]
2157#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2158#[rustc_legacy_const_generics(2)]
2159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2160pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2161    unsafe {
2162        static_assert_rounding!(ROUNDING);
2163        let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2164        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2165    }
2166}
2167
2168/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2169/// and store the results in dst.
2170///
2171/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
2172#[inline]
2173#[target_feature(enable = "avx512dq,avx512vl")]
2174#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2176pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
2177    unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) }
2178}
2179
2180/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2181/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2182/// not set).
2183///
2184/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
2185#[inline]
2186#[target_feature(enable = "avx512dq,avx512vl")]
2187#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2189pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2190    unsafe {
2191        let b = _mm_cvtepi64_pd(a).as_f64x2();
2192        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2193    }
2194}
2195
2196/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2197/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2198///
2199/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
2200#[inline]
2201#[target_feature(enable = "avx512dq,avx512vl")]
2202#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2204pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
2205    unsafe {
2206        let b = _mm_cvtepi64_pd(a).as_f64x2();
2207        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2208    }
2209}
2210
2211/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2212/// and store the results in dst.
2213///
2214/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
2215#[inline]
2216#[target_feature(enable = "avx512dq,avx512vl")]
2217#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2219pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
2220    unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2221}
2222
2223/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2224/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2225/// not set).
2226///
2227/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
2228#[inline]
2229#[target_feature(enable = "avx512dq,avx512vl")]
2230#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2232pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2233    unsafe {
2234        let b = _mm256_cvtepi64_pd(a).as_f64x4();
2235        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2236    }
2237}
2238
2239/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2240/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2241///
2242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
2243#[inline]
2244#[target_feature(enable = "avx512dq,avx512vl")]
2245#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2247pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
2248    unsafe {
2249        let b = _mm256_cvtepi64_pd(a).as_f64x4();
2250        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2251    }
2252}
2253
2254/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2255/// and store the results in dst.
2256///
2257/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
2258#[inline]
2259#[target_feature(enable = "avx512dq")]
2260#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2262pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
2263    unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2264}
2265
2266/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2267/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2268/// not set).
2269///
2270/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
2271#[inline]
2272#[target_feature(enable = "avx512dq")]
2273#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2275pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2276    unsafe {
2277        let b = _mm512_cvtepi64_pd(a).as_f64x8();
2278        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2279    }
2280}
2281
2282/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2283/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2284///
2285/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
2286#[inline]
2287#[target_feature(enable = "avx512dq")]
2288#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2290pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
2291    unsafe {
2292        let b = _mm512_cvtepi64_pd(a).as_f64x8();
2293        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2294    }
2295}
2296
2297/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2298/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2299///
2300/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2301/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2302/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2303/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2304/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2305///
2306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
2307#[inline]
2308#[target_feature(enable = "avx512dq")]
2309#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2310#[rustc_legacy_const_generics(1)]
2311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2312pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2313    unsafe {
2314        static_assert_rounding!(ROUNDING);
2315        transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
2316    }
2317}
2318
2319/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2320/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2321/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2322///
2323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2328///
2329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
2330#[inline]
2331#[target_feature(enable = "avx512dq")]
2332#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2333#[rustc_legacy_const_generics(3)]
2334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2335pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
2336    src: __m256,
2337    k: __mmask8,
2338    a: __m512i,
2339) -> __m256 {
2340    unsafe {
2341        static_assert_rounding!(ROUNDING);
2342        let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2343        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2344    }
2345}
2346
2347/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2348/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2349/// Rounding is done according to the ROUNDING parameter, which can be one of:
2350///
2351/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2352/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2353/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2354/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2355/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2356///
2357/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
2358#[inline]
2359#[target_feature(enable = "avx512dq")]
2360#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2361#[rustc_legacy_const_generics(2)]
2362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2363pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2364    unsafe {
2365        static_assert_rounding!(ROUNDING);
2366        let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2367        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2368    }
2369}
2370
2371/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2372/// and store the results in dst.
2373///
2374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
2375#[inline]
2376#[target_feature(enable = "avx512dq,avx512vl")]
2377#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2379pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
2380    _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a)
2381}
2382
2383/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2384/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2385/// not set).
2386///
2387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
2388#[inline]
2389#[target_feature(enable = "avx512dq,avx512vl")]
2390#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2392pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2393    unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) }
2394}
2395
2396/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2397/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2398///
2399/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
2400#[inline]
2401#[target_feature(enable = "avx512dq,avx512vl")]
2402#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2404pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
2405    _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a)
2406}
2407
2408/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2409/// and store the results in dst.
2410///
2411/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
2412#[inline]
2413#[target_feature(enable = "avx512dq,avx512vl")]
2414#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2416pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
2417    unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2418}
2419
2420/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2421/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2422/// not set).
2423///
2424/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
2425#[inline]
2426#[target_feature(enable = "avx512dq,avx512vl")]
2427#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2429pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2430    unsafe {
2431        let b = _mm256_cvtepi64_ps(a).as_f32x4();
2432        transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2433    }
2434}
2435
2436/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2437/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2438///
2439/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
2440#[inline]
2441#[target_feature(enable = "avx512dq,avx512vl")]
2442#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2444pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
2445    unsafe {
2446        let b = _mm256_cvtepi64_ps(a).as_f32x4();
2447        transmute(simd_select_bitmask(k, b, f32x4::ZERO))
2448    }
2449}
2450
2451/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2452/// and store the results in dst.
2453///
2454/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
2455#[inline]
2456#[target_feature(enable = "avx512dq")]
2457#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2459pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
2460    unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2461}
2462
2463/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2464/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2465/// not set).
2466///
2467/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
2468#[inline]
2469#[target_feature(enable = "avx512dq")]
2470#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2472pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2473    unsafe {
2474        let b = _mm512_cvtepi64_ps(a).as_f32x8();
2475        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2476    }
2477}
2478
2479/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2480/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2481///
2482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
2483#[inline]
2484#[target_feature(enable = "avx512dq")]
2485#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2487pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
2488    unsafe {
2489        let b = _mm512_cvtepi64_ps(a).as_f32x8();
2490        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2491    }
2492}
2493
2494/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2495/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2496///
2497/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2498/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2499/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2500/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2501/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2502///
2503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
2504#[inline]
2505#[target_feature(enable = "avx512dq")]
2506#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2507#[rustc_legacy_const_generics(1)]
2508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2509pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2510    unsafe {
2511        static_assert_rounding!(ROUNDING);
2512        transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
2513    }
2514}
2515
2516/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2517/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2518/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2519///
2520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2525///
2526/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
2527#[inline]
2528#[target_feature(enable = "avx512dq")]
2529#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2530#[rustc_legacy_const_generics(3)]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
2533    src: __m512d,
2534    k: __mmask8,
2535    a: __m512i,
2536) -> __m512d {
2537    unsafe {
2538        static_assert_rounding!(ROUNDING);
2539        let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2540        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2541    }
2542}
2543
2544/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2545/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2546/// Rounding is done according to the ROUNDING parameter, which can be one of:
2547///
2548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2553///
2554/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
2555#[inline]
2556#[target_feature(enable = "avx512dq")]
2557#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2558#[rustc_legacy_const_generics(2)]
2559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2560pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2561    unsafe {
2562        static_assert_rounding!(ROUNDING);
2563        let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2564        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2565    }
2566}
2567
2568/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2569/// and store the results in dst.
2570///
2571/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
2572#[inline]
2573#[target_feature(enable = "avx512dq,avx512vl")]
2574#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2576pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
2577    unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) }
2578}
2579
2580/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2581/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2582/// not set).
2583///
2584/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
2585#[inline]
2586#[target_feature(enable = "avx512dq,avx512vl")]
2587#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2589pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2590    unsafe {
2591        let b = _mm_cvtepu64_pd(a).as_f64x2();
2592        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2593    }
2594}
2595
2596/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2597/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2598///
2599/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
2600#[inline]
2601#[target_feature(enable = "avx512dq,avx512vl")]
2602#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2604pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
2605    unsafe {
2606        let b = _mm_cvtepu64_pd(a).as_f64x2();
2607        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2608    }
2609}
2610
2611/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2612/// and store the results in dst.
2613///
2614/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
2615#[inline]
2616#[target_feature(enable = "avx512dq,avx512vl")]
2617#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
2620    unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2621}
2622
2623/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2624/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2625/// not set).
2626///
2627/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
2628#[inline]
2629#[target_feature(enable = "avx512dq,avx512vl")]
2630#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2632pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2633    unsafe {
2634        let b = _mm256_cvtepu64_pd(a).as_f64x4();
2635        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2636    }
2637}
2638
2639/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2640/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2641///
2642/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
2643#[inline]
2644#[target_feature(enable = "avx512dq,avx512vl")]
2645#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2647pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
2648    unsafe {
2649        let b = _mm256_cvtepu64_pd(a).as_f64x4();
2650        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2651    }
2652}
2653
2654/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2655/// and store the results in dst.
2656///
2657/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
2658#[inline]
2659#[target_feature(enable = "avx512dq")]
2660#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2662pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
2663    unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2664}
2665
2666/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2667/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2668/// not set).
2669///
2670/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
2671#[inline]
2672#[target_feature(enable = "avx512dq")]
2673#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2675pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2676    unsafe {
2677        let b = _mm512_cvtepu64_pd(a).as_f64x8();
2678        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2679    }
2680}
2681
2682/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2683/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2684///
2685/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
2686#[inline]
2687#[target_feature(enable = "avx512dq")]
2688#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2690pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
2691    unsafe {
2692        let b = _mm512_cvtepu64_pd(a).as_f64x8();
2693        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2694    }
2695}
2696
2697/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2698/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2699///
2700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2705///
2706/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
2707#[inline]
2708#[target_feature(enable = "avx512dq")]
2709#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2710#[rustc_legacy_const_generics(1)]
2711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2712pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2713    unsafe {
2714        static_assert_rounding!(ROUNDING);
2715        transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
2716    }
2717}
2718
2719/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2720/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2721/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2722///
2723/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2724/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2725/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2726/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2727/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2728///
2729/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
2730#[inline]
2731#[target_feature(enable = "avx512dq")]
2732#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2733#[rustc_legacy_const_generics(3)]
2734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2735pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
2736    src: __m256,
2737    k: __mmask8,
2738    a: __m512i,
2739) -> __m256 {
2740    unsafe {
2741        static_assert_rounding!(ROUNDING);
2742        let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2743        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2744    }
2745}
2746
2747/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2748/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2749/// Rounding is done according to the ROUNDING parameter, which can be one of:
2750///
2751/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2752/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2753/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2754/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2755/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2756///
2757/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
2758#[inline]
2759#[target_feature(enable = "avx512dq")]
2760#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2761#[rustc_legacy_const_generics(2)]
2762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2763pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2764    unsafe {
2765        static_assert_rounding!(ROUNDING);
2766        let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2767        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2768    }
2769}
2770
2771/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2772/// and store the results in dst.
2773///
2774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
2775#[inline]
2776#[target_feature(enable = "avx512dq,avx512vl")]
2777#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
2780    _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a)
2781}
2782
2783/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2784/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2785/// not set).
2786///
2787/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
2788#[inline]
2789#[target_feature(enable = "avx512dq,avx512vl")]
2790#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2792pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2793    unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) }
2794}
2795
2796/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2797/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2798///
2799/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
2800#[inline]
2801#[target_feature(enable = "avx512dq,avx512vl")]
2802#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2804pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
2805    _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a)
2806}
2807
2808/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2809/// and store the results in dst.
2810///
2811/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
2812#[inline]
2813#[target_feature(enable = "avx512dq,avx512vl")]
2814#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2816pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
2817    unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2818}
2819
2820/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2821/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2822/// not set).
2823///
2824/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
2825#[inline]
2826#[target_feature(enable = "avx512dq,avx512vl")]
2827#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2830    unsafe {
2831        let b = _mm256_cvtepu64_ps(a).as_f32x4();
2832        transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2833    }
2834}
2835
2836/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2837/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2838///
2839/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
2840#[inline]
2841#[target_feature(enable = "avx512dq,avx512vl")]
2842#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2844pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
2845    unsafe {
2846        let b = _mm256_cvtepu64_ps(a).as_f32x4();
2847        transmute(simd_select_bitmask(k, b, f32x4::ZERO))
2848    }
2849}
2850
2851/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2852/// and store the results in dst.
2853///
2854/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
2855#[inline]
2856#[target_feature(enable = "avx512dq")]
2857#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2859pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
2860    unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2861}
2862
2863/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2864/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2865/// not set).
2866///
2867/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
2868#[inline]
2869#[target_feature(enable = "avx512dq")]
2870#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2872pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2873    unsafe {
2874        let b = _mm512_cvtepu64_ps(a).as_f32x8();
2875        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2876    }
2877}
2878
2879/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2880/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2881///
2882/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
2883#[inline]
2884#[target_feature(enable = "avx512dq")]
2885#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2887pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
2888    unsafe {
2889        let b = _mm512_cvtepu64_ps(a).as_f32x8();
2890        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2891    }
2892}
2893
2894/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2895/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2896///
2897/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2898/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2899/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2900/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2901/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2902///
2903/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
2904#[inline]
2905#[target_feature(enable = "avx512dq")]
2906#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
2907#[rustc_legacy_const_generics(1)]
2908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2909pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
2910    static_assert_rounding!(ROUNDING);
2911    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
2912}
2913
2914/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2915/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2916/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2917///
2918/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2919/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2920/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2921/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2922/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2923///
2924/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
2925#[inline]
2926#[target_feature(enable = "avx512dq")]
2927#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
2928#[rustc_legacy_const_generics(3)]
2929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2930pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
2931    src: __m512i,
2932    k: __mmask8,
2933    a: __m512d,
2934) -> __m512i {
2935    unsafe {
2936        static_assert_rounding!(ROUNDING);
2937        transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
2938    }
2939}
2940
2941/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2942/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2943/// Rounding is done according to the ROUNDING parameter, which can be one of:
2944///
2945/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2946/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2947/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2948/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2949/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2950///
2951/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
2952#[inline]
2953#[target_feature(enable = "avx512dq")]
2954#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
2955#[rustc_legacy_const_generics(2)]
2956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2957pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
2958    static_assert_rounding!(ROUNDING);
2959    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
2960}
2961
2962/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2963/// and store the results in dst.
2964///
2965/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
2966#[inline]
2967#[target_feature(enable = "avx512dq,avx512vl")]
2968#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2970pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
2971    _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a)
2972}
2973
2974/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2975/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2976/// not set).
2977///
2978/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
2979#[inline]
2980#[target_feature(enable = "avx512dq,avx512vl")]
2981#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2983pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
2984    unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
2985}
2986
2987/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2988/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2989///
2990/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
2991#[inline]
2992#[target_feature(enable = "avx512dq,avx512vl")]
2993#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2995pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
2996    _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a)
2997}
2998
2999/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3000/// and store the results in dst.
3001///
3002/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
3003#[inline]
3004#[target_feature(enable = "avx512dq,avx512vl")]
3005#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
3008    _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a)
3009}
3010
3011/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3012/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3013/// not set).
3014///
3015/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
3016#[inline]
3017#[target_feature(enable = "avx512dq,avx512vl")]
3018#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3020pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3021    unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3022}
3023
3024/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3025/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3026///
3027/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
3028#[inline]
3029#[target_feature(enable = "avx512dq,avx512vl")]
3030#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3032pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3033    _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a)
3034}
3035
3036/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3037/// and store the results in dst.
3038///
3039/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
3040#[inline]
3041#[target_feature(enable = "avx512dq")]
3042#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3044pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
3045    _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3046}
3047
3048/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3049/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3050/// not set).
3051///
3052/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
3053#[inline]
3054#[target_feature(enable = "avx512dq")]
3055#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3058    unsafe {
3059        transmute(vcvtpd2qq_512(
3060            a.as_f64x8(),
3061            src.as_i64x8(),
3062            k,
3063            _MM_FROUND_CUR_DIRECTION,
3064        ))
3065    }
3066}
3067
3068/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3069/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3070///
3071/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
3072#[inline]
3073#[target_feature(enable = "avx512dq")]
3074#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3077    _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a)
3078}
3079
3080/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3081/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3082///
3083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3087/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3088///
3089/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
3090#[inline]
3091#[target_feature(enable = "avx512dq")]
3092#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3093#[rustc_legacy_const_generics(1)]
3094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3095pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
3096    static_assert_rounding!(ROUNDING);
3097    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3098}
3099
3100/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3101/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3102/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3103///
3104/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3105/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3106/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3107/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3108/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3109///
3110/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
3111#[inline]
3112#[target_feature(enable = "avx512dq")]
3113#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3114#[rustc_legacy_const_generics(3)]
3115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3116pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
3117    src: __m512i,
3118    k: __mmask8,
3119    a: __m256,
3120) -> __m512i {
3121    unsafe {
3122        static_assert_rounding!(ROUNDING);
3123        transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
3124    }
3125}
3126
3127/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3128/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3129/// Rounding is done according to the ROUNDING parameter, which can be one of:
3130///
3131/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3132/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3133/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3134/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3135/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3136///
3137/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
3138#[inline]
3139#[target_feature(enable = "avx512dq")]
3140#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3141#[rustc_legacy_const_generics(2)]
3142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3143pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3144    static_assert_rounding!(ROUNDING);
3145    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3146}
3147
3148/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3149/// and store the results in dst.
3150///
3151/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
3152#[inline]
3153#[target_feature(enable = "avx512dq,avx512vl")]
3154#[cfg_attr(test, assert_instr(vcvtps2qq))]
3155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3156pub fn _mm_cvtps_epi64(a: __m128) -> __m128i {
3157    _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a)
3158}
3159
3160/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3161/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3162/// not set).
3163///
3164/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
3165#[inline]
3166#[target_feature(enable = "avx512dq,avx512vl")]
3167#[cfg_attr(test, assert_instr(vcvtps2qq))]
3168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3169pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3170    unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3171}
3172
3173/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3174/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3175///
3176/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
3177#[inline]
3178#[target_feature(enable = "avx512dq,avx512vl")]
3179#[cfg_attr(test, assert_instr(vcvtps2qq))]
3180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3181pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
3182    _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a)
3183}
3184
3185/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3186/// and store the results in dst.
3187///
3188/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
3189#[inline]
3190#[target_feature(enable = "avx512dq,avx512vl")]
3191#[cfg_attr(test, assert_instr(vcvtps2qq))]
3192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3193pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
3194    _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a)
3195}
3196
3197/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3198/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3199/// not set).
3200///
3201/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
3202#[inline]
3203#[target_feature(enable = "avx512dq,avx512vl")]
3204#[cfg_attr(test, assert_instr(vcvtps2qq))]
3205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3207    unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3208}
3209
3210/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3211/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3212///
3213/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
3214#[inline]
3215#[target_feature(enable = "avx512dq,avx512vl")]
3216#[cfg_attr(test, assert_instr(vcvtps2qq))]
3217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3218pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
3219    _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a)
3220}
3221
3222/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3223/// and store the results in dst.
3224///
3225/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
3226#[inline]
3227#[target_feature(enable = "avx512dq")]
3228#[cfg_attr(test, assert_instr(vcvtps2qq))]
3229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3230pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
3231    _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a)
3232}
3233
3234/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3235/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3236/// not set).
3237///
3238/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
3239#[inline]
3240#[target_feature(enable = "avx512dq")]
3241#[cfg_attr(test, assert_instr(vcvtps2qq))]
3242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3243pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3244    unsafe {
3245        transmute(vcvtps2qq_512(
3246            a.as_f32x8(),
3247            src.as_i64x8(),
3248            k,
3249            _MM_FROUND_CUR_DIRECTION,
3250        ))
3251    }
3252}
3253
3254/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3255/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3256///
3257/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
3258#[inline]
3259#[target_feature(enable = "avx512dq")]
3260#[cfg_attr(test, assert_instr(vcvtps2qq))]
3261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3262pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
3263    _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a)
3264}
3265
3266/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3267/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3268///
3269/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3270/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3271/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3272/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3273/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3274///
3275/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
3276#[inline]
3277#[target_feature(enable = "avx512dq")]
3278#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3279#[rustc_legacy_const_generics(1)]
3280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3281pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3282    static_assert_rounding!(ROUNDING);
3283    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3284}
3285
3286/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3287/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3288/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3289///
3290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3295///
3296/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
3297#[inline]
3298#[target_feature(enable = "avx512dq")]
3299#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3300#[rustc_legacy_const_generics(3)]
3301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3302pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
3303    src: __m512i,
3304    k: __mmask8,
3305    a: __m512d,
3306) -> __m512i {
3307    unsafe {
3308        static_assert_rounding!(ROUNDING);
3309        transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
3310    }
3311}
3312
3313/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3314/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3315/// Rounding is done according to the ROUNDING parameter, which can be one of:
3316///
3317/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3318/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3319/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3320/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3322///
3323/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
3324#[inline]
3325#[target_feature(enable = "avx512dq")]
3326#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3327#[rustc_legacy_const_generics(2)]
3328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3330    static_assert_rounding!(ROUNDING);
3331    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3332}
3333
3334/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3335/// and store the results in dst.
3336///
3337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
3338#[inline]
3339#[target_feature(enable = "avx512dq,avx512vl")]
3340#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3342pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
3343    _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a)
3344}
3345
3346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3347/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3348/// not set).
3349///
3350/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
3351#[inline]
3352#[target_feature(enable = "avx512dq,avx512vl")]
3353#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3355pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3356    unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
3357}
3358
3359/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3360/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3361///
3362/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
3363#[inline]
3364#[target_feature(enable = "avx512dq,avx512vl")]
3365#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3367pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3368    _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a)
3369}
3370
3371/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3372/// and store the results in dst.
3373///
3374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
3375#[inline]
3376#[target_feature(enable = "avx512dq,avx512vl")]
3377#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3379pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
3380    _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a)
3381}
3382
3383/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3384/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3385/// not set).
3386///
3387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
3388#[inline]
3389#[target_feature(enable = "avx512dq,avx512vl")]
3390#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3392pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3393    unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
3394}
3395
3396/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3397/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3398///
3399/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
3400#[inline]
3401#[target_feature(enable = "avx512dq,avx512vl")]
3402#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3404pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3405    _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a)
3406}
3407
3408/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3409/// and store the results in dst.
3410///
3411/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
3412#[inline]
3413#[target_feature(enable = "avx512dq")]
3414#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
3417    _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a)
3418}
3419
3420/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3421/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3422/// not set).
3423///
3424/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
3425#[inline]
3426#[target_feature(enable = "avx512dq")]
3427#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3430    unsafe {
3431        transmute(vcvtpd2uqq_512(
3432            a.as_f64x8(),
3433            src.as_u64x8(),
3434            k,
3435            _MM_FROUND_CUR_DIRECTION,
3436        ))
3437    }
3438}
3439
3440/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3441/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3442///
3443/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
3444#[inline]
3445#[target_feature(enable = "avx512dq")]
3446#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3448pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3449    _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a)
3450}
3451
3452/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3453/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3454///
3455/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3456/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3457/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3458/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3459/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3460///
3461/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
3462#[inline]
3463#[target_feature(enable = "avx512dq")]
3464#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3465#[rustc_legacy_const_generics(1)]
3466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3467pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
3468    static_assert_rounding!(ROUNDING);
3469    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3470}
3471
3472/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3473/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3474/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3475///
3476/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3477/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3478/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3479/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3480/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3481///
3482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
3483#[inline]
3484#[target_feature(enable = "avx512dq")]
3485#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3486#[rustc_legacy_const_generics(3)]
3487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3488pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
3489    src: __m512i,
3490    k: __mmask8,
3491    a: __m256,
3492) -> __m512i {
3493    unsafe {
3494        static_assert_rounding!(ROUNDING);
3495        transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
3496    }
3497}
3498
3499/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3500/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3501/// Rounding is done according to the ROUNDING parameter, which can be one of:
3502///
3503/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3504/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3505/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3506/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3507/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3508///
3509/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
3510#[inline]
3511#[target_feature(enable = "avx512dq")]
3512#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3513#[rustc_legacy_const_generics(2)]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3516    static_assert_rounding!(ROUNDING);
3517    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3518}
3519
3520/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3521/// and store the results in dst.
3522///
3523/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
3524#[inline]
3525#[target_feature(enable = "avx512dq,avx512vl")]
3526#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3528pub fn _mm_cvtps_epu64(a: __m128) -> __m128i {
3529    _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a)
3530}
3531
3532/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3533/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3534/// not set).
3535///
3536/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
3537#[inline]
3538#[target_feature(enable = "avx512dq,avx512vl")]
3539#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3541pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3542    unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
3543}
3544
3545/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3546/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3547///
3548/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
3549#[inline]
3550#[target_feature(enable = "avx512dq,avx512vl")]
3551#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3553pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
3554    _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a)
3555}
3556
3557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3558/// and store the results in dst.
3559///
3560/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
3561#[inline]
3562#[target_feature(enable = "avx512dq,avx512vl")]
3563#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3565pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
3566    _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a)
3567}
3568
3569/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3570/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3571/// not set).
3572///
3573/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
3574#[inline]
3575#[target_feature(enable = "avx512dq,avx512vl")]
3576#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3578pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3579    unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
3580}
3581
3582/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3583/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3584///
3585/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
3586#[inline]
3587#[target_feature(enable = "avx512dq,avx512vl")]
3588#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
3591    _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a)
3592}
3593
3594/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3595/// and store the results in dst.
3596///
3597/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
3598#[inline]
3599#[target_feature(enable = "avx512dq")]
3600#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3602pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
3603    _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a)
3604}
3605
3606/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3607/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3608/// not set).
3609///
3610/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
3611#[inline]
3612#[target_feature(enable = "avx512dq")]
3613#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3615pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3616    unsafe {
3617        transmute(vcvtps2uqq_512(
3618            a.as_f32x8(),
3619            src.as_u64x8(),
3620            k,
3621            _MM_FROUND_CUR_DIRECTION,
3622        ))
3623    }
3624}
3625
3626/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3627/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3628///
3629/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
3630#[inline]
3631#[target_feature(enable = "avx512dq")]
3632#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
3635    _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a)
3636}
3637
3638/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3639/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3640/// to the sae parameter.
3641///
3642/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
3643#[inline]
3644#[target_feature(enable = "avx512dq")]
3645#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3646#[rustc_legacy_const_generics(1)]
3647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3648pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
3649    static_assert_sae!(SAE);
3650    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3651}
3652
3653/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3654/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3655/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3656///
3657/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
3658#[inline]
3659#[target_feature(enable = "avx512dq")]
3660#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3661#[rustc_legacy_const_generics(3)]
3662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3663pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
3664    src: __m512i,
3665    k: __mmask8,
3666    a: __m512d,
3667) -> __m512i {
3668    unsafe {
3669        static_assert_sae!(SAE);
3670        transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
3671    }
3672}
3673
3674/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3675/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3676/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3677///
3678/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
3679#[inline]
3680#[target_feature(enable = "avx512dq")]
3681#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3682#[rustc_legacy_const_generics(2)]
3683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3684pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3685    static_assert_sae!(SAE);
3686    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_setzero_si512(), k, a)
3687}
3688
3689/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3690/// with truncation, and store the result in dst.
3691///
3692/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
3693#[inline]
3694#[target_feature(enable = "avx512dq,avx512vl")]
3695#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3697pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
3698    _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a)
3699}
3700
3701/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3702/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3703/// corresponding bit is not set).
3704///
3705/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
3706#[inline]
3707#[target_feature(enable = "avx512dq,avx512vl")]
3708#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3710pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3711    unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3712}
3713
3714/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3715/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3716/// bit is not set).
3717///
3718/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
3719#[inline]
3720#[target_feature(enable = "avx512dq,avx512vl")]
3721#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3724    _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a)
3725}
3726
3727/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3728/// with truncation, and store the result in dst.
3729///
3730/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
3731#[inline]
3732#[target_feature(enable = "avx512dq,avx512vl")]
3733#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
3736    _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a)
3737}
3738
3739/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3740/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3741/// corresponding bit is not set).
3742///
3743/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
3744#[inline]
3745#[target_feature(enable = "avx512dq,avx512vl")]
3746#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3748pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3749    unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3750}
3751
3752/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3753/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3754/// bit is not set).
3755///
3756/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
3757#[inline]
3758#[target_feature(enable = "avx512dq,avx512vl")]
3759#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3761pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3762    _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a)
3763}
3764
3765/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3766/// with truncation, and store the result in dst.
3767///
3768/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
3769#[inline]
3770#[target_feature(enable = "avx512dq")]
3771#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3773pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
3774    _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3775}
3776
3777/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3778/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3779/// corresponding bit is not set).
3780///
3781/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
3782#[inline]
3783#[target_feature(enable = "avx512dq")]
3784#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3786pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3787    unsafe {
3788        transmute(vcvttpd2qq_512(
3789            a.as_f64x8(),
3790            src.as_i64x8(),
3791            k,
3792            _MM_FROUND_CUR_DIRECTION,
3793        ))
3794    }
3795}
3796
3797/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3798/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3799/// bit is not set).
3800///
3801/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
3802#[inline]
3803#[target_feature(enable = "avx512dq")]
3804#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3806pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3807    _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a)
3808}
3809
3810/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3811/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3812/// to the sae parameter.
3813///
3814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
3815#[inline]
3816#[target_feature(enable = "avx512dq")]
3817#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3818#[rustc_legacy_const_generics(1)]
3819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3820pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
3821    static_assert_sae!(SAE);
3822    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3823}
3824
3825/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3826/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3827/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3828///
3829/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
3830#[inline]
3831#[target_feature(enable = "avx512dq")]
3832#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3833#[rustc_legacy_const_generics(3)]
3834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3835pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
3836    src: __m512i,
3837    k: __mmask8,
3838    a: __m256,
3839) -> __m512i {
3840    unsafe {
3841        static_assert_sae!(SAE);
3842        transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
3843    }
3844}
3845
3846/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3847/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3848/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3849///
3850/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
3851#[inline]
3852#[target_feature(enable = "avx512dq")]
3853#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3854#[rustc_legacy_const_generics(2)]
3855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3856pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
3857    static_assert_sae!(SAE);
3858    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_setzero_si512(), k, a)
3859}
3860
3861/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3862/// with truncation, and store the result in dst.
3863///
3864/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
3865#[inline]
3866#[target_feature(enable = "avx512dq,avx512vl")]
3867#[cfg_attr(test, assert_instr(vcvttps2qq))]
3868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3869pub fn _mm_cvttps_epi64(a: __m128) -> __m128i {
3870    _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a)
3871}
3872
3873/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3874/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3875/// corresponding bit is not set).
3876///
3877/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
3878#[inline]
3879#[target_feature(enable = "avx512dq,avx512vl")]
3880#[cfg_attr(test, assert_instr(vcvttps2qq))]
3881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3882pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3883    unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3884}
3885
3886/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3887/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3888/// bit is not set).
3889///
3890/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
3891#[inline]
3892#[target_feature(enable = "avx512dq,avx512vl")]
3893#[cfg_attr(test, assert_instr(vcvttps2qq))]
3894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3895pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
3896    _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a)
3897}
3898
3899/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3900/// with truncation, and store the result in dst.
3901///
3902/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
3903#[inline]
3904#[target_feature(enable = "avx512dq,avx512vl")]
3905#[cfg_attr(test, assert_instr(vcvttps2qq))]
3906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3907pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
3908    _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a)
3909}
3910
3911/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3912/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3913/// corresponding bit is not set).
3914///
3915/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
3916#[inline]
3917#[target_feature(enable = "avx512dq,avx512vl")]
3918#[cfg_attr(test, assert_instr(vcvttps2qq))]
3919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3920pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3921    unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3922}
3923
3924/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3925/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3926/// bit is not set).
3927///
3928/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
3929#[inline]
3930#[target_feature(enable = "avx512dq,avx512vl")]
3931#[cfg_attr(test, assert_instr(vcvttps2qq))]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
3934    _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a)
3935}
3936
3937/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3938/// with truncation, and store the result in dst.
3939///
3940/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
3941#[inline]
3942#[target_feature(enable = "avx512dq")]
3943#[cfg_attr(test, assert_instr(vcvttps2qq))]
3944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
3946    _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a)
3947}
3948
3949/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3950/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3951/// corresponding bit is not set).
3952///
3953/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
3954#[inline]
3955#[target_feature(enable = "avx512dq")]
3956#[cfg_attr(test, assert_instr(vcvttps2qq))]
3957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3958pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3959    unsafe {
3960        transmute(vcvttps2qq_512(
3961            a.as_f32x8(),
3962            src.as_i64x8(),
3963            k,
3964            _MM_FROUND_CUR_DIRECTION,
3965        ))
3966    }
3967}
3968
3969/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3970/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3971/// bit is not set).
3972///
3973/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
3974#[inline]
3975#[target_feature(enable = "avx512dq")]
3976#[cfg_attr(test, assert_instr(vcvttps2qq))]
3977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3978pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
3979    _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a)
3980}
3981
3982/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3983/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3984/// to the sae parameter.
3985///
3986/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
3987#[inline]
3988#[target_feature(enable = "avx512dq")]
3989#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
3990#[rustc_legacy_const_generics(1)]
3991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3992pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
3993    static_assert_sae!(SAE);
3994    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3995}
3996
3997/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3998/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3999/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4000///
4001/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
4002#[inline]
4003#[target_feature(enable = "avx512dq")]
4004#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4005#[rustc_legacy_const_generics(3)]
4006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4007pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
4008    src: __m512i,
4009    k: __mmask8,
4010    a: __m512d,
4011) -> __m512i {
4012    unsafe {
4013        static_assert_sae!(SAE);
4014        transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
4015    }
4016}
4017
4018/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4019/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4020/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4021///
4022/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
4023#[inline]
4024#[target_feature(enable = "avx512dq")]
4025#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4026#[rustc_legacy_const_generics(2)]
4027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4028pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
4029    static_assert_sae!(SAE);
4030    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_setzero_si512(), k, a)
4031}
4032
4033/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4034/// with truncation, and store the result in dst.
4035///
4036/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
4037#[inline]
4038#[target_feature(enable = "avx512dq,avx512vl")]
4039#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
4042    _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a)
4043}
4044
4045/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4046/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4047/// bit is not set).
4048///
4049/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
4050#[inline]
4051#[target_feature(enable = "avx512dq,avx512vl")]
4052#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4054pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
4055    unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
4056}
4057
4058/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4059/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4060/// bit is not set).
4061///
4062/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
4063#[inline]
4064#[target_feature(enable = "avx512dq,avx512vl")]
4065#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4067pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
4068    _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a)
4069}
4070
4071/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4072/// with truncation, and store the result in dst.
4073///
4074/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
4075#[inline]
4076#[target_feature(enable = "avx512dq,avx512vl")]
4077#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4079pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
4080    _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a)
4081}
4082
4083/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4084/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
4085/// bit is not set).
4086///
4087/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
4088#[inline]
4089#[target_feature(enable = "avx512dq,avx512vl")]
4090#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4092pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
4093    unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
4094}
4095
4096/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4097/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
4098/// bit is not set).
4099///
4100/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
4101#[inline]
4102#[target_feature(enable = "avx512dq,avx512vl")]
4103#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4105pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
4106    _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a)
4107}
4108
4109/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4110/// with truncation, and store the result in dst.
4111///
4112/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
4113#[inline]
4114#[target_feature(enable = "avx512dq")]
4115#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4117pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
4118    _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a)
4119}
4120
4121/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4122/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4123/// bit is not set).
4124///
4125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
4126#[inline]
4127#[target_feature(enable = "avx512dq")]
4128#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4130pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
4131    unsafe {
4132        transmute(vcvttpd2uqq_512(
4133            a.as_f64x8(),
4134            src.as_u64x8(),
4135            k,
4136            _MM_FROUND_CUR_DIRECTION,
4137        ))
4138    }
4139}
4140
4141/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4142/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4143///
4144///
4145/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
4146#[inline]
4147#[target_feature(enable = "avx512dq")]
4148#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4150pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
4151    _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a)
4152}
4153
4154/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4155/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4156/// to the sae parameter.
4157///
4158/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
4159#[inline]
4160#[target_feature(enable = "avx512dq")]
4161#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4162#[rustc_legacy_const_generics(1)]
4163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4164pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
4165    static_assert_sae!(SAE);
4166    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
4167}
4168
4169/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4170/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4171/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4172///
4173/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
4174#[inline]
4175#[target_feature(enable = "avx512dq")]
4176#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4177#[rustc_legacy_const_generics(3)]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
4180    src: __m512i,
4181    k: __mmask8,
4182    a: __m256,
4183) -> __m512i {
4184    unsafe {
4185        static_assert_sae!(SAE);
4186        transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
4187    }
4188}
4189
4190/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4191/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4192/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4193///
4194/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
4195#[inline]
4196#[target_feature(enable = "avx512dq")]
4197#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4198#[rustc_legacy_const_generics(2)]
4199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4200pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4201    static_assert_sae!(SAE);
4202    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_setzero_si512(), k, a)
4203}
4204
4205/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4206/// with truncation, and store the result in dst.
4207///
4208/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
4209#[inline]
4210#[target_feature(enable = "avx512dq,avx512vl")]
4211#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4213pub fn _mm_cvttps_epu64(a: __m128) -> __m128i {
4214    _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a)
4215}
4216
4217/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4218/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4219/// corresponding bit is not set).
4220///
4221/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
4222#[inline]
4223#[target_feature(enable = "avx512dq,avx512vl")]
4224#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4226pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4227    unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
4228}
4229
4230/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4231/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4232/// bit is not set).
4233///
4234/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
4235#[inline]
4236#[target_feature(enable = "avx512dq,avx512vl")]
4237#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4239pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
4240    _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a)
4241}
4242
4243/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4244/// with truncation, and store the result in dst.
4245///
4246/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
4247#[inline]
4248#[target_feature(enable = "avx512dq,avx512vl")]
4249#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4251pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
4252    _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a)
4253}
4254
4255/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4256/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4257/// corresponding bit is not set).
4258///
4259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
4260#[inline]
4261#[target_feature(enable = "avx512dq,avx512vl")]
4262#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4264pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4265    unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
4266}
4267
4268/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4269/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4270/// bit is not set).
4271///
4272/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
4273#[inline]
4274#[target_feature(enable = "avx512dq,avx512vl")]
4275#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4277pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
4278    _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a)
4279}
4280
4281/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4282/// with truncation, and store the result in dst.
4283///
4284/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
4285#[inline]
4286#[target_feature(enable = "avx512dq")]
4287#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4289pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
4290    _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a)
4291}
4292
4293/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4294/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4295/// corresponding bit is not set).
4296///
4297/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
4298#[inline]
4299#[target_feature(enable = "avx512dq")]
4300#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4302pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4303    unsafe {
4304        transmute(vcvttps2uqq_512(
4305            a.as_f32x8(),
4306            src.as_u64x8(),
4307            k,
4308            _MM_FROUND_CUR_DIRECTION,
4309        ))
4310    }
4311}
4312
4313/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4314/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4315/// bit is not set).
4316///
4317/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
4318#[inline]
4319#[target_feature(enable = "avx512dq")]
4320#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4322pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
4323    _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a)
4324}
4325
4326// Multiply-Low
4327
4328/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4329/// the low 64 bits of the intermediate integers in `dst`.
4330///
4331/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
4332#[inline]
4333#[target_feature(enable = "avx512dq,avx512vl")]
4334#[cfg_attr(test, assert_instr(vpmullq))]
4335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4336pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
4337    unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) }
4338}
4339
4340/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4341/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4342/// `src` if the corresponding bit is not set).
4343///
4344/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
4345#[inline]
4346#[target_feature(enable = "avx512dq,avx512vl")]
4347#[cfg_attr(test, assert_instr(vpmullq))]
4348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4349pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4350    unsafe {
4351        let b = _mm_mullo_epi64(a, b).as_i64x2();
4352        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
4353    }
4354}
4355
4356/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4357/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4358/// the corresponding bit is not set).
4359///
4360/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
4361#[inline]
4362#[target_feature(enable = "avx512dq,avx512vl")]
4363#[cfg_attr(test, assert_instr(vpmullq))]
4364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4366    unsafe {
4367        let b = _mm_mullo_epi64(a, b).as_i64x2();
4368        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
4369    }
4370}
4371
4372/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4373/// the low 64 bits of the intermediate integers in `dst`.
4374///
4375/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
4376#[inline]
4377#[target_feature(enable = "avx512dq,avx512vl")]
4378#[cfg_attr(test, assert_instr(vpmullq))]
4379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4380pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
4381    unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) }
4382}
4383
4384/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4385/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4386/// `src` if the corresponding bit is not set).
4387///
4388/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
4389#[inline]
4390#[target_feature(enable = "avx512dq,avx512vl")]
4391#[cfg_attr(test, assert_instr(vpmullq))]
4392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4393pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4394    unsafe {
4395        let b = _mm256_mullo_epi64(a, b).as_i64x4();
4396        transmute(simd_select_bitmask(k, b, src.as_i64x4()))
4397    }
4398}
4399
4400/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4401/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4402/// the corresponding bit is not set).
4403///
4404/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
4405#[inline]
4406#[target_feature(enable = "avx512dq,avx512vl")]
4407#[cfg_attr(test, assert_instr(vpmullq))]
4408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4409pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4410    unsafe {
4411        let b = _mm256_mullo_epi64(a, b).as_i64x4();
4412        transmute(simd_select_bitmask(k, b, i64x4::ZERO))
4413    }
4414}
4415
4416/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4417/// the low 64 bits of the intermediate integers in `dst`.
4418///
4419/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
4420#[inline]
4421#[target_feature(enable = "avx512dq")]
4422#[cfg_attr(test, assert_instr(vpmullq))]
4423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4424pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
4425    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
4426}
4427
4428/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4429/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4430/// `src` if the corresponding bit is not set).
4431///
4432/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
4433#[inline]
4434#[target_feature(enable = "avx512dq")]
4435#[cfg_attr(test, assert_instr(vpmullq))]
4436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4438    unsafe {
4439        let b = _mm512_mullo_epi64(a, b).as_i64x8();
4440        transmute(simd_select_bitmask(k, b, src.as_i64x8()))
4441    }
4442}
4443
4444/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4445/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4446/// the corresponding bit is not set).
4447///
4448/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
4449#[inline]
4450#[target_feature(enable = "avx512dq")]
4451#[cfg_attr(test, assert_instr(vpmullq))]
4452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4453pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4454    unsafe {
4455        let b = _mm512_mullo_epi64(a, b).as_i64x8();
4456        transmute(simd_select_bitmask(k, b, i64x8::ZERO))
4457    }
4458}
4459
4460// Mask Registers
4461
4462/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
4463///
4464/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
4465#[inline]
4466#[target_feature(enable = "avx512dq")]
4467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4468pub fn _cvtmask8_u32(a: __mmask8) -> u32 {
4469    a as u32
4470}
4471
4472/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
4473///
4474/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
4475#[inline]
4476#[target_feature(enable = "avx512dq")]
4477#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4478pub fn _cvtu32_mask8(a: u32) -> __mmask8 {
4479    a as __mmask8
4480}
4481
4482/// Add 16-bit masks a and b, and store the result in dst.
4483///
4484/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
4485#[inline]
4486#[target_feature(enable = "avx512dq")]
4487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4488pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
4489    a + b
4490}
4491
4492/// Add 8-bit masks a and b, and store the result in dst.
4493///
4494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
4495#[inline]
4496#[target_feature(enable = "avx512dq")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4499    a + b
4500}
4501
4502/// Bitwise AND of 8-bit masks a and b, and store the result in dst.
4503///
4504/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
4505#[inline]
4506#[target_feature(enable = "avx512dq")]
4507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4508pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4509    a & b
4510}
4511
4512/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
4513///
4514/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
4515#[inline]
4516#[target_feature(enable = "avx512dq")]
4517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4518pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4519    _knot_mask8(a) & b
4520}
4521
4522/// Bitwise NOT of 8-bit mask a, and store the result in dst.
4523///
4524/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
4525#[inline]
4526#[target_feature(enable = "avx512dq")]
4527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4528pub fn _knot_mask8(a: __mmask8) -> __mmask8 {
4529    a ^ 0b11111111
4530}
4531
4532/// Bitwise OR of 8-bit masks a and b, and store the result in dst.
4533///
4534/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
4535#[inline]
4536#[target_feature(enable = "avx512dq")]
4537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4538pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4539    a | b
4540}
4541
4542/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
4543///
4544/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
4545#[inline]
4546#[target_feature(enable = "avx512dq")]
4547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4548pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4549    _knot_mask8(_kxor_mask8(a, b))
4550}
4551
4552/// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
4553///
4554/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
4555#[inline]
4556#[target_feature(enable = "avx512dq")]
4557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4558pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4559    a ^ b
4560}
4561
4562/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4563/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
4564///
4565/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
4566#[inline]
4567#[target_feature(enable = "avx512dq")]
4568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4569pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
4570    let tmp = _kor_mask8(a, b);
4571    *all_ones = (tmp == 0xff) as u8;
4572    (tmp == 0) as u8
4573}
4574
4575/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
4576/// store 0 in dst.
4577///
4578/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
4579#[inline]
4580#[target_feature(enable = "avx512dq")]
4581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4582pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4583    (_kor_mask8(a, b) == 0xff) as u8
4584}
4585
4586/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4587/// store 0 in dst.
4588///
4589/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
4590#[inline]
4591#[target_feature(enable = "avx512dq")]
4592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4593pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4594    (_kor_mask8(a, b) == 0) as u8
4595}
4596
4597/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
4598///
4599/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
4600#[inline]
4601#[target_feature(enable = "avx512dq")]
4602#[rustc_legacy_const_generics(1)]
4603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4604pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4605    a << COUNT
4606}
4607
4608/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
4609///
4610/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
4611#[inline]
4612#[target_feature(enable = "avx512dq")]
4613#[rustc_legacy_const_generics(1)]
4614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4615pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4616    a >> COUNT
4617}
4618
4619/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
4620/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4621/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4622///
4623/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
4624#[inline]
4625#[target_feature(enable = "avx512dq")]
4626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4627pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
4628    *and_not = (_kandn_mask16(a, b) == 0) as u8;
4629    (_kand_mask16(a, b) == 0) as u8
4630}
4631
4632/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
4633/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4634/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4635///
4636/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
4637#[inline]
4638#[target_feature(enable = "avx512dq")]
4639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4640pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
4641    *and_not = (_kandn_mask8(a, b) == 0) as u8;
4642    (_kand_mask8(a, b) == 0) as u8
4643}
4644
4645/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
4646/// zeros, store 1 in dst, otherwise store 0 in dst.
4647///
4648/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
4649#[inline]
4650#[target_feature(enable = "avx512dq")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4653    (_kandn_mask16(a, b) == 0) as u8
4654}
4655
4656/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
4657/// zeros, store 1 in dst, otherwise store 0 in dst.
4658///
4659/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
4660#[inline]
4661#[target_feature(enable = "avx512dq")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4664    (_kandn_mask8(a, b) == 0) as u8
4665}
4666
4667/// Compute the bitwise AND of 16-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4668/// store 0 in dst.
4669///
4670/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
4671#[inline]
4672#[target_feature(enable = "avx512dq")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4675    (_kand_mask16(a, b) == 0) as u8
4676}
4677
4678/// Compute the bitwise AND of 8-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4679/// store 0 in dst.
4680///
4681/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
4682#[inline]
4683#[target_feature(enable = "avx512dq")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4686    (_kand_mask8(a, b) == 0) as u8
4687}
4688
4689/// Load 8-bit mask from memory
4690///
4691/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
4692#[inline]
4693#[target_feature(enable = "avx512dq")]
4694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
4696    *mem_addr
4697}
4698
4699/// Store 8-bit mask to memory
4700///
4701/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
4702#[inline]
4703#[target_feature(enable = "avx512dq")]
4704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4705pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
4706    *mem_addr = a;
4707}
4708
4709/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4710/// integer in a.
4711///
4712/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
4713#[inline]
4714#[target_feature(enable = "avx512dq,avx512vl")]
4715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4716pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
4717    let zero = _mm_setzero_si128();
4718    _mm_cmplt_epi32_mask(a, zero)
4719}
4720
4721/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4722/// integer in a.
4723///
4724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
4725#[inline]
4726#[target_feature(enable = "avx512dq,avx512vl")]
4727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4728pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
4729    let zero = _mm256_setzero_si256();
4730    _mm256_cmplt_epi32_mask(a, zero)
4731}
4732
4733/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4734/// integer in a.
4735///
4736/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
4737#[inline]
4738#[target_feature(enable = "avx512dq")]
4739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4740pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
4741    let zero = _mm512_setzero_si512();
4742    _mm512_cmplt_epi32_mask(a, zero)
4743}
4744
4745/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4746/// integer in a.
4747///
4748/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
4749#[inline]
4750#[target_feature(enable = "avx512dq,avx512vl")]
4751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4752pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
4753    let zero = _mm_setzero_si128();
4754    _mm_cmplt_epi64_mask(a, zero)
4755}
4756
4757/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4758/// integer in a.
4759///
4760/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
4761#[inline]
4762#[target_feature(enable = "avx512dq,avx512vl")]
4763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4764pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
4765    let zero = _mm256_setzero_si256();
4766    _mm256_cmplt_epi64_mask(a, zero)
4767}
4768
4769/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4770/// integer in a.
4771///
4772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
4773#[inline]
4774#[target_feature(enable = "avx512dq")]
4775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4776pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
4777    let zero = _mm512_setzero_si512();
4778    _mm512_cmplt_epi64_mask(a, zero)
4779}
4780
4781/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4782/// bit in k.
4783///
4784/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
4785#[inline]
4786#[target_feature(enable = "avx512dq,avx512vl")]
4787#[cfg_attr(test, assert_instr(vpmovm2d))]
4788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4789pub fn _mm_movm_epi32(k: __mmask8) -> __m128i {
4790    let ones = _mm_set1_epi32(-1);
4791    _mm_maskz_mov_epi32(k, ones)
4792}
4793
4794/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4795/// bit in k.
4796///
4797/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
4798#[inline]
4799#[target_feature(enable = "avx512dq,avx512vl")]
4800#[cfg_attr(test, assert_instr(vpmovm2d))]
4801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4802pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
4803    let ones = _mm256_set1_epi32(-1);
4804    _mm256_maskz_mov_epi32(k, ones)
4805}
4806
4807/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4808/// bit in k.
4809///
4810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
4811#[inline]
4812#[target_feature(enable = "avx512dq")]
4813#[cfg_attr(test, assert_instr(vpmovm2d))]
4814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
4816    let ones = _mm512_set1_epi32(-1);
4817    _mm512_maskz_mov_epi32(k, ones)
4818}
4819
4820/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4821/// bit in k.
4822///
4823/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
4824#[inline]
4825#[target_feature(enable = "avx512dq,avx512vl")]
4826#[cfg_attr(test, assert_instr(vpmovm2q))]
4827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4828pub fn _mm_movm_epi64(k: __mmask8) -> __m128i {
4829    let ones = _mm_set1_epi64x(-1);
4830    _mm_maskz_mov_epi64(k, ones)
4831}
4832
4833/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4834/// bit in k.
4835///
4836/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
4837#[inline]
4838#[target_feature(enable = "avx512dq,avx512vl")]
4839#[cfg_attr(test, assert_instr(vpmovm2q))]
4840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4841pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
4842    let ones = _mm256_set1_epi64x(-1);
4843    _mm256_maskz_mov_epi64(k, ones)
4844}
4845
4846/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4847/// bit in k.
4848///
4849/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
4850#[inline]
4851#[target_feature(enable = "avx512dq")]
4852#[cfg_attr(test, assert_instr(vpmovm2q))]
4853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4854pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
4855    let ones = _mm512_set1_epi64(-1);
4856    _mm512_maskz_mov_epi64(k, ones)
4857}
4858
4859// Range
4860
4861/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4862/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4863/// Lower 2 bits of IMM8 specifies the operation control:
4864///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4865/// Upper 2 bits of IMM8 specifies the sign control:
4866///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4867/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4868///
4869/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
4870#[inline]
4871#[target_feature(enable = "avx512dq")]
4872#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
4873#[rustc_legacy_const_generics(2, 3)]
4874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
4876    static_assert_uimm_bits!(IMM8, 4);
4877    static_assert_sae!(SAE);
4878    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), 0xff, a, b)
4879}
4880
4881/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4882/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4883/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4884/// Lower 2 bits of IMM8 specifies the operation control:
4885///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4886/// Upper 2 bits of IMM8 specifies the sign control:
4887///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4888/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4889///
4890/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
4891#[inline]
4892#[target_feature(enable = "avx512dq")]
4893#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
4894#[rustc_legacy_const_generics(4, 5)]
4895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4896pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
4897    src: __m512d,
4898    k: __mmask8,
4899    a: __m512d,
4900    b: __m512d,
4901) -> __m512d {
4902    unsafe {
4903        static_assert_uimm_bits!(IMM8, 4);
4904        static_assert_sae!(SAE);
4905        transmute(vrangepd_512(
4906            a.as_f64x8(),
4907            b.as_f64x8(),
4908            IMM8,
4909            src.as_f64x8(),
4910            k,
4911            SAE,
4912        ))
4913    }
4914}
4915
4916/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4917/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4918/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4919/// Lower 2 bits of IMM8 specifies the operation control:
4920///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4921/// Upper 2 bits of IMM8 specifies the sign control:
4922///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4924///
4925/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
4926#[inline]
4927#[target_feature(enable = "avx512dq")]
4928#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
4929#[rustc_legacy_const_generics(3, 4)]
4930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4931pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
4932    k: __mmask8,
4933    a: __m512d,
4934    b: __m512d,
4935) -> __m512d {
4936    static_assert_uimm_bits!(IMM8, 4);
4937    static_assert_sae!(SAE);
4938    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a, b)
4939}
4940
4941/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4942/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4943/// Lower 2 bits of IMM8 specifies the operation control:
4944///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4945/// Upper 2 bits of IMM8 specifies the sign control:
4946///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4947///
4948/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
4949#[inline]
4950#[target_feature(enable = "avx512dq,avx512vl")]
4951#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4952#[rustc_legacy_const_generics(2)]
4953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4954pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
4955    static_assert_uimm_bits!(IMM8, 4);
4956    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), 0xff, a, b)
4957}
4958
4959/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4960/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4961/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4962/// Lower 2 bits of IMM8 specifies the operation control:
4963///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4964/// Upper 2 bits of IMM8 specifies the sign control:
4965///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4966///
4967/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
4968#[inline]
4969#[target_feature(enable = "avx512dq,avx512vl")]
4970#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4971#[rustc_legacy_const_generics(4)]
4972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4973pub fn _mm_mask_range_pd<const IMM8: i32>(
4974    src: __m128d,
4975    k: __mmask8,
4976    a: __m128d,
4977    b: __m128d,
4978) -> __m128d {
4979    unsafe {
4980        static_assert_uimm_bits!(IMM8, 4);
4981        transmute(vrangepd_128(
4982            a.as_f64x2(),
4983            b.as_f64x2(),
4984            IMM8,
4985            src.as_f64x2(),
4986            k,
4987        ))
4988    }
4989}
4990
4991/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4992/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4993/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4994/// Lower 2 bits of IMM8 specifies the operation control:
4995///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4996/// Upper 2 bits of IMM8 specifies the sign control:
4997///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4998///
4999/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
5000#[inline]
5001#[target_feature(enable = "avx512dq,avx512vl")]
5002#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5003#[rustc_legacy_const_generics(3)]
5004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5005pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5006    static_assert_uimm_bits!(IMM8, 4);
5007    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), k, a, b)
5008}
5009
5010/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5011/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5012/// Lower 2 bits of IMM8 specifies the operation control:
5013///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5014/// Upper 2 bits of IMM8 specifies the sign control:
5015///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5016///
5017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
5018#[inline]
5019#[target_feature(enable = "avx512dq,avx512vl")]
5020#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5021#[rustc_legacy_const_generics(2)]
5022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5023pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
5024    static_assert_uimm_bits!(IMM8, 4);
5025    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), 0xff, a, b)
5026}
5027
5028/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5029/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5030/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5031/// Lower 2 bits of IMM8 specifies the operation control:
5032///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5033/// Upper 2 bits of IMM8 specifies the sign control:
5034///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5035///
5036/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
5037#[inline]
5038#[target_feature(enable = "avx512dq,avx512vl")]
5039#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5040#[rustc_legacy_const_generics(4)]
5041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5042pub fn _mm256_mask_range_pd<const IMM8: i32>(
5043    src: __m256d,
5044    k: __mmask8,
5045    a: __m256d,
5046    b: __m256d,
5047) -> __m256d {
5048    unsafe {
5049        static_assert_uimm_bits!(IMM8, 4);
5050        transmute(vrangepd_256(
5051            a.as_f64x4(),
5052            b.as_f64x4(),
5053            IMM8,
5054            src.as_f64x4(),
5055            k,
5056        ))
5057    }
5058}
5059
5060/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5061/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5062/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5063/// Lower 2 bits of IMM8 specifies the operation control:
5064///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5065/// Upper 2 bits of IMM8 specifies the sign control:
5066///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5067///
5068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
5069#[inline]
5070#[target_feature(enable = "avx512dq,avx512vl")]
5071#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5072#[rustc_legacy_const_generics(3)]
5073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5074pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5075    static_assert_uimm_bits!(IMM8, 4);
5076    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), k, a, b)
5077}
5078
5079/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5080/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5081/// Lower 2 bits of IMM8 specifies the operation control:
5082///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5083/// Upper 2 bits of IMM8 specifies the sign control:
5084///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5085///
5086/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
5087#[inline]
5088#[target_feature(enable = "avx512dq")]
5089#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5090#[rustc_legacy_const_generics(2)]
5091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5092pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
5093    static_assert_uimm_bits!(IMM8, 4);
5094    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), 0xff, a, b)
5095}
5096
5097/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5098/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5099/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5100/// Lower 2 bits of IMM8 specifies the operation control:
5101///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5102/// Upper 2 bits of IMM8 specifies the sign control:
5103///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5104///
5105/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
5106#[inline]
5107#[target_feature(enable = "avx512dq")]
5108#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5109#[rustc_legacy_const_generics(4)]
5110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5111pub fn _mm512_mask_range_pd<const IMM8: i32>(
5112    src: __m512d,
5113    k: __mmask8,
5114    a: __m512d,
5115    b: __m512d,
5116) -> __m512d {
5117    unsafe {
5118        static_assert_uimm_bits!(IMM8, 4);
5119        transmute(vrangepd_512(
5120            a.as_f64x8(),
5121            b.as_f64x8(),
5122            IMM8,
5123            src.as_f64x8(),
5124            k,
5125            _MM_FROUND_CUR_DIRECTION,
5126        ))
5127    }
5128}
5129
5130/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5131/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5132/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5133/// Lower 2 bits of IMM8 specifies the operation control:
5134///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5135/// Upper 2 bits of IMM8 specifies the sign control:
5136///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5137///
5138/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
5139#[inline]
5140#[target_feature(enable = "avx512dq")]
5141#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5142#[rustc_legacy_const_generics(3)]
5143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5144pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5145    static_assert_uimm_bits!(IMM8, 4);
5146    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), k, a, b)
5147}
5148
5149/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5150/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5151/// Lower 2 bits of IMM8 specifies the operation control:
5152///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5153/// Upper 2 bits of IMM8 specifies the sign control:
5154///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5155/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5156///
5157/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
5158#[inline]
5159#[target_feature(enable = "avx512dq")]
5160#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5161#[rustc_legacy_const_generics(2, 3)]
5162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 {
5164    static_assert_uimm_bits!(IMM8, 4);
5165    static_assert_sae!(SAE);
5166    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), 0xffff, a, b)
5167}
5168
5169/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5170/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5171/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5172/// Lower 2 bits of IMM8 specifies the operation control:
5173///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5174/// Upper 2 bits of IMM8 specifies the sign control:
5175///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5176///
5177/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
5178#[inline]
5179#[target_feature(enable = "avx512dq")]
5180#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5181#[rustc_legacy_const_generics(4, 5)]
5182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5183pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
5184    src: __m512,
5185    k: __mmask16,
5186    a: __m512,
5187    b: __m512,
5188) -> __m512 {
5189    unsafe {
5190        static_assert_uimm_bits!(IMM8, 4);
5191        static_assert_sae!(SAE);
5192        transmute(vrangeps_512(
5193            a.as_f32x16(),
5194            b.as_f32x16(),
5195            IMM8,
5196            src.as_f32x16(),
5197            k,
5198            SAE,
5199        ))
5200    }
5201}
5202
5203/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5204/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5205/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5206/// Lower 2 bits of IMM8 specifies the operation control:
5207///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5208/// Upper 2 bits of IMM8 specifies the sign control:
5209///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5210///
5211/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
5212#[inline]
5213#[target_feature(enable = "avx512dq")]
5214#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5215#[rustc_legacy_const_generics(3, 4)]
5216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5217pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
5218    k: __mmask16,
5219    a: __m512,
5220    b: __m512,
5221) -> __m512 {
5222    static_assert_uimm_bits!(IMM8, 4);
5223    static_assert_sae!(SAE);
5224    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a, b)
5225}
5226
5227/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5228/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5229/// Lower 2 bits of IMM8 specifies the operation control:
5230///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5231/// Upper 2 bits of IMM8 specifies the sign control:
5232///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5233///
5234/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
5235#[inline]
5236#[target_feature(enable = "avx512dq,avx512vl")]
5237#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5238#[rustc_legacy_const_generics(2)]
5239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5240pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
5241    static_assert_uimm_bits!(IMM8, 4);
5242    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), 0xff, a, b)
5243}
5244
5245/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5246/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5247/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5248/// Lower 2 bits of IMM8 specifies the operation control:
5249///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5250/// Upper 2 bits of IMM8 specifies the sign control:
5251///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5252///
5253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
5254#[inline]
5255#[target_feature(enable = "avx512dq,avx512vl")]
5256#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5257#[rustc_legacy_const_generics(4)]
5258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5259pub fn _mm_mask_range_ps<const IMM8: i32>(
5260    src: __m128,
5261    k: __mmask8,
5262    a: __m128,
5263    b: __m128,
5264) -> __m128 {
5265    unsafe {
5266        static_assert_uimm_bits!(IMM8, 4);
5267        transmute(vrangeps_128(
5268            a.as_f32x4(),
5269            b.as_f32x4(),
5270            IMM8,
5271            src.as_f32x4(),
5272            k,
5273        ))
5274    }
5275}
5276
5277/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5278/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5279/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5280/// Lower 2 bits of IMM8 specifies the operation control:
5281///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5282/// Upper 2 bits of IMM8 specifies the sign control:
5283///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5284///
5285/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
5286#[inline]
5287#[target_feature(enable = "avx512dq,avx512vl")]
5288#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5289#[rustc_legacy_const_generics(3)]
5290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5291pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5292    static_assert_uimm_bits!(IMM8, 4);
5293    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), k, a, b)
5294}
5295
5296/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5297/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5298/// Lower 2 bits of IMM8 specifies the operation control:
5299///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5300/// Upper 2 bits of IMM8 specifies the sign control:
5301///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5302///
5303/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
5304#[inline]
5305#[target_feature(enable = "avx512dq,avx512vl")]
5306#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5307#[rustc_legacy_const_generics(2)]
5308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5309pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
5310    static_assert_uimm_bits!(IMM8, 4);
5311    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), 0xff, a, b)
5312}
5313
5314/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5315/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5316/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5317/// Lower 2 bits of IMM8 specifies the operation control:
5318///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5319/// Upper 2 bits of IMM8 specifies the sign control:
5320///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5321///
5322/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
5323#[inline]
5324#[target_feature(enable = "avx512dq,avx512vl")]
5325#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5326#[rustc_legacy_const_generics(4)]
5327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5328pub fn _mm256_mask_range_ps<const IMM8: i32>(
5329    src: __m256,
5330    k: __mmask8,
5331    a: __m256,
5332    b: __m256,
5333) -> __m256 {
5334    unsafe {
5335        static_assert_uimm_bits!(IMM8, 4);
5336        transmute(vrangeps_256(
5337            a.as_f32x8(),
5338            b.as_f32x8(),
5339            IMM8,
5340            src.as_f32x8(),
5341            k,
5342        ))
5343    }
5344}
5345
5346/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5347/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5348/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5349/// Lower 2 bits of IMM8 specifies the operation control:
5350///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5351/// Upper 2 bits of IMM8 specifies the sign control:
5352///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5353///
5354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
5355#[inline]
5356#[target_feature(enable = "avx512dq,avx512vl")]
5357#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5358#[rustc_legacy_const_generics(3)]
5359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5360pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5361    static_assert_uimm_bits!(IMM8, 4);
5362    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), k, a, b)
5363}
5364
5365/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5366/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5367/// Lower 2 bits of IMM8 specifies the operation control:
5368///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5369/// Upper 2 bits of IMM8 specifies the sign control:
5370///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5371///
5372/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
5373#[inline]
5374#[target_feature(enable = "avx512dq")]
5375#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5376#[rustc_legacy_const_generics(2)]
5377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5378pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
5379    static_assert_uimm_bits!(IMM8, 4);
5380    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), 0xffff, a, b)
5381}
5382
5383/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5384/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5385/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5386/// Lower 2 bits of IMM8 specifies the operation control:
5387///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5388/// Upper 2 bits of IMM8 specifies the sign control:
5389///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5390///
5391/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
5392#[inline]
5393#[target_feature(enable = "avx512dq")]
5394#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5395#[rustc_legacy_const_generics(4)]
5396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5397pub fn _mm512_mask_range_ps<const IMM8: i32>(
5398    src: __m512,
5399    k: __mmask16,
5400    a: __m512,
5401    b: __m512,
5402) -> __m512 {
5403    unsafe {
5404        static_assert_uimm_bits!(IMM8, 4);
5405        transmute(vrangeps_512(
5406            a.as_f32x16(),
5407            b.as_f32x16(),
5408            IMM8,
5409            src.as_f32x16(),
5410            k,
5411            _MM_FROUND_CUR_DIRECTION,
5412        ))
5413    }
5414}
5415
5416/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5417/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5418/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5419/// Lower 2 bits of IMM8 specifies the operation control:
5420///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5421/// Upper 2 bits of IMM8 specifies the sign control:
5422///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5423///
5424/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
5425#[inline]
5426#[target_feature(enable = "avx512dq")]
5427#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5428#[rustc_legacy_const_generics(3)]
5429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5430pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5431    static_assert_uimm_bits!(IMM8, 4);
5432    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), k, a, b)
5433}
5434
5435/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5436/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5437/// of dst, and copy the upper element from a to the upper element of dst.
5438/// Lower 2 bits of IMM8 specifies the operation control:
5439///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5440/// Upper 2 bits of IMM8 specifies the sign control:
5441///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5442/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5443///
5444/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
5445#[inline]
5446#[target_feature(enable = "avx512dq")]
5447#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5448#[rustc_legacy_const_generics(2, 3)]
5449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
5451    static_assert_uimm_bits!(IMM8, 4);
5452    static_assert_sae!(SAE);
5453    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), 0xff, a, b)
5454}
5455
5456/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5457/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5458/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5459/// upper element from a to the upper element of dst.
5460/// Lower 2 bits of IMM8 specifies the operation control:
5461///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5462/// Upper 2 bits of IMM8 specifies the sign control:
5463///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5464/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5465///
5466/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
5467#[inline]
5468#[target_feature(enable = "avx512dq")]
5469#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5470#[rustc_legacy_const_generics(4, 5)]
5471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
5473    src: __m128d,
5474    k: __mmask8,
5475    a: __m128d,
5476    b: __m128d,
5477) -> __m128d {
5478    unsafe {
5479        static_assert_uimm_bits!(IMM8, 4);
5480        static_assert_sae!(SAE);
5481        transmute(vrangesd(
5482            a.as_f64x2(),
5483            b.as_f64x2(),
5484            src.as_f64x2(),
5485            k,
5486            IMM8,
5487            SAE,
5488        ))
5489    }
5490}
5491
5492/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5493/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5494/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5495/// element from a to the upper element of dst.
5496/// Lower 2 bits of IMM8 specifies the operation control:
5497///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5498/// Upper 2 bits of IMM8 specifies the sign control:
5499///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5500/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5501///
5502/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
5503#[inline]
5504#[target_feature(enable = "avx512dq")]
5505#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5506#[rustc_legacy_const_generics(3, 4)]
5507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5508pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
5509    k: __mmask8,
5510    a: __m128d,
5511    b: __m128d,
5512) -> __m128d {
5513    static_assert_uimm_bits!(IMM8, 4);
5514    static_assert_sae!(SAE);
5515    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
5516}
5517
5518/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5519/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5520/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5521/// upper element from a to the upper element of dst.
5522/// Lower 2 bits of IMM8 specifies the operation control:
5523///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5524/// Upper 2 bits of IMM8 specifies the sign control:
5525///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5526///
5527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
5528#[inline]
5529#[target_feature(enable = "avx512dq")]
5530#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5531#[rustc_legacy_const_generics(4)]
5532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5533pub fn _mm_mask_range_sd<const IMM8: i32>(
5534    src: __m128d,
5535    k: __mmask8,
5536    a: __m128d,
5537    b: __m128d,
5538) -> __m128d {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 4);
5541        transmute(vrangesd(
5542            a.as_f64x2(),
5543            b.as_f64x2(),
5544            src.as_f64x2(),
5545            k,
5546            IMM8,
5547            _MM_FROUND_CUR_DIRECTION,
5548        ))
5549    }
5550}
5551
5552/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5553/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5554/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5555/// element from a to the upper element of dst.
5556/// Lower 2 bits of IMM8 specifies the operation control:
5557///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5558/// Upper 2 bits of IMM8 specifies the sign control:
5559///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5560///
5561/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
5562#[inline]
5563#[target_feature(enable = "avx512dq")]
5564#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5565#[rustc_legacy_const_generics(3)]
5566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5568    static_assert_uimm_bits!(IMM8, 4);
5569    _mm_mask_range_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
5570}
5571
5572/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5573/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5574/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
5575/// Lower 2 bits of IMM8 specifies the operation control:
5576///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5577/// Upper 2 bits of IMM8 specifies the sign control:
5578///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5579/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5580///
5581/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
5582#[inline]
5583#[target_feature(enable = "avx512dq")]
5584#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5585#[rustc_legacy_const_generics(2, 3)]
5586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5587pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
5588    static_assert_uimm_bits!(IMM8, 4);
5589    static_assert_sae!(SAE);
5590    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), 0xff, a, b)
5591}
5592
5593/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5594/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5595/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5596/// upper 3 packed elements from a to the upper elements of dst.
5597/// Lower 2 bits of IMM8 specifies the operation control:
5598///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5599/// Upper 2 bits of IMM8 specifies the sign control:
5600///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5601/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5602///
5603/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
5604#[inline]
5605#[target_feature(enable = "avx512dq")]
5606#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5607#[rustc_legacy_const_generics(4, 5)]
5608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5609pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
5610    src: __m128,
5611    k: __mmask8,
5612    a: __m128,
5613    b: __m128,
5614) -> __m128 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 4);
5617        static_assert_sae!(SAE);
5618        transmute(vrangess(
5619            a.as_f32x4(),
5620            b.as_f32x4(),
5621            src.as_f32x4(),
5622            k,
5623            IMM8,
5624            SAE,
5625        ))
5626    }
5627}
5628
5629/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5630/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5631/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5632/// 3 packed elements from a to the upper elements of dst.
5633/// Lower 2 bits of IMM8 specifies the operation control:
5634///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5635/// Upper 2 bits of IMM8 specifies the sign control:
5636///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5637/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5638///
5639/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
5640#[inline]
5641#[target_feature(enable = "avx512dq")]
5642#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5643#[rustc_legacy_const_generics(3, 4)]
5644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5645pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
5646    k: __mmask8,
5647    a: __m128,
5648    b: __m128,
5649) -> __m128 {
5650    static_assert_uimm_bits!(IMM8, 4);
5651    static_assert_sae!(SAE);
5652    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
5653}
5654
5655/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5656/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5657/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5658/// upper 3 packed elements from a to the upper elements of dst.
5659/// Lower 2 bits of IMM8 specifies the operation control:
5660///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5661/// Upper 2 bits of IMM8 specifies the sign control:
5662///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5663///
5664/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
5665#[inline]
5666#[target_feature(enable = "avx512dq")]
5667#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5668#[rustc_legacy_const_generics(4)]
5669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5670pub fn _mm_mask_range_ss<const IMM8: i32>(
5671    src: __m128,
5672    k: __mmask8,
5673    a: __m128,
5674    b: __m128,
5675) -> __m128 {
5676    unsafe {
5677        static_assert_uimm_bits!(IMM8, 4);
5678        transmute(vrangess(
5679            a.as_f32x4(),
5680            b.as_f32x4(),
5681            src.as_f32x4(),
5682            k,
5683            IMM8,
5684            _MM_FROUND_CUR_DIRECTION,
5685        ))
5686    }
5687}
5688
5689/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5690/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5691/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5692/// 3 packed elements from a to the upper elements of dst.
5693/// Lower 2 bits of IMM8 specifies the operation control:
5694///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5695/// Upper 2 bits of IMM8 specifies the sign control:
5696///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5697///
5698/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
5699#[inline]
5700#[target_feature(enable = "avx512dq")]
5701#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5702#[rustc_legacy_const_generics(3)]
5703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5704pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5705    static_assert_uimm_bits!(IMM8, 4);
5706    _mm_mask_range_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
5707}
5708
5709// Reduce
5710
5711/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5712/// the number of bits specified by imm8, and store the results in dst.
5713/// Rounding is done according to the imm8 parameter, which can be one of:
5714///
5715/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5716/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5717/// * [`_MM_FROUND_TO_POS_INF`] : round up
5718/// * [`_MM_FROUND_TO_ZERO`] : truncate
5719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5720///
5721/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5722///
5723/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
5724#[inline]
5725#[target_feature(enable = "avx512dq")]
5726#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5727#[rustc_legacy_const_generics(1, 2)]
5728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5729pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
5730    static_assert_uimm_bits!(IMM8, 8);
5731    static_assert_sae!(SAE);
5732    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_undefined_pd(), 0xff, a)
5733}
5734
5735/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5736/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5737/// copied from src to dst if the corresponding mask bit is not set).
5738/// Rounding is done according to the imm8 parameter, which can be one of:
5739///
5740/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5741/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5742/// * [`_MM_FROUND_TO_POS_INF`] : round up
5743/// * [`_MM_FROUND_TO_ZERO`] : truncate
5744/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5745///
5746/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5747///
5748/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
5749#[inline]
5750#[target_feature(enable = "avx512dq")]
5751#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5752#[rustc_legacy_const_generics(3, 4)]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5755    src: __m512d,
5756    k: __mmask8,
5757    a: __m512d,
5758) -> __m512d {
5759    unsafe {
5760        static_assert_uimm_bits!(IMM8, 8);
5761        static_assert_sae!(SAE);
5762        transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
5763    }
5764}
5765
5766/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5767/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5768/// zeroed out if the corresponding mask bit is not set).
5769/// Rounding is done according to the imm8 parameter, which can be one of:
5770///
5771/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5772/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5773/// * [`_MM_FROUND_TO_POS_INF`] : round up
5774/// * [`_MM_FROUND_TO_ZERO`] : truncate
5775/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5776///
5777/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5778///
5779/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
5780#[inline]
5781#[target_feature(enable = "avx512dq")]
5782#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5783#[rustc_legacy_const_generics(2, 3)]
5784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5785pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5786    k: __mmask8,
5787    a: __m512d,
5788) -> __m512d {
5789    static_assert_uimm_bits!(IMM8, 8);
5790    static_assert_sae!(SAE);
5791    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a)
5792}
5793
5794/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5795/// the number of bits specified by imm8, and store the results in dst.
5796/// Rounding is done according to the imm8 parameter, which can be one of:
5797///
5798/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5799/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5800/// * [`_MM_FROUND_TO_POS_INF`] : round up
5801/// * [`_MM_FROUND_TO_ZERO`] : truncate
5802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5803///
5804/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
5805#[inline]
5806#[target_feature(enable = "avx512dq,avx512vl")]
5807#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5808#[rustc_legacy_const_generics(1)]
5809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5810pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5811    static_assert_uimm_bits!(IMM8, 8);
5812    _mm_mask_reduce_pd::<IMM8>(_mm_undefined_pd(), 0xff, a)
5813}
5814
5815/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5816/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5817/// copied from src to dst if the corresponding mask bit is not set).
5818/// Rounding is done according to the imm8 parameter, which can be one of:
5819///
5820/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5821/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5822/// * [`_MM_FROUND_TO_POS_INF`] : round up
5823/// * [`_MM_FROUND_TO_ZERO`] : truncate
5824/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5825///
5826/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
5827#[inline]
5828#[target_feature(enable = "avx512dq,avx512vl")]
5829#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5830#[rustc_legacy_const_generics(3)]
5831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5832pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5833    unsafe {
5834        static_assert_uimm_bits!(IMM8, 8);
5835        transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
5836    }
5837}
5838
5839/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5840/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5841/// zeroed out if the corresponding mask bit is not set).
5842/// Rounding is done according to the imm8 parameter, which can be one of:
5843///
5844/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5845/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5846/// * [`_MM_FROUND_TO_POS_INF`] : round up
5847/// * [`_MM_FROUND_TO_ZERO`] : truncate
5848/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5849///
5850/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
5851#[inline]
5852#[target_feature(enable = "avx512dq,avx512vl")]
5853#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5854#[rustc_legacy_const_generics(2)]
5855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5856pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5857    static_assert_uimm_bits!(IMM8, 8);
5858    _mm_mask_reduce_pd::<IMM8>(_mm_setzero_pd(), k, a)
5859}
5860
5861/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5862/// the number of bits specified by imm8, and store the results in dst.
5863/// Rounding is done according to the imm8 parameter, which can be one of:
5864///
5865/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5866/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5867/// * [`_MM_FROUND_TO_POS_INF`] : round up
5868/// * [`_MM_FROUND_TO_ZERO`] : truncate
5869/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5870///
5871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
5872#[inline]
5873#[target_feature(enable = "avx512dq,avx512vl")]
5874#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5875#[rustc_legacy_const_generics(1)]
5876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5877pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5878    static_assert_uimm_bits!(IMM8, 8);
5879    _mm256_mask_reduce_pd::<IMM8>(_mm256_undefined_pd(), 0xff, a)
5880}
5881
5882/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5883/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5884/// copied from src to dst if the corresponding mask bit is not set).
5885/// Rounding is done according to the imm8 parameter, which can be one of:
5886///
5887/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5888/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5889/// * [`_MM_FROUND_TO_POS_INF`] : round up
5890/// * [`_MM_FROUND_TO_ZERO`] : truncate
5891/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5892///
5893/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
5894#[inline]
5895#[target_feature(enable = "avx512dq,avx512vl")]
5896#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5897#[rustc_legacy_const_generics(3)]
5898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5899pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5900    unsafe {
5901        static_assert_uimm_bits!(IMM8, 8);
5902        transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
5903    }
5904}
5905
5906/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5907/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5908/// zeroed out if the corresponding mask bit is not set).
5909/// Rounding is done according to the imm8 parameter, which can be one of:
5910///
5911/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5912/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5913/// * [`_MM_FROUND_TO_POS_INF`] : round up
5914/// * [`_MM_FROUND_TO_ZERO`] : truncate
5915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5916///
5917/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
5918#[inline]
5919#[target_feature(enable = "avx512dq,avx512vl")]
5920#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5921#[rustc_legacy_const_generics(2)]
5922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5923pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5924    static_assert_uimm_bits!(IMM8, 8);
5925    _mm256_mask_reduce_pd::<IMM8>(_mm256_setzero_pd(), k, a)
5926}
5927
5928/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5929/// the number of bits specified by imm8, and store the results in dst.
5930/// Rounding is done according to the imm8 parameter, which can be one of:
5931///
5932/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5933/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5934/// * [`_MM_FROUND_TO_POS_INF`] : round up
5935/// * [`_MM_FROUND_TO_ZERO`] : truncate
5936/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5937///
5938/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
5939#[inline]
5940#[target_feature(enable = "avx512dq")]
5941#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5942#[rustc_legacy_const_generics(1)]
5943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5944pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5945    static_assert_uimm_bits!(IMM8, 8);
5946    _mm512_mask_reduce_pd::<IMM8>(_mm512_undefined_pd(), 0xff, a)
5947}
5948
5949/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5950/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5951/// copied from src to dst if the corresponding mask bit is not set).
5952/// Rounding is done according to the imm8 parameter, which can be one of:
5953///
5954/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5955/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5956/// * [`_MM_FROUND_TO_POS_INF`] : round up
5957/// * [`_MM_FROUND_TO_ZERO`] : truncate
5958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5959///
5960/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
5961#[inline]
5962#[target_feature(enable = "avx512dq")]
5963#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5964#[rustc_legacy_const_generics(3)]
5965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5966pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5967    unsafe {
5968        static_assert_uimm_bits!(IMM8, 8);
5969        transmute(vreducepd_512(
5970            a.as_f64x8(),
5971            IMM8,
5972            src.as_f64x8(),
5973            k,
5974            _MM_FROUND_CUR_DIRECTION,
5975        ))
5976    }
5977}
5978
5979/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5980/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5981/// zeroed out if the corresponding mask bit is not set).
5982/// Rounding is done according to the imm8 parameter, which can be one of:
5983///
5984/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5985/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5986/// * [`_MM_FROUND_TO_POS_INF`] : round up
5987/// * [`_MM_FROUND_TO_ZERO`] : truncate
5988/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5989///
5990/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
5991#[inline]
5992#[target_feature(enable = "avx512dq")]
5993#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5994#[rustc_legacy_const_generics(2)]
5995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5996pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5997    static_assert_uimm_bits!(IMM8, 8);
5998    _mm512_mask_reduce_pd::<IMM8>(_mm512_setzero_pd(), k, a)
5999}
6000
6001/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6002/// the number of bits specified by imm8, and store the results in dst.
6003/// Rounding is done according to the imm8 parameter, which can be one of:
6004///
6005/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6006/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6007/// * [`_MM_FROUND_TO_POS_INF`] : round up
6008/// * [`_MM_FROUND_TO_ZERO`] : truncate
6009/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6010///
6011/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6012///
6013/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
6014#[inline]
6015#[target_feature(enable = "avx512dq")]
6016#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6017#[rustc_legacy_const_generics(1, 2)]
6018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6019pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
6020    static_assert_uimm_bits!(IMM8, 8);
6021    static_assert_sae!(SAE);
6022    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_undefined_ps(), 0xffff, a)
6023}
6024
6025/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6026/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6027/// copied from src to dst if the corresponding mask bit is not set).
6028/// Rounding is done according to the imm8 parameter, which can be one of:
6029///
6030/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6031/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6032/// * [`_MM_FROUND_TO_POS_INF`] : round up
6033/// * [`_MM_FROUND_TO_ZERO`] : truncate
6034/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6035///
6036/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6037///
6038/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
6039#[inline]
6040#[target_feature(enable = "avx512dq")]
6041#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6042#[rustc_legacy_const_generics(3, 4)]
6043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6045    src: __m512,
6046    k: __mmask16,
6047    a: __m512,
6048) -> __m512 {
6049    unsafe {
6050        static_assert_uimm_bits!(IMM8, 8);
6051        static_assert_sae!(SAE);
6052        transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
6053    }
6054}
6055
6056/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6057/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6058/// zeroed out if the corresponding mask bit is not set).
6059/// Rounding is done according to the imm8 parameter, which can be one of:
6060///
6061/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6062/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6063/// * [`_MM_FROUND_TO_POS_INF`] : round up
6064/// * [`_MM_FROUND_TO_ZERO`] : truncate
6065/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6066///
6067/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6068///
6069/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
6070#[inline]
6071#[target_feature(enable = "avx512dq")]
6072#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6073#[rustc_legacy_const_generics(2, 3)]
6074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6075pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6076    k: __mmask16,
6077    a: __m512,
6078) -> __m512 {
6079    static_assert_uimm_bits!(IMM8, 8);
6080    static_assert_sae!(SAE);
6081    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a)
6082}
6083
6084/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6085/// the number of bits specified by imm8, and store the results in dst.
6086/// Rounding is done according to the imm8 parameter, which can be one of:
6087///
6088/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6089/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6090/// * [`_MM_FROUND_TO_POS_INF`] : round up
6091/// * [`_MM_FROUND_TO_ZERO`] : truncate
6092/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6093///
6094/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
6095#[inline]
6096#[target_feature(enable = "avx512dq,avx512vl")]
6097#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6098#[rustc_legacy_const_generics(1)]
6099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6100pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
6101    static_assert_uimm_bits!(IMM8, 8);
6102    _mm_mask_reduce_ps::<IMM8>(_mm_undefined_ps(), 0xff, a)
6103}
6104
6105/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6106/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6107/// copied from src to dst if the corresponding mask bit is not set).
6108/// Rounding is done according to the imm8 parameter, which can be one of:
6109///
6110/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6111/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6112/// * [`_MM_FROUND_TO_POS_INF`] : round up
6113/// * [`_MM_FROUND_TO_ZERO`] : truncate
6114/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6115///
6116/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
6117#[inline]
6118#[target_feature(enable = "avx512dq,avx512vl")]
6119#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6120#[rustc_legacy_const_generics(3)]
6121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6122pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
6123    unsafe {
6124        static_assert_uimm_bits!(IMM8, 8);
6125        transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
6126    }
6127}
6128
6129/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6130/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6131/// zeroed out if the corresponding mask bit is not set).
6132/// Rounding is done according to the imm8 parameter, which can be one of:
6133///
6134/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6135/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6136/// * [`_MM_FROUND_TO_POS_INF`] : round up
6137/// * [`_MM_FROUND_TO_ZERO`] : truncate
6138/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6139///
6140/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
6141#[inline]
6142#[target_feature(enable = "avx512dq,avx512vl")]
6143#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6144#[rustc_legacy_const_generics(2)]
6145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6146pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6147    static_assert_uimm_bits!(IMM8, 8);
6148    _mm_mask_reduce_ps::<IMM8>(_mm_setzero_ps(), k, a)
6149}
6150
6151/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6152/// the number of bits specified by imm8, and store the results in dst.
6153/// Rounding is done according to the imm8 parameter, which can be one of:
6154///
6155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6157/// * [`_MM_FROUND_TO_POS_INF`] : round up
6158/// * [`_MM_FROUND_TO_ZERO`] : truncate
6159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6160///
6161/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
6162#[inline]
6163#[target_feature(enable = "avx512dq,avx512vl")]
6164#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6165#[rustc_legacy_const_generics(1)]
6166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6167pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
6168    static_assert_uimm_bits!(IMM8, 8);
6169    _mm256_mask_reduce_ps::<IMM8>(_mm256_undefined_ps(), 0xff, a)
6170}
6171
6172/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6173/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6174/// copied from src to dst if the corresponding mask bit is not set).
6175/// Rounding is done according to the imm8 parameter, which can be one of:
6176///
6177/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6178/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6179/// * [`_MM_FROUND_TO_POS_INF`] : round up
6180/// * [`_MM_FROUND_TO_ZERO`] : truncate
6181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6182///
6183/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
6184#[inline]
6185#[target_feature(enable = "avx512dq,avx512vl")]
6186#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6187#[rustc_legacy_const_generics(3)]
6188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6189pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
6190    unsafe {
6191        static_assert_uimm_bits!(IMM8, 8);
6192        transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
6193    }
6194}
6195
6196/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6197/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6198/// zeroed out if the corresponding mask bit is not set).
6199/// Rounding is done according to the imm8 parameter, which can be one of:
6200///
6201/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6202/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6203/// * [`_MM_FROUND_TO_POS_INF`] : round up
6204/// * [`_MM_FROUND_TO_ZERO`] : truncate
6205/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6206///
6207/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
6208#[inline]
6209#[target_feature(enable = "avx512dq,avx512vl")]
6210#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6211#[rustc_legacy_const_generics(2)]
6212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6213pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
6214    static_assert_uimm_bits!(IMM8, 8);
6215    _mm256_mask_reduce_ps::<IMM8>(_mm256_setzero_ps(), k, a)
6216}
6217
6218/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6219/// the number of bits specified by imm8, and store the results in dst.
6220/// Rounding is done according to the imm8 parameter, which can be one of:
6221///
6222/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6223/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6224/// * [`_MM_FROUND_TO_POS_INF`] : round up
6225/// * [`_MM_FROUND_TO_ZERO`] : truncate
6226/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6227///
6228/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
6229#[inline]
6230#[target_feature(enable = "avx512dq")]
6231#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6232#[rustc_legacy_const_generics(1)]
6233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6234pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
6235    static_assert_uimm_bits!(IMM8, 8);
6236    _mm512_mask_reduce_ps::<IMM8>(_mm512_undefined_ps(), 0xffff, a)
6237}
6238
6239/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6240/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6241/// copied from src to dst if the corresponding mask bit is not set).
6242/// Rounding is done according to the imm8 parameter, which can be one of:
6243///
6244/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6245/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6246/// * [`_MM_FROUND_TO_POS_INF`] : round up
6247/// * [`_MM_FROUND_TO_ZERO`] : truncate
6248/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6249///
6250/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
6251#[inline]
6252#[target_feature(enable = "avx512dq")]
6253#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6254#[rustc_legacy_const_generics(3)]
6255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6256pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
6257    unsafe {
6258        static_assert_uimm_bits!(IMM8, 8);
6259        transmute(vreduceps_512(
6260            a.as_f32x16(),
6261            IMM8,
6262            src.as_f32x16(),
6263            k,
6264            _MM_FROUND_CUR_DIRECTION,
6265        ))
6266    }
6267}
6268
6269/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6270/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6271/// zeroed out if the corresponding mask bit is not set).
6272/// Rounding is done according to the imm8 parameter, which can be one of:
6273///
6274/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6275/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6276/// * [`_MM_FROUND_TO_POS_INF`] : round up
6277/// * [`_MM_FROUND_TO_ZERO`] : truncate
6278/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6279///
6280/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
6281#[inline]
6282#[target_feature(enable = "avx512dq")]
6283#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6284#[rustc_legacy_const_generics(2)]
6285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6286pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
6287    static_assert_uimm_bits!(IMM8, 8);
6288    _mm512_mask_reduce_ps::<IMM8>(_mm512_setzero_ps(), k, a)
6289}
6290
6291/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6292/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6293/// the upper element from a to the upper element of dst.
6294/// Rounding is done according to the imm8 parameter, which can be one of:
6295///
6296/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6297/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6298/// * [`_MM_FROUND_TO_POS_INF`] : round up
6299/// * [`_MM_FROUND_TO_ZERO`] : truncate
6300/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6301///
6302/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6303///
6304/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
6305#[inline]
6306#[target_feature(enable = "avx512dq")]
6307#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6308#[rustc_legacy_const_generics(2, 3)]
6309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6310pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
6311    static_assert_uimm_bits!(IMM8, 8);
6312    static_assert_sae!(SAE);
6313    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_undefined_pd(), 0xff, a, b)
6314}
6315
6316/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6317/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6318/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6319/// to the upper element of dst.
6320/// Rounding is done according to the imm8 parameter, which can be one of:
6321///
6322/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6323/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6324/// * [`_MM_FROUND_TO_POS_INF`] : round up
6325/// * [`_MM_FROUND_TO_ZERO`] : truncate
6326/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6327///
6328/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6329///
6330/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
6331#[inline]
6332#[target_feature(enable = "avx512dq")]
6333#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6334#[rustc_legacy_const_generics(4, 5)]
6335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6336pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6337    src: __m128d,
6338    k: __mmask8,
6339    a: __m128d,
6340    b: __m128d,
6341) -> __m128d {
6342    unsafe {
6343        static_assert_uimm_bits!(IMM8, 8);
6344        static_assert_sae!(SAE);
6345        transmute(vreducesd(
6346            a.as_f64x2(),
6347            b.as_f64x2(),
6348            src.as_f64x2(),
6349            k,
6350            IMM8,
6351            SAE,
6352        ))
6353    }
6354}
6355
6356/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6357/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6358/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6359/// to the upper element of dst.
6360/// Rounding is done according to the imm8 parameter, which can be one of:
6361///
6362/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6363/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6364/// * [`_MM_FROUND_TO_POS_INF`] : round up
6365/// * [`_MM_FROUND_TO_ZERO`] : truncate
6366/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6367///
6368/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6369///
6370/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
6371#[inline]
6372#[target_feature(enable = "avx512dq")]
6373#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6374#[rustc_legacy_const_generics(3, 4)]
6375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6376pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6377    k: __mmask8,
6378    a: __m128d,
6379    b: __m128d,
6380) -> __m128d {
6381    static_assert_uimm_bits!(IMM8, 8);
6382    static_assert_sae!(SAE);
6383    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
6384}
6385
6386/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6387/// by the number of bits specified by imm8, store the result in the lower element of dst using, and
6388/// copy the upper element from a.
6389/// to the upper element of dst.
6390/// Rounding is done according to the imm8 parameter, which can be one of:
6391///
6392/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6393/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6394/// * [`_MM_FROUND_TO_POS_INF`] : round up
6395/// * [`_MM_FROUND_TO_ZERO`] : truncate
6396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6397///
6398/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
6399#[inline]
6400#[target_feature(enable = "avx512dq")]
6401#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6402#[rustc_legacy_const_generics(2)]
6403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6404pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
6405    static_assert_uimm_bits!(IMM8, 8);
6406    _mm_mask_reduce_sd::<IMM8>(_mm_undefined_pd(), 0xff, a, b)
6407}
6408
6409/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6410/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6411/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6412/// to the upper element of dst.
6413/// Rounding is done according to the imm8 parameter, which can be one of:
6414///
6415/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6416/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6417/// * [`_MM_FROUND_TO_POS_INF`] : round up
6418/// * [`_MM_FROUND_TO_ZERO`] : truncate
6419/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6420///
6421/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
6422#[inline]
6423#[target_feature(enable = "avx512dq")]
6424#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6425#[rustc_legacy_const_generics(4)]
6426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6427pub fn _mm_mask_reduce_sd<const IMM8: i32>(
6428    src: __m128d,
6429    k: __mmask8,
6430    a: __m128d,
6431    b: __m128d,
6432) -> __m128d {
6433    unsafe {
6434        static_assert_uimm_bits!(IMM8, 8);
6435        transmute(vreducesd(
6436            a.as_f64x2(),
6437            b.as_f64x2(),
6438            src.as_f64x2(),
6439            k,
6440            IMM8,
6441            _MM_FROUND_CUR_DIRECTION,
6442        ))
6443    }
6444}
6445
6446/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6447/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6448/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6449/// to the upper element of dst.
6450/// Rounding is done according to the imm8 parameter, which can be one of:
6451///
6452/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6453/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6454/// * [`_MM_FROUND_TO_POS_INF`] : round up
6455/// * [`_MM_FROUND_TO_ZERO`] : truncate
6456/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6457///
6458/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
6459#[inline]
6460#[target_feature(enable = "avx512dq")]
6461#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6462#[rustc_legacy_const_generics(3)]
6463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6464pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6465    static_assert_uimm_bits!(IMM8, 8);
6466    _mm_mask_reduce_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
6467}
6468
6469/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6470/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6471/// the upper element from a.
6472/// to the upper element of dst.
6473/// Rounding is done according to the imm8 parameter, which can be one of:
6474///
6475/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6476/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6477/// * [`_MM_FROUND_TO_POS_INF`] : round up
6478/// * [`_MM_FROUND_TO_ZERO`] : truncate
6479/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6480///
6481/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6482///
6483/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
6484#[inline]
6485#[target_feature(enable = "avx512dq")]
6486#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6487#[rustc_legacy_const_generics(2, 3)]
6488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6489pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
6490    static_assert_uimm_bits!(IMM8, 8);
6491    static_assert_sae!(SAE);
6492    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_undefined_ps(), 0xff, a, b)
6493}
6494
6495/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6496/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6497/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6498/// to the upper element of dst.
6499/// Rounding is done according to the imm8 parameter, which can be one of:
6500///
6501/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6502/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6503/// * [`_MM_FROUND_TO_POS_INF`] : round up
6504/// * [`_MM_FROUND_TO_ZERO`] : truncate
6505/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6506///
6507/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6508///
6509/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
6510#[inline]
6511#[target_feature(enable = "avx512dq")]
6512#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6513#[rustc_legacy_const_generics(4, 5)]
6514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6515pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6516    src: __m128,
6517    k: __mmask8,
6518    a: __m128,
6519    b: __m128,
6520) -> __m128 {
6521    unsafe {
6522        static_assert_uimm_bits!(IMM8, 8);
6523        static_assert_sae!(SAE);
6524        transmute(vreducess(
6525            a.as_f32x4(),
6526            b.as_f32x4(),
6527            src.as_f32x4(),
6528            k,
6529            IMM8,
6530            SAE,
6531        ))
6532    }
6533}
6534
6535/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6536/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6537/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6538/// to the upper element of dst.
6539/// Rounding is done according to the imm8 parameter, which can be one of:
6540///
6541/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6542/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6543/// * [`_MM_FROUND_TO_POS_INF`] : round up
6544/// * [`_MM_FROUND_TO_ZERO`] : truncate
6545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6546///
6547/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6548///
6549/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
6550#[inline]
6551#[target_feature(enable = "avx512dq")]
6552#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6553#[rustc_legacy_const_generics(3, 4)]
6554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6555pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6556    k: __mmask8,
6557    a: __m128,
6558    b: __m128,
6559) -> __m128 {
6560    static_assert_uimm_bits!(IMM8, 8);
6561    static_assert_sae!(SAE);
6562    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
6563}
6564
6565/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6566/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6567/// the upper element from a.
6568/// to the upper element of dst.
6569/// Rounding is done according to the imm8 parameter, which can be one of:
6570///
6571/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6572/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6573/// * [`_MM_FROUND_TO_POS_INF`] : round up
6574/// * [`_MM_FROUND_TO_ZERO`] : truncate
6575/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6576///
6577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
6578#[inline]
6579#[target_feature(enable = "avx512dq")]
6580#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6581#[rustc_legacy_const_generics(2)]
6582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6583pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
6584    static_assert_uimm_bits!(IMM8, 8);
6585    _mm_mask_reduce_ss::<IMM8>(_mm_undefined_ps(), 0xff, a, b)
6586}
6587
6588/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6589/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6590/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6591/// to the upper element of dst.
6592/// Rounding is done according to the imm8 parameter, which can be one of:
6593///
6594/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6595/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6596/// * [`_MM_FROUND_TO_POS_INF`] : round up
6597/// * [`_MM_FROUND_TO_ZERO`] : truncate
6598/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6599///
6600/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
6601#[inline]
6602#[target_feature(enable = "avx512dq")]
6603#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6604#[rustc_legacy_const_generics(4)]
6605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6606pub fn _mm_mask_reduce_ss<const IMM8: i32>(
6607    src: __m128,
6608    k: __mmask8,
6609    a: __m128,
6610    b: __m128,
6611) -> __m128 {
6612    unsafe {
6613        static_assert_uimm_bits!(IMM8, 8);
6614        transmute(vreducess(
6615            a.as_f32x4(),
6616            b.as_f32x4(),
6617            src.as_f32x4(),
6618            k,
6619            IMM8,
6620            _MM_FROUND_CUR_DIRECTION,
6621        ))
6622    }
6623}
6624
6625/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6626/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6627/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6628/// to the upper element of dst.
6629/// Rounding is done according to the imm8 parameter, which can be one of:
6630///
6631/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6632/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6633/// * [`_MM_FROUND_TO_POS_INF`] : round up
6634/// * [`_MM_FROUND_TO_ZERO`] : truncate
6635/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6636///
6637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
6638#[inline]
6639#[target_feature(enable = "avx512dq")]
6640#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6641#[rustc_legacy_const_generics(3)]
6642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6643pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6644    static_assert_uimm_bits!(IMM8, 8);
6645    _mm_mask_reduce_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
6646}
6647
6648// FP-Class
6649
6650/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6651/// by imm8, and store the results in mask vector k.
6652/// imm can be a combination of:
6653///
6654///     - 0x01 // QNaN
6655///     - 0x02 // Positive Zero
6656///     - 0x04 // Negative Zero
6657///     - 0x08 // Positive Infinity
6658///     - 0x10 // Negative Infinity
6659///     - 0x20 // Denormal
6660///     - 0x40 // Negative
6661///     - 0x80 // SNaN
6662///
6663/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
6664#[inline]
6665#[target_feature(enable = "avx512dq,avx512vl")]
6666#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6667#[rustc_legacy_const_generics(1)]
6668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6669pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6670    static_assert_uimm_bits!(IMM8, 8);
6671    _mm_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6672}
6673
6674/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6675/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6676/// corresponding mask bit is not set).
6677/// imm can be a combination of:
6678///
6679///     - 0x01 // QNaN
6680///     - 0x02 // Positive Zero
6681///     - 0x04 // Negative Zero
6682///     - 0x08 // Positive Infinity
6683///     - 0x10 // Negative Infinity
6684///     - 0x20 // Denormal
6685///     - 0x40 // Negative
6686///     - 0x80 // SNaN
6687///
6688/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
6689#[inline]
6690#[target_feature(enable = "avx512dq,avx512vl")]
6691#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6692#[rustc_legacy_const_generics(2)]
6693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6694pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6695    unsafe {
6696        static_assert_uimm_bits!(IMM8, 8);
6697        transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1))
6698    }
6699}
6700
6701/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6702/// by imm8, and store the results in mask vector k.
6703/// imm can be a combination of:
6704///
6705///     - 0x01 // QNaN
6706///     - 0x02 // Positive Zero
6707///     - 0x04 // Negative Zero
6708///     - 0x08 // Positive Infinity
6709///     - 0x10 // Negative Infinity
6710///     - 0x20 // Denormal
6711///     - 0x40 // Negative
6712///     - 0x80 // SNaN
6713///
6714/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
6715#[inline]
6716#[target_feature(enable = "avx512dq,avx512vl")]
6717#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6718#[rustc_legacy_const_generics(1)]
6719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
6721    static_assert_uimm_bits!(IMM8, 8);
6722    _mm256_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6723}
6724
6725/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6726/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6727/// corresponding mask bit is not set).
6728/// imm can be a combination of:
6729///
6730///     - 0x01 // QNaN
6731///     - 0x02 // Positive Zero
6732///     - 0x04 // Negative Zero
6733///     - 0x08 // Positive Infinity
6734///     - 0x10 // Negative Infinity
6735///     - 0x20 // Denormal
6736///     - 0x40 // Negative
6737///     - 0x80 // SNaN
6738///
6739/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
6740#[inline]
6741#[target_feature(enable = "avx512dq,avx512vl")]
6742#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6743#[rustc_legacy_const_generics(2)]
6744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6745pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
6746    unsafe {
6747        static_assert_uimm_bits!(IMM8, 8);
6748        transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1))
6749    }
6750}
6751
6752/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6753/// by imm8, and store the results in mask vector k.
6754/// imm can be a combination of:
6755///
6756///     - 0x01 // QNaN
6757///     - 0x02 // Positive Zero
6758///     - 0x04 // Negative Zero
6759///     - 0x08 // Positive Infinity
6760///     - 0x10 // Negative Infinity
6761///     - 0x20 // Denormal
6762///     - 0x40 // Negative
6763///     - 0x80 // SNaN
6764///
6765/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
6766#[inline]
6767#[target_feature(enable = "avx512dq")]
6768#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6769#[rustc_legacy_const_generics(1)]
6770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6771pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
6772    static_assert_uimm_bits!(IMM8, 8);
6773    _mm512_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6774}
6775
6776/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6777/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6778/// corresponding mask bit is not set).
6779/// imm can be a combination of:
6780///
6781///     - 0x01 // QNaN
6782///     - 0x02 // Positive Zero
6783///     - 0x04 // Negative Zero
6784///     - 0x08 // Positive Infinity
6785///     - 0x10 // Negative Infinity
6786///     - 0x20 // Denormal
6787///     - 0x40 // Negative
6788///     - 0x80 // SNaN
6789///
6790/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
6791#[inline]
6792#[target_feature(enable = "avx512dq")]
6793#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6794#[rustc_legacy_const_generics(2)]
6795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6796pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
6797    unsafe {
6798        static_assert_uimm_bits!(IMM8, 8);
6799        transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1))
6800    }
6801}
6802
6803/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6804/// by imm8, and store the results in mask vector k.
6805/// imm can be a combination of:
6806///
6807///     - 0x01 // QNaN
6808///     - 0x02 // Positive Zero
6809///     - 0x04 // Negative Zero
6810///     - 0x08 // Positive Infinity
6811///     - 0x10 // Negative Infinity
6812///     - 0x20 // Denormal
6813///     - 0x40 // Negative
6814///     - 0x80 // SNaN
6815///
6816/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
6817#[inline]
6818#[target_feature(enable = "avx512dq,avx512vl")]
6819#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6820#[rustc_legacy_const_generics(1)]
6821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6822pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
6823    static_assert_uimm_bits!(IMM8, 8);
6824    _mm_mask_fpclass_ps_mask::<IMM8>(0xff, a)
6825}
6826
6827/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6828/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6829/// corresponding mask bit is not set).
6830/// imm can be a combination of:
6831///
6832///     - 0x01 // QNaN
6833///     - 0x02 // Positive Zero
6834///     - 0x04 // Negative Zero
6835///     - 0x08 // Positive Infinity
6836///     - 0x10 // Negative Infinity
6837///     - 0x20 // Denormal
6838///     - 0x40 // Negative
6839///     - 0x80 // SNaN
6840///
6841/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
6842#[inline]
6843#[target_feature(enable = "avx512dq,avx512vl")]
6844#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6845#[rustc_legacy_const_generics(2)]
6846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6847pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
6848    unsafe {
6849        static_assert_uimm_bits!(IMM8, 8);
6850        transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1))
6851    }
6852}
6853
6854/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6855/// by imm8, and store the results in mask vector k.
6856/// imm can be a combination of:
6857///
6858///     - 0x01 // QNaN
6859///     - 0x02 // Positive Zero
6860///     - 0x04 // Negative Zero
6861///     - 0x08 // Positive Infinity
6862///     - 0x10 // Negative Infinity
6863///     - 0x20 // Denormal
6864///     - 0x40 // Negative
6865///     - 0x80 // SNaN
6866///
6867/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
6868#[inline]
6869#[target_feature(enable = "avx512dq,avx512vl")]
6870#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6871#[rustc_legacy_const_generics(1)]
6872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
6874    static_assert_uimm_bits!(IMM8, 8);
6875    _mm256_mask_fpclass_ps_mask::<IMM8>(0xff, a)
6876}
6877
6878/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6879/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6880/// corresponding mask bit is not set).
6881/// imm can be a combination of:
6882///
6883///     - 0x01 // QNaN
6884///     - 0x02 // Positive Zero
6885///     - 0x04 // Negative Zero
6886///     - 0x08 // Positive Infinity
6887///     - 0x10 // Negative Infinity
6888///     - 0x20 // Denormal
6889///     - 0x40 // Negative
6890///     - 0x80 // SNaN
6891///
6892/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
6893#[inline]
6894#[target_feature(enable = "avx512dq,avx512vl")]
6895#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6896#[rustc_legacy_const_generics(2)]
6897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6898pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
6899    unsafe {
6900        static_assert_uimm_bits!(IMM8, 8);
6901        transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1))
6902    }
6903}
6904
6905/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6906/// by imm8, and store the results in mask vector k.
6907/// imm can be a combination of:
6908///
6909///     - 0x01 // QNaN
6910///     - 0x02 // Positive Zero
6911///     - 0x04 // Negative Zero
6912///     - 0x08 // Positive Infinity
6913///     - 0x10 // Negative Infinity
6914///     - 0x20 // Denormal
6915///     - 0x40 // Negative
6916///     - 0x80 // SNaN
6917///
6918/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
6919#[inline]
6920#[target_feature(enable = "avx512dq")]
6921#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6922#[rustc_legacy_const_generics(1)]
6923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6924pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
6925    static_assert_uimm_bits!(IMM8, 8);
6926    _mm512_mask_fpclass_ps_mask::<IMM8>(0xffff, a)
6927}
6928
6929/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6930/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6931/// corresponding mask bit is not set).
6932/// imm can be a combination of:
6933///
6934///     - 0x01 // QNaN
6935///     - 0x02 // Positive Zero
6936///     - 0x04 // Negative Zero
6937///     - 0x08 // Positive Infinity
6938///     - 0x10 // Negative Infinity
6939///     - 0x20 // Denormal
6940///     - 0x40 // Negative
6941///     - 0x80 // SNaN
6942///
6943/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
6944#[inline]
6945#[target_feature(enable = "avx512dq")]
6946#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6947#[rustc_legacy_const_generics(2)]
6948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6949pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
6950    unsafe {
6951        static_assert_uimm_bits!(IMM8, 8);
6952        transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1))
6953    }
6954}
6955
6956/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
6957/// by imm8, and store the results in mask vector k.
6958/// imm can be a combination of:
6959///
6960///     - 0x01 // QNaN
6961///     - 0x02 // Positive Zero
6962///     - 0x04 // Negative Zero
6963///     - 0x08 // Positive Infinity
6964///     - 0x10 // Negative Infinity
6965///     - 0x20 // Denormal
6966///     - 0x40 // Negative
6967///     - 0x80 // SNaN
6968///
6969/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
6970#[inline]
6971#[target_feature(enable = "avx512dq")]
6972#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
6973#[rustc_legacy_const_generics(1)]
6974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6975pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6976    static_assert_uimm_bits!(IMM8, 8);
6977    _mm_mask_fpclass_sd_mask::<IMM8>(0xff, a)
6978}
6979
6980/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
6981/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6982/// corresponding mask bit is not set).
6983/// imm can be a combination of:
6984///
6985///     - 0x01 // QNaN
6986///     - 0x02 // Positive Zero
6987///     - 0x04 // Negative Zero
6988///     - 0x08 // Positive Infinity
6989///     - 0x10 // Negative Infinity
6990///     - 0x20 // Denormal
6991///     - 0x40 // Negative
6992///     - 0x80 // SNaN
6993///
6994/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
6995#[inline]
6996#[target_feature(enable = "avx512dq")]
6997#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
6998#[rustc_legacy_const_generics(2)]
6999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7000pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
7001    unsafe {
7002        static_assert_uimm_bits!(IMM8, 8);
7003        vfpclasssd(a.as_f64x2(), IMM8, k1)
7004    }
7005}
7006
7007/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7008/// by imm8, and store the results in mask vector k.
7009/// imm can be a combination of:
7010///
7011///     - 0x01 // QNaN
7012///     - 0x02 // Positive Zero
7013///     - 0x04 // Negative Zero
7014///     - 0x08 // Positive Infinity
7015///     - 0x10 // Negative Infinity
7016///     - 0x20 // Denormal
7017///     - 0x40 // Negative
7018///     - 0x80 // SNaN
7019///
7020/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
7021#[inline]
7022#[target_feature(enable = "avx512dq")]
7023#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7024#[rustc_legacy_const_generics(1)]
7025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7026pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7027    static_assert_uimm_bits!(IMM8, 8);
7028    _mm_mask_fpclass_ss_mask::<IMM8>(0xff, a)
7029}
7030
7031/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7032/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7033/// corresponding mask bit is not set).
7034/// imm can be a combination of:
7035///
7036///     - 0x01 // QNaN
7037///     - 0x02 // Positive Zero
7038///     - 0x04 // Negative Zero
7039///     - 0x08 // Positive Infinity
7040///     - 0x10 // Negative Infinity
7041///     - 0x20 // Denormal
7042///     - 0x40 // Negative
7043///     - 0x80 // SNaN
7044///
7045/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
7046#[inline]
7047#[target_feature(enable = "avx512dq")]
7048#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7049#[rustc_legacy_const_generics(2)]
7050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7051pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7052    unsafe {
7053        static_assert_uimm_bits!(IMM8, 8);
7054        vfpclassss(a.as_f32x4(), IMM8, k1)
7055    }
7056}
7057
7058#[allow(improper_ctypes)]
7059unsafe extern "C" {
7060    #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
7061    fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
7062    #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
7063    fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
7064    #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
7065    fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;
7066
7067    #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
7068    fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
7069    #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
7070    fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
7071    #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
7072    fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;
7073
7074    #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"]
7075    fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
7076    #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"]
7077    fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
7078    #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"]
7079    fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;
7080
7081    #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
7082    fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
7083    #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"]
7084    fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
7085    #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"]
7086    fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;
7087
7088    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
7089    fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7090    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
7091    fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7092    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
7093    fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7094
7095    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
7096    fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7097    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
7098    fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7099    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
7100    fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7101
7102    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
7103    fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7104    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
7105    fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7106    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
7107    fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7108
7109    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
7110    fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7111    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
7112    fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7113    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
7114    fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7115
7116    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
7117    fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7118    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
7119    fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7120    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
7121    fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7122
7123    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
7124    fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7125    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
7126    fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7127    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
7128    fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7129
7130    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
7131    fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7132    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
7133    fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7134    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
7135    fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7136
7137    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
7138    fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7139    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
7140    fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7141    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
7142    fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7143
7144    #[link_name = "llvm.x86.avx512.mask.range.pd.128"]
7145    fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7146    #[link_name = "llvm.x86.avx512.mask.range.pd.256"]
7147    fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7148    #[link_name = "llvm.x86.avx512.mask.range.pd.512"]
7149    fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7150
7151    #[link_name = "llvm.x86.avx512.mask.range.ps.128"]
7152    fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7153    #[link_name = "llvm.x86.avx512.mask.range.ps.256"]
7154    fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7155    #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
7156    fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
7157    -> f32x16;
7158
7159    #[link_name = "llvm.x86.avx512.mask.range.sd"]
7160    fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7161    #[link_name = "llvm.x86.avx512.mask.range.ss"]
7162    fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7163
7164    #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
7165    fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7166    #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
7167    fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7168    #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
7169    fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7170
7171    #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
7172    fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7173    #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
7174    fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7175    #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
7176    fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;
7177
7178    #[link_name = "llvm.x86.avx512.mask.reduce.sd"]
7179    fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7180    #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
7181    fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7182
7183    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
7184    fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7185    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
7186    fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
7187    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
7188    fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;
7189
7190    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
7191    fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7192    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
7193    fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
7194    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
7195    fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;
7196
7197    #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
7198    fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7199    #[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
7200    fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7201}
7202
7203#[cfg(test)]
7204mod tests {
7205    use super::*;
7206
7207    use stdarch_test::simd_test;
7208
7209    use crate::core_arch::x86::*;
7210    use crate::mem::transmute;
7211
7212    const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
7213    const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
7214
7215    const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
7216    const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
7217    const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
7218    const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
7219
7220    const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
7221    const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
7222
7223    const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
7224    const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
7225    const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
7226    const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
7227
7228    #[simd_test(enable = "avx512dq,avx512vl")]
7229    unsafe fn test_mm_mask_and_pd() {
7230        let a = _mm_set1_pd(OPRND1_64);
7231        let b = _mm_set1_pd(OPRND2_64);
7232        let src = _mm_set_pd(1., 2.);
7233        let r = _mm_mask_and_pd(src, 0b01, a, b);
7234        let e = _mm_set_pd(1., AND_64);
7235        assert_eq_m128d(r, e);
7236    }
7237
7238    #[simd_test(enable = "avx512dq,avx512vl")]
7239    unsafe fn test_mm_maskz_and_pd() {
7240        let a = _mm_set1_pd(OPRND1_64);
7241        let b = _mm_set1_pd(OPRND2_64);
7242        let r = _mm_maskz_and_pd(0b01, a, b);
7243        let e = _mm_set_pd(0.0, AND_64);
7244        assert_eq_m128d(r, e);
7245    }
7246
7247    #[simd_test(enable = "avx512dq,avx512vl")]
7248    unsafe fn test_mm256_mask_and_pd() {
7249        let a = _mm256_set1_pd(OPRND1_64);
7250        let b = _mm256_set1_pd(OPRND2_64);
7251        let src = _mm256_set_pd(1., 2., 3., 4.);
7252        let r = _mm256_mask_and_pd(src, 0b0101, a, b);
7253        let e = _mm256_set_pd(1., AND_64, 3., AND_64);
7254        assert_eq_m256d(r, e);
7255    }
7256
7257    #[simd_test(enable = "avx512dq,avx512vl")]
7258    unsafe fn test_mm256_maskz_and_pd() {
7259        let a = _mm256_set1_pd(OPRND1_64);
7260        let b = _mm256_set1_pd(OPRND2_64);
7261        let r = _mm256_maskz_and_pd(0b0101, a, b);
7262        let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64);
7263        assert_eq_m256d(r, e);
7264    }
7265
7266    #[simd_test(enable = "avx512dq")]
7267    unsafe fn test_mm512_and_pd() {
7268        let a = _mm512_set1_pd(OPRND1_64);
7269        let b = _mm512_set1_pd(OPRND2_64);
7270        let r = _mm512_and_pd(a, b);
7271        let e = _mm512_set1_pd(AND_64);
7272        assert_eq_m512d(r, e);
7273    }
7274
7275    #[simd_test(enable = "avx512dq")]
7276    unsafe fn test_mm512_mask_and_pd() {
7277        let a = _mm512_set1_pd(OPRND1_64);
7278        let b = _mm512_set1_pd(OPRND2_64);
7279        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7280        let r = _mm512_mask_and_pd(src, 0b01010101, a, b);
7281        let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64);
7282        assert_eq_m512d(r, e);
7283    }
7284
7285    #[simd_test(enable = "avx512dq")]
7286    unsafe fn test_mm512_maskz_and_pd() {
7287        let a = _mm512_set1_pd(OPRND1_64);
7288        let b = _mm512_set1_pd(OPRND2_64);
7289        let r = _mm512_maskz_and_pd(0b01010101, a, b);
7290        let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64);
7291        assert_eq_m512d(r, e);
7292    }
7293
7294    #[simd_test(enable = "avx512dq,avx512vl")]
7295    unsafe fn test_mm_mask_and_ps() {
7296        let a = _mm_set1_ps(OPRND1_32);
7297        let b = _mm_set1_ps(OPRND2_32);
7298        let src = _mm_set_ps(1., 2., 3., 4.);
7299        let r = _mm_mask_and_ps(src, 0b0101, a, b);
7300        let e = _mm_set_ps(1., AND_32, 3., AND_32);
7301        assert_eq_m128(r, e);
7302    }
7303
7304    #[simd_test(enable = "avx512dq,avx512vl")]
7305    unsafe fn test_mm_maskz_and_ps() {
7306        let a = _mm_set1_ps(OPRND1_32);
7307        let b = _mm_set1_ps(OPRND2_32);
7308        let r = _mm_maskz_and_ps(0b0101, a, b);
7309        let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32);
7310        assert_eq_m128(r, e);
7311    }
7312
7313    #[simd_test(enable = "avx512dq,avx512vl")]
7314    unsafe fn test_mm256_mask_and_ps() {
7315        let a = _mm256_set1_ps(OPRND1_32);
7316        let b = _mm256_set1_ps(OPRND2_32);
7317        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7318        let r = _mm256_mask_and_ps(src, 0b01010101, a, b);
7319        let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32);
7320        assert_eq_m256(r, e);
7321    }
7322
7323    #[simd_test(enable = "avx512dq,avx512vl")]
7324    unsafe fn test_mm256_maskz_and_ps() {
7325        let a = _mm256_set1_ps(OPRND1_32);
7326        let b = _mm256_set1_ps(OPRND2_32);
7327        let r = _mm256_maskz_and_ps(0b01010101, a, b);
7328        let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32);
7329        assert_eq_m256(r, e);
7330    }
7331
7332    #[simd_test(enable = "avx512dq")]
7333    unsafe fn test_mm512_and_ps() {
7334        let a = _mm512_set1_ps(OPRND1_32);
7335        let b = _mm512_set1_ps(OPRND2_32);
7336        let r = _mm512_and_ps(a, b);
7337        let e = _mm512_set1_ps(AND_32);
7338        assert_eq_m512(r, e);
7339    }
7340
7341    #[simd_test(enable = "avx512dq")]
7342    unsafe fn test_mm512_mask_and_ps() {
7343        let a = _mm512_set1_ps(OPRND1_32);
7344        let b = _mm512_set1_ps(OPRND2_32);
7345        let src = _mm512_set_ps(
7346            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7347        );
7348        let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
7349        let e = _mm512_set_ps(
7350            1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
7351            15., AND_32,
7352        );
7353        assert_eq_m512(r, e);
7354    }
7355
7356    #[simd_test(enable = "avx512dq")]
7357    unsafe fn test_mm512_maskz_and_ps() {
7358        let a = _mm512_set1_ps(OPRND1_32);
7359        let b = _mm512_set1_ps(OPRND2_32);
7360        let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
7361        let e = _mm512_set_ps(
7362            0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
7363            AND_32,
7364        );
7365        assert_eq_m512(r, e);
7366    }
7367
7368    #[simd_test(enable = "avx512dq,avx512vl")]
7369    unsafe fn test_mm_mask_andnot_pd() {
7370        let a = _mm_set1_pd(OPRND1_64);
7371        let b = _mm_set1_pd(OPRND2_64);
7372        let src = _mm_set_pd(1., 2.);
7373        let r = _mm_mask_andnot_pd(src, 0b01, a, b);
7374        let e = _mm_set_pd(1., ANDN_64);
7375        assert_eq_m128d(r, e);
7376    }
7377
7378    #[simd_test(enable = "avx512dq,avx512vl")]
7379    unsafe fn test_mm_maskz_andnot_pd() {
7380        let a = _mm_set1_pd(OPRND1_64);
7381        let b = _mm_set1_pd(OPRND2_64);
7382        let r = _mm_maskz_andnot_pd(0b01, a, b);
7383        let e = _mm_set_pd(0.0, ANDN_64);
7384        assert_eq_m128d(r, e);
7385    }
7386
7387    #[simd_test(enable = "avx512dq,avx512vl")]
7388    unsafe fn test_mm256_mask_andnot_pd() {
7389        let a = _mm256_set1_pd(OPRND1_64);
7390        let b = _mm256_set1_pd(OPRND2_64);
7391        let src = _mm256_set_pd(1., 2., 3., 4.);
7392        let r = _mm256_mask_andnot_pd(src, 0b0101, a, b);
7393        let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64);
7394        assert_eq_m256d(r, e);
7395    }
7396
7397    #[simd_test(enable = "avx512dq,avx512vl")]
7398    unsafe fn test_mm256_maskz_andnot_pd() {
7399        let a = _mm256_set1_pd(OPRND1_64);
7400        let b = _mm256_set1_pd(OPRND2_64);
7401        let r = _mm256_maskz_andnot_pd(0b0101, a, b);
7402        let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64);
7403        assert_eq_m256d(r, e);
7404    }
7405
7406    #[simd_test(enable = "avx512dq")]
7407    unsafe fn test_mm512_andnot_pd() {
7408        let a = _mm512_set1_pd(OPRND1_64);
7409        let b = _mm512_set1_pd(OPRND2_64);
7410        let r = _mm512_andnot_pd(a, b);
7411        let e = _mm512_set1_pd(ANDN_64);
7412        assert_eq_m512d(r, e);
7413    }
7414
7415    #[simd_test(enable = "avx512dq")]
7416    unsafe fn test_mm512_mask_andnot_pd() {
7417        let a = _mm512_set1_pd(OPRND1_64);
7418        let b = _mm512_set1_pd(OPRND2_64);
7419        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7420        let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b);
7421        let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64);
7422        assert_eq_m512d(r, e);
7423    }
7424
7425    #[simd_test(enable = "avx512dq")]
7426    unsafe fn test_mm512_maskz_andnot_pd() {
7427        let a = _mm512_set1_pd(OPRND1_64);
7428        let b = _mm512_set1_pd(OPRND2_64);
7429        let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
7430        let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64);
7431        assert_eq_m512d(r, e);
7432    }
7433
7434    #[simd_test(enable = "avx512dq,avx512vl")]
7435    unsafe fn test_mm_mask_andnot_ps() {
7436        let a = _mm_set1_ps(OPRND1_32);
7437        let b = _mm_set1_ps(OPRND2_32);
7438        let src = _mm_set_ps(1., 2., 3., 4.);
7439        let r = _mm_mask_andnot_ps(src, 0b0101, a, b);
7440        let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32);
7441        assert_eq_m128(r, e);
7442    }
7443
7444    #[simd_test(enable = "avx512dq,avx512vl")]
7445    unsafe fn test_mm_maskz_andnot_ps() {
7446        let a = _mm_set1_ps(OPRND1_32);
7447        let b = _mm_set1_ps(OPRND2_32);
7448        let r = _mm_maskz_andnot_ps(0b0101, a, b);
7449        let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32);
7450        assert_eq_m128(r, e);
7451    }
7452
7453    #[simd_test(enable = "avx512dq,avx512vl")]
7454    unsafe fn test_mm256_mask_andnot_ps() {
7455        let a = _mm256_set1_ps(OPRND1_32);
7456        let b = _mm256_set1_ps(OPRND2_32);
7457        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7458        let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b);
7459        let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32);
7460        assert_eq_m256(r, e);
7461    }
7462
7463    #[simd_test(enable = "avx512dq,avx512vl")]
7464    unsafe fn test_mm256_maskz_andnot_ps() {
7465        let a = _mm256_set1_ps(OPRND1_32);
7466        let b = _mm256_set1_ps(OPRND2_32);
7467        let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
7468        let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32);
7469        assert_eq_m256(r, e);
7470    }
7471
7472    #[simd_test(enable = "avx512dq")]
7473    unsafe fn test_mm512_andnot_ps() {
7474        let a = _mm512_set1_ps(OPRND1_32);
7475        let b = _mm512_set1_ps(OPRND2_32);
7476        let r = _mm512_andnot_ps(a, b);
7477        let e = _mm512_set1_ps(ANDN_32);
7478        assert_eq_m512(r, e);
7479    }
7480
7481    #[simd_test(enable = "avx512dq")]
7482    unsafe fn test_mm512_mask_andnot_ps() {
7483        let a = _mm512_set1_ps(OPRND1_32);
7484        let b = _mm512_set1_ps(OPRND2_32);
7485        let src = _mm512_set_ps(
7486            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7487        );
7488        let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
7489        let e = _mm512_set_ps(
7490            1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
7491            ANDN_32, 15., ANDN_32,
7492        );
7493        assert_eq_m512(r, e);
7494    }
7495
7496    #[simd_test(enable = "avx512dq")]
7497    unsafe fn test_mm512_maskz_andnot_ps() {
7498        let a = _mm512_set1_ps(OPRND1_32);
7499        let b = _mm512_set1_ps(OPRND2_32);
7500        let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
7501        let e = _mm512_set_ps(
7502            0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
7503            ANDN_32, 0., ANDN_32,
7504        );
7505        assert_eq_m512(r, e);
7506    }
7507
7508    #[simd_test(enable = "avx512dq,avx512vl")]
7509    unsafe fn test_mm_mask_or_pd() {
7510        let a = _mm_set1_pd(OPRND1_64);
7511        let b = _mm_set1_pd(OPRND2_64);
7512        let src = _mm_set_pd(1., 2.);
7513        let r = _mm_mask_or_pd(src, 0b01, a, b);
7514        let e = _mm_set_pd(1., OR_64);
7515        assert_eq_m128d(r, e);
7516    }
7517
7518    #[simd_test(enable = "avx512dq,avx512vl")]
7519    unsafe fn test_mm_maskz_or_pd() {
7520        let a = _mm_set1_pd(OPRND1_64);
7521        let b = _mm_set1_pd(OPRND2_64);
7522        let r = _mm_maskz_or_pd(0b01, a, b);
7523        let e = _mm_set_pd(0.0, OR_64);
7524        assert_eq_m128d(r, e);
7525    }
7526
7527    #[simd_test(enable = "avx512dq,avx512vl")]
7528    unsafe fn test_mm256_mask_or_pd() {
7529        let a = _mm256_set1_pd(OPRND1_64);
7530        let b = _mm256_set1_pd(OPRND2_64);
7531        let src = _mm256_set_pd(1., 2., 3., 4.);
7532        let r = _mm256_mask_or_pd(src, 0b0101, a, b);
7533        let e = _mm256_set_pd(1., OR_64, 3., OR_64);
7534        assert_eq_m256d(r, e);
7535    }
7536
7537    #[simd_test(enable = "avx512dq,avx512vl")]
7538    unsafe fn test_mm256_maskz_or_pd() {
7539        let a = _mm256_set1_pd(OPRND1_64);
7540        let b = _mm256_set1_pd(OPRND2_64);
7541        let r = _mm256_maskz_or_pd(0b0101, a, b);
7542        let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64);
7543        assert_eq_m256d(r, e);
7544    }
7545
7546    #[simd_test(enable = "avx512dq")]
7547    unsafe fn test_mm512_or_pd() {
7548        let a = _mm512_set1_pd(OPRND1_64);
7549        let b = _mm512_set1_pd(OPRND2_64);
7550        let r = _mm512_or_pd(a, b);
7551        let e = _mm512_set1_pd(OR_64);
7552        assert_eq_m512d(r, e);
7553    }
7554
7555    #[simd_test(enable = "avx512dq")]
7556    unsafe fn test_mm512_mask_or_pd() {
7557        let a = _mm512_set1_pd(OPRND1_64);
7558        let b = _mm512_set1_pd(OPRND2_64);
7559        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7560        let r = _mm512_mask_or_pd(src, 0b01010101, a, b);
7561        let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64);
7562        assert_eq_m512d(r, e);
7563    }
7564
7565    #[simd_test(enable = "avx512dq")]
7566    unsafe fn test_mm512_maskz_or_pd() {
7567        let a = _mm512_set1_pd(OPRND1_64);
7568        let b = _mm512_set1_pd(OPRND2_64);
7569        let r = _mm512_maskz_or_pd(0b01010101, a, b);
7570        let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64);
7571        assert_eq_m512d(r, e);
7572    }
7573
7574    #[simd_test(enable = "avx512dq,avx512vl")]
7575    unsafe fn test_mm_mask_or_ps() {
7576        let a = _mm_set1_ps(OPRND1_32);
7577        let b = _mm_set1_ps(OPRND2_32);
7578        let src = _mm_set_ps(1., 2., 3., 4.);
7579        let r = _mm_mask_or_ps(src, 0b0101, a, b);
7580        let e = _mm_set_ps(1., OR_32, 3., OR_32);
7581        assert_eq_m128(r, e);
7582    }
7583
7584    #[simd_test(enable = "avx512dq,avx512vl")]
7585    unsafe fn test_mm_maskz_or_ps() {
7586        let a = _mm_set1_ps(OPRND1_32);
7587        let b = _mm_set1_ps(OPRND2_32);
7588        let r = _mm_maskz_or_ps(0b0101, a, b);
7589        let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32);
7590        assert_eq_m128(r, e);
7591    }
7592
7593    #[simd_test(enable = "avx512dq,avx512vl")]
7594    unsafe fn test_mm256_mask_or_ps() {
7595        let a = _mm256_set1_ps(OPRND1_32);
7596        let b = _mm256_set1_ps(OPRND2_32);
7597        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7598        let r = _mm256_mask_or_ps(src, 0b01010101, a, b);
7599        let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32);
7600        assert_eq_m256(r, e);
7601    }
7602
7603    #[simd_test(enable = "avx512dq,avx512vl")]
7604    unsafe fn test_mm256_maskz_or_ps() {
7605        let a = _mm256_set1_ps(OPRND1_32);
7606        let b = _mm256_set1_ps(OPRND2_32);
7607        let r = _mm256_maskz_or_ps(0b01010101, a, b);
7608        let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32);
7609        assert_eq_m256(r, e);
7610    }
7611
7612    #[simd_test(enable = "avx512dq")]
7613    unsafe fn test_mm512_or_ps() {
7614        let a = _mm512_set1_ps(OPRND1_32);
7615        let b = _mm512_set1_ps(OPRND2_32);
7616        let r = _mm512_or_ps(a, b);
7617        let e = _mm512_set1_ps(OR_32);
7618        assert_eq_m512(r, e);
7619    }
7620
7621    #[simd_test(enable = "avx512dq")]
7622    unsafe fn test_mm512_mask_or_ps() {
7623        let a = _mm512_set1_ps(OPRND1_32);
7624        let b = _mm512_set1_ps(OPRND2_32);
7625        let src = _mm512_set_ps(
7626            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7627        );
7628        let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
7629        let e = _mm512_set_ps(
7630            1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
7631            OR_32,
7632        );
7633        assert_eq_m512(r, e);
7634    }
7635
7636    #[simd_test(enable = "avx512dq")]
7637    unsafe fn test_mm512_maskz_or_ps() {
7638        let a = _mm512_set1_ps(OPRND1_32);
7639        let b = _mm512_set1_ps(OPRND2_32);
7640        let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
7641        let e = _mm512_set_ps(
7642            0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
7643        );
7644        assert_eq_m512(r, e);
7645    }
7646
7647    #[simd_test(enable = "avx512dq,avx512vl")]
7648    unsafe fn test_mm_mask_xor_pd() {
7649        let a = _mm_set1_pd(OPRND1_64);
7650        let b = _mm_set1_pd(OPRND2_64);
7651        let src = _mm_set_pd(1., 2.);
7652        let r = _mm_mask_xor_pd(src, 0b01, a, b);
7653        let e = _mm_set_pd(1., XOR_64);
7654        assert_eq_m128d(r, e);
7655    }
7656
7657    #[simd_test(enable = "avx512dq,avx512vl")]
7658    unsafe fn test_mm_maskz_xor_pd() {
7659        let a = _mm_set1_pd(OPRND1_64);
7660        let b = _mm_set1_pd(OPRND2_64);
7661        let r = _mm_maskz_xor_pd(0b01, a, b);
7662        let e = _mm_set_pd(0.0, XOR_64);
7663        assert_eq_m128d(r, e);
7664    }
7665
7666    #[simd_test(enable = "avx512dq,avx512vl")]
7667    unsafe fn test_mm256_mask_xor_pd() {
7668        let a = _mm256_set1_pd(OPRND1_64);
7669        let b = _mm256_set1_pd(OPRND2_64);
7670        let src = _mm256_set_pd(1., 2., 3., 4.);
7671        let r = _mm256_mask_xor_pd(src, 0b0101, a, b);
7672        let e = _mm256_set_pd(1., XOR_64, 3., XOR_64);
7673        assert_eq_m256d(r, e);
7674    }
7675
7676    #[simd_test(enable = "avx512dq,avx512vl")]
7677    unsafe fn test_mm256_maskz_xor_pd() {
7678        let a = _mm256_set1_pd(OPRND1_64);
7679        let b = _mm256_set1_pd(OPRND2_64);
7680        let r = _mm256_maskz_xor_pd(0b0101, a, b);
7681        let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64);
7682        assert_eq_m256d(r, e);
7683    }
7684
7685    #[simd_test(enable = "avx512dq")]
7686    unsafe fn test_mm512_xor_pd() {
7687        let a = _mm512_set1_pd(OPRND1_64);
7688        let b = _mm512_set1_pd(OPRND2_64);
7689        let r = _mm512_xor_pd(a, b);
7690        let e = _mm512_set1_pd(XOR_64);
7691        assert_eq_m512d(r, e);
7692    }
7693
7694    #[simd_test(enable = "avx512dq")]
7695    unsafe fn test_mm512_mask_xor_pd() {
7696        let a = _mm512_set1_pd(OPRND1_64);
7697        let b = _mm512_set1_pd(OPRND2_64);
7698        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7699        let r = _mm512_mask_xor_pd(src, 0b01010101, a, b);
7700        let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64);
7701        assert_eq_m512d(r, e);
7702    }
7703
7704    #[simd_test(enable = "avx512dq")]
7705    unsafe fn test_mm512_maskz_xor_pd() {
7706        let a = _mm512_set1_pd(OPRND1_64);
7707        let b = _mm512_set1_pd(OPRND2_64);
7708        let r = _mm512_maskz_xor_pd(0b01010101, a, b);
7709        let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64);
7710        assert_eq_m512d(r, e);
7711    }
7712
7713    #[simd_test(enable = "avx512dq,avx512vl")]
7714    unsafe fn test_mm_mask_xor_ps() {
7715        let a = _mm_set1_ps(OPRND1_32);
7716        let b = _mm_set1_ps(OPRND2_32);
7717        let src = _mm_set_ps(1., 2., 3., 4.);
7718        let r = _mm_mask_xor_ps(src, 0b0101, a, b);
7719        let e = _mm_set_ps(1., XOR_32, 3., XOR_32);
7720        assert_eq_m128(r, e);
7721    }
7722
7723    #[simd_test(enable = "avx512dq,avx512vl")]
7724    unsafe fn test_mm_maskz_xor_ps() {
7725        let a = _mm_set1_ps(OPRND1_32);
7726        let b = _mm_set1_ps(OPRND2_32);
7727        let r = _mm_maskz_xor_ps(0b0101, a, b);
7728        let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32);
7729        assert_eq_m128(r, e);
7730    }
7731
7732    #[simd_test(enable = "avx512dq,avx512vl")]
7733    unsafe fn test_mm256_mask_xor_ps() {
7734        let a = _mm256_set1_ps(OPRND1_32);
7735        let b = _mm256_set1_ps(OPRND2_32);
7736        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7737        let r = _mm256_mask_xor_ps(src, 0b01010101, a, b);
7738        let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32);
7739        assert_eq_m256(r, e);
7740    }
7741
7742    #[simd_test(enable = "avx512dq,avx512vl")]
7743    unsafe fn test_mm256_maskz_xor_ps() {
7744        let a = _mm256_set1_ps(OPRND1_32);
7745        let b = _mm256_set1_ps(OPRND2_32);
7746        let r = _mm256_maskz_xor_ps(0b01010101, a, b);
7747        let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32);
7748        assert_eq_m256(r, e);
7749    }
7750
7751    #[simd_test(enable = "avx512dq")]
7752    unsafe fn test_mm512_xor_ps() {
7753        let a = _mm512_set1_ps(OPRND1_32);
7754        let b = _mm512_set1_ps(OPRND2_32);
7755        let r = _mm512_xor_ps(a, b);
7756        let e = _mm512_set1_ps(XOR_32);
7757        assert_eq_m512(r, e);
7758    }
7759
7760    #[simd_test(enable = "avx512dq")]
7761    unsafe fn test_mm512_mask_xor_ps() {
7762        let a = _mm512_set1_ps(OPRND1_32);
7763        let b = _mm512_set1_ps(OPRND2_32);
7764        let src = _mm512_set_ps(
7765            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7766        );
7767        let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
7768        let e = _mm512_set_ps(
7769            1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
7770            15., XOR_32,
7771        );
7772        assert_eq_m512(r, e);
7773    }
7774
7775    #[simd_test(enable = "avx512dq")]
7776    unsafe fn test_mm512_maskz_xor_ps() {
7777        let a = _mm512_set1_ps(OPRND1_32);
7778        let b = _mm512_set1_ps(OPRND2_32);
7779        let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
7780        let e = _mm512_set_ps(
7781            0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
7782            XOR_32,
7783        );
7784        assert_eq_m512(r, e);
7785    }
7786
7787    #[simd_test(enable = "avx512dq,avx512vl")]
7788    unsafe fn test_mm256_broadcast_f32x2() {
7789        let a = _mm_set_ps(1., 2., 3., 4.);
7790        let r = _mm256_broadcast_f32x2(a);
7791        let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
7792        assert_eq_m256(r, e);
7793    }
7794
7795    #[simd_test(enable = "avx512dq,avx512vl")]
7796    unsafe fn test_mm256_mask_broadcast_f32x2() {
7797        let a = _mm_set_ps(1., 2., 3., 4.);
7798        let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
7799        let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
7800        let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.);
7801        assert_eq_m256(r, e);
7802    }
7803
7804    #[simd_test(enable = "avx512dq,avx512vl")]
7805    unsafe fn test_mm256_maskz_broadcast_f32x2() {
7806        let a = _mm_set_ps(1., 2., 3., 4.);
7807        let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
7808        let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
7809        assert_eq_m256(r, e);
7810    }
7811
7812    #[simd_test(enable = "avx512dq")]
7813    unsafe fn test_mm512_broadcast_f32x2() {
7814        let a = _mm_set_ps(1., 2., 3., 4.);
7815        let r = _mm512_broadcast_f32x2(a);
7816        let e = _mm512_set_ps(
7817            3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4.,
7818        );
7819        assert_eq_m512(r, e);
7820    }
7821
7822    #[simd_test(enable = "avx512dq")]
7823    unsafe fn test_mm512_mask_broadcast_f32x2() {
7824        let a = _mm_set_ps(1., 2., 3., 4.);
7825        let b = _mm512_set_ps(
7826            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
7827        );
7828        let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
7829        let e = _mm512_set_ps(
7830            5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20.,
7831        );
7832        assert_eq_m512(r, e);
7833    }
7834
7835    #[simd_test(enable = "avx512dq")]
7836    unsafe fn test_mm512_maskz_broadcast_f32x2() {
7837        let a = _mm_set_ps(1., 2., 3., 4.);
7838        let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
7839        let e = _mm512_set_ps(
7840            0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0.,
7841        );
7842        assert_eq_m512(r, e);
7843    }
7844
7845    #[simd_test(enable = "avx512dq")]
7846    unsafe fn test_mm512_broadcast_f32x8() {
7847        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7848        let r = _mm512_broadcast_f32x8(a);
7849        let e = _mm512_set_ps(
7850            1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
7851        );
7852        assert_eq_m512(r, e);
7853    }
7854
7855    #[simd_test(enable = "avx512dq")]
7856    unsafe fn test_mm512_mask_broadcast_f32x8() {
7857        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7858        let b = _mm512_set_ps(
7859            9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
7860        );
7861        let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
7862        let e = _mm512_set_ps(
7863            9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
7864        );
7865        assert_eq_m512(r, e);
7866    }
7867
7868    #[simd_test(enable = "avx512dq")]
7869    unsafe fn test_mm512_maskz_broadcast_f32x8() {
7870        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7871        let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
7872        let e = _mm512_set_ps(
7873            0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
7874        );
7875        assert_eq_m512(r, e);
7876    }
7877
7878    #[simd_test(enable = "avx512dq,avx512vl")]
7879    unsafe fn test_mm256_broadcast_f64x2() {
7880        let a = _mm_set_pd(1., 2.);
7881        let r = _mm256_broadcast_f64x2(a);
7882        let e = _mm256_set_pd(1., 2., 1., 2.);
7883        assert_eq_m256d(r, e);
7884    }
7885
7886    #[simd_test(enable = "avx512dq,avx512vl")]
7887    unsafe fn test_mm256_mask_broadcast_f64x2() {
7888        let a = _mm_set_pd(1., 2.);
7889        let b = _mm256_set_pd(3., 4., 5., 6.);
7890        let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
7891        let e = _mm256_set_pd(3., 2., 1., 6.);
7892        assert_eq_m256d(r, e);
7893    }
7894
7895    #[simd_test(enable = "avx512dq,avx512vl")]
7896    unsafe fn test_mm256_maskz_broadcast_f64x2() {
7897        let a = _mm_set_pd(1., 2.);
7898        let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
7899        let e = _mm256_set_pd(0., 2., 1., 0.);
7900        assert_eq_m256d(r, e);
7901    }
7902
7903    #[simd_test(enable = "avx512dq")]
7904    unsafe fn test_mm512_broadcast_f64x2() {
7905        let a = _mm_set_pd(1., 2.);
7906        let r = _mm512_broadcast_f64x2(a);
7907        let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
7908        assert_eq_m512d(r, e);
7909    }
7910
7911    #[simd_test(enable = "avx512dq")]
7912    unsafe fn test_mm512_mask_broadcast_f64x2() {
7913        let a = _mm_set_pd(1., 2.);
7914        let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
7915        let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
7916        let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
7917        assert_eq_m512d(r, e);
7918    }
7919
7920    #[simd_test(enable = "avx512dq")]
7921    unsafe fn test_mm512_maskz_broadcast_f64x2() {
7922        let a = _mm_set_pd(1., 2.);
7923        let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
7924        let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
7925        assert_eq_m512d(r, e);
7926    }
7927
7928    #[simd_test(enable = "avx512dq,avx512vl")]
7929    unsafe fn test_mm_broadcast_i32x2() {
7930        let a = _mm_set_epi32(1, 2, 3, 4);
7931        let r = _mm_broadcast_i32x2(a);
7932        let e = _mm_set_epi32(3, 4, 3, 4);
7933        assert_eq_m128i(r, e);
7934    }
7935
7936    #[simd_test(enable = "avx512dq,avx512vl")]
7937    unsafe fn test_mm_mask_broadcast_i32x2() {
7938        let a = _mm_set_epi32(1, 2, 3, 4);
7939        let b = _mm_set_epi32(5, 6, 7, 8);
7940        let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
7941        let e = _mm_set_epi32(5, 4, 3, 8);
7942        assert_eq_m128i(r, e);
7943    }
7944
7945    #[simd_test(enable = "avx512dq,avx512vl")]
7946    unsafe fn test_mm_maskz_broadcast_i32x2() {
7947        let a = _mm_set_epi32(1, 2, 3, 4);
7948        let r = _mm_maskz_broadcast_i32x2(0b0110, a);
7949        let e = _mm_set_epi32(0, 4, 3, 0);
7950        assert_eq_m128i(r, e);
7951    }
7952
7953    #[simd_test(enable = "avx512dq,avx512vl")]
7954    unsafe fn test_mm256_broadcast_i32x2() {
7955        let a = _mm_set_epi32(1, 2, 3, 4);
7956        let r = _mm256_broadcast_i32x2(a);
7957        let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
7958        assert_eq_m256i(r, e);
7959    }
7960
7961    #[simd_test(enable = "avx512dq,avx512vl")]
7962    unsafe fn test_mm256_mask_broadcast_i32x2() {
7963        let a = _mm_set_epi32(1, 2, 3, 4);
7964        let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
7965        let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
7966        let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4);
7967        assert_eq_m256i(r, e);
7968    }
7969
7970    #[simd_test(enable = "avx512dq,avx512vl")]
7971    unsafe fn test_mm256_maskz_broadcast_i32x2() {
7972        let a = _mm_set_epi32(1, 2, 3, 4);
7973        let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
7974        let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
7975        assert_eq_m256i(r, e);
7976    }
7977
7978    #[simd_test(enable = "avx512dq")]
7979    unsafe fn test_mm512_broadcast_i32x2() {
7980        let a = _mm_set_epi32(1, 2, 3, 4);
7981        let r = _mm512_broadcast_i32x2(a);
7982        let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
7983        assert_eq_m512i(r, e);
7984    }
7985
7986    #[simd_test(enable = "avx512dq")]
7987    unsafe fn test_mm512_mask_broadcast_i32x2() {
7988        let a = _mm_set_epi32(1, 2, 3, 4);
7989        let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
7990        let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
7991        let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20);
7992        assert_eq_m512i(r, e);
7993    }
7994
7995    #[simd_test(enable = "avx512dq")]
7996    unsafe fn test_mm512_maskz_broadcast_i32x2() {
7997        let a = _mm_set_epi32(1, 2, 3, 4);
7998        let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
7999        let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
8000        assert_eq_m512i(r, e);
8001    }
8002
8003    #[simd_test(enable = "avx512dq")]
8004    unsafe fn test_mm512_broadcast_i32x8() {
8005        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8006        let r = _mm512_broadcast_i32x8(a);
8007        let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
8008        assert_eq_m512i(r, e);
8009    }
8010
8011    #[simd_test(enable = "avx512dq")]
8012    unsafe fn test_mm512_mask_broadcast_i32x8() {
8013        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8014        let b = _mm512_set_epi32(
8015            9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
8016        );
8017        let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
8018        let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
8019        assert_eq_m512i(r, e);
8020    }
8021
8022    #[simd_test(enable = "avx512dq")]
8023    unsafe fn test_mm512_maskz_broadcast_i32x8() {
8024        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8025        let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
8026        let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
8027        assert_eq_m512i(r, e);
8028    }
8029
8030    #[simd_test(enable = "avx512dq,avx512vl")]
8031    unsafe fn test_mm256_broadcast_i64x2() {
8032        let a = _mm_set_epi64x(1, 2);
8033        let r = _mm256_broadcast_i64x2(a);
8034        let e = _mm256_set_epi64x(1, 2, 1, 2);
8035        assert_eq_m256i(r, e);
8036    }
8037
8038    #[simd_test(enable = "avx512dq,avx512vl")]
8039    unsafe fn test_mm256_mask_broadcast_i64x2() {
8040        let a = _mm_set_epi64x(1, 2);
8041        let b = _mm256_set_epi64x(3, 4, 5, 6);
8042        let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
8043        let e = _mm256_set_epi64x(3, 2, 1, 6);
8044        assert_eq_m256i(r, e);
8045    }
8046
8047    #[simd_test(enable = "avx512dq,avx512vl")]
8048    unsafe fn test_mm256_maskz_broadcast_i64x2() {
8049        let a = _mm_set_epi64x(1, 2);
8050        let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
8051        let e = _mm256_set_epi64x(0, 2, 1, 0);
8052        assert_eq_m256i(r, e);
8053    }
8054
8055    #[simd_test(enable = "avx512dq")]
8056    unsafe fn test_mm512_broadcast_i64x2() {
8057        let a = _mm_set_epi64x(1, 2);
8058        let r = _mm512_broadcast_i64x2(a);
8059        let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
8060        assert_eq_m512i(r, e);
8061    }
8062
8063    #[simd_test(enable = "avx512dq")]
8064    unsafe fn test_mm512_mask_broadcast_i64x2() {
8065        let a = _mm_set_epi64x(1, 2);
8066        let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
8067        let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
8068        let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
8069        assert_eq_m512i(r, e);
8070    }
8071
8072    #[simd_test(enable = "avx512dq")]
8073    unsafe fn test_mm512_maskz_broadcast_i64x2() {
8074        let a = _mm_set_epi64x(1, 2);
8075        let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
8076        let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
8077        assert_eq_m512i(r, e);
8078    }
8079
8080    #[simd_test(enable = "avx512dq")]
8081    unsafe fn test_mm512_extractf32x8_ps() {
8082        let a = _mm512_set_ps(
8083            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8084        );
8085        let r = _mm512_extractf32x8_ps::<1>(a);
8086        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8087        assert_eq_m256(r, e);
8088    }
8089
8090    #[simd_test(enable = "avx512dq")]
8091    unsafe fn test_mm512_mask_extractf32x8_ps() {
8092        let a = _mm512_set_ps(
8093            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8094        );
8095        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8096        let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
8097        let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.);
8098        assert_eq_m256(r, e);
8099    }
8100
8101    #[simd_test(enable = "avx512dq")]
8102    unsafe fn test_mm512_maskz_extractf32x8_ps() {
8103        let a = _mm512_set_ps(
8104            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8105        );
8106        let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
8107        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8108        assert_eq_m256(r, e);
8109    }
8110
8111    #[simd_test(enable = "avx512dq,avx512vl")]
8112    unsafe fn test_mm256_extractf64x2_pd() {
8113        let a = _mm256_set_pd(1., 2., 3., 4.);
8114        let r = _mm256_extractf64x2_pd::<1>(a);
8115        let e = _mm_set_pd(1., 2.);
8116        assert_eq_m128d(r, e);
8117    }
8118
8119    #[simd_test(enable = "avx512dq,avx512vl")]
8120    unsafe fn test_mm256_mask_extractf64x2_pd() {
8121        let a = _mm256_set_pd(1., 2., 3., 4.);
8122        let b = _mm_set_pd(5., 6.);
8123        let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
8124        let e = _mm_set_pd(5., 2.);
8125        assert_eq_m128d(r, e);
8126    }
8127
8128    #[simd_test(enable = "avx512dq,avx512vl")]
8129    unsafe fn test_mm256_maskz_extractf64x2_pd() {
8130        let a = _mm256_set_pd(1., 2., 3., 4.);
8131        let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
8132        let e = _mm_set_pd(0., 2.);
8133        assert_eq_m128d(r, e);
8134    }
8135
8136    #[simd_test(enable = "avx512dq")]
8137    unsafe fn test_mm512_extractf64x2_pd() {
8138        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8139        let r = _mm512_extractf64x2_pd::<2>(a);
8140        let e = _mm_set_pd(3., 4.);
8141        assert_eq_m128d(r, e);
8142    }
8143
8144    #[simd_test(enable = "avx512dq")]
8145    unsafe fn test_mm512_mask_extractf64x2_pd() {
8146        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8147        let b = _mm_set_pd(9., 10.);
8148        let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
8149        let e = _mm_set_pd(9., 4.);
8150        assert_eq_m128d(r, e);
8151    }
8152
8153    #[simd_test(enable = "avx512dq")]
8154    unsafe fn test_mm512_maskz_extractf64x2_pd() {
8155        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8156        let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
8157        let e = _mm_set_pd(0., 4.);
8158        assert_eq_m128d(r, e);
8159    }
8160
8161    #[simd_test(enable = "avx512dq")]
8162    unsafe fn test_mm512_extracti32x8_epi32() {
8163        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8164        let r = _mm512_extracti32x8_epi32::<1>(a);
8165        let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8166        assert_eq_m256i(r, e);
8167    }
8168
8169    #[simd_test(enable = "avx512dq")]
8170    unsafe fn test_mm512_mask_extracti32x8_epi32() {
8171        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8172        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8173        let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
8174        let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8);
8175        assert_eq_m256i(r, e);
8176    }
8177
8178    #[simd_test(enable = "avx512dq")]
8179    unsafe fn test_mm512_maskz_extracti32x8_epi32() {
8180        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8181        let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
8182        let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
8183        assert_eq_m256i(r, e);
8184    }
8185
8186    #[simd_test(enable = "avx512dq,avx512vl")]
8187    unsafe fn test_mm256_extracti64x2_epi64() {
8188        let a = _mm256_set_epi64x(1, 2, 3, 4);
8189        let r = _mm256_extracti64x2_epi64::<1>(a);
8190        let e = _mm_set_epi64x(1, 2);
8191        assert_eq_m128i(r, e);
8192    }
8193
8194    #[simd_test(enable = "avx512dq,avx512vl")]
8195    unsafe fn test_mm256_mask_extracti64x2_epi64() {
8196        let a = _mm256_set_epi64x(1, 2, 3, 4);
8197        let b = _mm_set_epi64x(5, 6);
8198        let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
8199        let e = _mm_set_epi64x(5, 2);
8200        assert_eq_m128i(r, e);
8201    }
8202
8203    #[simd_test(enable = "avx512dq,avx512vl")]
8204    unsafe fn test_mm256_maskz_extracti64x2_epi64() {
8205        let a = _mm256_set_epi64x(1, 2, 3, 4);
8206        let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
8207        let e = _mm_set_epi64x(0, 2);
8208        assert_eq_m128i(r, e);
8209    }
8210
8211    #[simd_test(enable = "avx512dq")]
8212    unsafe fn test_mm512_extracti64x2_epi64() {
8213        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8214        let r = _mm512_extracti64x2_epi64::<2>(a);
8215        let e = _mm_set_epi64x(3, 4);
8216        assert_eq_m128i(r, e);
8217    }
8218
8219    #[simd_test(enable = "avx512dq")]
8220    unsafe fn test_mm512_mask_extracti64x2_epi64() {
8221        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8222        let b = _mm_set_epi64x(9, 10);
8223        let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
8224        let e = _mm_set_epi64x(9, 4);
8225        assert_eq_m128i(r, e);
8226    }
8227
8228    #[simd_test(enable = "avx512dq")]
8229    unsafe fn test_mm512_maskz_extracti64x2_epi64() {
8230        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8231        let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
8232        let e = _mm_set_epi64x(0, 4);
8233        assert_eq_m128i(r, e);
8234    }
8235
8236    #[simd_test(enable = "avx512dq")]
8237    unsafe fn test_mm512_insertf32x8() {
8238        let a = _mm512_set_ps(
8239            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8240        );
8241        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8242        let r = _mm512_insertf32x8::<1>(a, b);
8243        let e = _mm512_set_ps(
8244            17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16.,
8245        );
8246        assert_eq_m512(r, e);
8247    }
8248
8249    #[simd_test(enable = "avx512dq")]
8250    unsafe fn test_mm512_mask_insertf32x8() {
8251        let a = _mm512_set_ps(
8252            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8253        );
8254        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8255        let src = _mm512_set_ps(
8256            25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
8257        );
8258        let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
8259        let e = _mm512_set_ps(
8260            25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40.,
8261        );
8262        assert_eq_m512(r, e);
8263    }
8264
8265    #[simd_test(enable = "avx512dq")]
8266    unsafe fn test_mm512_maskz_insertf32x8() {
8267        let a = _mm512_set_ps(
8268            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8269        );
8270        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8271        let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
8272        let e = _mm512_set_ps(
8273            0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0.,
8274        );
8275        assert_eq_m512(r, e);
8276    }
8277
8278    #[simd_test(enable = "avx512dq,avx512vl")]
8279    unsafe fn test_mm256_insertf64x2() {
8280        let a = _mm256_set_pd(1., 2., 3., 4.);
8281        let b = _mm_set_pd(5., 6.);
8282        let r = _mm256_insertf64x2::<1>(a, b);
8283        let e = _mm256_set_pd(5., 6., 3., 4.);
8284        assert_eq_m256d(r, e);
8285    }
8286
8287    #[simd_test(enable = "avx512dq,avx512vl")]
8288    unsafe fn test_mm256_mask_insertf64x2() {
8289        let a = _mm256_set_pd(1., 2., 3., 4.);
8290        let b = _mm_set_pd(5., 6.);
8291        let src = _mm256_set_pd(7., 8., 9., 10.);
8292        let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
8293        let e = _mm256_set_pd(7., 6., 3., 10.);
8294        assert_eq_m256d(r, e);
8295    }
8296
8297    #[simd_test(enable = "avx512dq,avx512vl")]
8298    unsafe fn test_mm256_maskz_insertf64x2() {
8299        let a = _mm256_set_pd(1., 2., 3., 4.);
8300        let b = _mm_set_pd(5., 6.);
8301        let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
8302        let e = _mm256_set_pd(0., 6., 3., 0.);
8303        assert_eq_m256d(r, e);
8304    }
8305
8306    #[simd_test(enable = "avx512dq")]
8307    unsafe fn test_mm512_insertf64x2() {
8308        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8309        let b = _mm_set_pd(9., 10.);
8310        let r = _mm512_insertf64x2::<2>(a, b);
8311        let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.);
8312        assert_eq_m512d(r, e);
8313    }
8314
8315    #[simd_test(enable = "avx512dq")]
8316    unsafe fn test_mm512_mask_insertf64x2() {
8317        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8318        let b = _mm_set_pd(9., 10.);
8319        let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
8320        let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
8321        let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.);
8322        assert_eq_m512d(r, e);
8323    }
8324
8325    #[simd_test(enable = "avx512dq")]
8326    unsafe fn test_mm512_maskz_insertf64x2() {
8327        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8328        let b = _mm_set_pd(9., 10.);
8329        let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
8330        let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.);
8331        assert_eq_m512d(r, e);
8332    }
8333
8334    #[simd_test(enable = "avx512dq")]
8335    unsafe fn test_mm512_inserti32x8() {
8336        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8337        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8338        let r = _mm512_inserti32x8::<1>(a, b);
8339        let e = _mm512_set_epi32(
8340            17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16,
8341        );
8342        assert_eq_m512i(r, e);
8343    }
8344
8345    #[simd_test(enable = "avx512dq")]
8346    unsafe fn test_mm512_mask_inserti32x8() {
8347        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8348        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8349        let src = _mm512_set_epi32(
8350            25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
8351        );
8352        let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
8353        let e = _mm512_set_epi32(
8354            25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40,
8355        );
8356        assert_eq_m512i(r, e);
8357    }
8358
8359    #[simd_test(enable = "avx512dq")]
8360    unsafe fn test_mm512_maskz_inserti32x8() {
8361        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8362        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8363        let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
8364        let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0);
8365        assert_eq_m512i(r, e);
8366    }
8367
8368    #[simd_test(enable = "avx512dq,avx512vl")]
8369    unsafe fn test_mm256_inserti64x2() {
8370        let a = _mm256_set_epi64x(1, 2, 3, 4);
8371        let b = _mm_set_epi64x(5, 6);
8372        let r = _mm256_inserti64x2::<1>(a, b);
8373        let e = _mm256_set_epi64x(5, 6, 3, 4);
8374        assert_eq_m256i(r, e);
8375    }
8376
8377    #[simd_test(enable = "avx512dq,avx512vl")]
8378    unsafe fn test_mm256_mask_inserti64x2() {
8379        let a = _mm256_set_epi64x(1, 2, 3, 4);
8380        let b = _mm_set_epi64x(5, 6);
8381        let src = _mm256_set_epi64x(7, 8, 9, 10);
8382        let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
8383        let e = _mm256_set_epi64x(7, 6, 3, 10);
8384        assert_eq_m256i(r, e);
8385    }
8386
8387    #[simd_test(enable = "avx512dq,avx512vl")]
8388    unsafe fn test_mm256_maskz_inserti64x2() {
8389        let a = _mm256_set_epi64x(1, 2, 3, 4);
8390        let b = _mm_set_epi64x(5, 6);
8391        let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
8392        let e = _mm256_set_epi64x(0, 6, 3, 0);
8393        assert_eq_m256i(r, e);
8394    }
8395
8396    #[simd_test(enable = "avx512dq")]
8397    unsafe fn test_mm512_inserti64x2() {
8398        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8399        let b = _mm_set_epi64x(9, 10);
8400        let r = _mm512_inserti64x2::<2>(a, b);
8401        let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8);
8402        assert_eq_m512i(r, e);
8403    }
8404
8405    #[simd_test(enable = "avx512dq")]
8406    unsafe fn test_mm512_mask_inserti64x2() {
8407        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8408        let b = _mm_set_epi64x(9, 10);
8409        let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
8410        let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
8411        let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8);
8412        assert_eq_m512i(r, e);
8413    }
8414
8415    #[simd_test(enable = "avx512dq")]
8416    unsafe fn test_mm512_maskz_inserti64x2() {
8417        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8418        let b = _mm_set_epi64x(9, 10);
8419        let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
8420        let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8);
8421        assert_eq_m512i(r, e);
8422    }
8423
8424    #[simd_test(enable = "avx512dq")]
8425    unsafe fn test_mm512_cvt_roundepi64_pd() {
8426        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8427        let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8428        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8429        assert_eq_m512d(r, e);
8430    }
8431
8432    #[simd_test(enable = "avx512dq")]
8433    unsafe fn test_mm512_mask_cvt_roundepi64_pd() {
8434        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8435        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8436        let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8437            b, 0b01101001, a,
8438        );
8439        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8440        assert_eq_m512d(r, e);
8441    }
8442
8443    #[simd_test(enable = "avx512dq")]
8444    unsafe fn test_mm512_maskz_cvt_roundepi64_pd() {
8445        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8446        let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8447            0b01101001, a,
8448        );
8449        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8450        assert_eq_m512d(r, e);
8451    }
8452
8453    #[simd_test(enable = "avx512dq,avx512vl")]
8454    unsafe fn test_mm_cvtepi64_pd() {
8455        let a = _mm_set_epi64x(1, 2);
8456        let r = _mm_cvtepi64_pd(a);
8457        let e = _mm_set_pd(1., 2.);
8458        assert_eq_m128d(r, e);
8459    }
8460
8461    #[simd_test(enable = "avx512dq,avx512vl")]
8462    unsafe fn test_mm_mask_cvtepi64_pd() {
8463        let a = _mm_set_epi64x(1, 2);
8464        let b = _mm_set_pd(3., 4.);
8465        let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
8466        let e = _mm_set_pd(3., 2.);
8467        assert_eq_m128d(r, e);
8468    }
8469
8470    #[simd_test(enable = "avx512dq,avx512vl")]
8471    unsafe fn test_mm_maskz_cvtepi64_pd() {
8472        let a = _mm_set_epi64x(1, 2);
8473        let r = _mm_maskz_cvtepi64_pd(0b01, a);
8474        let e = _mm_set_pd(0., 2.);
8475        assert_eq_m128d(r, e);
8476    }
8477
8478    #[simd_test(enable = "avx512dq,avx512vl")]
8479    unsafe fn test_mm256_cvtepi64_pd() {
8480        let a = _mm256_set_epi64x(1, 2, 3, 4);
8481        let r = _mm256_cvtepi64_pd(a);
8482        let e = _mm256_set_pd(1., 2., 3., 4.);
8483        assert_eq_m256d(r, e);
8484    }
8485
8486    #[simd_test(enable = "avx512dq,avx512vl")]
8487    unsafe fn test_mm256_mask_cvtepi64_pd() {
8488        let a = _mm256_set_epi64x(1, 2, 3, 4);
8489        let b = _mm256_set_pd(5., 6., 7., 8.);
8490        let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
8491        let e = _mm256_set_pd(5., 2., 3., 8.);
8492        assert_eq_m256d(r, e);
8493    }
8494
8495    #[simd_test(enable = "avx512dq,avx512vl")]
8496    unsafe fn test_mm256_maskz_cvtepi64_pd() {
8497        let a = _mm256_set_epi64x(1, 2, 3, 4);
8498        let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
8499        let e = _mm256_set_pd(0., 2., 3., 0.);
8500        assert_eq_m256d(r, e);
8501    }
8502
8503    #[simd_test(enable = "avx512dq")]
8504    unsafe fn test_mm512_cvtepi64_pd() {
8505        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8506        let r = _mm512_cvtepi64_pd(a);
8507        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8508        assert_eq_m512d(r, e);
8509    }
8510
8511    #[simd_test(enable = "avx512dq")]
8512    unsafe fn test_mm512_mask_cvtepi64_pd() {
8513        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8514        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8515        let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
8516        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8517        assert_eq_m512d(r, e);
8518    }
8519
8520    #[simd_test(enable = "avx512dq")]
8521    unsafe fn test_mm512_maskz_cvtepi64_pd() {
8522        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8523        let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
8524        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8525        assert_eq_m512d(r, e);
8526    }
8527
8528    #[simd_test(enable = "avx512dq")]
8529    unsafe fn test_mm512_cvt_roundepi64_ps() {
8530        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8531        let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8532        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8533        assert_eq_m256(r, e);
8534    }
8535
8536    #[simd_test(enable = "avx512dq")]
8537    unsafe fn test_mm512_mask_cvt_roundepi64_ps() {
8538        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8539        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8540        let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8541            b, 0b01101001, a,
8542        );
8543        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8544        assert_eq_m256(r, e);
8545    }
8546
8547    #[simd_test(enable = "avx512dq")]
8548    unsafe fn test_mm512_maskz_cvt_roundepi64_ps() {
8549        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8550        let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8551            0b01101001, a,
8552        );
8553        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8554        assert_eq_m256(r, e);
8555    }
8556
8557    #[simd_test(enable = "avx512dq,avx512vl")]
8558    unsafe fn test_mm_cvtepi64_ps() {
8559        let a = _mm_set_epi64x(1, 2);
8560        let r = _mm_cvtepi64_ps(a);
8561        let e = _mm_set_ps(0., 0., 1., 2.);
8562        assert_eq_m128(r, e);
8563    }
8564
8565    #[simd_test(enable = "avx512dq,avx512vl")]
8566    unsafe fn test_mm_mask_cvtepi64_ps() {
8567        let a = _mm_set_epi64x(1, 2);
8568        let b = _mm_set_ps(3., 4., 5., 6.);
8569        let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
8570        let e = _mm_set_ps(0., 0., 5., 2.);
8571        assert_eq_m128(r, e);
8572    }
8573
8574    #[simd_test(enable = "avx512dq,avx512vl")]
8575    unsafe fn test_mm_maskz_cvtepi64_ps() {
8576        let a = _mm_set_epi64x(1, 2);
8577        let r = _mm_maskz_cvtepi64_ps(0b01, a);
8578        let e = _mm_set_ps(0., 0., 0., 2.);
8579        assert_eq_m128(r, e);
8580    }
8581
8582    #[simd_test(enable = "avx512dq,avx512vl")]
8583    unsafe fn test_mm256_cvtepi64_ps() {
8584        let a = _mm256_set_epi64x(1, 2, 3, 4);
8585        let r = _mm256_cvtepi64_ps(a);
8586        let e = _mm_set_ps(1., 2., 3., 4.);
8587        assert_eq_m128(r, e);
8588    }
8589
8590    #[simd_test(enable = "avx512dq,avx512vl")]
8591    unsafe fn test_mm256_mask_cvtepi64_ps() {
8592        let a = _mm256_set_epi64x(1, 2, 3, 4);
8593        let b = _mm_set_ps(5., 6., 7., 8.);
8594        let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
8595        let e = _mm_set_ps(5., 2., 3., 8.);
8596        assert_eq_m128(r, e);
8597    }
8598
8599    #[simd_test(enable = "avx512dq,avx512vl")]
8600    unsafe fn test_mm256_maskz_cvtepi64_ps() {
8601        let a = _mm256_set_epi64x(1, 2, 3, 4);
8602        let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
8603        let e = _mm_set_ps(0., 2., 3., 0.);
8604        assert_eq_m128(r, e);
8605    }
8606
8607    #[simd_test(enable = "avx512dq")]
8608    unsafe fn test_mm512_cvtepi64_ps() {
8609        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8610        let r = _mm512_cvtepi64_ps(a);
8611        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8612        assert_eq_m256(r, e);
8613    }
8614
8615    #[simd_test(enable = "avx512dq")]
8616    unsafe fn test_mm512_mask_cvtepi64_ps() {
8617        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8618        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8619        let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
8620        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8621        assert_eq_m256(r, e);
8622    }
8623
8624    #[simd_test(enable = "avx512dq")]
8625    unsafe fn test_mm512_maskz_cvtepi64_ps() {
8626        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8627        let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
8628        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8629        assert_eq_m256(r, e);
8630    }
8631
8632    #[simd_test(enable = "avx512dq")]
8633    unsafe fn test_mm512_cvt_roundepu64_pd() {
8634        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8635        let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8636        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8637        assert_eq_m512d(r, e);
8638    }
8639
8640    #[simd_test(enable = "avx512dq")]
8641    unsafe fn test_mm512_mask_cvt_roundepu64_pd() {
8642        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8643        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8644        let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8645            b, 0b01101001, a,
8646        );
8647        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8648        assert_eq_m512d(r, e);
8649    }
8650
8651    #[simd_test(enable = "avx512dq")]
8652    unsafe fn test_mm512_maskz_cvt_roundepu64_pd() {
8653        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8654        let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8655            0b01101001, a,
8656        );
8657        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8658        assert_eq_m512d(r, e);
8659    }
8660
8661    #[simd_test(enable = "avx512dq,avx512vl")]
8662    unsafe fn test_mm_cvtepu64_pd() {
8663        let a = _mm_set_epi64x(1, 2);
8664        let r = _mm_cvtepu64_pd(a);
8665        let e = _mm_set_pd(1., 2.);
8666        assert_eq_m128d(r, e);
8667    }
8668
8669    #[simd_test(enable = "avx512dq,avx512vl")]
8670    unsafe fn test_mm_mask_cvtepu64_pd() {
8671        let a = _mm_set_epi64x(1, 2);
8672        let b = _mm_set_pd(3., 4.);
8673        let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
8674        let e = _mm_set_pd(3., 2.);
8675        assert_eq_m128d(r, e);
8676    }
8677
8678    #[simd_test(enable = "avx512dq,avx512vl")]
8679    unsafe fn test_mm_maskz_cvtepu64_pd() {
8680        let a = _mm_set_epi64x(1, 2);
8681        let r = _mm_maskz_cvtepu64_pd(0b01, a);
8682        let e = _mm_set_pd(0., 2.);
8683        assert_eq_m128d(r, e);
8684    }
8685
8686    #[simd_test(enable = "avx512dq,avx512vl")]
8687    unsafe fn test_mm256_cvtepu64_pd() {
8688        let a = _mm256_set_epi64x(1, 2, 3, 4);
8689        let r = _mm256_cvtepu64_pd(a);
8690        let e = _mm256_set_pd(1., 2., 3., 4.);
8691        assert_eq_m256d(r, e);
8692    }
8693
8694    #[simd_test(enable = "avx512dq,avx512vl")]
8695    unsafe fn test_mm256_mask_cvtepu64_pd() {
8696        let a = _mm256_set_epi64x(1, 2, 3, 4);
8697        let b = _mm256_set_pd(5., 6., 7., 8.);
8698        let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
8699        let e = _mm256_set_pd(5., 2., 3., 8.);
8700        assert_eq_m256d(r, e);
8701    }
8702
8703    #[simd_test(enable = "avx512dq,avx512vl")]
8704    unsafe fn test_mm256_maskz_cvtepu64_pd() {
8705        let a = _mm256_set_epi64x(1, 2, 3, 4);
8706        let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
8707        let e = _mm256_set_pd(0., 2., 3., 0.);
8708        assert_eq_m256d(r, e);
8709    }
8710
8711    #[simd_test(enable = "avx512dq")]
8712    unsafe fn test_mm512_cvtepu64_pd() {
8713        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8714        let r = _mm512_cvtepu64_pd(a);
8715        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8716        assert_eq_m512d(r, e);
8717    }
8718
8719    #[simd_test(enable = "avx512dq")]
8720    unsafe fn test_mm512_mask_cvtepu64_pd() {
8721        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8722        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8723        let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
8724        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8725        assert_eq_m512d(r, e);
8726    }
8727
8728    #[simd_test(enable = "avx512dq")]
8729    unsafe fn test_mm512_maskz_cvtepu64_pd() {
8730        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8731        let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
8732        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8733        assert_eq_m512d(r, e);
8734    }
8735
8736    #[simd_test(enable = "avx512dq")]
8737    unsafe fn test_mm512_cvt_roundepu64_ps() {
8738        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8739        let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8740        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8741        assert_eq_m256(r, e);
8742    }
8743
8744    #[simd_test(enable = "avx512dq")]
8745    unsafe fn test_mm512_mask_cvt_roundepu64_ps() {
8746        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8747        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8748        let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8749            b, 0b01101001, a,
8750        );
8751        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8752        assert_eq_m256(r, e);
8753    }
8754
8755    #[simd_test(enable = "avx512dq")]
8756    unsafe fn test_mm512_maskz_cvt_roundepu64_ps() {
8757        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8758        let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8759            0b01101001, a,
8760        );
8761        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8762        assert_eq_m256(r, e);
8763    }
8764
8765    #[simd_test(enable = "avx512dq,avx512vl")]
8766    unsafe fn test_mm_cvtepu64_ps() {
8767        let a = _mm_set_epi64x(1, 2);
8768        let r = _mm_cvtepu64_ps(a);
8769        let e = _mm_set_ps(0., 0., 1., 2.);
8770        assert_eq_m128(r, e);
8771    }
8772
8773    #[simd_test(enable = "avx512dq,avx512vl")]
8774    unsafe fn test_mm_mask_cvtepu64_ps() {
8775        let a = _mm_set_epi64x(1, 2);
8776        let b = _mm_set_ps(3., 4., 5., 6.);
8777        let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
8778        let e = _mm_set_ps(0., 0., 5., 2.);
8779        assert_eq_m128(r, e);
8780    }
8781
8782    #[simd_test(enable = "avx512dq,avx512vl")]
8783    unsafe fn test_mm_maskz_cvtepu64_ps() {
8784        let a = _mm_set_epi64x(1, 2);
8785        let r = _mm_maskz_cvtepu64_ps(0b01, a);
8786        let e = _mm_set_ps(0., 0., 0., 2.);
8787        assert_eq_m128(r, e);
8788    }
8789
8790    #[simd_test(enable = "avx512dq,avx512vl")]
8791    unsafe fn test_mm256_cvtepu64_ps() {
8792        let a = _mm256_set_epi64x(1, 2, 3, 4);
8793        let r = _mm256_cvtepu64_ps(a);
8794        let e = _mm_set_ps(1., 2., 3., 4.);
8795        assert_eq_m128(r, e);
8796    }
8797
8798    #[simd_test(enable = "avx512dq,avx512vl")]
8799    unsafe fn test_mm256_mask_cvtepu64_ps() {
8800        let a = _mm256_set_epi64x(1, 2, 3, 4);
8801        let b = _mm_set_ps(5., 6., 7., 8.);
8802        let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
8803        let e = _mm_set_ps(5., 2., 3., 8.);
8804        assert_eq_m128(r, e);
8805    }
8806
8807    #[simd_test(enable = "avx512dq,avx512vl")]
8808    unsafe fn test_mm256_maskz_cvtepu64_ps() {
8809        let a = _mm256_set_epi64x(1, 2, 3, 4);
8810        let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
8811        let e = _mm_set_ps(0., 2., 3., 0.);
8812        assert_eq_m128(r, e);
8813    }
8814
8815    #[simd_test(enable = "avx512dq")]
8816    unsafe fn test_mm512_cvtepu64_ps() {
8817        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8818        let r = _mm512_cvtepu64_ps(a);
8819        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8820        assert_eq_m256(r, e);
8821    }
8822
8823    #[simd_test(enable = "avx512dq")]
8824    unsafe fn test_mm512_mask_cvtepu64_ps() {
8825        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8826        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8827        let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
8828        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8829        assert_eq_m256(r, e);
8830    }
8831
8832    #[simd_test(enable = "avx512dq")]
8833    unsafe fn test_mm512_maskz_cvtepu64_ps() {
8834        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8835        let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
8836        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8837        assert_eq_m256(r, e);
8838    }
8839
8840    #[simd_test(enable = "avx512dq")]
8841    unsafe fn test_mm512_cvt_roundpd_epi64() {
8842        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8843        let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8844        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8845        assert_eq_m512i(r, e);
8846    }
8847
8848    #[simd_test(enable = "avx512dq")]
8849    unsafe fn test_mm512_mask_cvt_roundpd_epi64() {
8850        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8851        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8852        let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8853            b, 0b01101001, a,
8854        );
8855        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8856        assert_eq_m512i(r, e);
8857    }
8858
8859    #[simd_test(enable = "avx512dq")]
8860    unsafe fn test_mm512_maskz_cvt_roundpd_epi64() {
8861        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8862        let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8863            0b01101001, a,
8864        );
8865        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8866        assert_eq_m512i(r, e);
8867    }
8868
8869    #[simd_test(enable = "avx512dq,avx512vl")]
8870    unsafe fn test_mm_cvtpd_epi64() {
8871        let a = _mm_set_pd(1., 2.);
8872        let r = _mm_cvtpd_epi64(a);
8873        let e = _mm_set_epi64x(1, 2);
8874        assert_eq_m128i(r, e);
8875    }
8876
8877    #[simd_test(enable = "avx512dq,avx512vl")]
8878    unsafe fn test_mm_mask_cvtpd_epi64() {
8879        let a = _mm_set_pd(1., 2.);
8880        let b = _mm_set_epi64x(3, 4);
8881        let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
8882        let e = _mm_set_epi64x(3, 2);
8883        assert_eq_m128i(r, e);
8884    }
8885
8886    #[simd_test(enable = "avx512dq,avx512vl")]
8887    unsafe fn test_mm_maskz_cvtpd_epi64() {
8888        let a = _mm_set_pd(1., 2.);
8889        let r = _mm_maskz_cvtpd_epi64(0b01, a);
8890        let e = _mm_set_epi64x(0, 2);
8891        assert_eq_m128i(r, e);
8892    }
8893
8894    #[simd_test(enable = "avx512dq,avx512vl")]
8895    unsafe fn test_mm256_cvtpd_epi64() {
8896        let a = _mm256_set_pd(1., 2., 3., 4.);
8897        let r = _mm256_cvtpd_epi64(a);
8898        let e = _mm256_set_epi64x(1, 2, 3, 4);
8899        assert_eq_m256i(r, e);
8900    }
8901
8902    #[simd_test(enable = "avx512dq,avx512vl")]
8903    unsafe fn test_mm256_mask_cvtpd_epi64() {
8904        let a = _mm256_set_pd(1., 2., 3., 4.);
8905        let b = _mm256_set_epi64x(5, 6, 7, 8);
8906        let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
8907        let e = _mm256_set_epi64x(5, 2, 3, 8);
8908        assert_eq_m256i(r, e);
8909    }
8910
8911    #[simd_test(enable = "avx512dq,avx512vl")]
8912    unsafe fn test_mm256_maskz_cvtpd_epi64() {
8913        let a = _mm256_set_pd(1., 2., 3., 4.);
8914        let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
8915        let e = _mm256_set_epi64x(0, 2, 3, 0);
8916        assert_eq_m256i(r, e);
8917    }
8918
8919    #[simd_test(enable = "avx512dq")]
8920    unsafe fn test_mm512_cvtpd_epi64() {
8921        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8922        let r = _mm512_cvtpd_epi64(a);
8923        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8924        assert_eq_m512i(r, e);
8925    }
8926
8927    #[simd_test(enable = "avx512dq")]
8928    unsafe fn test_mm512_mask_cvtpd_epi64() {
8929        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8930        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8931        let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
8932        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8933        assert_eq_m512i(r, e);
8934    }
8935
8936    #[simd_test(enable = "avx512dq")]
8937    unsafe fn test_mm512_maskz_cvtpd_epi64() {
8938        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8939        let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
8940        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8941        assert_eq_m512i(r, e);
8942    }
8943
8944    #[simd_test(enable = "avx512dq")]
8945    unsafe fn test_mm512_cvt_roundps_epi64() {
8946        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8947        let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8948        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8949        assert_eq_m512i(r, e);
8950    }
8951
8952    #[simd_test(enable = "avx512dq")]
8953    unsafe fn test_mm512_mask_cvt_roundps_epi64() {
8954        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8955        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8956        let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8957            b, 0b01101001, a,
8958        );
8959        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8960        assert_eq_m512i(r, e);
8961    }
8962
8963    #[simd_test(enable = "avx512dq")]
8964    unsafe fn test_mm512_maskz_cvt_roundps_epi64() {
8965        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8966        let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8967            0b01101001, a,
8968        );
8969        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8970        assert_eq_m512i(r, e);
8971    }
8972
8973    #[simd_test(enable = "avx512dq,avx512vl")]
8974    unsafe fn test_mm_cvtps_epi64() {
8975        let a = _mm_set_ps(1., 2., 3., 4.);
8976        let r = _mm_cvtps_epi64(a);
8977        let e = _mm_set_epi64x(3, 4);
8978        assert_eq_m128i(r, e);
8979    }
8980
8981    #[simd_test(enable = "avx512dq,avx512vl")]
8982    unsafe fn test_mm_mask_cvtps_epi64() {
8983        let a = _mm_set_ps(1., 2., 3., 4.);
8984        let b = _mm_set_epi64x(5, 6);
8985        let r = _mm_mask_cvtps_epi64(b, 0b01, a);
8986        let e = _mm_set_epi64x(5, 4);
8987        assert_eq_m128i(r, e);
8988    }
8989
8990    #[simd_test(enable = "avx512dq,avx512vl")]
8991    unsafe fn test_mm_maskz_cvtps_epi64() {
8992        let a = _mm_set_ps(1., 2., 3., 4.);
8993        let r = _mm_maskz_cvtps_epi64(0b01, a);
8994        let e = _mm_set_epi64x(0, 4);
8995        assert_eq_m128i(r, e);
8996    }
8997
8998    #[simd_test(enable = "avx512dq,avx512vl")]
8999    unsafe fn test_mm256_cvtps_epi64() {
9000        let a = _mm_set_ps(1., 2., 3., 4.);
9001        let r = _mm256_cvtps_epi64(a);
9002        let e = _mm256_set_epi64x(1, 2, 3, 4);
9003        assert_eq_m256i(r, e);
9004    }
9005
9006    #[simd_test(enable = "avx512dq,avx512vl")]
9007    unsafe fn test_mm256_mask_cvtps_epi64() {
9008        let a = _mm_set_ps(1., 2., 3., 4.);
9009        let b = _mm256_set_epi64x(5, 6, 7, 8);
9010        let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
9011        let e = _mm256_set_epi64x(5, 2, 3, 8);
9012        assert_eq_m256i(r, e);
9013    }
9014
9015    #[simd_test(enable = "avx512dq,avx512vl")]
9016    unsafe fn test_mm256_maskz_cvtps_epi64() {
9017        let a = _mm_set_ps(1., 2., 3., 4.);
9018        let r = _mm256_maskz_cvtps_epi64(0b0110, a);
9019        let e = _mm256_set_epi64x(0, 2, 3, 0);
9020        assert_eq_m256i(r, e);
9021    }
9022
9023    #[simd_test(enable = "avx512dq")]
9024    unsafe fn test_mm512_cvtps_epi64() {
9025        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9026        let r = _mm512_cvtps_epi64(a);
9027        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9028        assert_eq_m512i(r, e);
9029    }
9030
9031    #[simd_test(enable = "avx512dq")]
9032    unsafe fn test_mm512_mask_cvtps_epi64() {
9033        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9034        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9035        let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
9036        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9037        assert_eq_m512i(r, e);
9038    }
9039
9040    #[simd_test(enable = "avx512dq")]
9041    unsafe fn test_mm512_maskz_cvtps_epi64() {
9042        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9043        let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
9044        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9045        assert_eq_m512i(r, e);
9046    }
9047
9048    #[simd_test(enable = "avx512dq")]
9049    unsafe fn test_mm512_cvt_roundpd_epu64() {
9050        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9051        let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9052        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9053        assert_eq_m512i(r, e);
9054    }
9055
9056    #[simd_test(enable = "avx512dq")]
9057    unsafe fn test_mm512_mask_cvt_roundpd_epu64() {
9058        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9059        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9060        let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9061            b, 0b01101001, a,
9062        );
9063        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9064        assert_eq_m512i(r, e);
9065    }
9066
9067    #[simd_test(enable = "avx512dq")]
9068    unsafe fn test_mm512_maskz_cvt_roundpd_epu64() {
9069        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9070        let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9071            0b01101001, a,
9072        );
9073        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9074        assert_eq_m512i(r, e);
9075    }
9076
9077    #[simd_test(enable = "avx512dq,avx512vl")]
9078    unsafe fn test_mm_cvtpd_epu64() {
9079        let a = _mm_set_pd(1., 2.);
9080        let r = _mm_cvtpd_epu64(a);
9081        let e = _mm_set_epi64x(1, 2);
9082        assert_eq_m128i(r, e);
9083    }
9084
9085    #[simd_test(enable = "avx512dq,avx512vl")]
9086    unsafe fn test_mm_mask_cvtpd_epu64() {
9087        let a = _mm_set_pd(1., 2.);
9088        let b = _mm_set_epi64x(3, 4);
9089        let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
9090        let e = _mm_set_epi64x(3, 2);
9091        assert_eq_m128i(r, e);
9092    }
9093
9094    #[simd_test(enable = "avx512dq,avx512vl")]
9095    unsafe fn test_mm_maskz_cvtpd_epu64() {
9096        let a = _mm_set_pd(1., 2.);
9097        let r = _mm_maskz_cvtpd_epu64(0b01, a);
9098        let e = _mm_set_epi64x(0, 2);
9099        assert_eq_m128i(r, e);
9100    }
9101
9102    #[simd_test(enable = "avx512dq,avx512vl")]
9103    unsafe fn test_mm256_cvtpd_epu64() {
9104        let a = _mm256_set_pd(1., 2., 3., 4.);
9105        let r = _mm256_cvtpd_epu64(a);
9106        let e = _mm256_set_epi64x(1, 2, 3, 4);
9107        assert_eq_m256i(r, e);
9108    }
9109
9110    #[simd_test(enable = "avx512dq,avx512vl")]
9111    unsafe fn test_mm256_mask_cvtpd_epu64() {
9112        let a = _mm256_set_pd(1., 2., 3., 4.);
9113        let b = _mm256_set_epi64x(5, 6, 7, 8);
9114        let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
9115        let e = _mm256_set_epi64x(5, 2, 3, 8);
9116        assert_eq_m256i(r, e);
9117    }
9118
9119    #[simd_test(enable = "avx512dq,avx512vl")]
9120    unsafe fn test_mm256_maskz_cvtpd_epu64() {
9121        let a = _mm256_set_pd(1., 2., 3., 4.);
9122        let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
9123        let e = _mm256_set_epi64x(0, 2, 3, 0);
9124        assert_eq_m256i(r, e);
9125    }
9126
9127    #[simd_test(enable = "avx512dq")]
9128    unsafe fn test_mm512_cvtpd_epu64() {
9129        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9130        let r = _mm512_cvtpd_epu64(a);
9131        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9132        assert_eq_m512i(r, e);
9133    }
9134
9135    #[simd_test(enable = "avx512dq")]
9136    unsafe fn test_mm512_mask_cvtpd_epu64() {
9137        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9138        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9139        let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
9140        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9141        assert_eq_m512i(r, e);
9142    }
9143
9144    #[simd_test(enable = "avx512dq")]
9145    unsafe fn test_mm512_maskz_cvtpd_epu64() {
9146        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9147        let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
9148        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9149        assert_eq_m512i(r, e);
9150    }
9151
9152    #[simd_test(enable = "avx512dq")]
9153    unsafe fn test_mm512_cvt_roundps_epu64() {
9154        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9155        let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9156        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9157        assert_eq_m512i(r, e);
9158    }
9159
9160    #[simd_test(enable = "avx512dq")]
9161    unsafe fn test_mm512_mask_cvt_roundps_epu64() {
9162        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9163        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9164        let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9165            b, 0b01101001, a,
9166        );
9167        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9168        assert_eq_m512i(r, e);
9169    }
9170
9171    #[simd_test(enable = "avx512dq")]
9172    unsafe fn test_mm512_maskz_cvt_roundps_epu64() {
9173        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9174        let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9175            0b01101001, a,
9176        );
9177        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9178        assert_eq_m512i(r, e);
9179    }
9180
9181    #[simd_test(enable = "avx512dq,avx512vl")]
9182    unsafe fn test_mm_cvtps_epu64() {
9183        let a = _mm_set_ps(1., 2., 3., 4.);
9184        let r = _mm_cvtps_epu64(a);
9185        let e = _mm_set_epi64x(3, 4);
9186        assert_eq_m128i(r, e);
9187    }
9188
9189    #[simd_test(enable = "avx512dq,avx512vl")]
9190    unsafe fn test_mm_mask_cvtps_epu64() {
9191        let a = _mm_set_ps(1., 2., 3., 4.);
9192        let b = _mm_set_epi64x(5, 6);
9193        let r = _mm_mask_cvtps_epu64(b, 0b01, a);
9194        let e = _mm_set_epi64x(5, 4);
9195        assert_eq_m128i(r, e);
9196    }
9197
9198    #[simd_test(enable = "avx512dq,avx512vl")]
9199    unsafe fn test_mm_maskz_cvtps_epu64() {
9200        let a = _mm_set_ps(1., 2., 3., 4.);
9201        let r = _mm_maskz_cvtps_epu64(0b01, a);
9202        let e = _mm_set_epi64x(0, 4);
9203        assert_eq_m128i(r, e);
9204    }
9205
9206    #[simd_test(enable = "avx512dq,avx512vl")]
9207    unsafe fn test_mm256_cvtps_epu64() {
9208        let a = _mm_set_ps(1., 2., 3., 4.);
9209        let r = _mm256_cvtps_epu64(a);
9210        let e = _mm256_set_epi64x(1, 2, 3, 4);
9211        assert_eq_m256i(r, e);
9212    }
9213
9214    #[simd_test(enable = "avx512dq,avx512vl")]
9215    unsafe fn test_mm256_mask_cvtps_epu64() {
9216        let a = _mm_set_ps(1., 2., 3., 4.);
9217        let b = _mm256_set_epi64x(5, 6, 7, 8);
9218        let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
9219        let e = _mm256_set_epi64x(5, 2, 3, 8);
9220        assert_eq_m256i(r, e);
9221    }
9222
9223    #[simd_test(enable = "avx512dq,avx512vl")]
9224    unsafe fn test_mm256_maskz_cvtps_epu64() {
9225        let a = _mm_set_ps(1., 2., 3., 4.);
9226        let r = _mm256_maskz_cvtps_epu64(0b0110, a);
9227        let e = _mm256_set_epi64x(0, 2, 3, 0);
9228        assert_eq_m256i(r, e);
9229    }
9230
9231    #[simd_test(enable = "avx512dq")]
9232    unsafe fn test_mm512_cvtps_epu64() {
9233        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9234        let r = _mm512_cvtps_epu64(a);
9235        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9236        assert_eq_m512i(r, e);
9237    }
9238
9239    #[simd_test(enable = "avx512dq")]
9240    unsafe fn test_mm512_mask_cvtps_epu64() {
9241        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9242        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9243        let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
9244        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9245        assert_eq_m512i(r, e);
9246    }
9247
9248    #[simd_test(enable = "avx512dq")]
9249    unsafe fn test_mm512_maskz_cvtps_epu64() {
9250        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9251        let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
9252        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9253        assert_eq_m512i(r, e);
9254    }
9255
9256    #[simd_test(enable = "avx512dq")]
9257    unsafe fn test_mm512_cvtt_roundpd_epi64() {
9258        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9259        let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
9260        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9261        assert_eq_m512i(r, e);
9262    }
9263
9264    #[simd_test(enable = "avx512dq")]
9265    unsafe fn test_mm512_mask_cvtt_roundpd_epi64() {
9266        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9267        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9268        let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9269        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9270        assert_eq_m512i(r, e);
9271    }
9272
9273    #[simd_test(enable = "avx512dq")]
9274    unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() {
9275        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9276        let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9277        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9278        assert_eq_m512i(r, e);
9279    }
9280
9281    #[simd_test(enable = "avx512dq,avx512vl")]
9282    unsafe fn test_mm_cvttpd_epi64() {
9283        let a = _mm_set_pd(1., 2.);
9284        let r = _mm_cvttpd_epi64(a);
9285        let e = _mm_set_epi64x(1, 2);
9286        assert_eq_m128i(r, e);
9287    }
9288
9289    #[simd_test(enable = "avx512dq,avx512vl")]
9290    unsafe fn test_mm_mask_cvttpd_epi64() {
9291        let a = _mm_set_pd(1., 2.);
9292        let b = _mm_set_epi64x(3, 4);
9293        let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
9294        let e = _mm_set_epi64x(3, 2);
9295        assert_eq_m128i(r, e);
9296    }
9297
9298    #[simd_test(enable = "avx512dq,avx512vl")]
9299    unsafe fn test_mm_maskz_cvttpd_epi64() {
9300        let a = _mm_set_pd(1., 2.);
9301        let r = _mm_maskz_cvttpd_epi64(0b01, a);
9302        let e = _mm_set_epi64x(0, 2);
9303        assert_eq_m128i(r, e);
9304    }
9305
9306    #[simd_test(enable = "avx512dq,avx512vl")]
9307    unsafe fn test_mm256_cvttpd_epi64() {
9308        let a = _mm256_set_pd(1., 2., 3., 4.);
9309        let r = _mm256_cvttpd_epi64(a);
9310        let e = _mm256_set_epi64x(1, 2, 3, 4);
9311        assert_eq_m256i(r, e);
9312    }
9313
9314    #[simd_test(enable = "avx512dq,avx512vl")]
9315    unsafe fn test_mm256_mask_cvttpd_epi64() {
9316        let a = _mm256_set_pd(1., 2., 3., 4.);
9317        let b = _mm256_set_epi64x(5, 6, 7, 8);
9318        let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
9319        let e = _mm256_set_epi64x(5, 2, 3, 8);
9320        assert_eq_m256i(r, e);
9321    }
9322
9323    #[simd_test(enable = "avx512dq,avx512vl")]
9324    unsafe fn test_mm256_maskz_cvttpd_epi64() {
9325        let a = _mm256_set_pd(1., 2., 3., 4.);
9326        let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
9327        let e = _mm256_set_epi64x(0, 2, 3, 0);
9328        assert_eq_m256i(r, e);
9329    }
9330
9331    #[simd_test(enable = "avx512dq")]
9332    unsafe fn test_mm512_cvttpd_epi64() {
9333        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9334        let r = _mm512_cvttpd_epi64(a);
9335        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9336        assert_eq_m512i(r, e);
9337    }
9338
9339    #[simd_test(enable = "avx512dq")]
9340    unsafe fn test_mm512_mask_cvttpd_epi64() {
9341        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9342        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9343        let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
9344        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9345        assert_eq_m512i(r, e);
9346    }
9347
9348    #[simd_test(enable = "avx512dq")]
9349    unsafe fn test_mm512_maskz_cvttpd_epi64() {
9350        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9351        let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
9352        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9353        assert_eq_m512i(r, e);
9354    }
9355
9356    #[simd_test(enable = "avx512dq")]
9357    unsafe fn test_mm512_cvtt_roundps_epi64() {
9358        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9359        let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
9360        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9361        assert_eq_m512i(r, e);
9362    }
9363
9364    #[simd_test(enable = "avx512dq")]
9365    unsafe fn test_mm512_mask_cvtt_roundps_epi64() {
9366        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9367        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9368        let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9369        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9370        assert_eq_m512i(r, e);
9371    }
9372
9373    #[simd_test(enable = "avx512dq")]
9374    unsafe fn test_mm512_maskz_cvtt_roundps_epi64() {
9375        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9376        let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9377        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9378        assert_eq_m512i(r, e);
9379    }
9380
9381    #[simd_test(enable = "avx512dq,avx512vl")]
9382    unsafe fn test_mm_cvttps_epi64() {
9383        let a = _mm_set_ps(1., 2., 3., 4.);
9384        let r = _mm_cvttps_epi64(a);
9385        let e = _mm_set_epi64x(3, 4);
9386        assert_eq_m128i(r, e);
9387    }
9388
9389    #[simd_test(enable = "avx512dq,avx512vl")]
9390    unsafe fn test_mm_mask_cvttps_epi64() {
9391        let a = _mm_set_ps(1., 2., 3., 4.);
9392        let b = _mm_set_epi64x(5, 6);
9393        let r = _mm_mask_cvttps_epi64(b, 0b01, a);
9394        let e = _mm_set_epi64x(5, 4);
9395        assert_eq_m128i(r, e);
9396    }
9397
9398    #[simd_test(enable = "avx512dq,avx512vl")]
9399    unsafe fn test_mm_maskz_cvttps_epi64() {
9400        let a = _mm_set_ps(1., 2., 3., 4.);
9401        let r = _mm_maskz_cvttps_epi64(0b01, a);
9402        let e = _mm_set_epi64x(0, 4);
9403        assert_eq_m128i(r, e);
9404    }
9405
9406    #[simd_test(enable = "avx512dq,avx512vl")]
9407    unsafe fn test_mm256_cvttps_epi64() {
9408        let a = _mm_set_ps(1., 2., 3., 4.);
9409        let r = _mm256_cvttps_epi64(a);
9410        let e = _mm256_set_epi64x(1, 2, 3, 4);
9411        assert_eq_m256i(r, e);
9412    }
9413
9414    #[simd_test(enable = "avx512dq,avx512vl")]
9415    unsafe fn test_mm256_mask_cvttps_epi64() {
9416        let a = _mm_set_ps(1., 2., 3., 4.);
9417        let b = _mm256_set_epi64x(5, 6, 7, 8);
9418        let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
9419        let e = _mm256_set_epi64x(5, 2, 3, 8);
9420        assert_eq_m256i(r, e);
9421    }
9422
9423    #[simd_test(enable = "avx512dq,avx512vl")]
9424    unsafe fn test_mm256_maskz_cvttps_epi64() {
9425        let a = _mm_set_ps(1., 2., 3., 4.);
9426        let r = _mm256_maskz_cvttps_epi64(0b0110, a);
9427        let e = _mm256_set_epi64x(0, 2, 3, 0);
9428        assert_eq_m256i(r, e);
9429    }
9430
9431    #[simd_test(enable = "avx512dq")]
9432    unsafe fn test_mm512_cvttps_epi64() {
9433        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9434        let r = _mm512_cvttps_epi64(a);
9435        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9436        assert_eq_m512i(r, e);
9437    }
9438
9439    #[simd_test(enable = "avx512dq")]
9440    unsafe fn test_mm512_mask_cvttps_epi64() {
9441        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9442        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9443        let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
9444        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9445        assert_eq_m512i(r, e);
9446    }
9447
9448    #[simd_test(enable = "avx512dq")]
9449    unsafe fn test_mm512_maskz_cvttps_epi64() {
9450        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9451        let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
9452        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9453        assert_eq_m512i(r, e);
9454    }
9455
9456    #[simd_test(enable = "avx512dq")]
9457    unsafe fn test_mm512_cvtt_roundpd_epu64() {
9458        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9459        let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
9460        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9461        assert_eq_m512i(r, e);
9462    }
9463
9464    #[simd_test(enable = "avx512dq")]
9465    unsafe fn test_mm512_mask_cvtt_roundpd_epu64() {
9466        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9467        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9468        let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9469        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9470        assert_eq_m512i(r, e);
9471    }
9472
9473    #[simd_test(enable = "avx512dq")]
9474    unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() {
9475        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9476        let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9477        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9478        assert_eq_m512i(r, e);
9479    }
9480
9481    #[simd_test(enable = "avx512dq,avx512vl")]
9482    unsafe fn test_mm_cvttpd_epu64() {
9483        let a = _mm_set_pd(1., 2.);
9484        let r = _mm_cvttpd_epu64(a);
9485        let e = _mm_set_epi64x(1, 2);
9486        assert_eq_m128i(r, e);
9487    }
9488
9489    #[simd_test(enable = "avx512dq,avx512vl")]
9490    unsafe fn test_mm_mask_cvttpd_epu64() {
9491        let a = _mm_set_pd(1., 2.);
9492        let b = _mm_set_epi64x(3, 4);
9493        let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
9494        let e = _mm_set_epi64x(3, 2);
9495        assert_eq_m128i(r, e);
9496    }
9497
9498    #[simd_test(enable = "avx512dq,avx512vl")]
9499    unsafe fn test_mm_maskz_cvttpd_epu64() {
9500        let a = _mm_set_pd(1., 2.);
9501        let r = _mm_maskz_cvttpd_epu64(0b01, a);
9502        let e = _mm_set_epi64x(0, 2);
9503        assert_eq_m128i(r, e);
9504    }
9505
9506    #[simd_test(enable = "avx512dq,avx512vl")]
9507    unsafe fn test_mm256_cvttpd_epu64() {
9508        let a = _mm256_set_pd(1., 2., 3., 4.);
9509        let r = _mm256_cvttpd_epu64(a);
9510        let e = _mm256_set_epi64x(1, 2, 3, 4);
9511        assert_eq_m256i(r, e);
9512    }
9513
9514    #[simd_test(enable = "avx512dq,avx512vl")]
9515    unsafe fn test_mm256_mask_cvttpd_epu64() {
9516        let a = _mm256_set_pd(1., 2., 3., 4.);
9517        let b = _mm256_set_epi64x(5, 6, 7, 8);
9518        let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
9519        let e = _mm256_set_epi64x(5, 2, 3, 8);
9520        assert_eq_m256i(r, e);
9521    }
9522
9523    #[simd_test(enable = "avx512dq,avx512vl")]
9524    unsafe fn test_mm256_maskz_cvttpd_epu64() {
9525        let a = _mm256_set_pd(1., 2., 3., 4.);
9526        let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
9527        let e = _mm256_set_epi64x(0, 2, 3, 0);
9528        assert_eq_m256i(r, e);
9529    }
9530
9531    #[simd_test(enable = "avx512dq")]
9532    unsafe fn test_mm512_cvttpd_epu64() {
9533        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9534        let r = _mm512_cvttpd_epu64(a);
9535        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9536        assert_eq_m512i(r, e);
9537    }
9538
9539    #[simd_test(enable = "avx512dq")]
9540    unsafe fn test_mm512_mask_cvttpd_epu64() {
9541        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9542        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9543        let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
9544        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9545        assert_eq_m512i(r, e);
9546    }
9547
9548    #[simd_test(enable = "avx512dq")]
9549    unsafe fn test_mm512_maskz_cvttpd_epu64() {
9550        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9551        let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
9552        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9553        assert_eq_m512i(r, e);
9554    }
9555
9556    #[simd_test(enable = "avx512dq")]
9557    unsafe fn test_mm512_cvtt_roundps_epu64() {
9558        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9559        let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
9560        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9561        assert_eq_m512i(r, e);
9562    }
9563
9564    #[simd_test(enable = "avx512dq")]
9565    unsafe fn test_mm512_mask_cvtt_roundps_epu64() {
9566        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9567        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9568        let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9569        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9570        assert_eq_m512i(r, e);
9571    }
9572
9573    #[simd_test(enable = "avx512dq")]
9574    unsafe fn test_mm512_maskz_cvtt_roundps_epu64() {
9575        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9576        let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9577        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9578        assert_eq_m512i(r, e);
9579    }
9580
9581    #[simd_test(enable = "avx512dq,avx512vl")]
9582    unsafe fn test_mm_cvttps_epu64() {
9583        let a = _mm_set_ps(1., 2., 3., 4.);
9584        let r = _mm_cvttps_epu64(a);
9585        let e = _mm_set_epi64x(3, 4);
9586        assert_eq_m128i(r, e);
9587    }
9588
9589    #[simd_test(enable = "avx512dq,avx512vl")]
9590    unsafe fn test_mm_mask_cvttps_epu64() {
9591        let a = _mm_set_ps(1., 2., 3., 4.);
9592        let b = _mm_set_epi64x(5, 6);
9593        let r = _mm_mask_cvttps_epu64(b, 0b01, a);
9594        let e = _mm_set_epi64x(5, 4);
9595        assert_eq_m128i(r, e);
9596    }
9597
9598    #[simd_test(enable = "avx512dq,avx512vl")]
9599    unsafe fn test_mm_maskz_cvttps_epu64() {
9600        let a = _mm_set_ps(1., 2., 3., 4.);
9601        let r = _mm_maskz_cvttps_epu64(0b01, a);
9602        let e = _mm_set_epi64x(0, 4);
9603        assert_eq_m128i(r, e);
9604    }
9605
9606    #[simd_test(enable = "avx512dq,avx512vl")]
9607    unsafe fn test_mm256_cvttps_epu64() {
9608        let a = _mm_set_ps(1., 2., 3., 4.);
9609        let r = _mm256_cvttps_epu64(a);
9610        let e = _mm256_set_epi64x(1, 2, 3, 4);
9611        assert_eq_m256i(r, e);
9612    }
9613
9614    #[simd_test(enable = "avx512dq,avx512vl")]
9615    unsafe fn test_mm256_mask_cvttps_epu64() {
9616        let a = _mm_set_ps(1., 2., 3., 4.);
9617        let b = _mm256_set_epi64x(5, 6, 7, 8);
9618        let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
9619        let e = _mm256_set_epi64x(5, 2, 3, 8);
9620        assert_eq_m256i(r, e);
9621    }
9622
9623    #[simd_test(enable = "avx512dq,avx512vl")]
9624    unsafe fn test_mm256_maskz_cvttps_epu64() {
9625        let a = _mm_set_ps(1., 2., 3., 4.);
9626        let r = _mm256_maskz_cvttps_epu64(0b0110, a);
9627        let e = _mm256_set_epi64x(0, 2, 3, 0);
9628        assert_eq_m256i(r, e);
9629    }
9630
9631    #[simd_test(enable = "avx512dq")]
9632    unsafe fn test_mm512_cvttps_epu64() {
9633        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9634        let r = _mm512_cvttps_epu64(a);
9635        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9636        assert_eq_m512i(r, e);
9637    }
9638
9639    #[simd_test(enable = "avx512dq")]
9640    unsafe fn test_mm512_mask_cvttps_epu64() {
9641        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9642        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9643        let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
9644        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9645        assert_eq_m512i(r, e);
9646    }
9647
9648    #[simd_test(enable = "avx512dq")]
9649    unsafe fn test_mm512_maskz_cvttps_epu64() {
9650        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9651        let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
9652        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9653        assert_eq_m512i(r, e);
9654    }
9655
9656    #[simd_test(enable = "avx512dq,avx512vl")]
9657    unsafe fn test_mm_mullo_epi64() {
9658        let a = _mm_set_epi64x(1, 2);
9659        let b = _mm_set_epi64x(3, 4);
9660        let r = _mm_mullo_epi64(a, b);
9661        let e = _mm_set_epi64x(3, 8);
9662        assert_eq_m128i(r, e);
9663    }
9664
9665    #[simd_test(enable = "avx512dq,avx512vl")]
9666    unsafe fn test_mm_mask_mullo_epi64() {
9667        let a = _mm_set_epi64x(1, 2);
9668        let b = _mm_set_epi64x(3, 4);
9669        let c = _mm_set_epi64x(5, 6);
9670        let r = _mm_mask_mullo_epi64(c, 0b01, a, b);
9671        let e = _mm_set_epi64x(5, 8);
9672        assert_eq_m128i(r, e);
9673    }
9674
9675    #[simd_test(enable = "avx512dq,avx512vl")]
9676    unsafe fn test_mm_maskz_mullo_epi64() {
9677        let a = _mm_set_epi64x(1, 2);
9678        let b = _mm_set_epi64x(3, 4);
9679        let r = _mm_maskz_mullo_epi64(0b01, a, b);
9680        let e = _mm_set_epi64x(0, 8);
9681        assert_eq_m128i(r, e);
9682    }
9683
9684    #[simd_test(enable = "avx512dq,avx512vl")]
9685    unsafe fn test_mm256_mullo_epi64() {
9686        let a = _mm256_set_epi64x(1, 2, 3, 4);
9687        let b = _mm256_set_epi64x(5, 6, 7, 8);
9688        let r = _mm256_mullo_epi64(a, b);
9689        let e = _mm256_set_epi64x(5, 12, 21, 32);
9690        assert_eq_m256i(r, e);
9691    }
9692
9693    #[simd_test(enable = "avx512dq,avx512vl")]
9694    unsafe fn test_mm256_mask_mullo_epi64() {
9695        let a = _mm256_set_epi64x(1, 2, 3, 4);
9696        let b = _mm256_set_epi64x(5, 6, 7, 8);
9697        let c = _mm256_set_epi64x(9, 10, 11, 12);
9698        let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b);
9699        let e = _mm256_set_epi64x(9, 12, 21, 12);
9700        assert_eq_m256i(r, e);
9701    }
9702
9703    #[simd_test(enable = "avx512dq,avx512vl")]
9704    unsafe fn test_mm256_maskz_mullo_epi64() {
9705        let a = _mm256_set_epi64x(1, 2, 3, 4);
9706        let b = _mm256_set_epi64x(5, 6, 7, 8);
9707        let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
9708        let e = _mm256_set_epi64x(0, 12, 21, 0);
9709        assert_eq_m256i(r, e);
9710    }
9711
9712    #[simd_test(enable = "avx512dq")]
9713    unsafe fn test_mm512_mullo_epi64() {
9714        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9715        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9716        let r = _mm512_mullo_epi64(a, b);
9717        let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128);
9718        assert_eq_m512i(r, e);
9719    }
9720
9721    #[simd_test(enable = "avx512dq")]
9722    unsafe fn test_mm512_mask_mullo_epi64() {
9723        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9724        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9725        let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
9726        let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b);
9727        let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128);
9728        assert_eq_m512i(r, e);
9729    }
9730
9731    #[simd_test(enable = "avx512dq")]
9732    unsafe fn test_mm512_maskz_mullo_epi64() {
9733        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9734        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9735        let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
9736        let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128);
9737        assert_eq_m512i(r, e);
9738    }
9739
9740    #[simd_test(enable = "avx512dq")]
9741    unsafe fn test_cvtmask8_u32() {
9742        let a: __mmask8 = 0b01101001;
9743        let r = _cvtmask8_u32(a);
9744        let e: u32 = 0b01101001;
9745        assert_eq!(r, e);
9746    }
9747
9748    #[simd_test(enable = "avx512dq")]
9749    unsafe fn test_cvtu32_mask8() {
9750        let a: u32 = 0b01101001;
9751        let r = _cvtu32_mask8(a);
9752        let e: __mmask8 = 0b01101001;
9753        assert_eq!(r, e);
9754    }
9755
9756    #[simd_test(enable = "avx512dq")]
9757    unsafe fn test_kadd_mask16() {
9758        let a: __mmask16 = 27549;
9759        let b: __mmask16 = 23434;
9760        let r = _kadd_mask16(a, b);
9761        let e: __mmask16 = 50983;
9762        assert_eq!(r, e);
9763    }
9764
9765    #[simd_test(enable = "avx512dq")]
9766    unsafe fn test_kadd_mask8() {
9767        let a: __mmask8 = 98;
9768        let b: __mmask8 = 117;
9769        let r = _kadd_mask8(a, b);
9770        let e: __mmask8 = 215;
9771        assert_eq!(r, e);
9772    }
9773
9774    #[simd_test(enable = "avx512dq")]
9775    unsafe fn test_kand_mask8() {
9776        let a: __mmask8 = 0b01101001;
9777        let b: __mmask8 = 0b10110011;
9778        let r = _kand_mask8(a, b);
9779        let e: __mmask8 = 0b00100001;
9780        assert_eq!(r, e);
9781    }
9782
9783    #[simd_test(enable = "avx512dq")]
9784    unsafe fn test_kandn_mask8() {
9785        let a: __mmask8 = 0b01101001;
9786        let b: __mmask8 = 0b10110011;
9787        let r = _kandn_mask8(a, b);
9788        let e: __mmask8 = 0b10010010;
9789        assert_eq!(r, e);
9790    }
9791
9792    #[simd_test(enable = "avx512dq")]
9793    unsafe fn test_knot_mask8() {
9794        let a: __mmask8 = 0b01101001;
9795        let r = _knot_mask8(a);
9796        let e: __mmask8 = 0b10010110;
9797        assert_eq!(r, e);
9798    }
9799
9800    #[simd_test(enable = "avx512dq")]
9801    unsafe fn test_kor_mask8() {
9802        let a: __mmask8 = 0b01101001;
9803        let b: __mmask8 = 0b10110011;
9804        let r = _kor_mask8(a, b);
9805        let e: __mmask8 = 0b11111011;
9806        assert_eq!(r, e);
9807    }
9808
9809    #[simd_test(enable = "avx512dq")]
9810    unsafe fn test_kxnor_mask8() {
9811        let a: __mmask8 = 0b01101001;
9812        let b: __mmask8 = 0b10110011;
9813        let r = _kxnor_mask8(a, b);
9814        let e: __mmask8 = 0b00100101;
9815        assert_eq!(r, e);
9816    }
9817
9818    #[simd_test(enable = "avx512dq")]
9819    unsafe fn test_kxor_mask8() {
9820        let a: __mmask8 = 0b01101001;
9821        let b: __mmask8 = 0b10110011;
9822        let r = _kxor_mask8(a, b);
9823        let e: __mmask8 = 0b11011010;
9824        assert_eq!(r, e);
9825    }
9826
9827    #[simd_test(enable = "avx512dq")]
9828    unsafe fn test_kortest_mask8_u8() {
9829        let a: __mmask8 = 0b01101001;
9830        let b: __mmask8 = 0b10110110;
9831        let mut all_ones: u8 = 0;
9832        let r = _kortest_mask8_u8(a, b, &mut all_ones);
9833        assert_eq!(r, 0);
9834        assert_eq!(all_ones, 1);
9835    }
9836
9837    #[simd_test(enable = "avx512dq")]
9838    unsafe fn test_kortestc_mask8_u8() {
9839        let a: __mmask8 = 0b01101001;
9840        let b: __mmask8 = 0b10110110;
9841        let r = _kortestc_mask8_u8(a, b);
9842        assert_eq!(r, 1);
9843    }
9844
9845    #[simd_test(enable = "avx512dq")]
9846    unsafe fn test_kortestz_mask8_u8() {
9847        let a: __mmask8 = 0b01101001;
9848        let b: __mmask8 = 0b10110110;
9849        let r = _kortestz_mask8_u8(a, b);
9850        assert_eq!(r, 0);
9851    }
9852
9853    #[simd_test(enable = "avx512dq")]
9854    unsafe fn test_kshiftli_mask8() {
9855        let a: __mmask8 = 0b01101001;
9856        let r = _kshiftli_mask8::<3>(a);
9857        let e: __mmask8 = 0b01001000;
9858        assert_eq!(r, e);
9859    }
9860
9861    #[simd_test(enable = "avx512dq")]
9862    unsafe fn test_kshiftri_mask8() {
9863        let a: __mmask8 = 0b01101001;
9864        let r = _kshiftri_mask8::<3>(a);
9865        let e: __mmask8 = 0b00001101;
9866        assert_eq!(r, e);
9867    }
9868
9869    #[simd_test(enable = "avx512dq")]
9870    unsafe fn test_ktest_mask8_u8() {
9871        let a: __mmask8 = 0b01101001;
9872        let b: __mmask8 = 0b10010110;
9873        let mut and_not: u8 = 0;
9874        let r = _ktest_mask8_u8(a, b, &mut and_not);
9875        assert_eq!(r, 1);
9876        assert_eq!(and_not, 0);
9877    }
9878
9879    #[simd_test(enable = "avx512dq")]
9880    unsafe fn test_ktestc_mask8_u8() {
9881        let a: __mmask8 = 0b01101001;
9882        let b: __mmask8 = 0b10010110;
9883        let r = _ktestc_mask8_u8(a, b);
9884        assert_eq!(r, 0);
9885    }
9886
9887    #[simd_test(enable = "avx512dq")]
9888    unsafe fn test_ktestz_mask8_u8() {
9889        let a: __mmask8 = 0b01101001;
9890        let b: __mmask8 = 0b10010110;
9891        let r = _ktestz_mask8_u8(a, b);
9892        assert_eq!(r, 1);
9893    }
9894
9895    #[simd_test(enable = "avx512dq")]
9896    unsafe fn test_ktest_mask16_u8() {
9897        let a: __mmask16 = 0b0110100100111100;
9898        let b: __mmask16 = 0b1001011011000011;
9899        let mut and_not: u8 = 0;
9900        let r = _ktest_mask16_u8(a, b, &mut and_not);
9901        assert_eq!(r, 1);
9902        assert_eq!(and_not, 0);
9903    }
9904
9905    #[simd_test(enable = "avx512dq")]
9906    unsafe fn test_ktestc_mask16_u8() {
9907        let a: __mmask16 = 0b0110100100111100;
9908        let b: __mmask16 = 0b1001011011000011;
9909        let r = _ktestc_mask16_u8(a, b);
9910        assert_eq!(r, 0);
9911    }
9912
9913    #[simd_test(enable = "avx512dq")]
9914    unsafe fn test_ktestz_mask16_u8() {
9915        let a: __mmask16 = 0b0110100100111100;
9916        let b: __mmask16 = 0b1001011011000011;
9917        let r = _ktestz_mask16_u8(a, b);
9918        assert_eq!(r, 1);
9919    }
9920
9921    #[simd_test(enable = "avx512dq")]
9922    unsafe fn test_load_mask8() {
9923        let a: __mmask8 = 0b01101001;
9924        let r = _load_mask8(&a);
9925        let e: __mmask8 = 0b01101001;
9926        assert_eq!(r, e);
9927    }
9928
9929    #[simd_test(enable = "avx512dq")]
9930    unsafe fn test_store_mask8() {
9931        let a: __mmask8 = 0b01101001;
9932        let mut r = 0;
9933        _store_mask8(&mut r, a);
9934        let e: __mmask8 = 0b01101001;
9935        assert_eq!(r, e);
9936    }
9937
9938    #[simd_test(enable = "avx512dq,avx512vl")]
9939    unsafe fn test_mm_movepi32_mask() {
9940        let a = _mm_set_epi32(0, -2, -3, 4);
9941        let r = _mm_movepi32_mask(a);
9942        let e = 0b0110;
9943        assert_eq!(r, e);
9944    }
9945
9946    #[simd_test(enable = "avx512dq,avx512vl")]
9947    unsafe fn test_mm256_movepi32_mask() {
9948        let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
9949        let r = _mm256_movepi32_mask(a);
9950        let e = 0b01101001;
9951        assert_eq!(r, e);
9952    }
9953
9954    #[simd_test(enable = "avx512dq")]
9955    unsafe fn test_mm512_movepi32_mask() {
9956        let a = _mm512_set_epi32(
9957            0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
9958        );
9959        let r = _mm512_movepi32_mask(a);
9960        let e = 0b0110100100111100;
9961        assert_eq!(r, e);
9962    }
9963
9964    #[simd_test(enable = "avx512dq,avx512vl")]
9965    unsafe fn test_mm_movepi64_mask() {
9966        let a = _mm_set_epi64x(0, -2);
9967        let r = _mm_movepi64_mask(a);
9968        let e = 0b01;
9969        assert_eq!(r, e);
9970    }
9971
9972    #[simd_test(enable = "avx512dq,avx512vl")]
9973    unsafe fn test_mm256_movepi64_mask() {
9974        let a = _mm256_set_epi64x(0, -2, -3, 4);
9975        let r = _mm256_movepi64_mask(a);
9976        let e = 0b0110;
9977        assert_eq!(r, e);
9978    }
9979
9980    #[simd_test(enable = "avx512dq")]
9981    unsafe fn test_mm512_movepi64_mask() {
9982        let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
9983        let r = _mm512_movepi64_mask(a);
9984        let e = 0b01101001;
9985        assert_eq!(r, e);
9986    }
9987
9988    #[simd_test(enable = "avx512dq,avx512vl")]
9989    unsafe fn test_mm_movm_epi32() {
9990        let a = 0b0110;
9991        let r = _mm_movm_epi32(a);
9992        let e = _mm_set_epi32(0, -1, -1, 0);
9993        assert_eq_m128i(r, e);
9994    }
9995
9996    #[simd_test(enable = "avx512dq,avx512vl")]
9997    unsafe fn test_mm256_movm_epi32() {
9998        let a = 0b01101001;
9999        let r = _mm256_movm_epi32(a);
10000        let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
10001        assert_eq_m256i(r, e);
10002    }
10003
10004    #[simd_test(enable = "avx512dq")]
10005    unsafe fn test_mm512_movm_epi32() {
10006        let a = 0b0110100100111100;
10007        let r = _mm512_movm_epi32(a);
10008        let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
10009        assert_eq_m512i(r, e);
10010    }
10011
10012    #[simd_test(enable = "avx512dq,avx512vl")]
10013    unsafe fn test_mm_movm_epi64() {
10014        let a = 0b01;
10015        let r = _mm_movm_epi64(a);
10016        let e = _mm_set_epi64x(0, -1);
10017        assert_eq_m128i(r, e);
10018    }
10019
10020    #[simd_test(enable = "avx512dq,avx512vl")]
10021    unsafe fn test_mm256_movm_epi64() {
10022        let a = 0b0110;
10023        let r = _mm256_movm_epi64(a);
10024        let e = _mm256_set_epi64x(0, -1, -1, 0);
10025        assert_eq_m256i(r, e);
10026    }
10027
10028    #[simd_test(enable = "avx512dq")]
10029    unsafe fn test_mm512_movm_epi64() {
10030        let a = 0b01101001;
10031        let r = _mm512_movm_epi64(a);
10032        let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
10033        assert_eq_m512i(r, e);
10034    }
10035
10036    #[simd_test(enable = "avx512dq")]
10037    unsafe fn test_mm512_range_round_pd() {
10038        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10039        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10040        let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10041        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10042        assert_eq_m512d(r, e);
10043    }
10044
10045    #[simd_test(enable = "avx512dq")]
10046    unsafe fn test_mm512_mask_range_round_pd() {
10047        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10048        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10049        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10050        let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b);
10051        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10052        assert_eq_m512d(r, e);
10053    }
10054
10055    #[simd_test(enable = "avx512dq")]
10056    unsafe fn test_mm512_maskz_range_round_pd() {
10057        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10058        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10059        let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
10060        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10061        assert_eq_m512d(r, e);
10062    }
10063
10064    #[simd_test(enable = "avx512dq,avx512vl")]
10065    unsafe fn test_mm_range_pd() {
10066        let a = _mm_set_pd(1., 2.);
10067        let b = _mm_set_pd(2., 1.);
10068        let r = _mm_range_pd::<0b0101>(a, b);
10069        let e = _mm_set_pd(2., 2.);
10070        assert_eq_m128d(r, e);
10071    }
10072
10073    #[simd_test(enable = "avx512dq,avx512vl")]
10074    unsafe fn test_mm_mask_range_pd() {
10075        let a = _mm_set_pd(1., 2.);
10076        let b = _mm_set_pd(2., 1.);
10077        let c = _mm_set_pd(3., 4.);
10078        let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b);
10079        let e = _mm_set_pd(3., 2.);
10080        assert_eq_m128d(r, e);
10081    }
10082
10083    #[simd_test(enable = "avx512dq,avx512vl")]
10084    unsafe fn test_mm_maskz_range_pd() {
10085        let a = _mm_set_pd(1., 2.);
10086        let b = _mm_set_pd(2., 1.);
10087        let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
10088        let e = _mm_set_pd(0., 2.);
10089        assert_eq_m128d(r, e);
10090    }
10091
10092    #[simd_test(enable = "avx512dq,avx512vl")]
10093    unsafe fn test_mm256_range_pd() {
10094        let a = _mm256_set_pd(1., 2., 3., 4.);
10095        let b = _mm256_set_pd(2., 1., 4., 3.);
10096        let r = _mm256_range_pd::<0b0101>(a, b);
10097        let e = _mm256_set_pd(2., 2., 4., 4.);
10098        assert_eq_m256d(r, e);
10099    }
10100
10101    #[simd_test(enable = "avx512dq,avx512vl")]
10102    unsafe fn test_mm256_mask_range_pd() {
10103        let a = _mm256_set_pd(1., 2., 3., 4.);
10104        let b = _mm256_set_pd(2., 1., 4., 3.);
10105        let c = _mm256_set_pd(5., 6., 7., 8.);
10106        let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b);
10107        let e = _mm256_set_pd(5., 2., 4., 8.);
10108        assert_eq_m256d(r, e);
10109    }
10110
10111    #[simd_test(enable = "avx512dq,avx512vl")]
10112    unsafe fn test_mm256_maskz_range_pd() {
10113        let a = _mm256_set_pd(1., 2., 3., 4.);
10114        let b = _mm256_set_pd(2., 1., 4., 3.);
10115        let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
10116        let e = _mm256_set_pd(0., 2., 4., 0.);
10117        assert_eq_m256d(r, e);
10118    }
10119
10120    #[simd_test(enable = "avx512dq")]
10121    unsafe fn test_mm512_range_pd() {
10122        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10123        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10124        let r = _mm512_range_pd::<0b0101>(a, b);
10125        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10126        assert_eq_m512d(r, e);
10127    }
10128
10129    #[simd_test(enable = "avx512dq")]
10130    unsafe fn test_mm512_mask_range_pd() {
10131        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10132        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10133        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10134        let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b);
10135        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10136        assert_eq_m512d(r, e);
10137    }
10138
10139    #[simd_test(enable = "avx512dq")]
10140    unsafe fn test_mm512_maskz_range_pd() {
10141        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10142        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10143        let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
10144        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10145        assert_eq_m512d(r, e);
10146    }
10147
10148    #[simd_test(enable = "avx512dq")]
10149    unsafe fn test_mm512_range_round_ps() {
10150        let a = _mm512_set_ps(
10151            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10152        );
10153        let b = _mm512_set_ps(
10154            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10155        );
10156        let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10157        let e = _mm512_set_ps(
10158            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10159        );
10160        assert_eq_m512(r, e);
10161    }
10162
10163    #[simd_test(enable = "avx512dq")]
10164    unsafe fn test_mm512_mask_range_round_ps() {
10165        let a = _mm512_set_ps(
10166            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10167        );
10168        let b = _mm512_set_ps(
10169            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10170        );
10171        let c = _mm512_set_ps(
10172            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10173        );
10174        let r =
10175            _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b);
10176        let e = _mm512_set_ps(
10177            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10178        );
10179        assert_eq_m512(r, e);
10180    }
10181
10182    #[simd_test(enable = "avx512dq")]
10183    unsafe fn test_mm512_maskz_range_round_ps() {
10184        let a = _mm512_set_ps(
10185            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10186        );
10187        let b = _mm512_set_ps(
10188            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10189        );
10190        let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b);
10191        let e = _mm512_set_ps(
10192            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10193        );
10194        assert_eq_m512(r, e);
10195    }
10196
10197    #[simd_test(enable = "avx512dq,avx512vl")]
10198    unsafe fn test_mm_range_ps() {
10199        let a = _mm_set_ps(1., 2., 3., 4.);
10200        let b = _mm_set_ps(2., 1., 4., 3.);
10201        let r = _mm_range_ps::<0b0101>(a, b);
10202        let e = _mm_set_ps(2., 2., 4., 4.);
10203        assert_eq_m128(r, e);
10204    }
10205
10206    #[simd_test(enable = "avx512dq,avx512vl")]
10207    unsafe fn test_mm_mask_range_ps() {
10208        let a = _mm_set_ps(1., 2., 3., 4.);
10209        let b = _mm_set_ps(2., 1., 4., 3.);
10210        let c = _mm_set_ps(5., 6., 7., 8.);
10211        let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b);
10212        let e = _mm_set_ps(5., 2., 4., 8.);
10213        assert_eq_m128(r, e);
10214    }
10215
10216    #[simd_test(enable = "avx512dq,avx512vl")]
10217    unsafe fn test_mm_maskz_range_ps() {
10218        let a = _mm_set_ps(1., 2., 3., 4.);
10219        let b = _mm_set_ps(2., 1., 4., 3.);
10220        let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
10221        let e = _mm_set_ps(0., 2., 4., 0.);
10222        assert_eq_m128(r, e);
10223    }
10224
10225    #[simd_test(enable = "avx512dq,avx512vl")]
10226    unsafe fn test_mm256_range_ps() {
10227        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10228        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10229        let r = _mm256_range_ps::<0b0101>(a, b);
10230        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
10231        assert_eq_m256(r, e);
10232    }
10233
10234    #[simd_test(enable = "avx512dq,avx512vl")]
10235    unsafe fn test_mm256_mask_range_ps() {
10236        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10237        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10238        let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
10239        let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b);
10240        let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.);
10241        assert_eq_m256(r, e);
10242    }
10243
10244    #[simd_test(enable = "avx512dq,avx512vl")]
10245    unsafe fn test_mm256_maskz_range_ps() {
10246        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10247        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10248        let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
10249        let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.);
10250        assert_eq_m256(r, e);
10251    }
10252
10253    #[simd_test(enable = "avx512dq")]
10254    unsafe fn test_mm512_range_ps() {
10255        let a = _mm512_set_ps(
10256            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10257        );
10258        let b = _mm512_set_ps(
10259            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10260        );
10261        let r = _mm512_range_ps::<0b0101>(a, b);
10262        let e = _mm512_set_ps(
10263            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10264        );
10265        assert_eq_m512(r, e);
10266    }
10267
10268    #[simd_test(enable = "avx512dq")]
10269    unsafe fn test_mm512_mask_range_ps() {
10270        let a = _mm512_set_ps(
10271            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10272        );
10273        let b = _mm512_set_ps(
10274            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10275        );
10276        let c = _mm512_set_ps(
10277            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10278        );
10279        let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b);
10280        let e = _mm512_set_ps(
10281            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10282        );
10283        assert_eq_m512(r, e);
10284    }
10285
10286    #[simd_test(enable = "avx512dq")]
10287    unsafe fn test_mm512_maskz_range_ps() {
10288        let a = _mm512_set_ps(
10289            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10290        );
10291        let b = _mm512_set_ps(
10292            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10293        );
10294        let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b);
10295        let e = _mm512_set_ps(
10296            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10297        );
10298        assert_eq_m512(r, e);
10299    }
10300
10301    #[simd_test(enable = "avx512dq")]
10302    unsafe fn test_mm_range_round_sd() {
10303        let a = _mm_set_sd(1.);
10304        let b = _mm_set_sd(2.);
10305        let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10306        let e = _mm_set_sd(2.);
10307        assert_eq_m128d(r, e);
10308    }
10309
10310    #[simd_test(enable = "avx512dq")]
10311    unsafe fn test_mm_mask_range_round_sd() {
10312        let a = _mm_set_sd(1.);
10313        let b = _mm_set_sd(2.);
10314        let c = _mm_set_sd(3.);
10315        let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10316        let e = _mm_set_sd(3.);
10317        assert_eq_m128d(r, e);
10318    }
10319
10320    #[simd_test(enable = "avx512dq")]
10321    unsafe fn test_mm_maskz_range_round_sd() {
10322        let a = _mm_set_sd(1.);
10323        let b = _mm_set_sd(2.);
10324        let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10325        let e = _mm_set_sd(0.);
10326        assert_eq_m128d(r, e);
10327    }
10328
10329    #[simd_test(enable = "avx512dq")]
10330    unsafe fn test_mm_mask_range_sd() {
10331        let a = _mm_set_sd(1.);
10332        let b = _mm_set_sd(2.);
10333        let c = _mm_set_sd(3.);
10334        let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b);
10335        let e = _mm_set_sd(3.);
10336        assert_eq_m128d(r, e);
10337    }
10338
10339    #[simd_test(enable = "avx512dq")]
10340    unsafe fn test_mm_maskz_range_sd() {
10341        let a = _mm_set_sd(1.);
10342        let b = _mm_set_sd(2.);
10343        let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
10344        let e = _mm_set_sd(0.);
10345        assert_eq_m128d(r, e);
10346    }
10347
10348    #[simd_test(enable = "avx512dq")]
10349    unsafe fn test_mm_range_round_ss() {
10350        let a = _mm_set_ss(1.);
10351        let b = _mm_set_ss(2.);
10352        let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10353        let e = _mm_set_ss(2.);
10354        assert_eq_m128(r, e);
10355    }
10356
10357    #[simd_test(enable = "avx512dq")]
10358    unsafe fn test_mm_mask_range_round_ss() {
10359        let a = _mm_set_ss(1.);
10360        let b = _mm_set_ss(2.);
10361        let c = _mm_set_ss(3.);
10362        let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10363        let e = _mm_set_ss(3.);
10364        assert_eq_m128(r, e);
10365    }
10366
10367    #[simd_test(enable = "avx512dq")]
10368    unsafe fn test_mm_maskz_range_round_ss() {
10369        let a = _mm_set_ss(1.);
10370        let b = _mm_set_ss(2.);
10371        let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10372        let e = _mm_set_ss(0.);
10373        assert_eq_m128(r, e);
10374    }
10375
10376    #[simd_test(enable = "avx512dq")]
10377    unsafe fn test_mm_mask_range_ss() {
10378        let a = _mm_set_ss(1.);
10379        let b = _mm_set_ss(2.);
10380        let c = _mm_set_ss(3.);
10381        let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b);
10382        let e = _mm_set_ss(3.);
10383        assert_eq_m128(r, e);
10384    }
10385
10386    #[simd_test(enable = "avx512dq")]
10387    unsafe fn test_mm_maskz_range_ss() {
10388        let a = _mm_set_ss(1.);
10389        let b = _mm_set_ss(2.);
10390        let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
10391        let e = _mm_set_ss(0.);
10392        assert_eq_m128(r, e);
10393    }
10394
10395    #[simd_test(enable = "avx512dq")]
10396    unsafe fn test_mm512_reduce_round_pd() {
10397        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10398        let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10399        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10400        assert_eq_m512d(r, e);
10401    }
10402
10403    #[simd_test(enable = "avx512dq")]
10404    unsafe fn test_mm512_mask_reduce_round_pd() {
10405        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10406        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10407        let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10408            src, 0b01101001, a,
10409        );
10410        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10411        assert_eq_m512d(r, e);
10412    }
10413
10414    #[simd_test(enable = "avx512dq")]
10415    unsafe fn test_mm512_maskz_reduce_round_pd() {
10416        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10417        let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10418            0b01101001, a,
10419        );
10420        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10421        assert_eq_m512d(r, e);
10422    }
10423
10424    #[simd_test(enable = "avx512dq,avx512vl")]
10425    unsafe fn test_mm_reduce_pd() {
10426        let a = _mm_set_pd(0.25, 0.50);
10427        let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10428        let e = _mm_set_pd(0.25, 0.);
10429        assert_eq_m128d(r, e);
10430    }
10431
10432    #[simd_test(enable = "avx512dq,avx512vl")]
10433    unsafe fn test_mm_mask_reduce_pd() {
10434        let a = _mm_set_pd(0.25, 0.50);
10435        let src = _mm_set_pd(3., 4.);
10436        let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
10437        let e = _mm_set_pd(3., 0.);
10438        assert_eq_m128d(r, e);
10439    }
10440
10441    #[simd_test(enable = "avx512dq,avx512vl")]
10442    unsafe fn test_mm_maskz_reduce_pd() {
10443        let a = _mm_set_pd(0.25, 0.50);
10444        let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
10445        let e = _mm_set_pd(0., 0.);
10446        assert_eq_m128d(r, e);
10447    }
10448
10449    #[simd_test(enable = "avx512dq,avx512vl")]
10450    unsafe fn test_mm256_reduce_pd() {
10451        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10452        let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10453        let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
10454        assert_eq_m256d(r, e);
10455    }
10456
10457    #[simd_test(enable = "avx512dq,avx512vl")]
10458    unsafe fn test_mm256_mask_reduce_pd() {
10459        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10460        let src = _mm256_set_pd(3., 4., 5., 6.);
10461        let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10462        let e = _mm256_set_pd(3., 0., 0.25, 6.);
10463        assert_eq_m256d(r, e);
10464    }
10465
10466    #[simd_test(enable = "avx512dq,avx512vl")]
10467    unsafe fn test_mm256_maskz_reduce_pd() {
10468        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10469        let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10470        let e = _mm256_set_pd(0., 0., 0.25, 0.);
10471        assert_eq_m256d(r, e);
10472    }
10473
10474    #[simd_test(enable = "avx512dq")]
10475    unsafe fn test_mm512_reduce_pd() {
10476        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10477        let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10478        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10479        assert_eq_m512d(r, e);
10480    }
10481
10482    #[simd_test(enable = "avx512dq")]
10483    unsafe fn test_mm512_mask_reduce_pd() {
10484        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10485        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10486        let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10487        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10488        assert_eq_m512d(r, e);
10489    }
10490
10491    #[simd_test(enable = "avx512dq")]
10492    unsafe fn test_mm512_maskz_reduce_pd() {
10493        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10494        let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10495        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10496        assert_eq_m512d(r, e);
10497    }
10498
10499    #[simd_test(enable = "avx512dq")]
10500    unsafe fn test_mm512_reduce_round_ps() {
10501        let a = _mm512_set_ps(
10502            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10503            4.0,
10504        );
10505        let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10506        let e = _mm512_set_ps(
10507            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10508        );
10509        assert_eq_m512(r, e);
10510    }
10511
10512    #[simd_test(enable = "avx512dq")]
10513    unsafe fn test_mm512_mask_reduce_round_ps() {
10514        let a = _mm512_set_ps(
10515            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10516            4.0,
10517        );
10518        let src = _mm512_set_ps(
10519            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10520        );
10521        let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10522            src,
10523            0b0110100100111100,
10524            a,
10525        );
10526        let e = _mm512_set_ps(
10527            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10528        );
10529        assert_eq_m512(r, e);
10530    }
10531
10532    #[simd_test(enable = "avx512dq")]
10533    unsafe fn test_mm512_maskz_reduce_round_ps() {
10534        let a = _mm512_set_ps(
10535            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10536            4.0,
10537        );
10538        let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10539            0b0110100100111100,
10540            a,
10541        );
10542        let e = _mm512_set_ps(
10543            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10544        );
10545        assert_eq_m512(r, e);
10546    }
10547
10548    #[simd_test(enable = "avx512dq,avx512vl")]
10549    unsafe fn test_mm_reduce_ps() {
10550        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10551        let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10552        let e = _mm_set_ps(0.25, 0., 0.25, 0.);
10553        assert_eq_m128(r, e);
10554    }
10555
10556    #[simd_test(enable = "avx512dq,avx512vl")]
10557    unsafe fn test_mm_mask_reduce_ps() {
10558        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10559        let src = _mm_set_ps(2., 3., 4., 5.);
10560        let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10561        let e = _mm_set_ps(2., 0., 0.25, 5.);
10562        assert_eq_m128(r, e);
10563    }
10564
10565    #[simd_test(enable = "avx512dq,avx512vl")]
10566    unsafe fn test_mm_maskz_reduce_ps() {
10567        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10568        let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10569        let e = _mm_set_ps(0., 0., 0.25, 0.);
10570        assert_eq_m128(r, e);
10571    }
10572
10573    #[simd_test(enable = "avx512dq,avx512vl")]
10574    unsafe fn test_mm256_reduce_ps() {
10575        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10576        let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10577        let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10578        assert_eq_m256(r, e);
10579    }
10580
10581    #[simd_test(enable = "avx512dq,avx512vl")]
10582    unsafe fn test_mm256_mask_reduce_ps() {
10583        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10584        let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
10585        let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10586        let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10587        assert_eq_m256(r, e);
10588    }
10589
10590    #[simd_test(enable = "avx512dq,avx512vl")]
10591    unsafe fn test_mm256_maskz_reduce_ps() {
10592        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10593        let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10594        let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10595        assert_eq_m256(r, e);
10596    }
10597
10598    #[simd_test(enable = "avx512dq")]
10599    unsafe fn test_mm512_reduce_ps() {
10600        let a = _mm512_set_ps(
10601            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10602            4.0,
10603        );
10604        let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10605        let e = _mm512_set_ps(
10606            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10607        );
10608        assert_eq_m512(r, e);
10609    }
10610
10611    #[simd_test(enable = "avx512dq")]
10612    unsafe fn test_mm512_mask_reduce_ps() {
10613        let a = _mm512_set_ps(
10614            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10615            4.0,
10616        );
10617        let src = _mm512_set_ps(
10618            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10619        );
10620        let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a);
10621        let e = _mm512_set_ps(
10622            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10623        );
10624        assert_eq_m512(r, e);
10625    }
10626
10627    #[simd_test(enable = "avx512dq")]
10628    unsafe fn test_mm512_maskz_reduce_ps() {
10629        let a = _mm512_set_ps(
10630            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10631            4.0,
10632        );
10633        let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a);
10634        let e = _mm512_set_ps(
10635            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10636        );
10637        assert_eq_m512(r, e);
10638    }
10639
10640    #[simd_test(enable = "avx512dq")]
10641    unsafe fn test_mm_reduce_round_sd() {
10642        let a = _mm_set_pd(1., 2.);
10643        let b = _mm_set_sd(0.25);
10644        let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10645        let e = _mm_set_pd(1., 0.25);
10646        assert_eq_m128d(r, e);
10647    }
10648
10649    #[simd_test(enable = "avx512dq")]
10650    unsafe fn test_mm_mask_reduce_round_sd() {
10651        let a = _mm_set_pd(1., 2.);
10652        let b = _mm_set_sd(0.25);
10653        let c = _mm_set_pd(3., 4.);
10654        let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10655            c, 0b0, a, b,
10656        );
10657        let e = _mm_set_pd(1., 4.);
10658        assert_eq_m128d(r, e);
10659    }
10660
10661    #[simd_test(enable = "avx512dq")]
10662    unsafe fn test_mm_maskz_reduce_round_sd() {
10663        let a = _mm_set_pd(1., 2.);
10664        let b = _mm_set_sd(0.25);
10665        let r =
10666            _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10667        let e = _mm_set_pd(1., 0.);
10668        assert_eq_m128d(r, e);
10669    }
10670
10671    #[simd_test(enable = "avx512dq")]
10672    unsafe fn test_mm_reduce_sd() {
10673        let a = _mm_set_pd(1., 2.);
10674        let b = _mm_set_sd(0.25);
10675        let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10676        let e = _mm_set_pd(1., 0.25);
10677        assert_eq_m128d(r, e);
10678    }
10679
10680    #[simd_test(enable = "avx512dq")]
10681    unsafe fn test_mm_mask_reduce_sd() {
10682        let a = _mm_set_pd(1., 2.);
10683        let b = _mm_set_sd(0.25);
10684        let c = _mm_set_pd(3., 4.);
10685        let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10686        let e = _mm_set_pd(1., 4.);
10687        assert_eq_m128d(r, e);
10688    }
10689
10690    #[simd_test(enable = "avx512dq")]
10691    unsafe fn test_mm_maskz_reduce_sd() {
10692        let a = _mm_set_pd(1., 2.);
10693        let b = _mm_set_sd(0.25);
10694        let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10695        let e = _mm_set_pd(1., 0.);
10696        assert_eq_m128d(r, e);
10697    }
10698
10699    #[simd_test(enable = "avx512dq")]
10700    unsafe fn test_mm_reduce_round_ss() {
10701        let a = _mm_set_ps(1., 2., 3., 4.);
10702        let b = _mm_set_ss(0.25);
10703        let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10704        let e = _mm_set_ps(1., 2., 3., 0.25);
10705        assert_eq_m128(r, e);
10706    }
10707
10708    #[simd_test(enable = "avx512dq")]
10709    unsafe fn test_mm_mask_reduce_round_ss() {
10710        let a = _mm_set_ps(1., 2., 3., 4.);
10711        let b = _mm_set_ss(0.25);
10712        let c = _mm_set_ps(5., 6., 7., 8.);
10713        let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10714            c, 0b0, a, b,
10715        );
10716        let e = _mm_set_ps(1., 2., 3., 8.);
10717        assert_eq_m128(r, e);
10718    }
10719
10720    #[simd_test(enable = "avx512dq")]
10721    unsafe fn test_mm_maskz_reduce_round_ss() {
10722        let a = _mm_set_ps(1., 2., 3., 4.);
10723        let b = _mm_set_ss(0.25);
10724        let r =
10725            _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10726        let e = _mm_set_ps(1., 2., 3., 0.);
10727        assert_eq_m128(r, e);
10728    }
10729
10730    #[simd_test(enable = "avx512dq")]
10731    unsafe fn test_mm_reduce_ss() {
10732        let a = _mm_set_ps(1., 2., 3., 4.);
10733        let b = _mm_set_ss(0.25);
10734        let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10735        let e = _mm_set_ps(1., 2., 3., 0.25);
10736        assert_eq_m128(r, e);
10737    }
10738
10739    #[simd_test(enable = "avx512dq")]
10740    unsafe fn test_mm_mask_reduce_ss() {
10741        let a = _mm_set_ps(1., 2., 3., 4.);
10742        let b = _mm_set_ss(0.25);
10743        let c = _mm_set_ps(5., 6., 7., 8.);
10744        let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10745        let e = _mm_set_ps(1., 2., 3., 8.);
10746        assert_eq_m128(r, e);
10747    }
10748
10749    #[simd_test(enable = "avx512dq")]
10750    unsafe fn test_mm_maskz_reduce_ss() {
10751        let a = _mm_set_ps(1., 2., 3., 4.);
10752        let b = _mm_set_ss(0.25);
10753        let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10754        let e = _mm_set_ps(1., 2., 3., 0.);
10755        assert_eq_m128(r, e);
10756    }
10757
10758    #[simd_test(enable = "avx512dq,avx512vl")]
10759    unsafe fn test_mm_fpclass_pd_mask() {
10760        let a = _mm_set_pd(1., f64::INFINITY);
10761        let r = _mm_fpclass_pd_mask::<0x18>(a);
10762        let e = 0b01;
10763        assert_eq!(r, e);
10764    }
10765
10766    #[simd_test(enable = "avx512dq,avx512vl")]
10767    unsafe fn test_mm_mask_fpclass_pd_mask() {
10768        let a = _mm_set_pd(1., f64::INFINITY);
10769        let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
10770        let e = 0b00;
10771        assert_eq!(r, e);
10772    }
10773
10774    #[simd_test(enable = "avx512dq,avx512vl")]
10775    unsafe fn test_mm256_fpclass_pd_mask() {
10776        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10777        let r = _mm256_fpclass_pd_mask::<0x18>(a);
10778        let e = 0b0110;
10779        assert_eq!(r, e);
10780    }
10781
10782    #[simd_test(enable = "avx512dq,avx512vl")]
10783    unsafe fn test_mm256_mask_fpclass_pd_mask() {
10784        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10785        let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
10786        let e = 0b0010;
10787        assert_eq!(r, e);
10788    }
10789
10790    #[simd_test(enable = "avx512dq")]
10791    unsafe fn test_mm512_fpclass_pd_mask() {
10792        let a = _mm512_set_pd(
10793            1.,
10794            f64::INFINITY,
10795            f64::NEG_INFINITY,
10796            0.0,
10797            -0.0,
10798            -2.0,
10799            f64::NAN,
10800            1.0e-308,
10801        );
10802        let r = _mm512_fpclass_pd_mask::<0x18>(a);
10803        let e = 0b01100000;
10804        assert_eq!(r, e);
10805    }
10806
10807    #[simd_test(enable = "avx512dq")]
10808    unsafe fn test_mm512_mask_fpclass_pd_mask() {
10809        let a = _mm512_set_pd(
10810            1.,
10811            f64::INFINITY,
10812            f64::NEG_INFINITY,
10813            0.0,
10814            -0.0,
10815            -2.0,
10816            f64::NAN,
10817            1.0e-308,
10818        );
10819        let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a);
10820        let e = 0b00100000;
10821        assert_eq!(r, e);
10822    }
10823
10824    #[simd_test(enable = "avx512dq,avx512vl")]
10825    unsafe fn test_mm_fpclass_ps_mask() {
10826        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
10827        let r = _mm_fpclass_ps_mask::<0x18>(a);
10828        let e = 0b0110;
10829        assert_eq!(r, e);
10830    }
10831
10832    #[simd_test(enable = "avx512dq,avx512vl")]
10833    unsafe fn test_mm_mask_fpclass_ps_mask() {
10834        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
10835        let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
10836        let e = 0b0010;
10837        assert_eq!(r, e);
10838    }
10839
10840    #[simd_test(enable = "avx512dq,avx512vl")]
10841    unsafe fn test_mm256_fpclass_ps_mask() {
10842        let a = _mm256_set_ps(
10843            1.,
10844            f32::INFINITY,
10845            f32::NEG_INFINITY,
10846            0.0,
10847            -0.0,
10848            -2.0,
10849            f32::NAN,
10850            1.0e-38,
10851        );
10852        let r = _mm256_fpclass_ps_mask::<0x18>(a);
10853        let e = 0b01100000;
10854        assert_eq!(r, e);
10855    }
10856
10857    #[simd_test(enable = "avx512dq,avx512vl")]
10858    unsafe fn test_mm256_mask_fpclass_ps_mask() {
10859        let a = _mm256_set_ps(
10860            1.,
10861            f32::INFINITY,
10862            f32::NEG_INFINITY,
10863            0.0,
10864            -0.0,
10865            -2.0,
10866            f32::NAN,
10867            1.0e-38,
10868        );
10869        let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a);
10870        let e = 0b00100000;
10871        assert_eq!(r, e);
10872    }
10873
10874    #[simd_test(enable = "avx512dq")]
10875    unsafe fn test_mm512_fpclass_ps_mask() {
10876        let a = _mm512_set_ps(
10877            1.,
10878            f32::INFINITY,
10879            f32::NEG_INFINITY,
10880            0.0,
10881            -0.0,
10882            -2.0,
10883            f32::NAN,
10884            1.0e-38,
10885            -1.,
10886            f32::NEG_INFINITY,
10887            f32::INFINITY,
10888            -0.0,
10889            0.0,
10890            2.0,
10891            f32::NAN,
10892            -1.0e-38,
10893        );
10894        let r = _mm512_fpclass_ps_mask::<0x18>(a);
10895        let e = 0b0110000001100000;
10896        assert_eq!(r, e);
10897    }
10898
10899    #[simd_test(enable = "avx512dq")]
10900    unsafe fn test_mm512_mask_fpclass_ps_mask() {
10901        let a = _mm512_set_ps(
10902            1.,
10903            f32::INFINITY,
10904            f32::NEG_INFINITY,
10905            0.0,
10906            -0.0,
10907            -2.0,
10908            f32::NAN,
10909            1.0e-38,
10910            -1.,
10911            f32::NEG_INFINITY,
10912            f32::INFINITY,
10913            -0.0,
10914            0.0,
10915            2.0,
10916            f32::NAN,
10917            -1.0e-38,
10918        );
10919        let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a);
10920        let e = 0b0010000000100000;
10921        assert_eq!(r, e);
10922    }
10923
10924    #[simd_test(enable = "avx512dq")]
10925    unsafe fn test_mm_fpclass_sd_mask() {
10926        let a = _mm_set_pd(1., f64::INFINITY);
10927        let r = _mm_fpclass_sd_mask::<0x18>(a);
10928        let e = 0b1;
10929        assert_eq!(r, e);
10930    }
10931
10932    #[simd_test(enable = "avx512dq")]
10933    unsafe fn test_mm_mask_fpclass_sd_mask() {
10934        let a = _mm_set_sd(f64::INFINITY);
10935        let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
10936        let e = 0b0;
10937        assert_eq!(r, e);
10938    }
10939
10940    #[simd_test(enable = "avx512dq")]
10941    unsafe fn test_mm_fpclass_ss_mask() {
10942        let a = _mm_set_ss(f32::INFINITY);
10943        let r = _mm_fpclass_ss_mask::<0x18>(a);
10944        let e = 0b1;
10945        assert_eq!(r, e);
10946    }
10947
10948    #[simd_test(enable = "avx512dq")]
10949    unsafe fn test_mm_mask_fpclass_ss_mask() {
10950        let a = _mm_set_ss(f32::INFINITY);
10951        let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
10952        let e = 0b0;
10953        assert_eq!(r, e);
10954    }
10955}