core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    unsafe {
22        let a = a.as_i32x16();
23        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
24        transmute(r)
25    }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38    unsafe {
39        let abs = _mm512_abs_epi32(a).as_i32x16();
40        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
41    }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54    unsafe {
55        let abs = _mm512_abs_epi32(a).as_i32x16();
56        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
57    }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68    unsafe {
69        let abs = _mm256_abs_epi32(a).as_i32x8();
70        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
71    }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82    unsafe {
83        let abs = _mm256_abs_epi32(a).as_i32x8();
84        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
85    }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96    unsafe {
97        let abs = _mm_abs_epi32(a).as_i32x4();
98        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
99    }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi32(a).as_i32x4();
112        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124    unsafe {
125        let a = a.as_i64x8();
126        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
127        transmute(r)
128    }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139    unsafe {
140        let abs = _mm512_abs_epi64(a).as_i64x8();
141        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
142    }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153    unsafe {
154        let abs = _mm512_abs_epi64(a).as_i64x8();
155        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
156    }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167    unsafe {
168        let a = a.as_i64x4();
169        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
170        transmute(r)
171    }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182    unsafe {
183        let abs = _mm256_abs_epi64(a).as_i64x4();
184        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
185    }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196    unsafe {
197        let abs = _mm256_abs_epi64(a).as_i64x4();
198        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
199    }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210    unsafe {
211        let a = a.as_i64x2();
212        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
213        transmute(r)
214    }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225    unsafe {
226        let abs = _mm_abs_epi64(a).as_i64x2();
227        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239    unsafe {
240        let abs = _mm_abs_epi64(a).as_i64x2();
241        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
242    }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253    unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275    unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297    unsafe {
298        let mov = a.as_i32x16();
299        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
300    }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311    unsafe {
312        let mov = a.as_i32x16();
313        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
314    }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325    unsafe {
326        let mov = a.as_i32x8();
327        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
328    }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339    unsafe {
340        let mov = a.as_i32x8();
341        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
342    }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353    unsafe {
354        let mov = a.as_i32x4();
355        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
356    }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367    unsafe {
368        let mov = a.as_i32x4();
369        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
370    }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381    unsafe {
382        let mov = a.as_i64x8();
383        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
384    }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395    unsafe {
396        let mov = a.as_i64x8();
397        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
398    }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409    unsafe {
410        let mov = a.as_i64x4();
411        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
412    }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423    unsafe {
424        let mov = a.as_i64x4();
425        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437    unsafe {
438        let mov = a.as_i64x2();
439        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
440    }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451    unsafe {
452        let mov = a.as_i64x2();
453        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
454    }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465    unsafe {
466        let mov = a.as_f32x16();
467        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
468    }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479    unsafe {
480        let mov = a.as_f32x16();
481        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
482    }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493    unsafe {
494        let mov = a.as_f32x8();
495        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
496    }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507    unsafe {
508        let mov = a.as_f32x8();
509        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
510    }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521    unsafe {
522        let mov = a.as_f32x4();
523        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
524    }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535    unsafe {
536        let mov = a.as_f32x4();
537        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
538    }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549    unsafe {
550        let mov = a.as_f64x8();
551        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
552    }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563    unsafe {
564        let mov = a.as_f64x8();
565        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
566    }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577    unsafe {
578        let mov = a.as_f64x4();
579        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
580    }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591    unsafe {
592        let mov = a.as_f64x4();
593        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
594    }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605    unsafe {
606        let mov = a.as_f64x2();
607        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
608    }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619    unsafe {
620        let mov = a.as_f64x2();
621        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
622    }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644    unsafe {
645        let add = _mm512_add_epi32(a, b).as_i32x16();
646        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
647    }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658    unsafe {
659        let add = _mm512_add_epi32(a, b).as_i32x16();
660        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
661    }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672    unsafe {
673        let add = _mm256_add_epi32(a, b).as_i32x8();
674        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
675    }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686    unsafe {
687        let add = _mm256_add_epi32(a, b).as_i32x8();
688        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
689    }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700    unsafe {
701        let add = _mm_add_epi32(a, b).as_i32x4();
702        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
703    }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714    unsafe {
715        let add = _mm_add_epi32(a, b).as_i32x4();
716        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
717    }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739    unsafe {
740        let add = _mm512_add_epi64(a, b).as_i64x8();
741        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
742    }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_add_epi64(a, b).as_i64x8();
755        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
756    }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767    unsafe {
768        let add = _mm256_add_epi64(a, b).as_i64x4();
769        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
770    }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781    unsafe {
782        let add = _mm256_add_epi64(a, b).as_i64x4();
783        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
784    }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795    unsafe {
796        let add = _mm_add_epi64(a, b).as_i64x2();
797        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
798    }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809    unsafe {
810        let add = _mm_add_epi64(a, b).as_i64x2();
811        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
812    }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834    unsafe {
835        let add = _mm512_add_ps(a, b).as_f32x16();
836        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
837    }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848    unsafe {
849        let add = _mm512_add_ps(a, b).as_f32x16();
850        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
851    }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862    unsafe {
863        let add = _mm256_add_ps(a, b).as_f32x8();
864        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
865    }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876    unsafe {
877        let add = _mm256_add_ps(a, b).as_f32x8();
878        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
879    }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890    unsafe {
891        let add = _mm_add_ps(a, b).as_f32x4();
892        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
893    }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904    unsafe {
905        let add = _mm_add_ps(a, b).as_f32x4();
906        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
907    }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929    unsafe {
930        let add = _mm512_add_pd(a, b).as_f64x8();
931        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
932    }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943    unsafe {
944        let add = _mm512_add_pd(a, b).as_f64x8();
945        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
946    }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957    unsafe {
958        let add = _mm256_add_pd(a, b).as_f64x4();
959        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
960    }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971    unsafe {
972        let add = _mm256_add_pd(a, b).as_f64x4();
973        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
974    }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985    unsafe {
986        let add = _mm_add_pd(a, b).as_f64x2();
987        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
988    }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999    unsafe {
1000        let add = _mm_add_pd(a, b).as_f64x2();
1001        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1002    }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024    unsafe {
1025        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1026        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1027    }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038    unsafe {
1039        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1040        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1041    }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052    unsafe {
1053        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1054        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1055    }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066    unsafe {
1067        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1068        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1069    }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080    unsafe {
1081        let sub = _mm_sub_epi32(a, b).as_i32x4();
1082        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1083    }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094    unsafe {
1095        let sub = _mm_sub_epi32(a, b).as_i32x4();
1096        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1097    }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119    unsafe {
1120        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1122    }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133    unsafe {
1134        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1135        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1136    }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147    unsafe {
1148        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1149        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1150    }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161    unsafe {
1162        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1163        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1164    }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175    unsafe {
1176        let sub = _mm_sub_epi64(a, b).as_i64x2();
1177        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1178    }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189    unsafe {
1190        let sub = _mm_sub_epi64(a, b).as_i64x2();
1191        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1192    }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214    unsafe {
1215        let sub = _mm512_sub_ps(a, b).as_f32x16();
1216        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1217    }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228    unsafe {
1229        let sub = _mm512_sub_ps(a, b).as_f32x16();
1230        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1231    }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242    unsafe {
1243        let sub = _mm256_sub_ps(a, b).as_f32x8();
1244        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1245    }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256    unsafe {
1257        let sub = _mm256_sub_ps(a, b).as_f32x8();
1258        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1259    }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270    unsafe {
1271        let sub = _mm_sub_ps(a, b).as_f32x4();
1272        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1273    }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284    unsafe {
1285        let sub = _mm_sub_ps(a, b).as_f32x4();
1286        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1287    }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309    unsafe {
1310        let sub = _mm512_sub_pd(a, b).as_f64x8();
1311        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1312    }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323    unsafe {
1324        let sub = _mm512_sub_pd(a, b).as_f64x8();
1325        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1326    }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337    unsafe {
1338        let sub = _mm256_sub_pd(a, b).as_f64x4();
1339        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1340    }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351    unsafe {
1352        let sub = _mm256_sub_pd(a, b).as_f64x4();
1353        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1354    }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365    unsafe {
1366        let sub = _mm_sub_pd(a, b).as_f64x2();
1367        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1368    }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379    unsafe {
1380        let sub = _mm_sub_pd(a, b).as_f64x2();
1381        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1382    }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393    unsafe {
1394        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396        transmute(simd_mul(a, b))
1397    }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408    unsafe {
1409        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1410        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1411    }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422    unsafe {
1423        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1424        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1425    }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436    unsafe {
1437        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1438        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1439    }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450    unsafe {
1451        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1452        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1453    }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464    unsafe {
1465        let mul = _mm_mul_epi32(a, b).as_i64x2();
1466        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1467    }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478    unsafe {
1479        let mul = _mm_mul_epi32(a, b).as_i64x2();
1480        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1481    }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503    unsafe {
1504        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1505        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1506    }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517    unsafe {
1518        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1519        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1520    }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531    unsafe {
1532        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1533        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1534    }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545    unsafe {
1546        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1547        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1548    }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559    unsafe {
1560        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1561        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1562    }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573    unsafe {
1574        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1575        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1576    }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600    unsafe {
1601        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1602        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1603    }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614    unsafe {
1615        let a = a.as_u64x8();
1616        let b = b.as_u64x8();
1617        let mask = u64x8::splat(u32::MAX.into());
1618        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1619    }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630    unsafe {
1631        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1632        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1633    }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644    unsafe {
1645        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1646        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1647    }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658    unsafe {
1659        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1660        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1661    }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672    unsafe {
1673        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1674        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1675    }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686    unsafe {
1687        let mul = _mm_mul_epu32(a, b).as_u64x2();
1688        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1689    }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700    unsafe {
1701        let mul = _mm_mul_epu32(a, b).as_u64x2();
1702        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1703    }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725    unsafe {
1726        let mul = _mm512_mul_ps(a, b).as_f32x16();
1727        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1728    }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739    unsafe {
1740        let mul = _mm512_mul_ps(a, b).as_f32x16();
1741        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1742    }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753    unsafe {
1754        let mul = _mm256_mul_ps(a, b).as_f32x8();
1755        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1756    }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767    unsafe {
1768        let mul = _mm256_mul_ps(a, b).as_f32x8();
1769        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1770    }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781    unsafe {
1782        let mul = _mm_mul_ps(a, b).as_f32x4();
1783        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1784    }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795    unsafe {
1796        let mul = _mm_mul_ps(a, b).as_f32x4();
1797        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1798    }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820    unsafe {
1821        let mul = _mm512_mul_pd(a, b).as_f64x8();
1822        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1823    }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834    unsafe {
1835        let mul = _mm512_mul_pd(a, b).as_f64x8();
1836        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1837    }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848    unsafe {
1849        let mul = _mm256_mul_pd(a, b).as_f64x4();
1850        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1851    }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862    unsafe {
1863        let mul = _mm256_mul_pd(a, b).as_f64x4();
1864        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1865    }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876    unsafe {
1877        let mul = _mm_mul_pd(a, b).as_f64x2();
1878        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1879    }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890    unsafe {
1891        let mul = _mm_mul_pd(a, b).as_f64x2();
1892        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1893    }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915    unsafe {
1916        let div = _mm512_div_ps(a, b).as_f32x16();
1917        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1918    }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929    unsafe {
1930        let div = _mm512_div_ps(a, b).as_f32x16();
1931        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1932    }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943    unsafe {
1944        let div = _mm256_div_ps(a, b).as_f32x8();
1945        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1946    }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957    unsafe {
1958        let div = _mm256_div_ps(a, b).as_f32x8();
1959        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1960    }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971    unsafe {
1972        let div = _mm_div_ps(a, b).as_f32x4();
1973        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1974    }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985    unsafe {
1986        let div = _mm_div_ps(a, b).as_f32x4();
1987        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1988    }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010    unsafe {
2011        let div = _mm512_div_pd(a, b).as_f64x8();
2012        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2013    }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024    unsafe {
2025        let div = _mm512_div_pd(a, b).as_f64x8();
2026        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2027    }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038    unsafe {
2039        let div = _mm256_div_pd(a, b).as_f64x4();
2040        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2041    }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052    unsafe {
2053        let div = _mm256_div_pd(a, b).as_f64x4();
2054        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2055    }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066    unsafe {
2067        let div = _mm_div_pd(a, b).as_f64x2();
2068        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2069    }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080    unsafe {
2081        let div = _mm_div_pd(a, b).as_f64x2();
2082        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2083    }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094    unsafe {
2095        let a = a.as_i32x16();
2096        let b = b.as_i32x16();
2097        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2098    }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109    unsafe {
2110        let max = _mm512_max_epi32(a, b).as_i32x16();
2111        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2112    }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123    unsafe {
2124        let max = _mm512_max_epi32(a, b).as_i32x16();
2125        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2126    }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137    unsafe {
2138        let max = _mm256_max_epi32(a, b).as_i32x8();
2139        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2140    }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151    unsafe {
2152        let max = _mm256_max_epi32(a, b).as_i32x8();
2153        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2154    }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165    unsafe {
2166        let max = _mm_max_epi32(a, b).as_i32x4();
2167        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2168    }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179    unsafe {
2180        let max = _mm_max_epi32(a, b).as_i32x4();
2181        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2182    }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193    unsafe {
2194        let a = a.as_i64x8();
2195        let b = b.as_i64x8();
2196        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2197    }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208    unsafe {
2209        let max = _mm512_max_epi64(a, b).as_i64x8();
2210        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2211    }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222    unsafe {
2223        let max = _mm512_max_epi64(a, b).as_i64x8();
2224        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2225    }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236    unsafe {
2237        let a = a.as_i64x4();
2238        let b = b.as_i64x4();
2239        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2240    }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251    unsafe {
2252        let max = _mm256_max_epi64(a, b).as_i64x4();
2253        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2254    }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265    unsafe {
2266        let max = _mm256_max_epi64(a, b).as_i64x4();
2267        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2268    }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279    unsafe {
2280        let a = a.as_i64x2();
2281        let b = b.as_i64x2();
2282        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2283    }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294    unsafe {
2295        let max = _mm_max_epi64(a, b).as_i64x2();
2296        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2297    }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308    unsafe {
2309        let max = _mm_max_epi64(a, b).as_i64x2();
2310        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2311    }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322    unsafe {
2323        transmute(vmaxps(
2324            a.as_f32x16(),
2325            b.as_f32x16(),
2326            _MM_FROUND_CUR_DIRECTION,
2327        ))
2328    }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339    unsafe {
2340        let max = _mm512_max_ps(a, b).as_f32x16();
2341        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2342    }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353    unsafe {
2354        let max = _mm512_max_ps(a, b).as_f32x16();
2355        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2356    }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367    unsafe {
2368        let max = _mm256_max_ps(a, b).as_f32x8();
2369        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2370    }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381    unsafe {
2382        let max = _mm256_max_ps(a, b).as_f32x8();
2383        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2384    }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395    unsafe {
2396        let max = _mm_max_ps(a, b).as_f32x4();
2397        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2398    }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409    unsafe {
2410        let max = _mm_max_ps(a, b).as_f32x4();
2411        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2412    }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434    unsafe {
2435        let max = _mm512_max_pd(a, b).as_f64x8();
2436        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2437    }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448    unsafe {
2449        let max = _mm512_max_pd(a, b).as_f64x8();
2450        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2451    }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462    unsafe {
2463        let max = _mm256_max_pd(a, b).as_f64x4();
2464        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2465    }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476    unsafe {
2477        let max = _mm256_max_pd(a, b).as_f64x4();
2478        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2479    }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490    unsafe {
2491        let max = _mm_max_pd(a, b).as_f64x2();
2492        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2493    }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504    unsafe {
2505        let max = _mm_max_pd(a, b).as_f64x2();
2506        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2507    }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518    unsafe {
2519        let a = a.as_u32x16();
2520        let b = b.as_u32x16();
2521        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2522    }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533    unsafe {
2534        let max = _mm512_max_epu32(a, b).as_u32x16();
2535        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2536    }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547    unsafe {
2548        let max = _mm512_max_epu32(a, b).as_u32x16();
2549        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2550    }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561    unsafe {
2562        let max = _mm256_max_epu32(a, b).as_u32x8();
2563        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2564    }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575    unsafe {
2576        let max = _mm256_max_epu32(a, b).as_u32x8();
2577        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2578    }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    unsafe {
2590        let max = _mm_max_epu32(a, b).as_u32x4();
2591        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2592    }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603    unsafe {
2604        let max = _mm_max_epu32(a, b).as_u32x4();
2605        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2606    }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617    unsafe {
2618        let a = a.as_u64x8();
2619        let b = b.as_u64x8();
2620        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2621    }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632    unsafe {
2633        let max = _mm512_max_epu64(a, b).as_u64x8();
2634        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2635    }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646    unsafe {
2647        let max = _mm512_max_epu64(a, b).as_u64x8();
2648        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2649    }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660    unsafe {
2661        let a = a.as_u64x4();
2662        let b = b.as_u64x4();
2663        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2664    }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675    unsafe {
2676        let max = _mm256_max_epu64(a, b).as_u64x4();
2677        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2678    }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689    unsafe {
2690        let max = _mm256_max_epu64(a, b).as_u64x4();
2691        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2692    }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703    unsafe {
2704        let a = a.as_u64x2();
2705        let b = b.as_u64x2();
2706        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2707    }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718    unsafe {
2719        let max = _mm_max_epu64(a, b).as_u64x2();
2720        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2721    }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732    unsafe {
2733        let max = _mm_max_epu64(a, b).as_u64x2();
2734        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2735    }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746    unsafe {
2747        let a = a.as_i32x16();
2748        let b = b.as_i32x16();
2749        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2750    }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761    unsafe {
2762        let min = _mm512_min_epi32(a, b).as_i32x16();
2763        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2764    }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775    unsafe {
2776        let min = _mm512_min_epi32(a, b).as_i32x16();
2777        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2778    }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789    unsafe {
2790        let min = _mm256_min_epi32(a, b).as_i32x8();
2791        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2792    }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803    unsafe {
2804        let min = _mm256_min_epi32(a, b).as_i32x8();
2805        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2806    }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817    unsafe {
2818        let min = _mm_min_epi32(a, b).as_i32x4();
2819        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2820    }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831    unsafe {
2832        let min = _mm_min_epi32(a, b).as_i32x4();
2833        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2834    }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845    unsafe {
2846        let a = a.as_i64x8();
2847        let b = b.as_i64x8();
2848        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2849    }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860    unsafe {
2861        let min = _mm512_min_epi64(a, b).as_i64x8();
2862        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2863    }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874    unsafe {
2875        let min = _mm512_min_epi64(a, b).as_i64x8();
2876        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2877    }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888    unsafe {
2889        let a = a.as_i64x4();
2890        let b = b.as_i64x4();
2891        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2892    }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903    unsafe {
2904        let min = _mm256_min_epi64(a, b).as_i64x4();
2905        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2906    }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917    unsafe {
2918        let min = _mm256_min_epi64(a, b).as_i64x4();
2919        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2920    }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931    unsafe {
2932        let a = a.as_i64x2();
2933        let b = b.as_i64x2();
2934        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2935    }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946    unsafe {
2947        let min = _mm_min_epi64(a, b).as_i64x2();
2948        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2949    }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960    unsafe {
2961        let min = _mm_min_epi64(a, b).as_i64x2();
2962        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2963    }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974    unsafe {
2975        transmute(vminps(
2976            a.as_f32x16(),
2977            b.as_f32x16(),
2978            _MM_FROUND_CUR_DIRECTION,
2979        ))
2980    }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991    unsafe {
2992        let min = _mm512_min_ps(a, b).as_f32x16();
2993        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2994    }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005    unsafe {
3006        let min = _mm512_min_ps(a, b).as_f32x16();
3007        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3008    }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019    unsafe {
3020        let min = _mm256_min_ps(a, b).as_f32x8();
3021        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3022    }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033    unsafe {
3034        let min = _mm256_min_ps(a, b).as_f32x8();
3035        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3036    }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047    unsafe {
3048        let min = _mm_min_ps(a, b).as_f32x4();
3049        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3050    }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061    unsafe {
3062        let min = _mm_min_ps(a, b).as_f32x4();
3063        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3064    }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086    unsafe {
3087        let min = _mm512_min_pd(a, b).as_f64x8();
3088        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3089    }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100    unsafe {
3101        let min = _mm512_min_pd(a, b).as_f64x8();
3102        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3103    }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114    unsafe {
3115        let min = _mm256_min_pd(a, b).as_f64x4();
3116        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3117    }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128    unsafe {
3129        let min = _mm256_min_pd(a, b).as_f64x4();
3130        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3131    }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142    unsafe {
3143        let min = _mm_min_pd(a, b).as_f64x2();
3144        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3145    }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156    unsafe {
3157        let min = _mm_min_pd(a, b).as_f64x2();
3158        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3159    }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170    unsafe {
3171        let a = a.as_u32x16();
3172        let b = b.as_u32x16();
3173        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
3174    }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185    unsafe {
3186        let min = _mm512_min_epu32(a, b).as_u32x16();
3187        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3188    }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199    unsafe {
3200        let min = _mm512_min_epu32(a, b).as_u32x16();
3201        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3202    }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213    unsafe {
3214        let min = _mm256_min_epu32(a, b).as_u32x8();
3215        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3216    }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227    unsafe {
3228        let min = _mm256_min_epu32(a, b).as_u32x8();
3229        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3230    }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241    unsafe {
3242        let min = _mm_min_epu32(a, b).as_u32x4();
3243        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3244    }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255    unsafe {
3256        let min = _mm_min_epu32(a, b).as_u32x4();
3257        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3258    }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269    unsafe {
3270        let a = a.as_u64x8();
3271        let b = b.as_u64x8();
3272        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
3273    }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284    unsafe {
3285        let min = _mm512_min_epu64(a, b).as_u64x8();
3286        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3287    }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298    unsafe {
3299        let min = _mm512_min_epu64(a, b).as_u64x8();
3300        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3301    }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312    unsafe {
3313        let a = a.as_u64x4();
3314        let b = b.as_u64x4();
3315        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
3316    }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327    unsafe {
3328        let min = _mm256_min_epu64(a, b).as_u64x4();
3329        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3330    }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341    unsafe {
3342        let min = _mm256_min_epu64(a, b).as_u64x4();
3343        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3344    }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355    unsafe {
3356        let a = a.as_u64x2();
3357        let b = b.as_u64x2();
3358        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
3359    }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370    unsafe {
3371        let min = _mm_min_epu64(a, b).as_u64x2();
3372        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3373    }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384    unsafe {
3385        let min = _mm_min_epu64(a, b).as_u64x2();
3386        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3387    }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398    unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475    unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552    unsafe { simd_fma(a, b, c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662    unsafe { simd_fma(a, b, c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772    unsafe { simd_fma(a, b, simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882    unsafe { simd_fma(a, b, simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992    unsafe {
3993        let add = simd_fma(a, b, c);
3994        let sub = simd_fma(a, b, simd_neg(c));
3995        simd_shuffle!(
3996            add,
3997            sub,
3998            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999        )
4000    }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110    unsafe {
4111        let add = simd_fma(a, b, c);
4112        let sub = simd_fma(a, b, simd_neg(c));
4113        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114    }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224    unsafe {
4225        let add = simd_fma(a, b, c);
4226        let sub = simd_fma(a, b, simd_neg(c));
4227        simd_shuffle!(
4228            add,
4229            sub,
4230            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231        )
4232    }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342    unsafe {
4343        let add = simd_fma(a, b, c);
4344        let sub = simd_fma(a, b, simd_neg(c));
4345        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346    }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456    unsafe { simd_fma(simd_neg(a), b, c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566    unsafe { simd_fma(simd_neg(a), b, c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292    unsafe {
5293        transmute(vgetexpps(
5294            a.as_f32x16(),
5295            f32x16::ZERO,
5296            0b11111111_11111111,
5297            _MM_FROUND_CUR_DIRECTION,
5298        ))
5299    }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310    unsafe {
5311        transmute(vgetexpps(
5312            a.as_f32x16(),
5313            src.as_f32x16(),
5314            k,
5315            _MM_FROUND_CUR_DIRECTION,
5316        ))
5317    }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328    unsafe {
5329        transmute(vgetexpps(
5330            a.as_f32x16(),
5331            f32x16::ZERO,
5332            k,
5333            _MM_FROUND_CUR_DIRECTION,
5334        ))
5335    }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412    unsafe {
5413        transmute(vgetexppd(
5414            a.as_f64x8(),
5415            f64x8::ZERO,
5416            0b11111111,
5417            _MM_FROUND_CUR_DIRECTION,
5418        ))
5419    }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430    unsafe {
5431        transmute(vgetexppd(
5432            a.as_f64x8(),
5433            src.as_f64x8(),
5434            k,
5435            _MM_FROUND_CUR_DIRECTION,
5436        ))
5437    }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448    unsafe {
5449        transmute(vgetexppd(
5450            a.as_f64x8(),
5451            f64x8::ZERO,
5452            k,
5453            _MM_FROUND_CUR_DIRECTION,
5454        ))
5455    }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539    unsafe {
5540        static_assert_uimm_bits!(IMM8, 8);
5541        let a = a.as_f32x16();
5542        let r = vrndscaleps(
5543            a,
5544            IMM8,
5545            f32x16::ZERO,
5546            0b11111111_11111111,
5547            _MM_FROUND_CUR_DIRECTION,
5548        );
5549        transmute(r)
5550    }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568    unsafe {
5569        static_assert_uimm_bits!(IMM8, 8);
5570        let a = a.as_f32x16();
5571        let src = src.as_f32x16();
5572        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5573        transmute(r)
5574    }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592    unsafe {
5593        static_assert_uimm_bits!(IMM8, 8);
5594        let a = a.as_f32x16();
5595        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5596        transmute(r)
5597    }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615    unsafe {
5616        static_assert_uimm_bits!(IMM8, 8);
5617        let a = a.as_f32x8();
5618        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5619        transmute(r)
5620    }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe {
5639        static_assert_uimm_bits!(IMM8, 8);
5640        let a = a.as_f32x8();
5641        let src = src.as_f32x8();
5642        let r = vrndscaleps256(a, IMM8, src, k);
5643        transmute(r)
5644    }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662    unsafe {
5663        static_assert_uimm_bits!(IMM8, 8);
5664        let a = a.as_f32x8();
5665        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5666        transmute(r)
5667    }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685    unsafe {
5686        static_assert_uimm_bits!(IMM8, 8);
5687        let a = a.as_f32x4();
5688        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5689        transmute(r)
5690    }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708    unsafe {
5709        static_assert_uimm_bits!(IMM8, 8);
5710        let a = a.as_f32x4();
5711        let src = src.as_f32x4();
5712        let r = vrndscaleps128(a, IMM8, src, k);
5713        transmute(r)
5714    }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732    unsafe {
5733        static_assert_uimm_bits!(IMM8, 8);
5734        let a = a.as_f32x4();
5735        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5736        transmute(r)
5737    }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755    unsafe {
5756        static_assert_uimm_bits!(IMM8, 8);
5757        let a = a.as_f64x8();
5758        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5759        transmute(r)
5760    }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778    src: __m512d,
5779    k: __mmask8,
5780    a: __m512d,
5781) -> __m512d {
5782    unsafe {
5783        static_assert_uimm_bits!(IMM8, 8);
5784        let a = a.as_f64x8();
5785        let src = src.as_f64x8();
5786        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5787        transmute(r)
5788    }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806    unsafe {
5807        static_assert_uimm_bits!(IMM8, 8);
5808        let a = a.as_f64x8();
5809        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5810        transmute(r)
5811    }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829    unsafe {
5830        static_assert_uimm_bits!(IMM8, 8);
5831        let a = a.as_f64x4();
5832        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5833        transmute(r)
5834    }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852    src: __m256d,
5853    k: __mmask8,
5854    a: __m256d,
5855) -> __m256d {
5856    unsafe {
5857        static_assert_uimm_bits!(IMM8, 8);
5858        let a = a.as_f64x4();
5859        let src = src.as_f64x4();
5860        let r = vrndscalepd256(a, IMM8, src, k);
5861        transmute(r)
5862    }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880    unsafe {
5881        static_assert_uimm_bits!(IMM8, 8);
5882        let a = a.as_f64x4();
5883        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5884        transmute(r)
5885    }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903    unsafe {
5904        static_assert_uimm_bits!(IMM8, 8);
5905        let a = a.as_f64x2();
5906        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5907        transmute(r)
5908    }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926    unsafe {
5927        static_assert_uimm_bits!(IMM8, 8);
5928        let a = a.as_f64x2();
5929        let src = src.as_f64x2();
5930        let r = vrndscalepd128(a, IMM8, src, k);
5931        transmute(r)
5932    }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950    unsafe {
5951        static_assert_uimm_bits!(IMM8, 8);
5952        let a = a.as_f64x2();
5953        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5954        transmute(r)
5955    }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966    unsafe {
5967        transmute(vscalefps(
5968            a.as_f32x16(),
5969            b.as_f32x16(),
5970            f32x16::ZERO,
5971            0b11111111_11111111,
5972            _MM_FROUND_CUR_DIRECTION,
5973        ))
5974    }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985    unsafe {
5986        transmute(vscalefps(
5987            a.as_f32x16(),
5988            b.as_f32x16(),
5989            src.as_f32x16(),
5990            k,
5991            _MM_FROUND_CUR_DIRECTION,
5992        ))
5993    }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004    unsafe {
6005        transmute(vscalefps(
6006            a.as_f32x16(),
6007            b.as_f32x16(),
6008            f32x16::ZERO,
6009            k,
6010            _MM_FROUND_CUR_DIRECTION,
6011        ))
6012    }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023    unsafe {
6024        transmute(vscalefps256(
6025            a.as_f32x8(),
6026            b.as_f32x8(),
6027            f32x8::ZERO,
6028            0b11111111,
6029        ))
6030    }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063    unsafe {
6064        transmute(vscalefps128(
6065            a.as_f32x4(),
6066            b.as_f32x4(),
6067            f32x4::ZERO,
6068            0b00001111,
6069        ))
6070    }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103    unsafe {
6104        transmute(vscalefpd(
6105            a.as_f64x8(),
6106            b.as_f64x8(),
6107            f64x8::ZERO,
6108            0b11111111,
6109            _MM_FROUND_CUR_DIRECTION,
6110        ))
6111    }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122    unsafe {
6123        transmute(vscalefpd(
6124            a.as_f64x8(),
6125            b.as_f64x8(),
6126            src.as_f64x8(),
6127            k,
6128            _MM_FROUND_CUR_DIRECTION,
6129        ))
6130    }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141    unsafe {
6142        transmute(vscalefpd(
6143            a.as_f64x8(),
6144            b.as_f64x8(),
6145            f64x8::ZERO,
6146            k,
6147            _MM_FROUND_CUR_DIRECTION,
6148        ))
6149    }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160    unsafe {
6161        transmute(vscalefpd256(
6162            a.as_f64x4(),
6163            b.as_f64x4(),
6164            f64x4::ZERO,
6165            0b00001111,
6166        ))
6167    }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200    unsafe {
6201        transmute(vscalefpd128(
6202            a.as_f64x2(),
6203            b.as_f64x2(),
6204            f64x2::ZERO,
6205            0b00000011,
6206        ))
6207    }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241    unsafe {
6242        static_assert_uimm_bits!(IMM8, 8);
6243        let a = a.as_f32x16();
6244        let b = b.as_f32x16();
6245        let c = c.as_i32x16();
6246        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247        transmute(r)
6248    }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260    a: __m512,
6261    k: __mmask16,
6262    b: __m512,
6263    c: __m512i,
6264) -> __m512 {
6265    unsafe {
6266        static_assert_uimm_bits!(IMM8, 8);
6267        let a = a.as_f32x16();
6268        let b = b.as_f32x16();
6269        let c = c.as_i32x16();
6270        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6271        transmute(r)
6272    }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284    k: __mmask16,
6285    a: __m512,
6286    b: __m512,
6287    c: __m512i,
6288) -> __m512 {
6289    unsafe {
6290        static_assert_uimm_bits!(IMM8, 8);
6291        let a = a.as_f32x16();
6292        let b = b.as_f32x16();
6293        let c = c.as_i32x16();
6294        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6295        transmute(r)
6296    }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308    unsafe {
6309        static_assert_uimm_bits!(IMM8, 8);
6310        let a = a.as_f32x8();
6311        let b = b.as_f32x8();
6312        let c = c.as_i32x8();
6313        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6314        transmute(r)
6315    }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327    a: __m256,
6328    k: __mmask8,
6329    b: __m256,
6330    c: __m256i,
6331) -> __m256 {
6332    unsafe {
6333        static_assert_uimm_bits!(IMM8, 8);
6334        let a = a.as_f32x8();
6335        let b = b.as_f32x8();
6336        let c = c.as_i32x8();
6337        let r = vfixupimmps256(a, b, c, IMM8, k);
6338        transmute(r)
6339    }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351    k: __mmask8,
6352    a: __m256,
6353    b: __m256,
6354    c: __m256i,
6355) -> __m256 {
6356    unsafe {
6357        static_assert_uimm_bits!(IMM8, 8);
6358        let a = a.as_f32x8();
6359        let b = b.as_f32x8();
6360        let c = c.as_i32x8();
6361        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6362        transmute(r)
6363    }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375    unsafe {
6376        static_assert_uimm_bits!(IMM8, 8);
6377        let a = a.as_f32x4();
6378        let b = b.as_f32x4();
6379        let c = c.as_i32x4();
6380        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6381        transmute(r)
6382    }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394    a: __m128,
6395    k: __mmask8,
6396    b: __m128,
6397    c: __m128i,
6398) -> __m128 {
6399    unsafe {
6400        static_assert_uimm_bits!(IMM8, 8);
6401        let a = a.as_f32x4();
6402        let b = b.as_f32x4();
6403        let c = c.as_i32x4();
6404        let r = vfixupimmps128(a, b, c, IMM8, k);
6405        transmute(r)
6406    }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418    k: __mmask8,
6419    a: __m128,
6420    b: __m128,
6421    c: __m128i,
6422) -> __m128 {
6423    unsafe {
6424        static_assert_uimm_bits!(IMM8, 8);
6425        let a = a.as_f32x4();
6426        let b = b.as_f32x4();
6427        let c = c.as_i32x4();
6428        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6429        transmute(r)
6430    }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442    unsafe {
6443        static_assert_uimm_bits!(IMM8, 8);
6444        let a = a.as_f64x8();
6445        let b = b.as_f64x8();
6446        let c = c.as_i64x8();
6447        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6448        transmute(r)
6449    }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461    a: __m512d,
6462    k: __mmask8,
6463    b: __m512d,
6464    c: __m512i,
6465) -> __m512d {
6466    unsafe {
6467        static_assert_uimm_bits!(IMM8, 8);
6468        let a = a.as_f64x8();
6469        let b = b.as_f64x8();
6470        let c = c.as_i64x8();
6471        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6472        transmute(r)
6473    }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485    k: __mmask8,
6486    a: __m512d,
6487    b: __m512d,
6488    c: __m512i,
6489) -> __m512d {
6490    unsafe {
6491        static_assert_uimm_bits!(IMM8, 8);
6492        let a = a.as_f64x8();
6493        let b = b.as_f64x8();
6494        let c = c.as_i64x8();
6495        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6496        transmute(r)
6497    }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509    unsafe {
6510        static_assert_uimm_bits!(IMM8, 8);
6511        let a = a.as_f64x4();
6512        let b = b.as_f64x4();
6513        let c = c.as_i64x4();
6514        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6515        transmute(r)
6516    }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528    a: __m256d,
6529    k: __mmask8,
6530    b: __m256d,
6531    c: __m256i,
6532) -> __m256d {
6533    unsafe {
6534        static_assert_uimm_bits!(IMM8, 8);
6535        let a = a.as_f64x4();
6536        let b = b.as_f64x4();
6537        let c = c.as_i64x4();
6538        let r = vfixupimmpd256(a, b, c, IMM8, k);
6539        transmute(r)
6540    }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552    k: __mmask8,
6553    a: __m256d,
6554    b: __m256d,
6555    c: __m256i,
6556) -> __m256d {
6557    unsafe {
6558        static_assert_uimm_bits!(IMM8, 8);
6559        let a = a.as_f64x4();
6560        let b = b.as_f64x4();
6561        let c = c.as_i64x4();
6562        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6563        transmute(r)
6564    }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576    unsafe {
6577        static_assert_uimm_bits!(IMM8, 8);
6578        let a = a.as_f64x2();
6579        let b = b.as_f64x2();
6580        let c = c.as_i64x2();
6581        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6582        transmute(r)
6583    }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595    a: __m128d,
6596    k: __mmask8,
6597    b: __m128d,
6598    c: __m128i,
6599) -> __m128d {
6600    unsafe {
6601        static_assert_uimm_bits!(IMM8, 8);
6602        let a = a.as_f64x2();
6603        let b = b.as_f64x2();
6604        let c = c.as_i64x2();
6605        let r = vfixupimmpd128(a, b, c, IMM8, k);
6606        transmute(r)
6607    }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619    k: __mmask8,
6620    a: __m128d,
6621    b: __m128d,
6622    c: __m128i,
6623) -> __m128d {
6624    unsafe {
6625        static_assert_uimm_bits!(IMM8, 8);
6626        let a = a.as_f64x2();
6627        let b = b.as_f64x2();
6628        let c = c.as_i64x2();
6629        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6630        transmute(r)
6631    }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643    unsafe {
6644        static_assert_uimm_bits!(IMM8, 8);
6645        let a = a.as_i32x16();
6646        let b = b.as_i32x16();
6647        let c = c.as_i32x16();
6648        let r = vpternlogd(a, b, c, IMM8);
6649        transmute(r)
6650    }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662    src: __m512i,
6663    k: __mmask16,
6664    a: __m512i,
6665    b: __m512i,
6666) -> __m512i {
6667    unsafe {
6668        static_assert_uimm_bits!(IMM8, 8);
6669        let src = src.as_i32x16();
6670        let a = a.as_i32x16();
6671        let b = b.as_i32x16();
6672        let r = vpternlogd(src, a, b, IMM8);
6673        transmute(simd_select_bitmask(k, r, src))
6674    }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686    k: __mmask16,
6687    a: __m512i,
6688    b: __m512i,
6689    c: __m512i,
6690) -> __m512i {
6691    unsafe {
6692        static_assert_uimm_bits!(IMM8, 8);
6693        let a = a.as_i32x16();
6694        let b = b.as_i32x16();
6695        let c = c.as_i32x16();
6696        let r = vpternlogd(a, b, c, IMM8);
6697        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6698    }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710    unsafe {
6711        static_assert_uimm_bits!(IMM8, 8);
6712        let a = a.as_i32x8();
6713        let b = b.as_i32x8();
6714        let c = c.as_i32x8();
6715        let r = vpternlogd256(a, b, c, IMM8);
6716        transmute(r)
6717    }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729    src: __m256i,
6730    k: __mmask8,
6731    a: __m256i,
6732    b: __m256i,
6733) -> __m256i {
6734    unsafe {
6735        static_assert_uimm_bits!(IMM8, 8);
6736        let src = src.as_i32x8();
6737        let a = a.as_i32x8();
6738        let b = b.as_i32x8();
6739        let r = vpternlogd256(src, a, b, IMM8);
6740        transmute(simd_select_bitmask(k, r, src))
6741    }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753    k: __mmask8,
6754    a: __m256i,
6755    b: __m256i,
6756    c: __m256i,
6757) -> __m256i {
6758    unsafe {
6759        static_assert_uimm_bits!(IMM8, 8);
6760        let a = a.as_i32x8();
6761        let b = b.as_i32x8();
6762        let c = c.as_i32x8();
6763        let r = vpternlogd256(a, b, c, IMM8);
6764        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6765    }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777    unsafe {
6778        static_assert_uimm_bits!(IMM8, 8);
6779        let a = a.as_i32x4();
6780        let b = b.as_i32x4();
6781        let c = c.as_i32x4();
6782        let r = vpternlogd128(a, b, c, IMM8);
6783        transmute(r)
6784    }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796    src: __m128i,
6797    k: __mmask8,
6798    a: __m128i,
6799    b: __m128i,
6800) -> __m128i {
6801    unsafe {
6802        static_assert_uimm_bits!(IMM8, 8);
6803        let src = src.as_i32x4();
6804        let a = a.as_i32x4();
6805        let b = b.as_i32x4();
6806        let r = vpternlogd128(src, a, b, IMM8);
6807        transmute(simd_select_bitmask(k, r, src))
6808    }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820    k: __mmask8,
6821    a: __m128i,
6822    b: __m128i,
6823    c: __m128i,
6824) -> __m128i {
6825    unsafe {
6826        static_assert_uimm_bits!(IMM8, 8);
6827        let a = a.as_i32x4();
6828        let b = b.as_i32x4();
6829        let c = c.as_i32x4();
6830        let r = vpternlogd128(a, b, c, IMM8);
6831        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6832    }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844    unsafe {
6845        static_assert_uimm_bits!(IMM8, 8);
6846        let a = a.as_i64x8();
6847        let b = b.as_i64x8();
6848        let c = c.as_i64x8();
6849        let r = vpternlogq(a, b, c, IMM8);
6850        transmute(r)
6851    }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863    src: __m512i,
6864    k: __mmask8,
6865    a: __m512i,
6866    b: __m512i,
6867) -> __m512i {
6868    unsafe {
6869        static_assert_uimm_bits!(IMM8, 8);
6870        let src = src.as_i64x8();
6871        let a = a.as_i64x8();
6872        let b = b.as_i64x8();
6873        let r = vpternlogq(src, a, b, IMM8);
6874        transmute(simd_select_bitmask(k, r, src))
6875    }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887    k: __mmask8,
6888    a: __m512i,
6889    b: __m512i,
6890    c: __m512i,
6891) -> __m512i {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        let a = a.as_i64x8();
6895        let b = b.as_i64x8();
6896        let c = c.as_i64x8();
6897        let r = vpternlogq(a, b, c, IMM8);
6898        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6899    }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911    unsafe {
6912        static_assert_uimm_bits!(IMM8, 8);
6913        let a = a.as_i64x4();
6914        let b = b.as_i64x4();
6915        let c = c.as_i64x4();
6916        let r = vpternlogq256(a, b, c, IMM8);
6917        transmute(r)
6918    }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930    src: __m256i,
6931    k: __mmask8,
6932    a: __m256i,
6933    b: __m256i,
6934) -> __m256i {
6935    unsafe {
6936        static_assert_uimm_bits!(IMM8, 8);
6937        let src = src.as_i64x4();
6938        let a = a.as_i64x4();
6939        let b = b.as_i64x4();
6940        let r = vpternlogq256(src, a, b, IMM8);
6941        transmute(simd_select_bitmask(k, r, src))
6942    }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954    k: __mmask8,
6955    a: __m256i,
6956    b: __m256i,
6957    c: __m256i,
6958) -> __m256i {
6959    unsafe {
6960        static_assert_uimm_bits!(IMM8, 8);
6961        let a = a.as_i64x4();
6962        let b = b.as_i64x4();
6963        let c = c.as_i64x4();
6964        let r = vpternlogq256(a, b, c, IMM8);
6965        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6966    }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978    unsafe {
6979        static_assert_uimm_bits!(IMM8, 8);
6980        let a = a.as_i64x2();
6981        let b = b.as_i64x2();
6982        let c = c.as_i64x2();
6983        let r = vpternlogq128(a, b, c, IMM8);
6984        transmute(r)
6985    }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997    src: __m128i,
6998    k: __mmask8,
6999    a: __m128i,
7000    b: __m128i,
7001) -> __m128i {
7002    unsafe {
7003        static_assert_uimm_bits!(IMM8, 8);
7004        let src = src.as_i64x2();
7005        let a = a.as_i64x2();
7006        let b = b.as_i64x2();
7007        let r = vpternlogq128(src, a, b, IMM8);
7008        transmute(simd_select_bitmask(k, r, src))
7009    }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021    k: __mmask8,
7022    a: __m128i,
7023    b: __m128i,
7024    c: __m128i,
7025) -> __m128i {
7026    unsafe {
7027        static_assert_uimm_bits!(IMM8, 8);
7028        let a = a.as_i64x2();
7029        let b = b.as_i64x2();
7030        let c = c.as_i64x2();
7031        let r = vpternlogq128(a, b, c, IMM8);
7032        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7033    }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038///    _MM_MANT_NORM_1_2     // interval [1, 2)
7039///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7040///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7041///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043///    _MM_MANT_SIGN_src     // sign = sign(src)
7044///    _MM_MANT_SIGN_zero    // sign = 0
7045///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054    a: __m512,
7055) -> __m512 {
7056    unsafe {
7057        static_assert_uimm_bits!(NORM, 4);
7058        static_assert_uimm_bits!(SIGN, 2);
7059        let a = a.as_f32x16();
7060        let zero = f32x16::ZERO;
7061        let r = vgetmantps(
7062            a,
7063            SIGN << 2 | NORM,
7064            zero,
7065            0b11111111_11111111,
7066            _MM_FROUND_CUR_DIRECTION,
7067        );
7068        transmute(r)
7069    }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7075///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7076///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7077///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079///    _MM_MANT_SIGN_src     // sign = sign(src)\
7080///    _MM_MANT_SIGN_zero    // sign = 0\
7081///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090    const NORM: _MM_MANTISSA_NORM_ENUM,
7091    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093    src: __m512,
7094    k: __mmask16,
7095    a: __m512,
7096) -> __m512 {
7097    unsafe {
7098        static_assert_uimm_bits!(NORM, 4);
7099        static_assert_uimm_bits!(SIGN, 2);
7100        let a = a.as_f32x16();
7101        let src = src.as_f32x16();
7102        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7103        transmute(r)
7104    }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7110///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7111///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7112///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114///    _MM_MANT_SIGN_src     // sign = sign(src)\
7115///    _MM_MANT_SIGN_zero    // sign = 0\
7116///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125    const NORM: _MM_MANTISSA_NORM_ENUM,
7126    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128    k: __mmask16,
7129    a: __m512,
7130) -> __m512 {
7131    unsafe {
7132        static_assert_uimm_bits!(NORM, 4);
7133        static_assert_uimm_bits!(SIGN, 2);
7134        let a = a.as_f32x16();
7135        let r = vgetmantps(
7136            a,
7137            SIGN << 2 | NORM,
7138            f32x16::ZERO,
7139            k,
7140            _MM_FROUND_CUR_DIRECTION,
7141        );
7142        transmute(r)
7143    }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148///    _MM_MANT_NORM_1_2     // interval [1, 2)
7149///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7150///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7151///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153///    _MM_MANT_SIGN_src     // sign = sign(src)
7154///    _MM_MANT_SIGN_zero    // sign = 0
7155///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164    a: __m256,
7165) -> __m256 {
7166    unsafe {
7167        static_assert_uimm_bits!(NORM, 4);
7168        static_assert_uimm_bits!(SIGN, 2);
7169        let a = a.as_f32x8();
7170        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7171        transmute(r)
7172    }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182///    _MM_MANT_SIGN_src     // sign = sign(src)\
7183///    _MM_MANT_SIGN_zero    // sign = 0\
7184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193    const NORM: _MM_MANTISSA_NORM_ENUM,
7194    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196    src: __m256,
7197    k: __mmask8,
7198    a: __m256,
7199) -> __m256 {
7200    unsafe {
7201        static_assert_uimm_bits!(NORM, 4);
7202        static_assert_uimm_bits!(SIGN, 2);
7203        let a = a.as_f32x8();
7204        let src = src.as_f32x8();
7205        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7206        transmute(r)
7207    }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7213///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7214///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7215///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217///    _MM_MANT_SIGN_src     // sign = sign(src)\
7218///    _MM_MANT_SIGN_zero    // sign = 0\
7219///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228    const NORM: _MM_MANTISSA_NORM_ENUM,
7229    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231    k: __mmask8,
7232    a: __m256,
7233) -> __m256 {
7234    unsafe {
7235        static_assert_uimm_bits!(NORM, 4);
7236        static_assert_uimm_bits!(SIGN, 2);
7237        let a = a.as_f32x8();
7238        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7239        transmute(r)
7240    }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245///    _MM_MANT_NORM_1_2     // interval [1, 2)
7246///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7247///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7248///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250///    _MM_MANT_SIGN_src     // sign = sign(src)
7251///    _MM_MANT_SIGN_zero    // sign = 0
7252///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261    a: __m128,
7262) -> __m128 {
7263    unsafe {
7264        static_assert_uimm_bits!(NORM, 4);
7265        static_assert_uimm_bits!(SIGN, 2);
7266        let a = a.as_f32x4();
7267        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7268        transmute(r)
7269    }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7275///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7276///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7277///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279///    _MM_MANT_SIGN_src     // sign = sign(src)\
7280///    _MM_MANT_SIGN_zero    // sign = 0\
7281///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290    const NORM: _MM_MANTISSA_NORM_ENUM,
7291    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293    src: __m128,
7294    k: __mmask8,
7295    a: __m128,
7296) -> __m128 {
7297    unsafe {
7298        static_assert_uimm_bits!(NORM, 4);
7299        static_assert_uimm_bits!(SIGN, 2);
7300        let a = a.as_f32x4();
7301        let src = src.as_f32x4();
7302        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7303        transmute(r)
7304    }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7310///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7311///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7312///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314///    _MM_MANT_SIGN_src     // sign = sign(src)\
7315///    _MM_MANT_SIGN_zero    // sign = 0\
7316///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325    const NORM: _MM_MANTISSA_NORM_ENUM,
7326    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328    k: __mmask8,
7329    a: __m128,
7330) -> __m128 {
7331    unsafe {
7332        static_assert_uimm_bits!(NORM, 4);
7333        static_assert_uimm_bits!(SIGN, 2);
7334        let a = a.as_f32x4();
7335        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7336        transmute(r)
7337    }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7343///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7344///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7345///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347///    _MM_MANT_SIGN_src     // sign = sign(src)\
7348///    _MM_MANT_SIGN_zero    // sign = 0\
7349///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358    a: __m512d,
7359) -> __m512d {
7360    unsafe {
7361        static_assert_uimm_bits!(NORM, 4);
7362        static_assert_uimm_bits!(SIGN, 2);
7363        let a = a.as_f64x8();
7364        let zero = f64x8::ZERO;
7365        let r = vgetmantpd(
7366            a,
7367            SIGN << 2 | NORM,
7368            zero,
7369            0b11111111,
7370            _MM_FROUND_CUR_DIRECTION,
7371        );
7372        transmute(r)
7373    }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7379///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7380///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7381///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383///    _MM_MANT_SIGN_src     // sign = sign(src)\
7384///    _MM_MANT_SIGN_zero    // sign = 0\
7385///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394    const NORM: _MM_MANTISSA_NORM_ENUM,
7395    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397    src: __m512d,
7398    k: __mmask8,
7399    a: __m512d,
7400) -> __m512d {
7401    unsafe {
7402        static_assert_uimm_bits!(NORM, 4);
7403        static_assert_uimm_bits!(SIGN, 2);
7404        let a = a.as_f64x8();
7405        let src = src.as_f64x8();
7406        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7407        transmute(r)
7408    }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7414///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7415///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7416///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418///    _MM_MANT_SIGN_src     // sign = sign(src)\
7419///    _MM_MANT_SIGN_zero    // sign = 0\
7420///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429    const NORM: _MM_MANTISSA_NORM_ENUM,
7430    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432    k: __mmask8,
7433    a: __m512d,
7434) -> __m512d {
7435    unsafe {
7436        static_assert_uimm_bits!(NORM, 4);
7437        static_assert_uimm_bits!(SIGN, 2);
7438        let a = a.as_f64x8();
7439        let r = vgetmantpd(
7440            a,
7441            SIGN << 2 | NORM,
7442            f64x8::ZERO,
7443            k,
7444            _MM_FROUND_CUR_DIRECTION,
7445        );
7446        transmute(r)
7447    }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7453///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7454///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7455///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457///    _MM_MANT_SIGN_src     // sign = sign(src)\
7458///    _MM_MANT_SIGN_zero    // sign = 0\
7459///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468    a: __m256d,
7469) -> __m256d {
7470    unsafe {
7471        static_assert_uimm_bits!(NORM, 4);
7472        static_assert_uimm_bits!(SIGN, 2);
7473        let a = a.as_f64x4();
7474        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7475        transmute(r)
7476    }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7482///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7483///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7484///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486///    _MM_MANT_SIGN_src     // sign = sign(src)\
7487///    _MM_MANT_SIGN_zero    // sign = 0\
7488///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497    const NORM: _MM_MANTISSA_NORM_ENUM,
7498    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500    src: __m256d,
7501    k: __mmask8,
7502    a: __m256d,
7503) -> __m256d {
7504    unsafe {
7505        static_assert_uimm_bits!(NORM, 4);
7506        static_assert_uimm_bits!(SIGN, 2);
7507        let a = a.as_f64x4();
7508        let src = src.as_f64x4();
7509        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7510        transmute(r)
7511    }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7517///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7518///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7519///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521///    _MM_MANT_SIGN_src     // sign = sign(src)\
7522///    _MM_MANT_SIGN_zero    // sign = 0\
7523///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532    const NORM: _MM_MANTISSA_NORM_ENUM,
7533    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535    k: __mmask8,
7536    a: __m256d,
7537) -> __m256d {
7538    unsafe {
7539        static_assert_uimm_bits!(NORM, 4);
7540        static_assert_uimm_bits!(SIGN, 2);
7541        let a = a.as_f64x4();
7542        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7543        transmute(r)
7544    }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7550///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7551///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7552///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554///    _MM_MANT_SIGN_src     // sign = sign(src)\
7555///    _MM_MANT_SIGN_zero    // sign = 0\
7556///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565    a: __m128d,
7566) -> __m128d {
7567    unsafe {
7568        static_assert_uimm_bits!(NORM, 4);
7569        static_assert_uimm_bits!(SIGN, 2);
7570        let a = a.as_f64x2();
7571        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7572        transmute(r)
7573    }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7579///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7580///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7581///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583///    _MM_MANT_SIGN_src     // sign = sign(src)\
7584///    _MM_MANT_SIGN_zero    // sign = 0\
7585///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594    const NORM: _MM_MANTISSA_NORM_ENUM,
7595    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597    src: __m128d,
7598    k: __mmask8,
7599    a: __m128d,
7600) -> __m128d {
7601    unsafe {
7602        static_assert_uimm_bits!(NORM, 4);
7603        static_assert_uimm_bits!(SIGN, 2);
7604        let a = a.as_f64x2();
7605        let src = src.as_f64x2();
7606        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7607        transmute(r)
7608    }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7614///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7615///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7616///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618///    _MM_MANT_SIGN_src     // sign = sign(src)\
7619///    _MM_MANT_SIGN_zero    // sign = 0\
7620///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629    const NORM: _MM_MANTISSA_NORM_ENUM,
7630    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632    k: __mmask8,
7633    a: __m128d,
7634) -> __m128d {
7635    unsafe {
7636        static_assert_uimm_bits!(NORM, 4);
7637        static_assert_uimm_bits!(SIGN, 2);
7638        let a = a.as_f64x2();
7639        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7640        transmute(r)
7641    }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660    unsafe {
7661        static_assert_rounding!(ROUNDING);
7662        let a = a.as_f32x16();
7663        let b = b.as_f32x16();
7664        let r = vaddps(a, b, ROUNDING);
7665        transmute(r)
7666    }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685    src: __m512,
7686    k: __mmask16,
7687    a: __m512,
7688    b: __m512,
7689) -> __m512 {
7690    unsafe {
7691        static_assert_rounding!(ROUNDING);
7692        let a = a.as_f32x16();
7693        let b = b.as_f32x16();
7694        let r = vaddps(a, b, ROUNDING);
7695        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7696    }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715    k: __mmask16,
7716    a: __m512,
7717    b: __m512,
7718) -> __m512 {
7719    unsafe {
7720        static_assert_rounding!(ROUNDING);
7721        let a = a.as_f32x16();
7722        let b = b.as_f32x16();
7723        let r = vaddps(a, b, ROUNDING);
7724        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7725    }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744    unsafe {
7745        static_assert_rounding!(ROUNDING);
7746        let a = a.as_f64x8();
7747        let b = b.as_f64x8();
7748        let r = vaddpd(a, b, ROUNDING);
7749        transmute(r)
7750    }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769    src: __m512d,
7770    k: __mmask8,
7771    a: __m512d,
7772    b: __m512d,
7773) -> __m512d {
7774    unsafe {
7775        static_assert_rounding!(ROUNDING);
7776        let a = a.as_f64x8();
7777        let b = b.as_f64x8();
7778        let r = vaddpd(a, b, ROUNDING);
7779        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7780    }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799    k: __mmask8,
7800    a: __m512d,
7801    b: __m512d,
7802) -> __m512d {
7803    unsafe {
7804        static_assert_rounding!(ROUNDING);
7805        let a = a.as_f64x8();
7806        let b = b.as_f64x8();
7807        let r = vaddpd(a, b, ROUNDING);
7808        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7809    }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828    unsafe {
7829        static_assert_rounding!(ROUNDING);
7830        let a = a.as_f32x16();
7831        let b = b.as_f32x16();
7832        let r = vsubps(a, b, ROUNDING);
7833        transmute(r)
7834    }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853    src: __m512,
7854    k: __mmask16,
7855    a: __m512,
7856    b: __m512,
7857) -> __m512 {
7858    unsafe {
7859        static_assert_rounding!(ROUNDING);
7860        let a = a.as_f32x16();
7861        let b = b.as_f32x16();
7862        let r = vsubps(a, b, ROUNDING);
7863        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7864    }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883    k: __mmask16,
7884    a: __m512,
7885    b: __m512,
7886) -> __m512 {
7887    unsafe {
7888        static_assert_rounding!(ROUNDING);
7889        let a = a.as_f32x16();
7890        let b = b.as_f32x16();
7891        let r = vsubps(a, b, ROUNDING);
7892        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7893    }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912    unsafe {
7913        static_assert_rounding!(ROUNDING);
7914        let a = a.as_f64x8();
7915        let b = b.as_f64x8();
7916        let r = vsubpd(a, b, ROUNDING);
7917        transmute(r)
7918    }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937    src: __m512d,
7938    k: __mmask8,
7939    a: __m512d,
7940    b: __m512d,
7941) -> __m512d {
7942    unsafe {
7943        static_assert_rounding!(ROUNDING);
7944        let a = a.as_f64x8();
7945        let b = b.as_f64x8();
7946        let r = vsubpd(a, b, ROUNDING);
7947        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7948    }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967    k: __mmask8,
7968    a: __m512d,
7969    b: __m512d,
7970) -> __m512d {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f64x8();
7974        let b = b.as_f64x8();
7975        let r = vsubpd(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7977    }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996    unsafe {
7997        static_assert_rounding!(ROUNDING);
7998        let a = a.as_f32x16();
7999        let b = b.as_f32x16();
8000        let r = vmulps(a, b, ROUNDING);
8001        transmute(r)
8002    }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021    src: __m512,
8022    k: __mmask16,
8023    a: __m512,
8024    b: __m512,
8025) -> __m512 {
8026    unsafe {
8027        static_assert_rounding!(ROUNDING);
8028        let a = a.as_f32x16();
8029        let b = b.as_f32x16();
8030        let r = vmulps(a, b, ROUNDING);
8031        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8032    }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051    k: __mmask16,
8052    a: __m512,
8053    b: __m512,
8054) -> __m512 {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f32x16();
8058        let b = b.as_f32x16();
8059        let r = vmulps(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8061    }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080    unsafe {
8081        static_assert_rounding!(ROUNDING);
8082        let a = a.as_f64x8();
8083        let b = b.as_f64x8();
8084        let r = vmulpd(a, b, ROUNDING);
8085        transmute(r)
8086    }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105    src: __m512d,
8106    k: __mmask8,
8107    a: __m512d,
8108    b: __m512d,
8109) -> __m512d {
8110    unsafe {
8111        static_assert_rounding!(ROUNDING);
8112        let a = a.as_f64x8();
8113        let b = b.as_f64x8();
8114        let r = vmulpd(a, b, ROUNDING);
8115        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8116    }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135    k: __mmask8,
8136    a: __m512d,
8137    b: __m512d,
8138) -> __m512d {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f64x8();
8142        let b = b.as_f64x8();
8143        let r = vmulpd(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8145    }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164    unsafe {
8165        static_assert_rounding!(ROUNDING);
8166        let a = a.as_f32x16();
8167        let b = b.as_f32x16();
8168        let r = vdivps(a, b, ROUNDING);
8169        transmute(r)
8170    }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189    src: __m512,
8190    k: __mmask16,
8191    a: __m512,
8192    b: __m512,
8193) -> __m512 {
8194    unsafe {
8195        static_assert_rounding!(ROUNDING);
8196        let a = a.as_f32x16();
8197        let b = b.as_f32x16();
8198        let r = vdivps(a, b, ROUNDING);
8199        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8200    }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219    k: __mmask16,
8220    a: __m512,
8221    b: __m512,
8222) -> __m512 {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f32x16();
8226        let b = b.as_f32x16();
8227        let r = vdivps(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8229    }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248    unsafe {
8249        static_assert_rounding!(ROUNDING);
8250        let a = a.as_f64x8();
8251        let b = b.as_f64x8();
8252        let r = vdivpd(a, b, ROUNDING);
8253        transmute(r)
8254    }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273    src: __m512d,
8274    k: __mmask8,
8275    a: __m512d,
8276    b: __m512d,
8277) -> __m512d {
8278    unsafe {
8279        static_assert_rounding!(ROUNDING);
8280        let a = a.as_f64x8();
8281        let b = b.as_f64x8();
8282        let r = vdivpd(a, b, ROUNDING);
8283        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8284    }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303    k: __mmask8,
8304    a: __m512d,
8305    b: __m512d,
8306) -> __m512d {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f64x8();
8310        let b = b.as_f64x8();
8311        let r = vdivpd(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8313    }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332    unsafe {
8333        static_assert_rounding!(ROUNDING);
8334        let a = a.as_f32x16();
8335        let r = vsqrtps(a, ROUNDING);
8336        transmute(r)
8337    }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356    src: __m512,
8357    k: __mmask16,
8358    a: __m512,
8359) -> __m512 {
8360    unsafe {
8361        static_assert_rounding!(ROUNDING);
8362        let a = a.as_f32x16();
8363        let r = vsqrtps(a, ROUNDING);
8364        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8365    }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384    unsafe {
8385        static_assert_rounding!(ROUNDING);
8386        let a = a.as_f32x16();
8387        let r = vsqrtps(a, ROUNDING);
8388        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8389    }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408    unsafe {
8409        static_assert_rounding!(ROUNDING);
8410        let a = a.as_f64x8();
8411        let r = vsqrtpd(a, ROUNDING);
8412        transmute(r)
8413    }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432    src: __m512d,
8433    k: __mmask8,
8434    a: __m512d,
8435) -> __m512d {
8436    unsafe {
8437        static_assert_rounding!(ROUNDING);
8438        let a = a.as_f64x8();
8439        let r = vsqrtpd(a, ROUNDING);
8440        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8441    }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460    unsafe {
8461        static_assert_rounding!(ROUNDING);
8462        let a = a.as_f64x8();
8463        let r = vsqrtpd(a, ROUNDING);
8464        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8465    }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484    unsafe {
8485        static_assert_rounding!(ROUNDING);
8486        vfmadd132psround(a, b, c, ROUNDING)
8487    }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506    a: __m512,
8507    k: __mmask16,
8508    b: __m512,
8509    c: __m512,
8510) -> __m512 {
8511    unsafe {
8512        static_assert_rounding!(ROUNDING);
8513        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8514    }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533    k: __mmask16,
8534    a: __m512,
8535    b: __m512,
8536    c: __m512,
8537) -> __m512 {
8538    unsafe {
8539        static_assert_rounding!(ROUNDING);
8540        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8541    }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560    a: __m512,
8561    b: __m512,
8562    c: __m512,
8563    k: __mmask16,
8564) -> __m512 {
8565    unsafe {
8566        static_assert_rounding!(ROUNDING);
8567        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8568    }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587    unsafe {
8588        static_assert_rounding!(ROUNDING);
8589        vfmadd132pdround(a, b, c, ROUNDING)
8590    }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609    a: __m512d,
8610    k: __mmask8,
8611    b: __m512d,
8612    c: __m512d,
8613) -> __m512d {
8614    unsafe {
8615        static_assert_rounding!(ROUNDING);
8616        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8617    }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636    k: __mmask8,
8637    a: __m512d,
8638    b: __m512d,
8639    c: __m512d,
8640) -> __m512d {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8644    }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663    a: __m512d,
8664    b: __m512d,
8665    c: __m512d,
8666    k: __mmask8,
8667) -> __m512d {
8668    unsafe {
8669        static_assert_rounding!(ROUNDING);
8670        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8671    }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690    unsafe {
8691        static_assert_rounding!(ROUNDING);
8692        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8693    }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712    a: __m512,
8713    k: __mmask16,
8714    b: __m512,
8715    c: __m512,
8716) -> __m512 {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8720        simd_select_bitmask(k, r, a)
8721    }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740    k: __mmask16,
8741    a: __m512,
8742    b: __m512,
8743    c: __m512,
8744) -> __m512 {
8745    unsafe {
8746        static_assert_rounding!(ROUNDING);
8747        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8748        simd_select_bitmask(k, r, _mm512_setzero_ps())
8749    }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768    a: __m512,
8769    b: __m512,
8770    c: __m512,
8771    k: __mmask16,
8772) -> __m512 {
8773    unsafe {
8774        static_assert_rounding!(ROUNDING);
8775        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8776        simd_select_bitmask(k, r, c)
8777    }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796    unsafe {
8797        static_assert_rounding!(ROUNDING);
8798        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8799    }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818    a: __m512d,
8819    k: __mmask8,
8820    b: __m512d,
8821    c: __m512d,
8822) -> __m512d {
8823    unsafe {
8824        static_assert_rounding!(ROUNDING);
8825        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8826        simd_select_bitmask(k, r, a)
8827    }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846    k: __mmask8,
8847    a: __m512d,
8848    b: __m512d,
8849    c: __m512d,
8850) -> __m512d {
8851    unsafe {
8852        static_assert_rounding!(ROUNDING);
8853        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8854        simd_select_bitmask(k, r, _mm512_setzero_pd())
8855    }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874    a: __m512d,
8875    b: __m512d,
8876    c: __m512d,
8877    k: __mmask8,
8878) -> __m512d {
8879    unsafe {
8880        static_assert_rounding!(ROUNDING);
8881        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8882        simd_select_bitmask(k, r, c)
8883    }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902    unsafe {
8903        static_assert_rounding!(ROUNDING);
8904        vfmaddsubpsround(a, b, c, ROUNDING)
8905    }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924    a: __m512,
8925    k: __mmask16,
8926    b: __m512,
8927    c: __m512,
8928) -> __m512 {
8929    unsafe {
8930        static_assert_rounding!(ROUNDING);
8931        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8932    }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951    k: __mmask16,
8952    a: __m512,
8953    b: __m512,
8954    c: __m512,
8955) -> __m512 {
8956    unsafe {
8957        static_assert_rounding!(ROUNDING);
8958        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8959    }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978    a: __m512,
8979    b: __m512,
8980    c: __m512,
8981    k: __mmask16,
8982) -> __m512 {
8983    unsafe {
8984        static_assert_rounding!(ROUNDING);
8985        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8986    }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005    a: __m512d,
9006    b: __m512d,
9007    c: __m512d,
9008) -> __m512d {
9009    unsafe {
9010        static_assert_rounding!(ROUNDING);
9011        vfmaddsubpdround(a, b, c, ROUNDING)
9012    }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031    a: __m512d,
9032    k: __mmask8,
9033    b: __m512d,
9034    c: __m512d,
9035) -> __m512d {
9036    unsafe {
9037        static_assert_rounding!(ROUNDING);
9038        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9039    }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058    k: __mmask8,
9059    a: __m512d,
9060    b: __m512d,
9061    c: __m512d,
9062) -> __m512d {
9063    unsafe {
9064        static_assert_rounding!(ROUNDING);
9065        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9066    }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085    a: __m512d,
9086    b: __m512d,
9087    c: __m512d,
9088    k: __mmask8,
9089) -> __m512d {
9090    unsafe {
9091        static_assert_rounding!(ROUNDING);
9092        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9093    }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112    unsafe {
9113        static_assert_rounding!(ROUNDING);
9114        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9115    }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134    a: __m512,
9135    k: __mmask16,
9136    b: __m512,
9137    c: __m512,
9138) -> __m512 {
9139    unsafe {
9140        static_assert_rounding!(ROUNDING);
9141        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9142        simd_select_bitmask(k, r, a)
9143    }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162    k: __mmask16,
9163    a: __m512,
9164    b: __m512,
9165    c: __m512,
9166) -> __m512 {
9167    unsafe {
9168        static_assert_rounding!(ROUNDING);
9169        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9170        simd_select_bitmask(k, r, _mm512_setzero_ps())
9171    }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190    a: __m512,
9191    b: __m512,
9192    c: __m512,
9193    k: __mmask16,
9194) -> __m512 {
9195    unsafe {
9196        static_assert_rounding!(ROUNDING);
9197        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9198        simd_select_bitmask(k, r, c)
9199    }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218    a: __m512d,
9219    b: __m512d,
9220    c: __m512d,
9221) -> __m512d {
9222    unsafe {
9223        static_assert_rounding!(ROUNDING);
9224        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9225    }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244    a: __m512d,
9245    k: __mmask8,
9246    b: __m512d,
9247    c: __m512d,
9248) -> __m512d {
9249    unsafe {
9250        static_assert_rounding!(ROUNDING);
9251        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9252        simd_select_bitmask(k, r, a)
9253    }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272    k: __mmask8,
9273    a: __m512d,
9274    b: __m512d,
9275    c: __m512d,
9276) -> __m512d {
9277    unsafe {
9278        static_assert_rounding!(ROUNDING);
9279        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9280        simd_select_bitmask(k, r, _mm512_setzero_pd())
9281    }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300    a: __m512d,
9301    b: __m512d,
9302    c: __m512d,
9303    k: __mmask8,
9304) -> __m512d {
9305    unsafe {
9306        static_assert_rounding!(ROUNDING);
9307        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9308        simd_select_bitmask(k, r, c)
9309    }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328    unsafe {
9329        static_assert_rounding!(ROUNDING);
9330        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9331    }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350    a: __m512,
9351    k: __mmask16,
9352    b: __m512,
9353    c: __m512,
9354) -> __m512 {
9355    unsafe {
9356        static_assert_rounding!(ROUNDING);
9357        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9358        simd_select_bitmask(k, r, a)
9359    }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378    k: __mmask16,
9379    a: __m512,
9380    b: __m512,
9381    c: __m512,
9382) -> __m512 {
9383    unsafe {
9384        static_assert_rounding!(ROUNDING);
9385        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9386        simd_select_bitmask(k, r, _mm512_setzero_ps())
9387    }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406    a: __m512,
9407    b: __m512,
9408    c: __m512,
9409    k: __mmask16,
9410) -> __m512 {
9411    unsafe {
9412        static_assert_rounding!(ROUNDING);
9413        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9414        simd_select_bitmask(k, r, c)
9415    }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434    unsafe {
9435        static_assert_rounding!(ROUNDING);
9436        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9437    }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456    a: __m512d,
9457    k: __mmask8,
9458    b: __m512d,
9459    c: __m512d,
9460) -> __m512d {
9461    unsafe {
9462        static_assert_rounding!(ROUNDING);
9463        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9464        simd_select_bitmask(k, r, a)
9465    }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484    k: __mmask8,
9485    a: __m512d,
9486    b: __m512d,
9487    c: __m512d,
9488) -> __m512d {
9489    unsafe {
9490        static_assert_rounding!(ROUNDING);
9491        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9492        simd_select_bitmask(k, r, _mm512_setzero_pd())
9493    }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512    a: __m512d,
9513    b: __m512d,
9514    c: __m512d,
9515    k: __mmask8,
9516) -> __m512d {
9517    unsafe {
9518        static_assert_rounding!(ROUNDING);
9519        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9520        simd_select_bitmask(k, r, c)
9521    }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540    unsafe {
9541        static_assert_rounding!(ROUNDING);
9542        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9543    }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562    a: __m512,
9563    k: __mmask16,
9564    b: __m512,
9565    c: __m512,
9566) -> __m512 {
9567    unsafe {
9568        static_assert_rounding!(ROUNDING);
9569        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9570        simd_select_bitmask(k, r, a)
9571    }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590    k: __mmask16,
9591    a: __m512,
9592    b: __m512,
9593    c: __m512,
9594) -> __m512 {
9595    unsafe {
9596        static_assert_rounding!(ROUNDING);
9597        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9598        simd_select_bitmask(k, r, _mm512_setzero_ps())
9599    }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618    a: __m512,
9619    b: __m512,
9620    c: __m512,
9621    k: __mmask16,
9622) -> __m512 {
9623    unsafe {
9624        static_assert_rounding!(ROUNDING);
9625        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9626        simd_select_bitmask(k, r, c)
9627    }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646    unsafe {
9647        static_assert_rounding!(ROUNDING);
9648        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9649    }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668    a: __m512d,
9669    k: __mmask8,
9670    b: __m512d,
9671    c: __m512d,
9672) -> __m512d {
9673    unsafe {
9674        static_assert_rounding!(ROUNDING);
9675        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9676        simd_select_bitmask(k, r, a)
9677    }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696    k: __mmask8,
9697    a: __m512d,
9698    b: __m512d,
9699    c: __m512d,
9700) -> __m512d {
9701    unsafe {
9702        static_assert_rounding!(ROUNDING);
9703        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9704        simd_select_bitmask(k, r, _mm512_setzero_pd())
9705    }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724    a: __m512d,
9725    b: __m512d,
9726    c: __m512d,
9727    k: __mmask8,
9728) -> __m512d {
9729    unsafe {
9730        static_assert_rounding!(ROUNDING);
9731        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9732        simd_select_bitmask(k, r, c)
9733    }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746    unsafe {
9747        static_assert_sae!(SAE);
9748        let a = a.as_f32x16();
9749        let b = b.as_f32x16();
9750        let r = vmaxps(a, b, SAE);
9751        transmute(r)
9752    }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765    src: __m512,
9766    k: __mmask16,
9767    a: __m512,
9768    b: __m512,
9769) -> __m512 {
9770    unsafe {
9771        static_assert_sae!(SAE);
9772        let a = a.as_f32x16();
9773        let b = b.as_f32x16();
9774        let r = vmaxps(a, b, SAE);
9775        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9776    }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789    unsafe {
9790        static_assert_sae!(SAE);
9791        let a = a.as_f32x16();
9792        let b = b.as_f32x16();
9793        let r = vmaxps(a, b, SAE);
9794        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9795    }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808    unsafe {
9809        static_assert_sae!(SAE);
9810        let a = a.as_f64x8();
9811        let b = b.as_f64x8();
9812        let r = vmaxpd(a, b, SAE);
9813        transmute(r)
9814    }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827    src: __m512d,
9828    k: __mmask8,
9829    a: __m512d,
9830    b: __m512d,
9831) -> __m512d {
9832    unsafe {
9833        static_assert_sae!(SAE);
9834        let a = a.as_f64x8();
9835        let b = b.as_f64x8();
9836        let r = vmaxpd(a, b, SAE);
9837        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9838    }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851    unsafe {
9852        static_assert_sae!(SAE);
9853        let a = a.as_f64x8();
9854        let b = b.as_f64x8();
9855        let r = vmaxpd(a, b, SAE);
9856        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9857    }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870    unsafe {
9871        static_assert_sae!(SAE);
9872        let a = a.as_f32x16();
9873        let b = b.as_f32x16();
9874        let r = vminps(a, b, SAE);
9875        transmute(r)
9876    }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889    src: __m512,
9890    k: __mmask16,
9891    a: __m512,
9892    b: __m512,
9893) -> __m512 {
9894    unsafe {
9895        static_assert_sae!(SAE);
9896        let a = a.as_f32x16();
9897        let b = b.as_f32x16();
9898        let r = vminps(a, b, SAE);
9899        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9900    }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913    unsafe {
9914        static_assert_sae!(SAE);
9915        let a = a.as_f32x16();
9916        let b = b.as_f32x16();
9917        let r = vminps(a, b, SAE);
9918        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9919    }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932    unsafe {
9933        static_assert_sae!(SAE);
9934        let a = a.as_f64x8();
9935        let b = b.as_f64x8();
9936        let r = vminpd(a, b, SAE);
9937        transmute(r)
9938    }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951    src: __m512d,
9952    k: __mmask8,
9953    a: __m512d,
9954    b: __m512d,
9955) -> __m512d {
9956    unsafe {
9957        static_assert_sae!(SAE);
9958        let a = a.as_f64x8();
9959        let b = b.as_f64x8();
9960        let r = vminpd(a, b, SAE);
9961        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9962    }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975    unsafe {
9976        static_assert_sae!(SAE);
9977        let a = a.as_f64x8();
9978        let b = b.as_f64x8();
9979        let r = vminpd(a, b, SAE);
9980        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9981    }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994    unsafe {
9995        static_assert_sae!(SAE);
9996        let a = a.as_f32x16();
9997        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9998        transmute(r)
9999    }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012    unsafe {
10013        static_assert_sae!(SAE);
10014        let a = a.as_f32x16();
10015        let src = src.as_f32x16();
10016        let r = vgetexpps(a, src, k, SAE);
10017        transmute(r)
10018    }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031    unsafe {
10032        static_assert_sae!(SAE);
10033        let a = a.as_f32x16();
10034        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10035        transmute(r)
10036    }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049    unsafe {
10050        static_assert_sae!(SAE);
10051        let a = a.as_f64x8();
10052        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10053        transmute(r)
10054    }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067    src: __m512d,
10068    k: __mmask8,
10069    a: __m512d,
10070) -> __m512d {
10071    unsafe {
10072        static_assert_sae!(SAE);
10073        let a = a.as_f64x8();
10074        let src = src.as_f64x8();
10075        let r = vgetexppd(a, src, k, SAE);
10076        transmute(r)
10077    }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090    unsafe {
10091        static_assert_sae!(SAE);
10092        let a = a.as_f64x8();
10093        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114    unsafe {
10115        static_assert_uimm_bits!(IMM8, 8);
10116        static_assert_mantissas_sae!(SAE);
10117        let a = a.as_f32x16();
10118        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10119        transmute(r)
10120    }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139    src: __m512,
10140    k: __mmask16,
10141    a: __m512,
10142) -> __m512 {
10143    unsafe {
10144        static_assert_uimm_bits!(IMM8, 8);
10145        static_assert_mantissas_sae!(SAE);
10146        let a = a.as_f32x16();
10147        let src = src.as_f32x16();
10148        let r = vrndscaleps(a, IMM8, src, k, SAE);
10149        transmute(r)
10150    }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169    k: __mmask16,
10170    a: __m512,
10171) -> __m512 {
10172    unsafe {
10173        static_assert_uimm_bits!(IMM8, 8);
10174        static_assert_mantissas_sae!(SAE);
10175        let a = a.as_f32x16();
10176        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10177        transmute(r)
10178    }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197    unsafe {
10198        static_assert_uimm_bits!(IMM8, 8);
10199        static_assert_mantissas_sae!(SAE);
10200        let a = a.as_f64x8();
10201        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10202        transmute(r)
10203    }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222    src: __m512d,
10223    k: __mmask8,
10224    a: __m512d,
10225) -> __m512d {
10226    unsafe {
10227        static_assert_uimm_bits!(IMM8, 8);
10228        static_assert_mantissas_sae!(SAE);
10229        let a = a.as_f64x8();
10230        let src = src.as_f64x8();
10231        let r = vrndscalepd(a, IMM8, src, k, SAE);
10232        transmute(r)
10233    }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252    k: __mmask8,
10253    a: __m512d,
10254) -> __m512d {
10255    unsafe {
10256        static_assert_uimm_bits!(IMM8, 8);
10257        static_assert_mantissas_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10260        transmute(r)
10261    }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280    unsafe {
10281        static_assert_rounding!(ROUNDING);
10282        let a = a.as_f32x16();
10283        let b = b.as_f32x16();
10284        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10285        transmute(r)
10286    }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305    src: __m512,
10306    k: __mmask16,
10307    a: __m512,
10308    b: __m512,
10309) -> __m512 {
10310    unsafe {
10311        static_assert_rounding!(ROUNDING);
10312        let a = a.as_f32x16();
10313        let b = b.as_f32x16();
10314        let src = src.as_f32x16();
10315        let r = vscalefps(a, b, src, k, ROUNDING);
10316        transmute(r)
10317    }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336    k: __mmask16,
10337    a: __m512,
10338    b: __m512,
10339) -> __m512 {
10340    unsafe {
10341        static_assert_rounding!(ROUNDING);
10342        let a = a.as_f32x16();
10343        let b = b.as_f32x16();
10344        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10345        transmute(r)
10346    }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365    unsafe {
10366        static_assert_rounding!(ROUNDING);
10367        let a = a.as_f64x8();
10368        let b = b.as_f64x8();
10369        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10370        transmute(r)
10371    }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390    src: __m512d,
10391    k: __mmask8,
10392    a: __m512d,
10393    b: __m512d,
10394) -> __m512d {
10395    unsafe {
10396        static_assert_rounding!(ROUNDING);
10397        let a = a.as_f64x8();
10398        let b = b.as_f64x8();
10399        let src = src.as_f64x8();
10400        let r = vscalefpd(a, b, src, k, ROUNDING);
10401        transmute(r)
10402    }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421    k: __mmask8,
10422    a: __m512d,
10423    b: __m512d,
10424) -> __m512d {
10425    unsafe {
10426        static_assert_rounding!(ROUNDING);
10427        let a = a.as_f64x8();
10428        let b = b.as_f64x8();
10429        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10430        transmute(r)
10431    }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444    a: __m512,
10445    b: __m512,
10446    c: __m512i,
10447) -> __m512 {
10448    unsafe {
10449        static_assert_uimm_bits!(IMM8, 8);
10450        static_assert_mantissas_sae!(SAE);
10451        let a = a.as_f32x16();
10452        let b = b.as_f32x16();
10453        let c = c.as_i32x16();
10454        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10455        transmute(r)
10456    }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469    a: __m512,
10470    k: __mmask16,
10471    b: __m512,
10472    c: __m512i,
10473) -> __m512 {
10474    unsafe {
10475        static_assert_uimm_bits!(IMM8, 8);
10476        static_assert_mantissas_sae!(SAE);
10477        let a = a.as_f32x16();
10478        let b = b.as_f32x16();
10479        let c = c.as_i32x16();
10480        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10481        transmute(r)
10482    }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495    k: __mmask16,
10496    a: __m512,
10497    b: __m512,
10498    c: __m512i,
10499) -> __m512 {
10500    unsafe {
10501        static_assert_uimm_bits!(IMM8, 8);
10502        static_assert_mantissas_sae!(SAE);
10503        let a = a.as_f32x16();
10504        let b = b.as_f32x16();
10505        let c = c.as_i32x16();
10506        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10507        transmute(r)
10508    }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521    a: __m512d,
10522    b: __m512d,
10523    c: __m512i,
10524) -> __m512d {
10525    unsafe {
10526        static_assert_uimm_bits!(IMM8, 8);
10527        static_assert_mantissas_sae!(SAE);
10528        let a = a.as_f64x8();
10529        let b = b.as_f64x8();
10530        let c = c.as_i64x8();
10531        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10532        transmute(r)
10533    }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546    a: __m512d,
10547    k: __mmask8,
10548    b: __m512d,
10549    c: __m512i,
10550) -> __m512d {
10551    unsafe {
10552        static_assert_uimm_bits!(IMM8, 8);
10553        static_assert_mantissas_sae!(SAE);
10554        let a = a.as_f64x8();
10555        let b = b.as_f64x8();
10556        let c = c.as_i64x8();
10557        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10558        transmute(r)
10559    }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572    k: __mmask8,
10573    a: __m512d,
10574    b: __m512d,
10575    c: __m512i,
10576) -> __m512d {
10577    unsafe {
10578        static_assert_uimm_bits!(IMM8, 8);
10579        static_assert_mantissas_sae!(SAE);
10580        let a = a.as_f64x8();
10581        let b = b.as_f64x8();
10582        let c = c.as_i64x8();
10583        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10584        transmute(r)
10585    }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595///    _MM_MANT_SIGN_src     // sign = sign(src)\
10596///    _MM_MANT_SIGN_zero    // sign = 0\
10597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607    const NORM: _MM_MANTISSA_NORM_ENUM,
10608    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609    const SAE: i32,
10610>(
10611    a: __m512,
10612) -> __m512 {
10613    unsafe {
10614        static_assert_uimm_bits!(NORM, 4);
10615        static_assert_uimm_bits!(SIGN, 2);
10616        static_assert_mantissas_sae!(SAE);
10617        let a = a.as_f32x16();
10618        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10619        transmute(r)
10620    }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10626///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10627///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10628///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630///    _MM_MANT_SIGN_src     // sign = sign(src)\
10631///    _MM_MANT_SIGN_zero    // sign = 0\
10632///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642    const NORM: _MM_MANTISSA_NORM_ENUM,
10643    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644    const SAE: i32,
10645>(
10646    src: __m512,
10647    k: __mmask16,
10648    a: __m512,
10649) -> __m512 {
10650    unsafe {
10651        static_assert_uimm_bits!(NORM, 4);
10652        static_assert_uimm_bits!(SIGN, 2);
10653        static_assert_mantissas_sae!(SAE);
10654        let a = a.as_f32x16();
10655        let src = src.as_f32x16();
10656        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10657        transmute(r)
10658    }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10664///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10665///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10666///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668///    _MM_MANT_SIGN_src     // sign = sign(src)\
10669///    _MM_MANT_SIGN_zero    // sign = 0\
10670///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680    const NORM: _MM_MANTISSA_NORM_ENUM,
10681    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682    const SAE: i32,
10683>(
10684    k: __mmask16,
10685    a: __m512,
10686) -> __m512 {
10687    unsafe {
10688        static_assert_uimm_bits!(NORM, 4);
10689        static_assert_uimm_bits!(SIGN, 2);
10690        static_assert_mantissas_sae!(SAE);
10691        let a = a.as_f32x16();
10692        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10693        transmute(r)
10694    }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10700///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10701///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10702///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704///    _MM_MANT_SIGN_src     // sign = sign(src)\
10705///    _MM_MANT_SIGN_zero    // sign = 0\
10706///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716    const NORM: _MM_MANTISSA_NORM_ENUM,
10717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718    const SAE: i32,
10719>(
10720    a: __m512d,
10721) -> __m512d {
10722    unsafe {
10723        static_assert_uimm_bits!(NORM, 4);
10724        static_assert_uimm_bits!(SIGN, 2);
10725        static_assert_mantissas_sae!(SAE);
10726        let a = a.as_f64x8();
10727        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
10728        transmute(r)
10729    }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10735///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10736///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10737///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739///    _MM_MANT_SIGN_src     // sign = sign(src)\
10740///    _MM_MANT_SIGN_zero    // sign = 0\
10741///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751    const NORM: _MM_MANTISSA_NORM_ENUM,
10752    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753    const SAE: i32,
10754>(
10755    src: __m512d,
10756    k: __mmask8,
10757    a: __m512d,
10758) -> __m512d {
10759    unsafe {
10760        static_assert_uimm_bits!(NORM, 4);
10761        static_assert_uimm_bits!(SIGN, 2);
10762        static_assert_mantissas_sae!(SAE);
10763        let a = a.as_f64x8();
10764        let src = src.as_f64x8();
10765        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10766        transmute(r)
10767    }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10773///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10774///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10775///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777///    _MM_MANT_SIGN_src     // sign = sign(src)\
10778///    _MM_MANT_SIGN_zero    // sign = 0\
10779///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789    const NORM: _MM_MANTISSA_NORM_ENUM,
10790    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791    const SAE: i32,
10792>(
10793    k: __mmask8,
10794    a: __m512d,
10795) -> __m512d {
10796    unsafe {
10797        static_assert_uimm_bits!(NORM, 4);
10798        static_assert_uimm_bits!(SIGN, 2);
10799        static_assert_mantissas_sae!(SAE);
10800        let a = a.as_f64x8();
10801        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10802        transmute(r)
10803    }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814    unsafe {
10815        transmute(vcvtps2dq(
10816            a.as_f32x16(),
10817            i32x16::ZERO,
10818            0b11111111_11111111,
10819            _MM_FROUND_CUR_DIRECTION,
10820        ))
10821    }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832    unsafe {
10833        transmute(vcvtps2dq(
10834            a.as_f32x16(),
10835            src.as_i32x16(),
10836            k,
10837            _MM_FROUND_CUR_DIRECTION,
10838        ))
10839    }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850    unsafe {
10851        transmute(vcvtps2dq(
10852            a.as_f32x16(),
10853            i32x16::ZERO,
10854            k,
10855            _MM_FROUND_CUR_DIRECTION,
10856        ))
10857    }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868    unsafe {
10869        let convert = _mm256_cvtps_epi32(a);
10870        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10871    }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882    unsafe {
10883        let convert = _mm256_cvtps_epi32(a);
10884        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10885    }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896    unsafe {
10897        let convert = _mm_cvtps_epi32(a);
10898        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10899    }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910    unsafe {
10911        let convert = _mm_cvtps_epi32(a);
10912        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10913    }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924    unsafe {
10925        transmute(vcvtps2udq(
10926            a.as_f32x16(),
10927            u32x16::ZERO,
10928            0b11111111_11111111,
10929            _MM_FROUND_CUR_DIRECTION,
10930        ))
10931    }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942    unsafe {
10943        transmute(vcvtps2udq(
10944            a.as_f32x16(),
10945            src.as_u32x16(),
10946            k,
10947            _MM_FROUND_CUR_DIRECTION,
10948        ))
10949    }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960    unsafe {
10961        transmute(vcvtps2udq(
10962            a.as_f32x16(),
10963            u32x16::ZERO,
10964            k,
10965            _MM_FROUND_CUR_DIRECTION,
10966        ))
10967    }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044    unsafe {
11045        transmute(vcvtps2pd(
11046            a.as_f32x8(),
11047            f64x8::ZERO,
11048            0b11111111,
11049            _MM_FROUND_CUR_DIRECTION,
11050        ))
11051    }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062    unsafe {
11063        transmute(vcvtps2pd(
11064            a.as_f32x8(),
11065            src.as_f64x8(),
11066            k,
11067            _MM_FROUND_CUR_DIRECTION,
11068        ))
11069    }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080    unsafe {
11081        transmute(vcvtps2pd(
11082            a.as_f32x8(),
11083            f64x8::ZERO,
11084            k,
11085            _MM_FROUND_CUR_DIRECTION,
11086        ))
11087    }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098    unsafe {
11099        transmute(vcvtps2pd(
11100            _mm512_castps512_ps256(v2).as_f32x8(),
11101            f64x8::ZERO,
11102            0b11111111,
11103            _MM_FROUND_CUR_DIRECTION,
11104        ))
11105    }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116    unsafe {
11117        transmute(vcvtps2pd(
11118            _mm512_castps512_ps256(v2).as_f32x8(),
11119            src.as_f64x8(),
11120            k,
11121            _MM_FROUND_CUR_DIRECTION,
11122        ))
11123    }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134    unsafe {
11135        transmute(vcvtpd2ps(
11136            a.as_f64x8(),
11137            f32x8::ZERO,
11138            0b11111111,
11139            _MM_FROUND_CUR_DIRECTION,
11140        ))
11141    }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152    unsafe {
11153        transmute(vcvtpd2ps(
11154            a.as_f64x8(),
11155            src.as_f32x8(),
11156            k,
11157            _MM_FROUND_CUR_DIRECTION,
11158        ))
11159    }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170    unsafe {
11171        transmute(vcvtpd2ps(
11172            a.as_f64x8(),
11173            f32x8::ZERO,
11174            k,
11175            _MM_FROUND_CUR_DIRECTION,
11176        ))
11177    }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188    unsafe {
11189        let convert = _mm256_cvtpd_ps(a);
11190        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11191    }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202    unsafe {
11203        let convert = _mm256_cvtpd_ps(a);
11204        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11205    }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216    unsafe {
11217        let convert = _mm_cvtpd_ps(a);
11218        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11219    }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230    unsafe {
11231        let convert = _mm_cvtpd_ps(a);
11232        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11233    }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244    unsafe {
11245        transmute(vcvtpd2dq(
11246            a.as_f64x8(),
11247            i32x8::ZERO,
11248            0b11111111,
11249            _MM_FROUND_CUR_DIRECTION,
11250        ))
11251    }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262    unsafe {
11263        transmute(vcvtpd2dq(
11264            a.as_f64x8(),
11265            src.as_i32x8(),
11266            k,
11267            _MM_FROUND_CUR_DIRECTION,
11268        ))
11269    }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280    unsafe {
11281        transmute(vcvtpd2dq(
11282            a.as_f64x8(),
11283            i32x8::ZERO,
11284            k,
11285            _MM_FROUND_CUR_DIRECTION,
11286        ))
11287    }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298    unsafe {
11299        let convert = _mm256_cvtpd_epi32(a);
11300        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11301    }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312    unsafe {
11313        let convert = _mm256_cvtpd_epi32(a);
11314        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11315    }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326    unsafe {
11327        let convert = _mm_cvtpd_epi32(a);
11328        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11329    }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340    unsafe {
11341        let convert = _mm_cvtpd_epi32(a);
11342        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11343    }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354    unsafe {
11355        transmute(vcvtpd2udq(
11356            a.as_f64x8(),
11357            u32x8::ZERO,
11358            0b11111111,
11359            _MM_FROUND_CUR_DIRECTION,
11360        ))
11361    }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372    unsafe {
11373        transmute(vcvtpd2udq(
11374            a.as_f64x8(),
11375            src.as_u32x8(),
11376            k,
11377            _MM_FROUND_CUR_DIRECTION,
11378        ))
11379    }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390    unsafe {
11391        transmute(vcvtpd2udq(
11392            a.as_f64x8(),
11393            u32x8::ZERO,
11394            k,
11395            _MM_FROUND_CUR_DIRECTION,
11396        ))
11397    }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474    unsafe {
11475        let r: f32x8 = vcvtpd2ps(
11476            v2.as_f64x8(),
11477            f32x8::ZERO,
11478            0b11111111,
11479            _MM_FROUND_CUR_DIRECTION,
11480        );
11481        simd_shuffle!(
11482            r,
11483            f32x8::ZERO,
11484            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485        )
11486    }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497    unsafe {
11498        let r: f32x8 = vcvtpd2ps(
11499            v2.as_f64x8(),
11500            _mm512_castps512_ps256(src).as_f32x8(),
11501            k,
11502            _MM_FROUND_CUR_DIRECTION,
11503        );
11504        simd_shuffle!(
11505            r,
11506            f32x8::ZERO,
11507            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508        )
11509    }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520    unsafe {
11521        let a = a.as_i8x16();
11522        transmute::<i32x16, _>(simd_cast(a))
11523    }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534    unsafe {
11535        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11536        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11537    }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548    unsafe {
11549        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11550        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11551    }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562    unsafe {
11563        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11564        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11565    }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576    unsafe {
11577        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11578        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11579    }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590    unsafe {
11591        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11592        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11593    }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604    unsafe {
11605        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11606        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11607    }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618    unsafe {
11619        let a = a.as_i8x16();
11620        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621        transmute::<i64x8, _>(simd_cast(v64))
11622    }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633    unsafe {
11634        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11635        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11636    }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647    unsafe {
11648        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11649        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11650    }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661    unsafe {
11662        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11663        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11664    }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675    unsafe {
11676        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11677        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11678    }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe {
11690        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11691        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11692    }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703    unsafe {
11704        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11705        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11706    }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717    unsafe {
11718        let a = a.as_u8x16();
11719        transmute::<i32x16, _>(simd_cast(a))
11720    }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731    unsafe {
11732        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11733        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11734    }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745    unsafe {
11746        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
11747        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11748    }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759    unsafe {
11760        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11761        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11762    }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773    unsafe {
11774        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
11775        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11776    }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787    unsafe {
11788        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11789        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11790    }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801    unsafe {
11802        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
11803        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11804    }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815    unsafe {
11816        let a = a.as_u8x16();
11817        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818        transmute::<i64x8, _>(simd_cast(v64))
11819    }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830    unsafe {
11831        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11832        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11833    }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844    unsafe {
11845        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
11846        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11847    }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858    unsafe {
11859        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11860        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11861    }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872    unsafe {
11873        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11874        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11875    }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11888        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11889    }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11902        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11903    }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914    unsafe {
11915        let a = a.as_i16x16();
11916        transmute::<i32x16, _>(simd_cast(a))
11917    }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928    unsafe {
11929        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11930        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11931    }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942    unsafe {
11943        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11944        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11945    }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956    unsafe {
11957        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11958        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11959    }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970    unsafe {
11971        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11972        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11973    }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984    unsafe {
11985        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11986        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11987    }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998    unsafe {
11999        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12000        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12001    }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012    unsafe {
12013        let a = a.as_i16x8();
12014        transmute::<i64x8, _>(simd_cast(a))
12015    }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026    unsafe {
12027        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12028        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12029    }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040    unsafe {
12041        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12042        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12043    }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054    unsafe {
12055        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12056        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12057    }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068    unsafe {
12069        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12070        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12071    }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12084        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12085    }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096    unsafe {
12097        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12098        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12099    }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110    unsafe {
12111        let a = a.as_u16x16();
12112        transmute::<i32x16, _>(simd_cast(a))
12113    }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124    unsafe {
12125        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12126        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12127    }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138    unsafe {
12139        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12140        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12141    }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152    unsafe {
12153        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12154        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12155    }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166    unsafe {
12167        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12168        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12169    }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180    unsafe {
12181        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12182        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12183    }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194    unsafe {
12195        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12196        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12197    }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208    unsafe {
12209        let a = a.as_u16x8();
12210        transmute::<i64x8, _>(simd_cast(a))
12211    }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222    unsafe {
12223        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12224        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12225    }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236    unsafe {
12237        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12238        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12239    }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250    unsafe {
12251        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12252        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12253    }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264    unsafe {
12265        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12266        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12267    }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278    unsafe {
12279        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12280        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12281    }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292    unsafe {
12293        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12294        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12295    }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306    unsafe {
12307        let a = a.as_i32x8();
12308        transmute::<i64x8, _>(simd_cast(a))
12309    }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320    unsafe {
12321        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12322        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12323    }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334    unsafe {
12335        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12336        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12337    }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348    unsafe {
12349        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12350        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12351    }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362    unsafe {
12363        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12364        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12365    }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376    unsafe {
12377        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12378        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12379    }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390    unsafe {
12391        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12392        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12393    }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404    unsafe {
12405        let a = a.as_u32x8();
12406        transmute::<i64x8, _>(simd_cast(a))
12407    }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418    unsafe {
12419        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12420        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12421    }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432    unsafe {
12433        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12434        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12435    }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446    unsafe {
12447        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12448        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12449    }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460    unsafe {
12461        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12462        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12463    }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474    unsafe {
12475        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12476        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12477    }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488    unsafe {
12489        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12490        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12491    }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502    unsafe {
12503        let a = a.as_i32x16();
12504        transmute::<f32x16, _>(simd_cast(a))
12505    }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516    unsafe {
12517        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12518        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12519    }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530    unsafe {
12531        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12532        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12533    }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544    unsafe {
12545        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12546        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12547    }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558    unsafe {
12559        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12560        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12561    }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572    unsafe {
12573        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12574        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12575    }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586    unsafe {
12587        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12588        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12589    }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600    unsafe {
12601        let a = a.as_i32x8();
12602        transmute::<f64x8, _>(simd_cast(a))
12603    }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614    unsafe {
12615        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12616        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12617    }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628    unsafe {
12629        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12630        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12631    }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642    unsafe {
12643        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12644        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12645    }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656    unsafe {
12657        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
12658        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12659    }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670    unsafe {
12671        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12672        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12673    }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684    unsafe {
12685        let convert = _mm_cvtepi32_pd(a).as_f64x2();
12686        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12687    }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698    unsafe {
12699        let a = a.as_u32x16();
12700        transmute::<f32x16, _>(simd_cast(a))
12701    }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712    unsafe {
12713        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12714        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12715    }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726    unsafe {
12727        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
12728        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12729    }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740    unsafe {
12741        let a = a.as_u32x8();
12742        transmute::<f64x8, _>(simd_cast(a))
12743    }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754    unsafe {
12755        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12756        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12757    }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768    unsafe {
12769        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
12770        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12771    }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782    unsafe {
12783        let a = a.as_u32x4();
12784        transmute::<f64x4, _>(simd_cast(a))
12785    }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796    unsafe {
12797        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12798        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
12799    }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810    unsafe {
12811        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
12812        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
12813    }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824    unsafe {
12825        let a = a.as_u32x4();
12826        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827        transmute::<f64x2, _>(simd_cast(u64))
12828    }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839    unsafe {
12840        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12841        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
12842    }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853    unsafe {
12854        let convert = _mm_cvtepu32_pd(a).as_f64x2();
12855        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
12856    }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867    unsafe {
12868        let v2 = v2.as_i32x16();
12869        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870        transmute::<f64x8, _>(simd_cast(v256))
12871    }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882    unsafe {
12883        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12885    }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896    unsafe {
12897        let v2 = v2.as_u32x16();
12898        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899        transmute::<f64x8, _>(simd_cast(v256))
12900    }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911    unsafe {
12912        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12914    }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925    unsafe {
12926        let a = a.as_i32x16();
12927        transmute::<i16x16, _>(simd_cast(a))
12928    }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    unsafe {
12940        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12941        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
12942    }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953    unsafe {
12954        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
12955        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12956    }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967    unsafe {
12968        let a = a.as_i32x8();
12969        transmute::<i16x8, _>(simd_cast(a))
12970    }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981    unsafe {
12982        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12983        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12984    }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995    unsafe {
12996        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
12997        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12998    }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042    unsafe {
13043        let a = a.as_i32x16();
13044        transmute::<i8x16, _>(simd_cast(a))
13045    }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056    unsafe {
13057        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13058        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13059    }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070    unsafe {
13071        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13072        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13073    }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150    unsafe {
13151        let a = a.as_i64x8();
13152        transmute::<i32x8, _>(simd_cast(a))
13153    }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164    unsafe {
13165        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13166        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13167    }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178    unsafe {
13179        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13180        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13181    }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192    unsafe {
13193        let a = a.as_i64x4();
13194        transmute::<i32x4, _>(simd_cast(a))
13195    }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206    unsafe {
13207        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13208        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13209    }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220    unsafe {
13221        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13222        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13223    }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267    unsafe {
13268        let a = a.as_i64x8();
13269        transmute::<i16x8, _>(simd_cast(a))
13270    }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281    unsafe {
13282        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13283        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13284    }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295    unsafe {
13296        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13297        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13298    }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472    unsafe {
14473        static_assert_rounding!(ROUNDING);
14474        let a = a.as_f32x16();
14475        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14476        transmute(r)
14477    }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496    src: __m512i,
14497    k: __mmask16,
14498    a: __m512,
14499) -> __m512i {
14500    unsafe {
14501        static_assert_rounding!(ROUNDING);
14502        let a = a.as_f32x16();
14503        let src = src.as_i32x16();
14504        let r = vcvtps2dq(a, src, k, ROUNDING);
14505        transmute(r)
14506    }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525    unsafe {
14526        static_assert_rounding!(ROUNDING);
14527        let a = a.as_f32x16();
14528        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14529        transmute(r)
14530    }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549    unsafe {
14550        static_assert_rounding!(ROUNDING);
14551        let a = a.as_f32x16();
14552        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14553        transmute(r)
14554    }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573    src: __m512i,
14574    k: __mmask16,
14575    a: __m512,
14576) -> __m512i {
14577    unsafe {
14578        static_assert_rounding!(ROUNDING);
14579        let a = a.as_f32x16();
14580        let src = src.as_u32x16();
14581        let r = vcvtps2udq(a, src, k, ROUNDING);
14582        transmute(r)
14583    }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602    unsafe {
14603        static_assert_rounding!(ROUNDING);
14604        let a = a.as_f32x16();
14605        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14606        transmute(r)
14607    }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620    unsafe {
14621        static_assert_sae!(SAE);
14622        let a = a.as_f32x8();
14623        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
14624        transmute(r)
14625    }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638    unsafe {
14639        static_assert_sae!(SAE);
14640        let a = a.as_f32x8();
14641        let src = src.as_f64x8();
14642        let r = vcvtps2pd(a, src, k, SAE);
14643        transmute(r)
14644    }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657    unsafe {
14658        static_assert_sae!(SAE);
14659        let a = a.as_f32x8();
14660        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
14661        transmute(r)
14662    }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681    unsafe {
14682        static_assert_rounding!(ROUNDING);
14683        let a = a.as_f64x8();
14684        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
14685        transmute(r)
14686    }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705    src: __m256i,
14706    k: __mmask8,
14707    a: __m512d,
14708) -> __m256i {
14709    unsafe {
14710        static_assert_rounding!(ROUNDING);
14711        let a = a.as_f64x8();
14712        let src = src.as_i32x8();
14713        let r = vcvtpd2dq(a, src, k, ROUNDING);
14714        transmute(r)
14715    }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734    unsafe {
14735        static_assert_rounding!(ROUNDING);
14736        let a = a.as_f64x8();
14737        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
14738        transmute(r)
14739    }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758    unsafe {
14759        static_assert_rounding!(ROUNDING);
14760        let a = a.as_f64x8();
14761        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
14762        transmute(r)
14763    }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782    src: __m256i,
14783    k: __mmask8,
14784    a: __m512d,
14785) -> __m256i {
14786    unsafe {
14787        static_assert_rounding!(ROUNDING);
14788        let a = a.as_f64x8();
14789        let src = src.as_u32x8();
14790        let r = vcvtpd2udq(a, src, k, ROUNDING);
14791        transmute(r)
14792    }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811    unsafe {
14812        static_assert_rounding!(ROUNDING);
14813        let a = a.as_f64x8();
14814        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
14815        transmute(r)
14816    }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835    unsafe {
14836        static_assert_rounding!(ROUNDING);
14837        let a = a.as_f64x8();
14838        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
14839        transmute(r)
14840    }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859    src: __m256,
14860    k: __mmask8,
14861    a: __m512d,
14862) -> __m256 {
14863    unsafe {
14864        static_assert_rounding!(ROUNDING);
14865        let a = a.as_f64x8();
14866        let src = src.as_f32x8();
14867        let r = vcvtpd2ps(a, src, k, ROUNDING);
14868        transmute(r)
14869    }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888    unsafe {
14889        static_assert_rounding!(ROUNDING);
14890        let a = a.as_f64x8();
14891        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
14892        transmute(r)
14893    }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912    unsafe {
14913        static_assert_rounding!(ROUNDING);
14914        let a = a.as_i32x16();
14915        let r = vcvtdq2ps(a, ROUNDING);
14916        transmute(r)
14917    }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936    src: __m512,
14937    k: __mmask16,
14938    a: __m512i,
14939) -> __m512 {
14940    unsafe {
14941        static_assert_rounding!(ROUNDING);
14942        let a = a.as_i32x16();
14943        let r = vcvtdq2ps(a, ROUNDING);
14944        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
14945    }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964    unsafe {
14965        static_assert_rounding!(ROUNDING);
14966        let a = a.as_i32x16();
14967        let r = vcvtdq2ps(a, ROUNDING);
14968        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
14969    }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988    unsafe {
14989        static_assert_rounding!(ROUNDING);
14990        let a = a.as_u32x16();
14991        let r = vcvtudq2ps(a, ROUNDING);
14992        transmute(r)
14993    }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012    src: __m512,
15013    k: __mmask16,
15014    a: __m512i,
15015) -> __m512 {
15016    unsafe {
15017        static_assert_rounding!(ROUNDING);
15018        let a = a.as_u32x16();
15019        let r = vcvtudq2ps(a, ROUNDING);
15020        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15021    }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040    unsafe {
15041        static_assert_rounding!(ROUNDING);
15042        let a = a.as_u32x16();
15043        let r = vcvtudq2ps(a, ROUNDING);
15044        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15045    }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15050///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15051///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15052///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15053///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15054///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15055///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15056///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15057///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15058///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15059///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15060///
15061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15062#[inline]
15063#[target_feature(enable = "avx512f")]
15064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15065#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15066#[rustc_legacy_const_generics(1)]
15067pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15068    unsafe {
15069        static_assert_extended_rounding!(ROUNDING);
15070        let a = a.as_f32x16();
15071        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15072        transmute(r)
15073    }
15074}
15075
15076/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15077/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15078///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15079///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15080///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15081///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15082///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15083///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15084///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15085///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15086///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15087///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15088///
15089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15090#[inline]
15091#[target_feature(enable = "avx512f")]
15092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15093#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15094#[rustc_legacy_const_generics(3)]
15095pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15096    src: __m256i,
15097    k: __mmask16,
15098    a: __m512,
15099) -> __m256i {
15100    unsafe {
15101        static_assert_extended_rounding!(ROUNDING);
15102        let a = a.as_f32x16();
15103        let src = src.as_i16x16();
15104        let r = vcvtps2ph(a, ROUNDING, src, k);
15105        transmute(r)
15106    }
15107}
15108
15109/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15111///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15112///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15113///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15114///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15115///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15116///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15117///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15118///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15119///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15120///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15121///
15122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15123#[inline]
15124#[target_feature(enable = "avx512f")]
15125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15126#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15127#[rustc_legacy_const_generics(2)]
15128pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15129    unsafe {
15130        static_assert_extended_rounding!(ROUNDING);
15131        let a = a.as_f32x16();
15132        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15133        transmute(r)
15134    }
15135}
15136
15137/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15138/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15139/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15140/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15141/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15142/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15143/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15144///
15145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15146#[inline]
15147#[target_feature(enable = "avx512f,avx512vl")]
15148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15149#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15150#[rustc_legacy_const_generics(3)]
15151pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15152    src: __m128i,
15153    k: __mmask8,
15154    a: __m256,
15155) -> __m128i {
15156    unsafe {
15157        static_assert_uimm_bits!(IMM8, 8);
15158        let a = a.as_f32x8();
15159        let src = src.as_i16x8();
15160        let r = vcvtps2ph256(a, IMM8, src, k);
15161        transmute(r)
15162    }
15163}
15164
15165/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15166/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15172///
15173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15174#[inline]
15175#[target_feature(enable = "avx512f,avx512vl")]
15176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15177#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15178#[rustc_legacy_const_generics(2)]
15179pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15180    unsafe {
15181        static_assert_uimm_bits!(IMM8, 8);
15182        let a = a.as_f32x8();
15183        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15184        transmute(r)
15185    }
15186}
15187
15188/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15189/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15190/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15191/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15192/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15193/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15194/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15195///
15196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15197#[inline]
15198#[target_feature(enable = "avx512f,avx512vl")]
15199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15200#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15201#[rustc_legacy_const_generics(3)]
15202pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15203    unsafe {
15204        static_assert_uimm_bits!(IMM8, 8);
15205        let a = a.as_f32x4();
15206        let src = src.as_i16x8();
15207        let r = vcvtps2ph128(a, IMM8, src, k);
15208        transmute(r)
15209    }
15210}
15211
15212/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15213/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15214/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15215/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15216/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15217/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15219///
15220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15221#[inline]
15222#[target_feature(enable = "avx512f,avx512vl")]
15223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15224#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15225#[rustc_legacy_const_generics(2)]
15226pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15227    unsafe {
15228        static_assert_uimm_bits!(IMM8, 8);
15229        let a = a.as_f32x4();
15230        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15231        transmute(r)
15232    }
15233}
15234
15235/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15236/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15237///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15238///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15239///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15240///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15241///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15242///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15243///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15244///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15245///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15246///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15247///
15248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15249#[inline]
15250#[target_feature(enable = "avx512f")]
15251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15252#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15253#[rustc_legacy_const_generics(1)]
15254pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15255    unsafe {
15256        static_assert_extended_rounding!(ROUNDING);
15257        let a = a.as_f32x16();
15258        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15259        transmute(r)
15260    }
15261}
15262
15263/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15265///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15266///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15267///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15268///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15269///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15270///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15271///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15272///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15273///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15274///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15275///
15276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15277#[inline]
15278#[target_feature(enable = "avx512f")]
15279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15280#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15281#[rustc_legacy_const_generics(3)]
15282pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15283    unsafe {
15284        static_assert_extended_rounding!(ROUNDING);
15285        let a = a.as_f32x16();
15286        let src = src.as_i16x16();
15287        let r = vcvtps2ph(a, ROUNDING, src, k);
15288        transmute(r)
15289    }
15290}
15291
15292/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15293/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15294///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15295///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15296///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15297///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15298///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15299///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15300///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15301///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15302///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15303///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15304///
15305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15306#[inline]
15307#[target_feature(enable = "avx512f")]
15308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15309#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15310#[rustc_legacy_const_generics(2)]
15311pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15312    unsafe {
15313        static_assert_extended_rounding!(ROUNDING);
15314        let a = a.as_f32x16();
15315        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15316        transmute(r)
15317    }
15318}
15319
15320/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15321/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15322/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15323/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15324/// * [`_MM_FROUND_TO_POS_INF`] : round up
15325/// * [`_MM_FROUND_TO_ZERO`] : truncate
15326/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15327///
15328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15329#[inline]
15330#[target_feature(enable = "avx512f,avx512vl")]
15331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15332#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15333#[rustc_legacy_const_generics(3)]
15334pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15335    unsafe {
15336        static_assert_uimm_bits!(IMM8, 8);
15337        let a = a.as_f32x8();
15338        let src = src.as_i16x8();
15339        let r = vcvtps2ph256(a, IMM8, src, k);
15340        transmute(r)
15341    }
15342}
15343
15344/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15345/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15346/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15347/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15348/// * [`_MM_FROUND_TO_POS_INF`] : round up
15349/// * [`_MM_FROUND_TO_ZERO`] : truncate
15350/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15351///
15352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15353#[inline]
15354#[target_feature(enable = "avx512f,avx512vl")]
15355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15356#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15357#[rustc_legacy_const_generics(2)]
15358pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15359    unsafe {
15360        static_assert_uimm_bits!(IMM8, 8);
15361        let a = a.as_f32x8();
15362        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15363        transmute(r)
15364    }
15365}
15366
15367/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15368/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15369/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15370/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15371/// * [`_MM_FROUND_TO_POS_INF`] : round up
15372/// * [`_MM_FROUND_TO_ZERO`] : truncate
15373/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15376#[inline]
15377#[target_feature(enable = "avx512f,avx512vl")]
15378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15379#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15382    unsafe {
15383        static_assert_uimm_bits!(IMM8, 8);
15384        let a = a.as_f32x4();
15385        let src = src.as_i16x8();
15386        let r = vcvtps2ph128(a, IMM8, src, k);
15387        transmute(r)
15388    }
15389}
15390
15391/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15395/// * [`_MM_FROUND_TO_POS_INF`] : round up
15396/// * [`_MM_FROUND_TO_ZERO`] : truncate
15397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15398///
15399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15400#[inline]
15401#[target_feature(enable = "avx512f,avx512vl")]
15402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15403#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15404#[rustc_legacy_const_generics(2)]
15405pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15406    unsafe {
15407        static_assert_uimm_bits!(IMM8, 8);
15408        let a = a.as_f32x4();
15409        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15410        transmute(r)
15411    }
15412}
15413
15414/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15415/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15416///
15417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15418#[inline]
15419#[target_feature(enable = "avx512f")]
15420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15421#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15422#[rustc_legacy_const_generics(1)]
15423pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15424    unsafe {
15425        static_assert_sae!(SAE);
15426        let a = a.as_i16x16();
15427        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15428        transmute(r)
15429    }
15430}
15431
15432/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15433/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15434///
15435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15436#[inline]
15437#[target_feature(enable = "avx512f")]
15438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15439#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15440#[rustc_legacy_const_generics(3)]
15441pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15442    unsafe {
15443        static_assert_sae!(SAE);
15444        let a = a.as_i16x16();
15445        let src = src.as_f32x16();
15446        let r = vcvtph2ps(a, src, k, SAE);
15447        transmute(r)
15448    }
15449}
15450
15451/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15452/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15458#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15459#[rustc_legacy_const_generics(2)]
15460pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15461    unsafe {
15462        static_assert_sae!(SAE);
15463        let a = a.as_i16x16();
15464        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15465        transmute(r)
15466    }
15467}
15468
15469/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15470///
15471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15472#[inline]
15473#[target_feature(enable = "avx512f")]
15474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15475#[cfg_attr(test, assert_instr(vcvtph2ps))]
15476pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15477    unsafe {
15478        transmute(vcvtph2ps(
15479            a.as_i16x16(),
15480            f32x16::ZERO,
15481            0b11111111_11111111,
15482            _MM_FROUND_NO_EXC,
15483        ))
15484    }
15485}
15486
15487/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15488///
15489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15490#[inline]
15491#[target_feature(enable = "avx512f")]
15492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15493#[cfg_attr(test, assert_instr(vcvtph2ps))]
15494pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15495    unsafe {
15496        transmute(vcvtph2ps(
15497            a.as_i16x16(),
15498            src.as_f32x16(),
15499            k,
15500            _MM_FROUND_NO_EXC,
15501        ))
15502    }
15503}
15504
15505/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15506///
15507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15508#[inline]
15509#[target_feature(enable = "avx512f")]
15510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15511#[cfg_attr(test, assert_instr(vcvtph2ps))]
15512pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15513    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15514}
15515
15516/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15517///
15518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15519#[inline]
15520#[target_feature(enable = "avx512f,avx512vl")]
15521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15522#[cfg_attr(test, assert_instr(vcvtph2ps))]
15523pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15524    unsafe {
15525        let convert = _mm256_cvtph_ps(a);
15526        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15527    }
15528}
15529
15530/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15531///
15532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15533#[inline]
15534#[target_feature(enable = "avx512f,avx512vl")]
15535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15536#[cfg_attr(test, assert_instr(vcvtph2ps))]
15537pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15538    unsafe {
15539        let convert = _mm256_cvtph_ps(a);
15540        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15541    }
15542}
15543
15544/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15545///
15546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15547#[inline]
15548#[target_feature(enable = "avx512f,avx512vl")]
15549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15550#[cfg_attr(test, assert_instr(vcvtph2ps))]
15551pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15552    unsafe {
15553        let convert = _mm_cvtph_ps(a);
15554        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15555    }
15556}
15557
15558/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15559///
15560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15561#[inline]
15562#[target_feature(enable = "avx512f,avx512vl")]
15563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15564#[cfg_attr(test, assert_instr(vcvtph2ps))]
15565pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15566    unsafe {
15567        let convert = _mm_cvtph_ps(a);
15568        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15569    }
15570}
15571
15572/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15573/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15574///
15575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15576#[inline]
15577#[target_feature(enable = "avx512f")]
15578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15579#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15580#[rustc_legacy_const_generics(1)]
15581pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15582    unsafe {
15583        static_assert_sae!(SAE);
15584        let a = a.as_f32x16();
15585        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15586        transmute(r)
15587    }
15588}
15589
15590/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15591/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15592///
15593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15594#[inline]
15595#[target_feature(enable = "avx512f")]
15596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15597#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15598#[rustc_legacy_const_generics(3)]
15599pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15600    src: __m512i,
15601    k: __mmask16,
15602    a: __m512,
15603) -> __m512i {
15604    unsafe {
15605        static_assert_sae!(SAE);
15606        let a = a.as_f32x16();
15607        let src = src.as_i32x16();
15608        let r = vcvttps2dq(a, src, k, SAE);
15609        transmute(r)
15610    }
15611}
15612
15613/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15614/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15615///
15616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15617#[inline]
15618#[target_feature(enable = "avx512f")]
15619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15620#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15621#[rustc_legacy_const_generics(2)]
15622pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15623    unsafe {
15624        static_assert_sae!(SAE);
15625        let a = a.as_f32x16();
15626        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
15627        transmute(r)
15628    }
15629}
15630
15631/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15632/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15633///
15634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15635#[inline]
15636#[target_feature(enable = "avx512f")]
15637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15638#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15639#[rustc_legacy_const_generics(1)]
15640pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15641    unsafe {
15642        static_assert_sae!(SAE);
15643        let a = a.as_f32x16();
15644        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
15645        transmute(r)
15646    }
15647}
15648
15649/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15651///
15652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15653#[inline]
15654#[target_feature(enable = "avx512f")]
15655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15656#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15657#[rustc_legacy_const_generics(3)]
15658pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15659    src: __m512i,
15660    k: __mmask16,
15661    a: __m512,
15662) -> __m512i {
15663    unsafe {
15664        static_assert_sae!(SAE);
15665        let a = a.as_f32x16();
15666        let src = src.as_u32x16();
15667        let r = vcvttps2udq(a, src, k, SAE);
15668        transmute(r)
15669    }
15670}
15671
15672/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15674///
15675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15676#[inline]
15677#[target_feature(enable = "avx512f")]
15678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15679#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15680#[rustc_legacy_const_generics(2)]
15681pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15682    unsafe {
15683        static_assert_sae!(SAE);
15684        let a = a.as_f32x16();
15685        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
15686        transmute(r)
15687    }
15688}
15689
15690/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15692///
15693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15694#[inline]
15695#[target_feature(enable = "avx512f")]
15696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15697#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15698#[rustc_legacy_const_generics(1)]
15699pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15700    unsafe {
15701        static_assert_sae!(SAE);
15702        let a = a.as_f64x8();
15703        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
15704        transmute(r)
15705    }
15706}
15707
15708/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15709/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15710///
15711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15712#[inline]
15713#[target_feature(enable = "avx512f")]
15714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15715#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15716#[rustc_legacy_const_generics(3)]
15717pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15718    src: __m256i,
15719    k: __mmask8,
15720    a: __m512d,
15721) -> __m256i {
15722    unsafe {
15723        static_assert_sae!(SAE);
15724        let a = a.as_f64x8();
15725        let src = src.as_i32x8();
15726        let r = vcvttpd2dq(a, src, k, SAE);
15727        transmute(r)
15728    }
15729}
15730
15731/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15732/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15733///
15734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15735#[inline]
15736#[target_feature(enable = "avx512f")]
15737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15738#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15739#[rustc_legacy_const_generics(2)]
15740pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15741    unsafe {
15742        static_assert_sae!(SAE);
15743        let a = a.as_f64x8();
15744        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
15745        transmute(r)
15746    }
15747}
15748
15749/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15750/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15751///
15752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15753#[inline]
15754#[target_feature(enable = "avx512f")]
15755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15756#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15757#[rustc_legacy_const_generics(1)]
15758pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15759    unsafe {
15760        static_assert_sae!(SAE);
15761        let a = a.as_f64x8();
15762        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
15763        transmute(r)
15764    }
15765}
15766
15767/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15769///
15770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15771#[inline]
15772#[target_feature(enable = "avx512f")]
15773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15774#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15775#[rustc_legacy_const_generics(3)]
15776pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15777    src: __m256i,
15778    k: __mmask8,
15779    a: __m512d,
15780) -> __m256i {
15781    unsafe {
15782        static_assert_sae!(SAE);
15783        let a = a.as_f64x8();
15784        let src = src.as_i32x8();
15785        let r = vcvttpd2udq(a, src, k, SAE);
15786        transmute(r)
15787    }
15788}
15789
15790/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15793#[inline]
15794#[target_feature(enable = "avx512f")]
15795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15796#[cfg_attr(test, assert_instr(vcvttps2dq))]
15797pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15798    unsafe {
15799        transmute(vcvttps2dq(
15800            a.as_f32x16(),
15801            i32x16::ZERO,
15802            0b11111111_11111111,
15803            _MM_FROUND_CUR_DIRECTION,
15804        ))
15805    }
15806}
15807
15808/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15814#[cfg_attr(test, assert_instr(vcvttps2dq))]
15815pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15816    unsafe {
15817        transmute(vcvttps2dq(
15818            a.as_f32x16(),
15819            src.as_i32x16(),
15820            k,
15821            _MM_FROUND_CUR_DIRECTION,
15822        ))
15823    }
15824}
15825
15826/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15832#[cfg_attr(test, assert_instr(vcvttps2dq))]
15833pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15834    unsafe {
15835        transmute(vcvttps2dq(
15836            a.as_f32x16(),
15837            i32x16::ZERO,
15838            k,
15839            _MM_FROUND_CUR_DIRECTION,
15840        ))
15841    }
15842}
15843
15844/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15845///
15846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15847#[inline]
15848#[target_feature(enable = "avx512f,avx512vl")]
15849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15850#[cfg_attr(test, assert_instr(vcvttps2dq))]
15851pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15852    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
15853}
15854
15855/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15856///
15857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15858#[inline]
15859#[target_feature(enable = "avx512f,avx512vl")]
15860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15861#[cfg_attr(test, assert_instr(vcvttps2dq))]
15862pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15863    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
15864}
15865
15866/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15867///
15868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15869#[inline]
15870#[target_feature(enable = "avx512f,avx512vl")]
15871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15872#[cfg_attr(test, assert_instr(vcvttps2dq))]
15873pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15874    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
15875}
15876
15877/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15878///
15879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15880#[inline]
15881#[target_feature(enable = "avx512f,avx512vl")]
15882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15883#[cfg_attr(test, assert_instr(vcvttps2dq))]
15884pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15885    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
15886}
15887
15888/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15889///
15890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15891#[inline]
15892#[target_feature(enable = "avx512f")]
15893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15894#[cfg_attr(test, assert_instr(vcvttps2udq))]
15895pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15896    unsafe {
15897        transmute(vcvttps2udq(
15898            a.as_f32x16(),
15899            u32x16::ZERO,
15900            0b11111111_11111111,
15901            _MM_FROUND_CUR_DIRECTION,
15902        ))
15903    }
15904}
15905
15906/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15907///
15908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15909#[inline]
15910#[target_feature(enable = "avx512f")]
15911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15912#[cfg_attr(test, assert_instr(vcvttps2udq))]
15913pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15914    unsafe {
15915        transmute(vcvttps2udq(
15916            a.as_f32x16(),
15917            src.as_u32x16(),
15918            k,
15919            _MM_FROUND_CUR_DIRECTION,
15920        ))
15921    }
15922}
15923
15924/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15925///
15926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15927#[inline]
15928#[target_feature(enable = "avx512f")]
15929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15930#[cfg_attr(test, assert_instr(vcvttps2udq))]
15931pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15932    unsafe {
15933        transmute(vcvttps2udq(
15934            a.as_f32x16(),
15935            u32x16::ZERO,
15936            k,
15937            _MM_FROUND_CUR_DIRECTION,
15938        ))
15939    }
15940}
15941
15942/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15943///
15944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15945#[inline]
15946#[target_feature(enable = "avx512f,avx512vl")]
15947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15948#[cfg_attr(test, assert_instr(vcvttps2udq))]
15949pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15950    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
15951}
15952
15953/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15954///
15955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15956#[inline]
15957#[target_feature(enable = "avx512f,avx512vl")]
15958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15959#[cfg_attr(test, assert_instr(vcvttps2udq))]
15960pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15961    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
15962}
15963
15964/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15965///
15966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15967#[inline]
15968#[target_feature(enable = "avx512f,avx512vl")]
15969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15970#[cfg_attr(test, assert_instr(vcvttps2udq))]
15971pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15972    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
15973}
15974
15975/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15976///
15977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15978#[inline]
15979#[target_feature(enable = "avx512f,avx512vl")]
15980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15981#[cfg_attr(test, assert_instr(vcvttps2udq))]
15982pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15983    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
15984}
15985
15986/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15987///
15988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15989#[inline]
15990#[target_feature(enable = "avx512f,avx512vl")]
15991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15992#[cfg_attr(test, assert_instr(vcvttps2udq))]
15993pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15994    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
15995}
15996
15997/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15998///
15999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16000#[inline]
16001#[target_feature(enable = "avx512f,avx512vl")]
16002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16003#[cfg_attr(test, assert_instr(vcvttps2udq))]
16004pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16005    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
16006}
16007
16008/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16009/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16010///
16011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16012#[inline]
16013#[target_feature(enable = "avx512f")]
16014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16015#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16016#[rustc_legacy_const_generics(2)]
16017pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16018    unsafe {
16019        static_assert_sae!(SAE);
16020        let a = a.as_f64x8();
16021        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
16022        transmute(r)
16023    }
16024}
16025
16026/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16027///
16028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16029#[inline]
16030#[target_feature(enable = "avx512f")]
16031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16032#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16033pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16034    unsafe {
16035        transmute(vcvttpd2dq(
16036            a.as_f64x8(),
16037            i32x8::ZERO,
16038            0b11111111,
16039            _MM_FROUND_CUR_DIRECTION,
16040        ))
16041    }
16042}
16043
16044/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16045///
16046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16047#[inline]
16048#[target_feature(enable = "avx512f")]
16049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16050#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16051pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16052    unsafe {
16053        transmute(vcvttpd2dq(
16054            a.as_f64x8(),
16055            src.as_i32x8(),
16056            k,
16057            _MM_FROUND_CUR_DIRECTION,
16058        ))
16059    }
16060}
16061
16062/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16063///
16064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16065#[inline]
16066#[target_feature(enable = "avx512f")]
16067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16068#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16069pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16070    unsafe {
16071        transmute(vcvttpd2dq(
16072            a.as_f64x8(),
16073            i32x8::ZERO,
16074            k,
16075            _MM_FROUND_CUR_DIRECTION,
16076        ))
16077    }
16078}
16079
16080/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16081///
16082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16083#[inline]
16084#[target_feature(enable = "avx512f,avx512vl")]
16085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16086#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16087pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16088    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16089}
16090
16091/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16092///
16093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16094#[inline]
16095#[target_feature(enable = "avx512f,avx512vl")]
16096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16097#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16098pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16099    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16100}
16101
16102/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16105#[inline]
16106#[target_feature(enable = "avx512f,avx512vl")]
16107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16109pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16110    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16111}
16112
16113/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16114///
16115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16116#[inline]
16117#[target_feature(enable = "avx512f,avx512vl")]
16118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16119#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16120pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16121    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16122}
16123
16124/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16125///
16126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16127#[inline]
16128#[target_feature(enable = "avx512f")]
16129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16130#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16131pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16132    unsafe {
16133        transmute(vcvttpd2udq(
16134            a.as_f64x8(),
16135            i32x8::ZERO,
16136            0b11111111,
16137            _MM_FROUND_CUR_DIRECTION,
16138        ))
16139    }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16143///
16144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16145#[inline]
16146#[target_feature(enable = "avx512f")]
16147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16148#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16149pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16150    unsafe {
16151        transmute(vcvttpd2udq(
16152            a.as_f64x8(),
16153            src.as_i32x8(),
16154            k,
16155            _MM_FROUND_CUR_DIRECTION,
16156        ))
16157    }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16161///
16162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16163#[inline]
16164#[target_feature(enable = "avx512f")]
16165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16166#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16167pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16168    unsafe {
16169        transmute(vcvttpd2udq(
16170            a.as_f64x8(),
16171            i32x8::ZERO,
16172            k,
16173            _MM_FROUND_CUR_DIRECTION,
16174        ))
16175    }
16176}
16177
16178/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16179///
16180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16181#[inline]
16182#[target_feature(enable = "avx512f,avx512vl")]
16183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16184#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16185pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16186    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16187}
16188
16189/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16190///
16191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16192#[inline]
16193#[target_feature(enable = "avx512f,avx512vl")]
16194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16195#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16196pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16197    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16198}
16199
16200/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16201///
16202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16203#[inline]
16204#[target_feature(enable = "avx512f,avx512vl")]
16205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16206#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16207pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16208    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16209}
16210
16211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16212///
16213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16214#[inline]
16215#[target_feature(enable = "avx512f,avx512vl")]
16216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16217#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16218pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16219    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16220}
16221
16222/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16223///
16224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16225#[inline]
16226#[target_feature(enable = "avx512f,avx512vl")]
16227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16228#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16229pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16230    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16231}
16232
16233/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16234///
16235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16236#[inline]
16237#[target_feature(enable = "avx512f,avx512vl")]
16238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16239#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16240pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16241    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16242}
16243
16244/// Returns vector of type `__m512d` with all elements set to zero.
16245///
16246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16247#[inline]
16248#[target_feature(enable = "avx512f")]
16249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16250#[cfg_attr(test, assert_instr(vxorps))]
16251pub fn _mm512_setzero_pd() -> __m512d {
16252    // All-0 is a properly initialized __m512d
16253    unsafe { const { mem::zeroed() } }
16254}
16255
16256/// Returns vector of type `__m512` with all elements set to zero.
16257///
16258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16259#[inline]
16260#[target_feature(enable = "avx512f")]
16261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16262#[cfg_attr(test, assert_instr(vxorps))]
16263pub fn _mm512_setzero_ps() -> __m512 {
16264    // All-0 is a properly initialized __m512
16265    unsafe { const { mem::zeroed() } }
16266}
16267
16268/// Return vector of type `__m512` with all elements set to zero.
16269///
16270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16271#[inline]
16272#[target_feature(enable = "avx512f")]
16273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16274#[cfg_attr(test, assert_instr(vxorps))]
16275pub fn _mm512_setzero() -> __m512 {
16276    // All-0 is a properly initialized __m512
16277    unsafe { const { mem::zeroed() } }
16278}
16279
16280/// Returns vector of type `__m512i` with all elements set to zero.
16281///
16282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16283#[inline]
16284#[target_feature(enable = "avx512f")]
16285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16286#[cfg_attr(test, assert_instr(vxorps))]
16287pub fn _mm512_setzero_si512() -> __m512i {
16288    // All-0 is a properly initialized __m512i
16289    unsafe { const { mem::zeroed() } }
16290}
16291
16292/// Return vector of type `__m512i` with all elements set to zero.
16293///
16294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16295#[inline]
16296#[target_feature(enable = "avx512f")]
16297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16298#[cfg_attr(test, assert_instr(vxorps))]
16299pub fn _mm512_setzero_epi32() -> __m512i {
16300    // All-0 is a properly initialized __m512i
16301    unsafe { const { mem::zeroed() } }
16302}
16303
16304/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16305/// order.
16306///
16307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16308#[inline]
16309#[target_feature(enable = "avx512f")]
16310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16311pub fn _mm512_setr_epi32(
16312    e15: i32,
16313    e14: i32,
16314    e13: i32,
16315    e12: i32,
16316    e11: i32,
16317    e10: i32,
16318    e9: i32,
16319    e8: i32,
16320    e7: i32,
16321    e6: i32,
16322    e5: i32,
16323    e4: i32,
16324    e3: i32,
16325    e2: i32,
16326    e1: i32,
16327    e0: i32,
16328) -> __m512i {
16329    unsafe {
16330        let r = i32x16::new(
16331            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16332        );
16333        transmute(r)
16334    }
16335}
16336
16337/// Set packed 8-bit integers in dst with the supplied values.
16338///
16339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16340#[inline]
16341#[target_feature(enable = "avx512f")]
16342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16343pub fn _mm512_set_epi8(
16344    e63: i8,
16345    e62: i8,
16346    e61: i8,
16347    e60: i8,
16348    e59: i8,
16349    e58: i8,
16350    e57: i8,
16351    e56: i8,
16352    e55: i8,
16353    e54: i8,
16354    e53: i8,
16355    e52: i8,
16356    e51: i8,
16357    e50: i8,
16358    e49: i8,
16359    e48: i8,
16360    e47: i8,
16361    e46: i8,
16362    e45: i8,
16363    e44: i8,
16364    e43: i8,
16365    e42: i8,
16366    e41: i8,
16367    e40: i8,
16368    e39: i8,
16369    e38: i8,
16370    e37: i8,
16371    e36: i8,
16372    e35: i8,
16373    e34: i8,
16374    e33: i8,
16375    e32: i8,
16376    e31: i8,
16377    e30: i8,
16378    e29: i8,
16379    e28: i8,
16380    e27: i8,
16381    e26: i8,
16382    e25: i8,
16383    e24: i8,
16384    e23: i8,
16385    e22: i8,
16386    e21: i8,
16387    e20: i8,
16388    e19: i8,
16389    e18: i8,
16390    e17: i8,
16391    e16: i8,
16392    e15: i8,
16393    e14: i8,
16394    e13: i8,
16395    e12: i8,
16396    e11: i8,
16397    e10: i8,
16398    e9: i8,
16399    e8: i8,
16400    e7: i8,
16401    e6: i8,
16402    e5: i8,
16403    e4: i8,
16404    e3: i8,
16405    e2: i8,
16406    e1: i8,
16407    e0: i8,
16408) -> __m512i {
16409    unsafe {
16410        let r = i8x64::new(
16411            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16412            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16413            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16414            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16415        );
16416        transmute(r)
16417    }
16418}
16419
16420/// Set packed 16-bit integers in dst with the supplied values.
16421///
16422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16423#[inline]
16424#[target_feature(enable = "avx512f")]
16425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16426pub fn _mm512_set_epi16(
16427    e31: i16,
16428    e30: i16,
16429    e29: i16,
16430    e28: i16,
16431    e27: i16,
16432    e26: i16,
16433    e25: i16,
16434    e24: i16,
16435    e23: i16,
16436    e22: i16,
16437    e21: i16,
16438    e20: i16,
16439    e19: i16,
16440    e18: i16,
16441    e17: i16,
16442    e16: i16,
16443    e15: i16,
16444    e14: i16,
16445    e13: i16,
16446    e12: i16,
16447    e11: i16,
16448    e10: i16,
16449    e9: i16,
16450    e8: i16,
16451    e7: i16,
16452    e6: i16,
16453    e5: i16,
16454    e4: i16,
16455    e3: i16,
16456    e2: i16,
16457    e1: i16,
16458    e0: i16,
16459) -> __m512i {
16460    unsafe {
16461        let r = i16x32::new(
16462            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16463            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16464        );
16465        transmute(r)
16466    }
16467}
16468
16469/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16470///
16471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16475pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16476    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16477}
16478
16479/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16480///
16481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16482#[inline]
16483#[target_feature(enable = "avx512f")]
16484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16485pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16486    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16487}
16488
16489/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16490///
16491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16492#[inline]
16493#[target_feature(enable = "avx512f")]
16494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16495pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16496    _mm512_set_pd(d, c, b, a, d, c, b, a)
16497}
16498
16499/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16500///
16501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16502#[inline]
16503#[target_feature(enable = "avx512f")]
16504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16505pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16506    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16507}
16508
16509/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16510///
16511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16512#[inline]
16513#[target_feature(enable = "avx512f")]
16514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16515pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16516    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16517}
16518
16519/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16520///
16521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16522#[inline]
16523#[target_feature(enable = "avx512f")]
16524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16525pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16526    _mm512_set_pd(a, b, c, d, a, b, c, d)
16527}
16528
16529/// Set packed 64-bit integers in dst with the supplied values.
16530///
16531/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16532#[inline]
16533#[target_feature(enable = "avx512f")]
16534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16535pub fn _mm512_set_epi64(
16536    e0: i64,
16537    e1: i64,
16538    e2: i64,
16539    e3: i64,
16540    e4: i64,
16541    e5: i64,
16542    e6: i64,
16543    e7: i64,
16544) -> __m512i {
16545    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16546}
16547
16548/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16549///
16550/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16551#[inline]
16552#[target_feature(enable = "avx512f")]
16553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16554pub fn _mm512_setr_epi64(
16555    e0: i64,
16556    e1: i64,
16557    e2: i64,
16558    e3: i64,
16559    e4: i64,
16560    e5: i64,
16561    e6: i64,
16562    e7: i64,
16563) -> __m512i {
16564    unsafe {
16565        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16566        transmute(r)
16567    }
16568}
16569
16570/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16571///
16572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16573#[inline]
16574#[target_feature(enable = "avx512f")]
16575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16576#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16577#[rustc_legacy_const_generics(2)]
16578pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16579    offsets: __m256i,
16580    slice: *const f64,
16581) -> __m512d {
16582    static_assert_imm8_scale!(SCALE);
16583    let zero = f64x8::ZERO;
16584    let neg_one = -1;
16585    let slice = slice as *const i8;
16586    let offsets = offsets.as_i32x8();
16587    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16588    transmute(r)
16589}
16590
16591/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16592///
16593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16594#[inline]
16595#[target_feature(enable = "avx512f")]
16596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16597#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16598#[rustc_legacy_const_generics(4)]
16599pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16600    src: __m512d,
16601    mask: __mmask8,
16602    offsets: __m256i,
16603    slice: *const f64,
16604) -> __m512d {
16605    static_assert_imm8_scale!(SCALE);
16606    let src = src.as_f64x8();
16607    let slice = slice as *const i8;
16608    let offsets = offsets.as_i32x8();
16609    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16610    transmute(r)
16611}
16612
16613/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16614///
16615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16616#[inline]
16617#[target_feature(enable = "avx512f")]
16618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16619#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16620#[rustc_legacy_const_generics(2)]
16621pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
16622    offsets: __m512i,
16623    slice: *const f64,
16624) -> __m512d {
16625    static_assert_imm8_scale!(SCALE);
16626    let zero = f64x8::ZERO;
16627    let neg_one = -1;
16628    let slice = slice as *const i8;
16629    let offsets = offsets.as_i64x8();
16630    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
16631    transmute(r)
16632}
16633
16634/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16635///
16636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16637#[inline]
16638#[target_feature(enable = "avx512f")]
16639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16640#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16641#[rustc_legacy_const_generics(4)]
16642pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16643    src: __m512d,
16644    mask: __mmask8,
16645    offsets: __m512i,
16646    slice: *const f64,
16647) -> __m512d {
16648    static_assert_imm8_scale!(SCALE);
16649    let src = src.as_f64x8();
16650    let slice = slice as *const i8;
16651    let offsets = offsets.as_i64x8();
16652    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16653    transmute(r)
16654}
16655
16656/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16657///
16658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16659#[inline]
16660#[target_feature(enable = "avx512f")]
16661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16662#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16663#[rustc_legacy_const_generics(2)]
16664pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
16665    static_assert_imm8_scale!(SCALE);
16666    let zero = f32x8::ZERO;
16667    let neg_one = -1;
16668    let slice = slice as *const i8;
16669    let offsets = offsets.as_i64x8();
16670    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
16671    transmute(r)
16672}
16673
16674/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16675///
16676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16677#[inline]
16678#[target_feature(enable = "avx512f")]
16679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16680#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16681#[rustc_legacy_const_generics(4)]
16682pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16683    src: __m256,
16684    mask: __mmask8,
16685    offsets: __m512i,
16686    slice: *const f32,
16687) -> __m256 {
16688    static_assert_imm8_scale!(SCALE);
16689    let src = src.as_f32x8();
16690    let slice = slice as *const i8;
16691    let offsets = offsets.as_i64x8();
16692    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16693    transmute(r)
16694}
16695
16696/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16697///
16698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16699#[inline]
16700#[target_feature(enable = "avx512f")]
16701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16702#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16703#[rustc_legacy_const_generics(2)]
16704pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
16705    static_assert_imm8_scale!(SCALE);
16706    let zero = f32x16::ZERO;
16707    let neg_one = -1;
16708    let slice = slice as *const i8;
16709    let offsets = offsets.as_i32x16();
16710    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
16711    transmute(r)
16712}
16713
16714/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16720#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16721#[rustc_legacy_const_generics(4)]
16722pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16723    src: __m512,
16724    mask: __mmask16,
16725    offsets: __m512i,
16726    slice: *const f32,
16727) -> __m512 {
16728    static_assert_imm8_scale!(SCALE);
16729    let src = src.as_f32x16();
16730    let slice = slice as *const i8;
16731    let offsets = offsets.as_i32x16();
16732    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16733    transmute(r)
16734}
16735
16736/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16742#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16743#[rustc_legacy_const_generics(2)]
16744pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16745    offsets: __m512i,
16746    slice: *const i32,
16747) -> __m512i {
16748    static_assert_imm8_scale!(SCALE);
16749    let zero = i32x16::ZERO;
16750    let neg_one = -1;
16751    let slice = slice as *const i8;
16752    let offsets = offsets.as_i32x16();
16753    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
16754    transmute(r)
16755}
16756
16757/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16758///
16759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16760#[inline]
16761#[target_feature(enable = "avx512f")]
16762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16763#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16764#[rustc_legacy_const_generics(4)]
16765pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16766    src: __m512i,
16767    mask: __mmask16,
16768    offsets: __m512i,
16769    slice: *const i32,
16770) -> __m512i {
16771    static_assert_imm8_scale!(SCALE);
16772    let src = src.as_i32x16();
16773    let mask = mask as i16;
16774    let slice = slice as *const i8;
16775    let offsets = offsets.as_i32x16();
16776    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
16777    transmute(r)
16778}
16779
16780/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16781///
16782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16783#[inline]
16784#[target_feature(enable = "avx512f")]
16785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16786#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16787#[rustc_legacy_const_generics(2)]
16788pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16789    offsets: __m256i,
16790    slice: *const i64,
16791) -> __m512i {
16792    static_assert_imm8_scale!(SCALE);
16793    let zero = i64x8::ZERO;
16794    let neg_one = -1;
16795    let slice = slice as *const i8;
16796    let offsets = offsets.as_i32x8();
16797    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
16798    transmute(r)
16799}
16800
16801/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16802///
16803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16804#[inline]
16805#[target_feature(enable = "avx512f")]
16806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16807#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16808#[rustc_legacy_const_generics(4)]
16809pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16810    src: __m512i,
16811    mask: __mmask8,
16812    offsets: __m256i,
16813    slice: *const i64,
16814) -> __m512i {
16815    static_assert_imm8_scale!(SCALE);
16816    let src = src.as_i64x8();
16817    let mask = mask as i8;
16818    let slice = slice as *const i8;
16819    let offsets = offsets.as_i32x8();
16820    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
16821    transmute(r)
16822}
16823
16824/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16825///
16826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16827#[inline]
16828#[target_feature(enable = "avx512f")]
16829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16830#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16831#[rustc_legacy_const_generics(2)]
16832pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16833    offsets: __m512i,
16834    slice: *const i64,
16835) -> __m512i {
16836    static_assert_imm8_scale!(SCALE);
16837    let zero = i64x8::ZERO;
16838    let neg_one = -1;
16839    let slice = slice as *const i8;
16840    let offsets = offsets.as_i64x8();
16841    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
16842    transmute(r)
16843}
16844
16845/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16846///
16847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16848#[inline]
16849#[target_feature(enable = "avx512f")]
16850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16851#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16852#[rustc_legacy_const_generics(4)]
16853pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16854    src: __m512i,
16855    mask: __mmask8,
16856    offsets: __m512i,
16857    slice: *const i64,
16858) -> __m512i {
16859    static_assert_imm8_scale!(SCALE);
16860    let src = src.as_i64x8();
16861    let mask = mask as i8;
16862    let slice = slice as *const i8;
16863    let offsets = offsets.as_i64x8();
16864    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
16865    transmute(r)
16866}
16867
16868/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16869///
16870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16871#[inline]
16872#[target_feature(enable = "avx512f")]
16873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16874#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16875#[rustc_legacy_const_generics(2)]
16876pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16877    offsets: __m512i,
16878    slice: *const i32,
16879) -> __m256i {
16880    static_assert_imm8_scale!(SCALE);
16881    let zeros = i32x8::ZERO;
16882    let neg_one = -1;
16883    let slice = slice as *const i8;
16884    let offsets = offsets.as_i64x8();
16885    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
16886    transmute(r)
16887}
16888
16889/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16890///
16891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16892#[inline]
16893#[target_feature(enable = "avx512f")]
16894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16895#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16896#[rustc_legacy_const_generics(4)]
16897pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16898    src: __m256i,
16899    mask: __mmask8,
16900    offsets: __m512i,
16901    slice: *const i32,
16902) -> __m256i {
16903    static_assert_imm8_scale!(SCALE);
16904    let src = src.as_i32x8();
16905    let mask = mask as i8;
16906    let slice = slice as *const i8;
16907    let offsets = offsets.as_i64x8();
16908    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
16909    transmute(r)
16910}
16911
16912/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16913///
16914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16915#[inline]
16916#[target_feature(enable = "avx512f")]
16917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16918#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16919#[rustc_legacy_const_generics(3)]
16920pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16921    slice: *mut f64,
16922    offsets: __m256i,
16923    src: __m512d,
16924) {
16925    static_assert_imm8_scale!(SCALE);
16926    let src = src.as_f64x8();
16927    let neg_one = -1;
16928    let slice = slice as *mut i8;
16929    let offsets = offsets.as_i32x8();
16930    vscatterdpd(slice, neg_one, offsets, src, SCALE);
16931}
16932
16933/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16934///
16935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16936#[inline]
16937#[target_feature(enable = "avx512f")]
16938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16940#[rustc_legacy_const_generics(4)]
16941pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16942    slice: *mut f64,
16943    mask: __mmask8,
16944    offsets: __m256i,
16945    src: __m512d,
16946) {
16947    static_assert_imm8_scale!(SCALE);
16948    let src = src.as_f64x8();
16949    let slice = slice as *mut i8;
16950    let offsets = offsets.as_i32x8();
16951    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16952}
16953
16954/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16955///
16956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16957#[inline]
16958#[target_feature(enable = "avx512f")]
16959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16960#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16961#[rustc_legacy_const_generics(3)]
16962pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16963    slice: *mut f64,
16964    offsets: __m512i,
16965    src: __m512d,
16966) {
16967    static_assert_imm8_scale!(SCALE);
16968    let src = src.as_f64x8();
16969    let neg_one = -1;
16970    let slice = slice as *mut i8;
16971    let offsets = offsets.as_i64x8();
16972    vscatterqpd(slice, neg_one, offsets, src, SCALE);
16973}
16974
16975/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16976///
16977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16978#[inline]
16979#[target_feature(enable = "avx512f")]
16980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16981#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16982#[rustc_legacy_const_generics(4)]
16983pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16984    slice: *mut f64,
16985    mask: __mmask8,
16986    offsets: __m512i,
16987    src: __m512d,
16988) {
16989    static_assert_imm8_scale!(SCALE);
16990    let src = src.as_f64x8();
16991    let slice = slice as *mut i8;
16992    let offsets = offsets.as_i64x8();
16993    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16994}
16995
16996/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16997///
16998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16999#[inline]
17000#[target_feature(enable = "avx512f")]
17001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17002#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17003#[rustc_legacy_const_generics(3)]
17004pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17005    slice: *mut f32,
17006    offsets: __m512i,
17007    src: __m512,
17008) {
17009    static_assert_imm8_scale!(SCALE);
17010    let src = src.as_f32x16();
17011    let neg_one = -1;
17012    let slice = slice as *mut i8;
17013    let offsets = offsets.as_i32x16();
17014    vscatterdps(slice, neg_one, offsets, src, SCALE);
17015}
17016
17017/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17018///
17019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17020#[inline]
17021#[target_feature(enable = "avx512f")]
17022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17023#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17024#[rustc_legacy_const_generics(4)]
17025pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17026    slice: *mut f32,
17027    mask: __mmask16,
17028    offsets: __m512i,
17029    src: __m512,
17030) {
17031    static_assert_imm8_scale!(SCALE);
17032    let src = src.as_f32x16();
17033    let slice = slice as *mut i8;
17034    let offsets = offsets.as_i32x16();
17035    vscatterdps(slice, mask as i16, offsets, src, SCALE);
17036}
17037
17038/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17039///
17040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17041#[inline]
17042#[target_feature(enable = "avx512f")]
17043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17044#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17045#[rustc_legacy_const_generics(3)]
17046pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17047    slice: *mut f32,
17048    offsets: __m512i,
17049    src: __m256,
17050) {
17051    static_assert_imm8_scale!(SCALE);
17052    let src = src.as_f32x8();
17053    let neg_one = -1;
17054    let slice = slice as *mut i8;
17055    let offsets = offsets.as_i64x8();
17056    vscatterqps(slice, neg_one, offsets, src, SCALE);
17057}
17058
17059/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17060///
17061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17062#[inline]
17063#[target_feature(enable = "avx512f")]
17064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17065#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17066#[rustc_legacy_const_generics(4)]
17067pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17068    slice: *mut f32,
17069    mask: __mmask8,
17070    offsets: __m512i,
17071    src: __m256,
17072) {
17073    static_assert_imm8_scale!(SCALE);
17074    let src = src.as_f32x8();
17075    let slice = slice as *mut i8;
17076    let offsets = offsets.as_i64x8();
17077    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17078}
17079
17080/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17081///
17082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17083#[inline]
17084#[target_feature(enable = "avx512f")]
17085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17086#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17087#[rustc_legacy_const_generics(3)]
17088pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17089    slice: *mut i64,
17090    offsets: __m256i,
17091    src: __m512i,
17092) {
17093    static_assert_imm8_scale!(SCALE);
17094    let src = src.as_i64x8();
17095    let neg_one = -1;
17096    let slice = slice as *mut i8;
17097    let offsets = offsets.as_i32x8();
17098    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17099}
17100
17101/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17102///
17103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17104#[inline]
17105#[target_feature(enable = "avx512f")]
17106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17107#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17108#[rustc_legacy_const_generics(4)]
17109pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17110    slice: *mut i64,
17111    mask: __mmask8,
17112    offsets: __m256i,
17113    src: __m512i,
17114) {
17115    static_assert_imm8_scale!(SCALE);
17116    let src = src.as_i64x8();
17117    let mask = mask as i8;
17118    let slice = slice as *mut i8;
17119    let offsets = offsets.as_i32x8();
17120    vpscatterdq(slice, mask, offsets, src, SCALE);
17121}
17122
17123/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17129#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17130#[rustc_legacy_const_generics(3)]
17131pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17132    slice: *mut i64,
17133    offsets: __m512i,
17134    src: __m512i,
17135) {
17136    static_assert_imm8_scale!(SCALE);
17137    let src = src.as_i64x8();
17138    let neg_one = -1;
17139    let slice = slice as *mut i8;
17140    let offsets = offsets.as_i64x8();
17141    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17142}
17143
17144/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17145///
17146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17147#[inline]
17148#[target_feature(enable = "avx512f")]
17149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17150#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17151#[rustc_legacy_const_generics(4)]
17152pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17153    slice: *mut i64,
17154    mask: __mmask8,
17155    offsets: __m512i,
17156    src: __m512i,
17157) {
17158    static_assert_imm8_scale!(SCALE);
17159    let src = src.as_i64x8();
17160    let mask = mask as i8;
17161    let slice = slice as *mut i8;
17162    let offsets = offsets.as_i64x8();
17163    vpscatterqq(slice, mask, offsets, src, SCALE);
17164}
17165
17166/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17172#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(3)]
17174pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17175    slice: *mut i32,
17176    offsets: __m512i,
17177    src: __m512i,
17178) {
17179    static_assert_imm8_scale!(SCALE);
17180    let src = src.as_i32x16();
17181    let neg_one = -1;
17182    let slice = slice as *mut i8;
17183    let offsets = offsets.as_i32x16();
17184    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17185}
17186
17187/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17188///
17189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17193#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17194#[rustc_legacy_const_generics(4)]
17195pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17196    slice: *mut i32,
17197    mask: __mmask16,
17198    offsets: __m512i,
17199    src: __m512i,
17200) {
17201    static_assert_imm8_scale!(SCALE);
17202    let src = src.as_i32x16();
17203    let mask = mask as i16;
17204    let slice = slice as *mut i8;
17205    let offsets = offsets.as_i32x16();
17206    vpscatterdd(slice, mask, offsets, src, SCALE);
17207}
17208
17209/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17210///
17211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17212#[inline]
17213#[target_feature(enable = "avx512f")]
17214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17215#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17216#[rustc_legacy_const_generics(3)]
17217pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17218    slice: *mut i32,
17219    offsets: __m512i,
17220    src: __m256i,
17221) {
17222    static_assert_imm8_scale!(SCALE);
17223    let src = src.as_i32x8();
17224    let neg_one = -1;
17225    let slice = slice as *mut i8;
17226    let offsets = offsets.as_i64x8();
17227    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17228}
17229
17230/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17231///
17232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17233#[inline]
17234#[target_feature(enable = "avx512f")]
17235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17236#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17237#[rustc_legacy_const_generics(4)]
17238pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17239    slice: *mut i32,
17240    mask: __mmask8,
17241    offsets: __m512i,
17242    src: __m256i,
17243) {
17244    static_assert_imm8_scale!(SCALE);
17245    let src = src.as_i32x8();
17246    let mask = mask as i8;
17247    let slice = slice as *mut i8;
17248    let offsets = offsets.as_i64x8();
17249    vpscatterqd(slice, mask, offsets, src, SCALE);
17250}
17251
17252/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17253/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17254///
17255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17256#[inline]
17257#[target_feature(enable = "avx512f")]
17258#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17259#[rustc_legacy_const_generics(2)]
17260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17261pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17262    vindex: __m512i,
17263    base_addr: *const i64,
17264) -> __m512i {
17265    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17266}
17267
17268/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17269/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17270/// (elements are copied from src when the corresponding mask bit is not set).
17271///
17272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17273#[inline]
17274#[target_feature(enable = "avx512f")]
17275#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17276#[rustc_legacy_const_generics(4)]
17277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17278pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17279    src: __m512i,
17280    k: __mmask8,
17281    vindex: __m512i,
17282    base_addr: *const i64,
17283) -> __m512i {
17284    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17285}
17286
17287/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17288/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17289///
17290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17291#[inline]
17292#[target_feature(enable = "avx512f")]
17293#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17294#[rustc_legacy_const_generics(2)]
17295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17296pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17297    vindex: __m512i,
17298    base_addr: *const f64,
17299) -> __m512d {
17300    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17301}
17302
17303/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17304/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17305/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17306///
17307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17308#[inline]
17309#[target_feature(enable = "avx512f")]
17310#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17311#[rustc_legacy_const_generics(4)]
17312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17313pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17314    src: __m512d,
17315    k: __mmask8,
17316    vindex: __m512i,
17317    base_addr: *const f64,
17318) -> __m512d {
17319    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17320}
17321
17322/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17323/// indices stored in the lower half of vindex scaled by scale.
17324///
17325/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17326#[inline]
17327#[target_feature(enable = "avx512f")]
17328#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17329#[rustc_legacy_const_generics(3)]
17330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17331pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17332    base_addr: *mut i64,
17333    vindex: __m512i,
17334    a: __m512i,
17335) {
17336    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17337}
17338
17339/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17340/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17341/// mask bit is not set are not written to memory).
17342///
17343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17344#[inline]
17345#[target_feature(enable = "avx512f")]
17346#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17347#[rustc_legacy_const_generics(4)]
17348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17349pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17350    base_addr: *mut i64,
17351    k: __mmask8,
17352    vindex: __m512i,
17353    a: __m512i,
17354) {
17355    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17356}
17357
17358/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17359/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17360///
17361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17362#[inline]
17363#[target_feature(enable = "avx512f")]
17364#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17365#[rustc_legacy_const_generics(3)]
17366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17367pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17368    base_addr: *mut f64,
17369    vindex: __m512i,
17370    a: __m512d,
17371) {
17372    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17373}
17374
17375/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17376/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17377/// (elements whose corresponding mask bit is not set are not written to memory).
17378///
17379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17380#[inline]
17381#[target_feature(enable = "avx512f")]
17382#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17383#[rustc_legacy_const_generics(4)]
17384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17385pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17386    base_addr: *mut f64,
17387    k: __mmask8,
17388    vindex: __m512i,
17389    a: __m512d,
17390) {
17391    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17392}
17393
17394/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17395/// indices stored in vindex scaled by scale
17396///
17397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17398#[inline]
17399#[target_feature(enable = "avx512f,avx512vl")]
17400#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17401#[rustc_legacy_const_generics(3)]
17402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17403pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17404    base_addr: *mut i32,
17405    vindex: __m256i,
17406    a: __m256i,
17407) {
17408    static_assert_imm8_scale!(SCALE);
17409    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17410}
17411
17412/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17413/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17414/// are not written to memory).
17415///
17416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17417#[inline]
17418#[target_feature(enable = "avx512f,avx512vl")]
17419#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17420#[rustc_legacy_const_generics(4)]
17421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17422pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17423    base_addr: *mut i32,
17424    k: __mmask8,
17425    vindex: __m256i,
17426    a: __m256i,
17427) {
17428    static_assert_imm8_scale!(SCALE);
17429    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17430}
17431
17432/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17433///
17434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17435#[inline]
17436#[target_feature(enable = "avx512f,avx512vl")]
17437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17438#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17439#[rustc_legacy_const_generics(3)]
17440pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17441    slice: *mut i64,
17442    offsets: __m128i,
17443    src: __m256i,
17444) {
17445    static_assert_imm8_scale!(SCALE);
17446    let src = src.as_i64x4();
17447    let slice = slice as *mut i8;
17448    let offsets = offsets.as_i32x4();
17449    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17450}
17451
17452/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17453/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17454/// are not written to memory).
17455///
17456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17457#[inline]
17458#[target_feature(enable = "avx512f,avx512vl")]
17459#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17460#[rustc_legacy_const_generics(4)]
17461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17462pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17463    base_addr: *mut i64,
17464    k: __mmask8,
17465    vindex: __m128i,
17466    a: __m256i,
17467) {
17468    static_assert_imm8_scale!(SCALE);
17469    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17470}
17471
17472/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17473/// at packed 32-bit integer indices stored in vindex scaled by scale
17474///
17475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17476#[inline]
17477#[target_feature(enable = "avx512f,avx512vl")]
17478#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17479#[rustc_legacy_const_generics(3)]
17480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17481pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17482    base_addr: *mut f64,
17483    vindex: __m128i,
17484    a: __m256d,
17485) {
17486    static_assert_imm8_scale!(SCALE);
17487    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17488}
17489
17490/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17491/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17492/// mask bit is not set are not written to memory).
17493///
17494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17495#[inline]
17496#[target_feature(enable = "avx512f,avx512vl")]
17497#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17498#[rustc_legacy_const_generics(4)]
17499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17500pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17501    base_addr: *mut f64,
17502    k: __mmask8,
17503    vindex: __m128i,
17504    a: __m256d,
17505) {
17506    static_assert_imm8_scale!(SCALE);
17507    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17508}
17509
17510/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17511/// at packed 32-bit integer indices stored in vindex scaled by scale
17512///
17513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17514#[inline]
17515#[target_feature(enable = "avx512f,avx512vl")]
17516#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17517#[rustc_legacy_const_generics(3)]
17518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17519pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17520    base_addr: *mut f32,
17521    vindex: __m256i,
17522    a: __m256,
17523) {
17524    static_assert_imm8_scale!(SCALE);
17525    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17526}
17527
17528/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17529/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17530/// mask bit is not set are not written to memory).
17531///
17532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17533#[inline]
17534#[target_feature(enable = "avx512f,avx512vl")]
17535#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17536#[rustc_legacy_const_generics(4)]
17537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17538pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17539    base_addr: *mut f32,
17540    k: __mmask8,
17541    vindex: __m256i,
17542    a: __m256,
17543) {
17544    static_assert_imm8_scale!(SCALE);
17545    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17546}
17547
17548/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17549/// indices stored in vindex scaled by scale
17550///
17551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17552#[inline]
17553#[target_feature(enable = "avx512f,avx512vl")]
17554#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17555#[rustc_legacy_const_generics(3)]
17556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17557pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17558    base_addr: *mut i32,
17559    vindex: __m256i,
17560    a: __m128i,
17561) {
17562    static_assert_imm8_scale!(SCALE);
17563    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17564}
17565
17566/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17567/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17568/// are not written to memory).
17569///
17570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17571#[inline]
17572#[target_feature(enable = "avx512f,avx512vl")]
17573#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17574#[rustc_legacy_const_generics(4)]
17575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17576pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17577    base_addr: *mut i32,
17578    k: __mmask8,
17579    vindex: __m256i,
17580    a: __m128i,
17581) {
17582    static_assert_imm8_scale!(SCALE);
17583    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17584}
17585
17586/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17587/// indices stored in vindex scaled by scale
17588///
17589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17590#[inline]
17591#[target_feature(enable = "avx512f,avx512vl")]
17592#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17593#[rustc_legacy_const_generics(3)]
17594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17595pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17596    base_addr: *mut i64,
17597    vindex: __m256i,
17598    a: __m256i,
17599) {
17600    static_assert_imm8_scale!(SCALE);
17601    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17602}
17603
17604/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17605/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17606/// are not written to memory).
17607///
17608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17609#[inline]
17610#[target_feature(enable = "avx512f,avx512vl")]
17611#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17612#[rustc_legacy_const_generics(4)]
17613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17614pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17615    base_addr: *mut i64,
17616    k: __mmask8,
17617    vindex: __m256i,
17618    a: __m256i,
17619) {
17620    static_assert_imm8_scale!(SCALE);
17621    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
17622}
17623
17624/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17625/// at packed 64-bit integer indices stored in vindex scaled by scale
17626///
17627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17628#[inline]
17629#[target_feature(enable = "avx512f,avx512vl")]
17630#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17631#[rustc_legacy_const_generics(3)]
17632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17633pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17634    base_addr: *mut f64,
17635    vindex: __m256i,
17636    a: __m256d,
17637) {
17638    static_assert_imm8_scale!(SCALE);
17639    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17640}
17641
17642/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17643/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17644/// mask bit is not set are not written to memory).
17645///
17646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17647#[inline]
17648#[target_feature(enable = "avx512f,avx512vl")]
17649#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17650#[rustc_legacy_const_generics(4)]
17651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17652pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17653    base_addr: *mut f64,
17654    k: __mmask8,
17655    vindex: __m256i,
17656    a: __m256d,
17657) {
17658    static_assert_imm8_scale!(SCALE);
17659    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
17660}
17661
17662/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17663/// at packed 64-bit integer indices stored in vindex scaled by scale
17664///
17665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17666#[inline]
17667#[target_feature(enable = "avx512f,avx512vl")]
17668#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17669#[rustc_legacy_const_generics(3)]
17670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17671pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17672    base_addr: *mut f32,
17673    vindex: __m256i,
17674    a: __m128,
17675) {
17676    static_assert_imm8_scale!(SCALE);
17677    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17678}
17679
17680/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17681/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17682/// mask bit is not set are not written to memory).
17683///
17684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17685#[inline]
17686#[target_feature(enable = "avx512f,avx512vl")]
17687#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17688#[rustc_legacy_const_generics(4)]
17689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17690pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17691    base_addr: *mut f32,
17692    k: __mmask8,
17693    vindex: __m256i,
17694    a: __m128,
17695) {
17696    static_assert_imm8_scale!(SCALE);
17697    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
17698}
17699
17700/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17701/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17702/// mask bit is not set).
17703///
17704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17705#[inline]
17706#[target_feature(enable = "avx512f,avx512vl")]
17707#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17708#[rustc_legacy_const_generics(4)]
17709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17710pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17711    src: __m256i,
17712    k: __mmask8,
17713    vindex: __m256i,
17714    base_addr: *const i32,
17715) -> __m256i {
17716    static_assert_imm8_scale!(SCALE);
17717    transmute(vpgatherdd_256(
17718        src.as_i32x8(),
17719        base_addr as _,
17720        vindex.as_i32x8(),
17721        k,
17722        SCALE,
17723    ))
17724}
17725
17726/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17727/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17728/// mask bit is not set).
17729///
17730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17731#[inline]
17732#[target_feature(enable = "avx512f,avx512vl")]
17733#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17734#[rustc_legacy_const_generics(4)]
17735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17736pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17737    src: __m256i,
17738    k: __mmask8,
17739    vindex: __m128i,
17740    base_addr: *const i64,
17741) -> __m256i {
17742    static_assert_imm8_scale!(SCALE);
17743    transmute(vpgatherdq_256(
17744        src.as_i64x4(),
17745        base_addr as _,
17746        vindex.as_i32x4(),
17747        k,
17748        SCALE,
17749    ))
17750}
17751
17752/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17753/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17754/// from src when the corresponding mask bit is not set).
17755///
17756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17757#[inline]
17758#[target_feature(enable = "avx512f,avx512vl")]
17759#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17760#[rustc_legacy_const_generics(4)]
17761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17762pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17763    src: __m256d,
17764    k: __mmask8,
17765    vindex: __m128i,
17766    base_addr: *const f64,
17767) -> __m256d {
17768    static_assert_imm8_scale!(SCALE);
17769    transmute(vgatherdpd_256(
17770        src.as_f64x4(),
17771        base_addr as _,
17772        vindex.as_i32x4(),
17773        k,
17774        SCALE,
17775    ))
17776}
17777
17778/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17779/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17780/// from src when the corresponding mask bit is not set).
17781///
17782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17783#[inline]
17784#[target_feature(enable = "avx512f,avx512vl")]
17785#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17786#[rustc_legacy_const_generics(4)]
17787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17788pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17789    src: __m256,
17790    k: __mmask8,
17791    vindex: __m256i,
17792    base_addr: *const f32,
17793) -> __m256 {
17794    static_assert_imm8_scale!(SCALE);
17795    transmute(vgatherdps_256(
17796        src.as_f32x8(),
17797        base_addr as _,
17798        vindex.as_i32x8(),
17799        k,
17800        SCALE,
17801    ))
17802}
17803
17804/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17805/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17806/// mask bit is not set).
17807///
17808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17809#[inline]
17810#[target_feature(enable = "avx512f,avx512vl")]
17811#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17812#[rustc_legacy_const_generics(4)]
17813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17814pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17815    src: __m128i,
17816    k: __mmask8,
17817    vindex: __m256i,
17818    base_addr: *const i32,
17819) -> __m128i {
17820    static_assert_imm8_scale!(SCALE);
17821    transmute(vpgatherqd_256(
17822        src.as_i32x4(),
17823        base_addr as _,
17824        vindex.as_i64x4(),
17825        k,
17826        SCALE,
17827    ))
17828}
17829
17830/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17831/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17832/// mask bit is not set).
17833///
17834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17835#[inline]
17836#[target_feature(enable = "avx512f,avx512vl")]
17837#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17838#[rustc_legacy_const_generics(4)]
17839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17840pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17841    src: __m256i,
17842    k: __mmask8,
17843    vindex: __m256i,
17844    base_addr: *const i64,
17845) -> __m256i {
17846    static_assert_imm8_scale!(SCALE);
17847    transmute(vpgatherqq_256(
17848        src.as_i64x4(),
17849        base_addr as _,
17850        vindex.as_i64x4(),
17851        k,
17852        SCALE,
17853    ))
17854}
17855
17856/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17857/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17858/// from src when the corresponding mask bit is not set).
17859///
17860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17861#[inline]
17862#[target_feature(enable = "avx512f,avx512vl")]
17863#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17864#[rustc_legacy_const_generics(4)]
17865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17866pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17867    src: __m256d,
17868    k: __mmask8,
17869    vindex: __m256i,
17870    base_addr: *const f64,
17871) -> __m256d {
17872    static_assert_imm8_scale!(SCALE);
17873    transmute(vgatherqpd_256(
17874        src.as_f64x4(),
17875        base_addr as _,
17876        vindex.as_i64x4(),
17877        k,
17878        SCALE,
17879    ))
17880}
17881
17882/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17883/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17884/// from src when the corresponding mask bit is not set).
17885///
17886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17887#[inline]
17888#[target_feature(enable = "avx512f,avx512vl")]
17889#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17890#[rustc_legacy_const_generics(4)]
17891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17892pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17893    src: __m128,
17894    k: __mmask8,
17895    vindex: __m256i,
17896    base_addr: *const f32,
17897) -> __m128 {
17898    static_assert_imm8_scale!(SCALE);
17899    transmute(vgatherqps_256(
17900        src.as_f32x4(),
17901        base_addr as _,
17902        vindex.as_i64x4(),
17903        k,
17904        SCALE,
17905    ))
17906}
17907
17908/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17909/// indices stored in vindex scaled by scale
17910///
17911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17912#[inline]
17913#[target_feature(enable = "avx512f,avx512vl")]
17914#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17915#[rustc_legacy_const_generics(3)]
17916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17917pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17918    base_addr: *mut i32,
17919    vindex: __m128i,
17920    a: __m128i,
17921) {
17922    static_assert_imm8_scale!(SCALE);
17923    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17924}
17925
17926/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17927/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17928/// are not written to memory).
17929///
17930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17931#[inline]
17932#[target_feature(enable = "avx512f,avx512vl")]
17933#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17934#[rustc_legacy_const_generics(4)]
17935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17936pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17937    base_addr: *mut i32,
17938    k: __mmask8,
17939    vindex: __m128i,
17940    a: __m128i,
17941) {
17942    static_assert_imm8_scale!(SCALE);
17943    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
17944}
17945
17946/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17947/// indices stored in vindex scaled by scale
17948///
17949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17950#[inline]
17951#[target_feature(enable = "avx512f,avx512vl")]
17952#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17953#[rustc_legacy_const_generics(3)]
17954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17955pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17956    base_addr: *mut i64,
17957    vindex: __m128i,
17958    a: __m128i,
17959) {
17960    static_assert_imm8_scale!(SCALE);
17961    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17962}
17963
17964/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17965/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17966/// are not written to memory).
17967///
17968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17969#[inline]
17970#[target_feature(enable = "avx512f,avx512vl")]
17971#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17972#[rustc_legacy_const_generics(4)]
17973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17974pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17975    base_addr: *mut i64,
17976    k: __mmask8,
17977    vindex: __m128i,
17978    a: __m128i,
17979) {
17980    static_assert_imm8_scale!(SCALE);
17981    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
17982}
17983
17984/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17985/// at packed 32-bit integer indices stored in vindex scaled by scale
17986///
17987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17988#[inline]
17989#[target_feature(enable = "avx512f,avx512vl")]
17990#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17991#[rustc_legacy_const_generics(3)]
17992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17993pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
17994    base_addr: *mut f64,
17995    vindex: __m128i,
17996    a: __m128d,
17997) {
17998    static_assert_imm8_scale!(SCALE);
17999    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18000}
18001
18002/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18003/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18004/// mask bit is not set are not written to memory).
18005///
18006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18007#[inline]
18008#[target_feature(enable = "avx512f,avx512vl")]
18009#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18010#[rustc_legacy_const_generics(4)]
18011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18012pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18013    base_addr: *mut f64,
18014    k: __mmask8,
18015    vindex: __m128i,
18016    a: __m128d,
18017) {
18018    static_assert_imm8_scale!(SCALE);
18019    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18020}
18021
18022/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18023/// at packed 32-bit integer indices stored in vindex scaled by scale
18024///
18025/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18026#[inline]
18027#[target_feature(enable = "avx512f,avx512vl")]
18028#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18029#[rustc_legacy_const_generics(3)]
18030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18031pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18032    static_assert_imm8_scale!(SCALE);
18033    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18034}
18035
18036/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18037/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18038/// mask bit is not set are not written to memory).
18039///
18040/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18041#[inline]
18042#[target_feature(enable = "avx512f,avx512vl")]
18043#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18044#[rustc_legacy_const_generics(4)]
18045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18046pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18047    base_addr: *mut f32,
18048    k: __mmask8,
18049    vindex: __m128i,
18050    a: __m128,
18051) {
18052    static_assert_imm8_scale!(SCALE);
18053    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18054}
18055
18056/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18057/// indices stored in vindex scaled by scale
18058///
18059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18060#[inline]
18061#[target_feature(enable = "avx512f,avx512vl")]
18062#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18063#[rustc_legacy_const_generics(3)]
18064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18065pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18066    base_addr: *mut i32,
18067    vindex: __m128i,
18068    a: __m128i,
18069) {
18070    static_assert_imm8_scale!(SCALE);
18071    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18072}
18073
18074/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18075/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18076/// are not written to memory).
18077///
18078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18079#[inline]
18080#[target_feature(enable = "avx512f,avx512vl")]
18081#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18082#[rustc_legacy_const_generics(4)]
18083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18084pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18085    base_addr: *mut i32,
18086    k: __mmask8,
18087    vindex: __m128i,
18088    a: __m128i,
18089) {
18090    static_assert_imm8_scale!(SCALE);
18091    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18092}
18093
18094/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18095/// indices stored in vindex scaled by scale
18096///
18097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18098#[inline]
18099#[target_feature(enable = "avx512f,avx512vl")]
18100#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18101#[rustc_legacy_const_generics(3)]
18102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18103pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18104    base_addr: *mut i64,
18105    vindex: __m128i,
18106    a: __m128i,
18107) {
18108    static_assert_imm8_scale!(SCALE);
18109    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18110}
18111
18112/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18113/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18114/// are not written to memory).
18115///
18116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18117#[inline]
18118#[target_feature(enable = "avx512f,avx512vl")]
18119#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18120#[rustc_legacy_const_generics(4)]
18121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18122pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18123    base_addr: *mut i64,
18124    k: __mmask8,
18125    vindex: __m128i,
18126    a: __m128i,
18127) {
18128    static_assert_imm8_scale!(SCALE);
18129    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18130}
18131
18132/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18133/// at packed 64-bit integer indices stored in vindex scaled by scale
18134///
18135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18136#[inline]
18137#[target_feature(enable = "avx512f,avx512vl")]
18138#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18139#[rustc_legacy_const_generics(3)]
18140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18141pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18142    base_addr: *mut f64,
18143    vindex: __m128i,
18144    a: __m128d,
18145) {
18146    static_assert_imm8_scale!(SCALE);
18147    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18148}
18149
18150/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18151/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18152/// mask bit is not set are not written to memory).
18153///
18154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18155#[inline]
18156#[target_feature(enable = "avx512f,avx512vl")]
18157#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18158#[rustc_legacy_const_generics(4)]
18159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18160pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18161    base_addr: *mut f64,
18162    k: __mmask8,
18163    vindex: __m128i,
18164    a: __m128d,
18165) {
18166    static_assert_imm8_scale!(SCALE);
18167    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18168}
18169
18170/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18171/// at packed 64-bit integer indices stored in vindex scaled by scale
18172///
18173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18174#[inline]
18175#[target_feature(enable = "avx512f,avx512vl")]
18176#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18177#[rustc_legacy_const_generics(3)]
18178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18179pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18180    static_assert_imm8_scale!(SCALE);
18181    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18182}
18183
18184/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18185/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18186///
18187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18188#[inline]
18189#[target_feature(enable = "avx512f,avx512vl")]
18190#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18191#[rustc_legacy_const_generics(4)]
18192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18193pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18194    base_addr: *mut f32,
18195    k: __mmask8,
18196    vindex: __m128i,
18197    a: __m128,
18198) {
18199    static_assert_imm8_scale!(SCALE);
18200    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18201}
18202
18203/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18204/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18205/// mask bit is not set).
18206///
18207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18208#[inline]
18209#[target_feature(enable = "avx512f,avx512vl")]
18210#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18211#[rustc_legacy_const_generics(4)]
18212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18213pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18214    src: __m128i,
18215    k: __mmask8,
18216    vindex: __m128i,
18217    base_addr: *const i32,
18218) -> __m128i {
18219    static_assert_imm8_scale!(SCALE);
18220    transmute(vpgatherdd_128(
18221        src.as_i32x4(),
18222        base_addr as _,
18223        vindex.as_i32x4(),
18224        k,
18225        SCALE,
18226    ))
18227}
18228
18229/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18230/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18231/// mask bit is not set).
18232///
18233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18234#[inline]
18235#[target_feature(enable = "avx512f,avx512vl")]
18236#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18237#[rustc_legacy_const_generics(4)]
18238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18239pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18240    src: __m128i,
18241    k: __mmask8,
18242    vindex: __m128i,
18243    base_addr: *const i64,
18244) -> __m128i {
18245    static_assert_imm8_scale!(SCALE);
18246    transmute(vpgatherdq_128(
18247        src.as_i64x2(),
18248        base_addr as _,
18249        vindex.as_i32x4(),
18250        k,
18251        SCALE,
18252    ))
18253}
18254
18255/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18256/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18257/// from src when the corresponding mask bit is not set).
18258///
18259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18260#[inline]
18261#[target_feature(enable = "avx512f,avx512vl")]
18262#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18263#[rustc_legacy_const_generics(4)]
18264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18265pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18266    src: __m128d,
18267    k: __mmask8,
18268    vindex: __m128i,
18269    base_addr: *const f64,
18270) -> __m128d {
18271    static_assert_imm8_scale!(SCALE);
18272    transmute(vgatherdpd_128(
18273        src.as_f64x2(),
18274        base_addr as _,
18275        vindex.as_i32x4(),
18276        k,
18277        SCALE,
18278    ))
18279}
18280
18281/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18282/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18283/// from src when the corresponding mask bit is not set).
18284///
18285/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18286#[inline]
18287#[target_feature(enable = "avx512f,avx512vl")]
18288#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18289#[rustc_legacy_const_generics(4)]
18290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18291pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18292    src: __m128,
18293    k: __mmask8,
18294    vindex: __m128i,
18295    base_addr: *const f32,
18296) -> __m128 {
18297    static_assert_imm8_scale!(SCALE);
18298    transmute(vgatherdps_128(
18299        src.as_f32x4(),
18300        base_addr as _,
18301        vindex.as_i32x4(),
18302        k,
18303        SCALE,
18304    ))
18305}
18306
18307/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18308/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18309/// mask bit is not set).
18310///
18311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18312#[inline]
18313#[target_feature(enable = "avx512f,avx512vl")]
18314#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18315#[rustc_legacy_const_generics(4)]
18316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18317pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18318    src: __m128i,
18319    k: __mmask8,
18320    vindex: __m128i,
18321    base_addr: *const i32,
18322) -> __m128i {
18323    static_assert_imm8_scale!(SCALE);
18324    transmute(vpgatherqd_128(
18325        src.as_i32x4(),
18326        base_addr as _,
18327        vindex.as_i64x2(),
18328        k,
18329        SCALE,
18330    ))
18331}
18332
18333/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18334/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18335/// mask bit is not set).
18336///
18337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18338#[inline]
18339#[target_feature(enable = "avx512f,avx512vl")]
18340#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18341#[rustc_legacy_const_generics(4)]
18342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18343pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18344    src: __m128i,
18345    k: __mmask8,
18346    vindex: __m128i,
18347    base_addr: *const i64,
18348) -> __m128i {
18349    static_assert_imm8_scale!(SCALE);
18350    transmute(vpgatherqq_128(
18351        src.as_i64x2(),
18352        base_addr as _,
18353        vindex.as_i64x2(),
18354        k,
18355        SCALE,
18356    ))
18357}
18358
18359/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18360/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18361/// from src when the corresponding mask bit is not set).
18362///
18363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18364#[inline]
18365#[target_feature(enable = "avx512f,avx512vl")]
18366#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18367#[rustc_legacy_const_generics(4)]
18368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18369pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18370    src: __m128d,
18371    k: __mmask8,
18372    vindex: __m128i,
18373    base_addr: *const f64,
18374) -> __m128d {
18375    static_assert_imm8_scale!(SCALE);
18376    transmute(vgatherqpd_128(
18377        src.as_f64x2(),
18378        base_addr as _,
18379        vindex.as_i64x2(),
18380        k,
18381        SCALE,
18382    ))
18383}
18384
18385/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18386/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18387/// from src when the corresponding mask bit is not set).
18388///
18389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18390#[inline]
18391#[target_feature(enable = "avx512f,avx512vl")]
18392#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18393#[rustc_legacy_const_generics(4)]
18394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18395pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18396    src: __m128,
18397    k: __mmask8,
18398    vindex: __m128i,
18399    base_addr: *const f32,
18400) -> __m128 {
18401    static_assert_imm8_scale!(SCALE);
18402    transmute(vgatherqps_128(
18403        src.as_f32x4(),
18404        base_addr as _,
18405        vindex.as_i64x2(),
18406        k,
18407        SCALE,
18408    ))
18409}
18410
18411/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18412///
18413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18414#[inline]
18415#[target_feature(enable = "avx512f")]
18416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18417#[cfg_attr(test, assert_instr(vpcompressd))]
18418pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18419    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18420}
18421
18422/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18423///
18424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18425#[inline]
18426#[target_feature(enable = "avx512f")]
18427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18428#[cfg_attr(test, assert_instr(vpcompressd))]
18429pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18430    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18431}
18432
18433/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18434///
18435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18436#[inline]
18437#[target_feature(enable = "avx512f,avx512vl")]
18438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18439#[cfg_attr(test, assert_instr(vpcompressd))]
18440pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18441    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18442}
18443
18444/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18445///
18446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18447#[inline]
18448#[target_feature(enable = "avx512f,avx512vl")]
18449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18450#[cfg_attr(test, assert_instr(vpcompressd))]
18451pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18452    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18453}
18454
18455/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18456///
18457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18458#[inline]
18459#[target_feature(enable = "avx512f,avx512vl")]
18460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18461#[cfg_attr(test, assert_instr(vpcompressd))]
18462pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18463    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18464}
18465
18466/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18467///
18468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18472#[cfg_attr(test, assert_instr(vpcompressd))]
18473pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18474    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18475}
18476
18477/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18478///
18479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18480#[inline]
18481#[target_feature(enable = "avx512f")]
18482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18483#[cfg_attr(test, assert_instr(vpcompressq))]
18484pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18485    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18486}
18487
18488/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18489///
18490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18491#[inline]
18492#[target_feature(enable = "avx512f")]
18493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18494#[cfg_attr(test, assert_instr(vpcompressq))]
18495pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18496    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18497}
18498
18499/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18500///
18501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18502#[inline]
18503#[target_feature(enable = "avx512f,avx512vl")]
18504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18505#[cfg_attr(test, assert_instr(vpcompressq))]
18506pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18507    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18508}
18509
18510/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18511///
18512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18513#[inline]
18514#[target_feature(enable = "avx512f,avx512vl")]
18515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18516#[cfg_attr(test, assert_instr(vpcompressq))]
18517pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18518    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18519}
18520
18521/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18522///
18523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18524#[inline]
18525#[target_feature(enable = "avx512f,avx512vl")]
18526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18527#[cfg_attr(test, assert_instr(vpcompressq))]
18528pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18529    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18530}
18531
18532/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18533///
18534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18535#[inline]
18536#[target_feature(enable = "avx512f,avx512vl")]
18537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18538#[cfg_attr(test, assert_instr(vpcompressq))]
18539pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18540    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18541}
18542
18543/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18544///
18545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18546#[inline]
18547#[target_feature(enable = "avx512f")]
18548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18549#[cfg_attr(test, assert_instr(vcompressps))]
18550pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18551    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18552}
18553
18554/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18555///
18556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18557#[inline]
18558#[target_feature(enable = "avx512f")]
18559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18560#[cfg_attr(test, assert_instr(vcompressps))]
18561pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18562    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18563}
18564
18565/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18566///
18567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18568#[inline]
18569#[target_feature(enable = "avx512f,avx512vl")]
18570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18571#[cfg_attr(test, assert_instr(vcompressps))]
18572pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18573    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18574}
18575
18576/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18577///
18578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18579#[inline]
18580#[target_feature(enable = "avx512f,avx512vl")]
18581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18582#[cfg_attr(test, assert_instr(vcompressps))]
18583pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18584    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18585}
18586
18587/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18588///
18589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18590#[inline]
18591#[target_feature(enable = "avx512f,avx512vl")]
18592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18593#[cfg_attr(test, assert_instr(vcompressps))]
18594pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18595    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
18596}
18597
18598/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18599///
18600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18601#[inline]
18602#[target_feature(enable = "avx512f,avx512vl")]
18603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18604#[cfg_attr(test, assert_instr(vcompressps))]
18605pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18606    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
18607}
18608
18609/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18610///
18611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18612#[inline]
18613#[target_feature(enable = "avx512f")]
18614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18615#[cfg_attr(test, assert_instr(vcompresspd))]
18616pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18617    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
18618}
18619
18620/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18621///
18622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18623#[inline]
18624#[target_feature(enable = "avx512f")]
18625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18626#[cfg_attr(test, assert_instr(vcompresspd))]
18627pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18628    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
18629}
18630
18631/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18632///
18633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18634#[inline]
18635#[target_feature(enable = "avx512f,avx512vl")]
18636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18637#[cfg_attr(test, assert_instr(vcompresspd))]
18638pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18639    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
18640}
18641
18642/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18643///
18644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18645#[inline]
18646#[target_feature(enable = "avx512f,avx512vl")]
18647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18648#[cfg_attr(test, assert_instr(vcompresspd))]
18649pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18650    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
18651}
18652
18653/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18654///
18655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18656#[inline]
18657#[target_feature(enable = "avx512f,avx512vl")]
18658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18659#[cfg_attr(test, assert_instr(vcompresspd))]
18660pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18661    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
18662}
18663
18664/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18665///
18666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18667#[inline]
18668#[target_feature(enable = "avx512f,avx512vl")]
18669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18670#[cfg_attr(test, assert_instr(vcompresspd))]
18671pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18672    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
18673}
18674
18675/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18676///
18677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18678#[inline]
18679#[target_feature(enable = "avx512f")]
18680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18681#[cfg_attr(test, assert_instr(vpcompressd))]
18682pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
18683    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
18684}
18685
18686/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18687///
18688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18689#[inline]
18690#[target_feature(enable = "avx512f,avx512vl")]
18691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18692#[cfg_attr(test, assert_instr(vpcompressd))]
18693pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
18694    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
18695}
18696
18697/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18698///
18699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18700#[inline]
18701#[target_feature(enable = "avx512f,avx512vl")]
18702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18703#[cfg_attr(test, assert_instr(vpcompressd))]
18704pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
18705    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
18706}
18707
18708/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18709///
18710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18711#[inline]
18712#[target_feature(enable = "avx512f")]
18713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18714#[cfg_attr(test, assert_instr(vpcompressq))]
18715pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
18716    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
18717}
18718
18719/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18720///
18721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18722#[inline]
18723#[target_feature(enable = "avx512f,avx512vl")]
18724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18725#[cfg_attr(test, assert_instr(vpcompressq))]
18726pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
18727    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
18728}
18729
18730/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18731///
18732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18733#[inline]
18734#[target_feature(enable = "avx512f,avx512vl")]
18735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18736#[cfg_attr(test, assert_instr(vpcompressq))]
18737pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
18738    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
18739}
18740
18741/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18742///
18743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18744#[inline]
18745#[target_feature(enable = "avx512f")]
18746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18747#[cfg_attr(test, assert_instr(vcompressps))]
18748pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
18749    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
18750}
18751
18752/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18753///
18754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18755#[inline]
18756#[target_feature(enable = "avx512f,avx512vl")]
18757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18758#[cfg_attr(test, assert_instr(vcompressps))]
18759pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
18760    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
18761}
18762
18763/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18764///
18765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18766#[inline]
18767#[target_feature(enable = "avx512f,avx512vl")]
18768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18769#[cfg_attr(test, assert_instr(vcompressps))]
18770pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
18771    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
18772}
18773
18774/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18775///
18776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18777#[inline]
18778#[target_feature(enable = "avx512f")]
18779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18780#[cfg_attr(test, assert_instr(vcompresspd))]
18781pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
18782    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
18783}
18784
18785/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18786///
18787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18788#[inline]
18789#[target_feature(enable = "avx512f,avx512vl")]
18790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18791#[cfg_attr(test, assert_instr(vcompresspd))]
18792pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
18793    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
18794}
18795
18796/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18797///
18798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18802#[cfg_attr(test, assert_instr(vcompresspd))]
18803pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
18804    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
18805}
18806
18807/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18808///
18809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18810#[inline]
18811#[target_feature(enable = "avx512f")]
18812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18813#[cfg_attr(test, assert_instr(vpexpandd))]
18814pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18815    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
18816}
18817
18818/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18819///
18820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18821#[inline]
18822#[target_feature(enable = "avx512f")]
18823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18824#[cfg_attr(test, assert_instr(vpexpandd))]
18825pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18826    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
18827}
18828
18829/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18830///
18831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18832#[inline]
18833#[target_feature(enable = "avx512f,avx512vl")]
18834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18835#[cfg_attr(test, assert_instr(vpexpandd))]
18836pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18837    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
18838}
18839
18840/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18841///
18842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18843#[inline]
18844#[target_feature(enable = "avx512f,avx512vl")]
18845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18846#[cfg_attr(test, assert_instr(vpexpandd))]
18847pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18848    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
18849}
18850
18851/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18852///
18853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18854#[inline]
18855#[target_feature(enable = "avx512f,avx512vl")]
18856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18857#[cfg_attr(test, assert_instr(vpexpandd))]
18858pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18859    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
18860}
18861
18862/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18863///
18864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18865#[inline]
18866#[target_feature(enable = "avx512f,avx512vl")]
18867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18868#[cfg_attr(test, assert_instr(vpexpandd))]
18869pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18870    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
18871}
18872
18873/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18874///
18875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18876#[inline]
18877#[target_feature(enable = "avx512f")]
18878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18879#[cfg_attr(test, assert_instr(vpexpandq))]
18880pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18881    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
18882}
18883
18884/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18885///
18886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18887#[inline]
18888#[target_feature(enable = "avx512f")]
18889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18890#[cfg_attr(test, assert_instr(vpexpandq))]
18891pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18892    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
18893}
18894
18895/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18896///
18897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18898#[inline]
18899#[target_feature(enable = "avx512f,avx512vl")]
18900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18901#[cfg_attr(test, assert_instr(vpexpandq))]
18902pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18903    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
18904}
18905
18906/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18907///
18908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18909#[inline]
18910#[target_feature(enable = "avx512f,avx512vl")]
18911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18912#[cfg_attr(test, assert_instr(vpexpandq))]
18913pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18914    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
18915}
18916
18917/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18918///
18919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18920#[inline]
18921#[target_feature(enable = "avx512f,avx512vl")]
18922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18923#[cfg_attr(test, assert_instr(vpexpandq))]
18924pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18925    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
18926}
18927
18928/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18929///
18930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18931#[inline]
18932#[target_feature(enable = "avx512f,avx512vl")]
18933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18934#[cfg_attr(test, assert_instr(vpexpandq))]
18935pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18936    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
18937}
18938
18939/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18940///
18941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18942#[inline]
18943#[target_feature(enable = "avx512f")]
18944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18945#[cfg_attr(test, assert_instr(vexpandps))]
18946pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18947    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
18948}
18949
18950/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18951///
18952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18953#[inline]
18954#[target_feature(enable = "avx512f")]
18955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18956#[cfg_attr(test, assert_instr(vexpandps))]
18957pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18958    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
18959}
18960
18961/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18962///
18963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18964#[inline]
18965#[target_feature(enable = "avx512f,avx512vl")]
18966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18967#[cfg_attr(test, assert_instr(vexpandps))]
18968pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18969    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
18970}
18971
18972/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18973///
18974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18975#[inline]
18976#[target_feature(enable = "avx512f,avx512vl")]
18977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18978#[cfg_attr(test, assert_instr(vexpandps))]
18979pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18980    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
18981}
18982
18983/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18984///
18985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18986#[inline]
18987#[target_feature(enable = "avx512f,avx512vl")]
18988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18989#[cfg_attr(test, assert_instr(vexpandps))]
18990pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18991    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
18992}
18993
18994/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18995///
18996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18997#[inline]
18998#[target_feature(enable = "avx512f,avx512vl")]
18999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19000#[cfg_attr(test, assert_instr(vexpandps))]
19001pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19002    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
19003}
19004
19005/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19006///
19007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19008#[inline]
19009#[target_feature(enable = "avx512f")]
19010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19011#[cfg_attr(test, assert_instr(vexpandpd))]
19012pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19013    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
19014}
19015
19016/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19017///
19018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19019#[inline]
19020#[target_feature(enable = "avx512f")]
19021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19022#[cfg_attr(test, assert_instr(vexpandpd))]
19023pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19024    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
19025}
19026
19027/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19028///
19029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19030#[inline]
19031#[target_feature(enable = "avx512f,avx512vl")]
19032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19033#[cfg_attr(test, assert_instr(vexpandpd))]
19034pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19035    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
19036}
19037
19038/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19039///
19040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19041#[inline]
19042#[target_feature(enable = "avx512f,avx512vl")]
19043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19044#[cfg_attr(test, assert_instr(vexpandpd))]
19045pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19046    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
19047}
19048
19049/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19050///
19051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19052#[inline]
19053#[target_feature(enable = "avx512f,avx512vl")]
19054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19055#[cfg_attr(test, assert_instr(vexpandpd))]
19056pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19057    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
19058}
19059
19060/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19061///
19062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19063#[inline]
19064#[target_feature(enable = "avx512f,avx512vl")]
19065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19066#[cfg_attr(test, assert_instr(vexpandpd))]
19067pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19068    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19069}
19070
19071/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19072///
19073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19074#[inline]
19075#[target_feature(enable = "avx512f")]
19076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19077#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19078#[rustc_legacy_const_generics(1)]
19079pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19080    unsafe {
19081        static_assert_uimm_bits!(IMM8, 8);
19082        let a = a.as_i32x16();
19083        let r = vprold(a, IMM8);
19084        transmute(r)
19085    }
19086}
19087
19088/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19089///
19090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19091#[inline]
19092#[target_feature(enable = "avx512f")]
19093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19094#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19095#[rustc_legacy_const_generics(3)]
19096pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19097    unsafe {
19098        static_assert_uimm_bits!(IMM8, 8);
19099        let a = a.as_i32x16();
19100        let r = vprold(a, IMM8);
19101        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19102    }
19103}
19104
19105/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19106///
19107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19108#[inline]
19109#[target_feature(enable = "avx512f")]
19110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19111#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19112#[rustc_legacy_const_generics(2)]
19113pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19114    unsafe {
19115        static_assert_uimm_bits!(IMM8, 8);
19116        let a = a.as_i32x16();
19117        let r = vprold(a, IMM8);
19118        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19119    }
19120}
19121
19122/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19123///
19124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19125#[inline]
19126#[target_feature(enable = "avx512f,avx512vl")]
19127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19128#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19129#[rustc_legacy_const_generics(1)]
19130pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19131    unsafe {
19132        static_assert_uimm_bits!(IMM8, 8);
19133        let a = a.as_i32x8();
19134        let r = vprold256(a, IMM8);
19135        transmute(r)
19136    }
19137}
19138
19139/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19145#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19146#[rustc_legacy_const_generics(3)]
19147pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19148    unsafe {
19149        static_assert_uimm_bits!(IMM8, 8);
19150        let a = a.as_i32x8();
19151        let r = vprold256(a, IMM8);
19152        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19153    }
19154}
19155
19156/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19157///
19158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19159#[inline]
19160#[target_feature(enable = "avx512f,avx512vl")]
19161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19162#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19163#[rustc_legacy_const_generics(2)]
19164pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19165    unsafe {
19166        static_assert_uimm_bits!(IMM8, 8);
19167        let a = a.as_i32x8();
19168        let r = vprold256(a, IMM8);
19169        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19170    }
19171}
19172
19173/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19174///
19175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19176#[inline]
19177#[target_feature(enable = "avx512f,avx512vl")]
19178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19179#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19180#[rustc_legacy_const_generics(1)]
19181pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19182    unsafe {
19183        static_assert_uimm_bits!(IMM8, 8);
19184        let a = a.as_i32x4();
19185        let r = vprold128(a, IMM8);
19186        transmute(r)
19187    }
19188}
19189
19190/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19191///
19192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19193#[inline]
19194#[target_feature(enable = "avx512f,avx512vl")]
19195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19196#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19197#[rustc_legacy_const_generics(3)]
19198pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19199    unsafe {
19200        static_assert_uimm_bits!(IMM8, 8);
19201        let a = a.as_i32x4();
19202        let r = vprold128(a, IMM8);
19203        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19204    }
19205}
19206
19207/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19208///
19209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19210#[inline]
19211#[target_feature(enable = "avx512f,avx512vl")]
19212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19213#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19214#[rustc_legacy_const_generics(2)]
19215pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19216    unsafe {
19217        static_assert_uimm_bits!(IMM8, 8);
19218        let a = a.as_i32x4();
19219        let r = vprold128(a, IMM8);
19220        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19221    }
19222}
19223
19224/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19225///
19226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19227#[inline]
19228#[target_feature(enable = "avx512f")]
19229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19230#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19231#[rustc_legacy_const_generics(1)]
19232pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19233    unsafe {
19234        static_assert_uimm_bits!(IMM8, 8);
19235        let a = a.as_i32x16();
19236        let r = vprord(a, IMM8);
19237        transmute(r)
19238    }
19239}
19240
19241/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19242///
19243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19244#[inline]
19245#[target_feature(enable = "avx512f")]
19246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19247#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19248#[rustc_legacy_const_generics(3)]
19249pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19250    unsafe {
19251        static_assert_uimm_bits!(IMM8, 8);
19252        let a = a.as_i32x16();
19253        let r = vprord(a, IMM8);
19254        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19255    }
19256}
19257
19258/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19259///
19260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19261#[inline]
19262#[target_feature(enable = "avx512f")]
19263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19264#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19265#[rustc_legacy_const_generics(2)]
19266pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19267    unsafe {
19268        static_assert_uimm_bits!(IMM8, 8);
19269        let a = a.as_i32x16();
19270        let r = vprord(a, IMM8);
19271        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19272    }
19273}
19274
19275/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19276///
19277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19278#[inline]
19279#[target_feature(enable = "avx512f,avx512vl")]
19280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19281#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19282#[rustc_legacy_const_generics(1)]
19283pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19284    unsafe {
19285        static_assert_uimm_bits!(IMM8, 8);
19286        let a = a.as_i32x8();
19287        let r = vprord256(a, IMM8);
19288        transmute(r)
19289    }
19290}
19291
19292/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19293///
19294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19295#[inline]
19296#[target_feature(enable = "avx512f,avx512vl")]
19297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19298#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19299#[rustc_legacy_const_generics(3)]
19300pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19301    unsafe {
19302        static_assert_uimm_bits!(IMM8, 8);
19303        let a = a.as_i32x8();
19304        let r = vprord256(a, IMM8);
19305        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19306    }
19307}
19308
19309/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19310///
19311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19312#[inline]
19313#[target_feature(enable = "avx512f,avx512vl")]
19314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19315#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19316#[rustc_legacy_const_generics(2)]
19317pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19318    unsafe {
19319        static_assert_uimm_bits!(IMM8, 8);
19320        let a = a.as_i32x8();
19321        let r = vprord256(a, IMM8);
19322        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19323    }
19324}
19325
19326/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19332#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19333#[rustc_legacy_const_generics(1)]
19334pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19335    unsafe {
19336        static_assert_uimm_bits!(IMM8, 8);
19337        let a = a.as_i32x4();
19338        let r = vprord128(a, IMM8);
19339        transmute(r)
19340    }
19341}
19342
19343/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19344///
19345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19346#[inline]
19347#[target_feature(enable = "avx512f,avx512vl")]
19348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19349#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19350#[rustc_legacy_const_generics(3)]
19351pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19352    unsafe {
19353        static_assert_uimm_bits!(IMM8, 8);
19354        let a = a.as_i32x4();
19355        let r = vprord128(a, IMM8);
19356        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19357    }
19358}
19359
19360/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19361///
19362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19363#[inline]
19364#[target_feature(enable = "avx512f,avx512vl")]
19365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19366#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19367#[rustc_legacy_const_generics(2)]
19368pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19369    unsafe {
19370        static_assert_uimm_bits!(IMM8, 8);
19371        let a = a.as_i32x4();
19372        let r = vprord128(a, IMM8);
19373        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19374    }
19375}
19376
19377/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19378///
19379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19380#[inline]
19381#[target_feature(enable = "avx512f")]
19382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19383#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19384#[rustc_legacy_const_generics(1)]
19385pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19386    unsafe {
19387        static_assert_uimm_bits!(IMM8, 8);
19388        let a = a.as_i64x8();
19389        let r = vprolq(a, IMM8);
19390        transmute(r)
19391    }
19392}
19393
19394/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19395///
19396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19397#[inline]
19398#[target_feature(enable = "avx512f")]
19399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19400#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19401#[rustc_legacy_const_generics(3)]
19402pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19403    unsafe {
19404        static_assert_uimm_bits!(IMM8, 8);
19405        let a = a.as_i64x8();
19406        let r = vprolq(a, IMM8);
19407        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19408    }
19409}
19410
19411/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19412///
19413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19414#[inline]
19415#[target_feature(enable = "avx512f")]
19416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19417#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19418#[rustc_legacy_const_generics(2)]
19419pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19420    unsafe {
19421        static_assert_uimm_bits!(IMM8, 8);
19422        let a = a.as_i64x8();
19423        let r = vprolq(a, IMM8);
19424        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19425    }
19426}
19427
19428/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19429///
19430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19431#[inline]
19432#[target_feature(enable = "avx512f,avx512vl")]
19433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19434#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19435#[rustc_legacy_const_generics(1)]
19436pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19437    unsafe {
19438        static_assert_uimm_bits!(IMM8, 8);
19439        let a = a.as_i64x4();
19440        let r = vprolq256(a, IMM8);
19441        transmute(r)
19442    }
19443}
19444
19445/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19446///
19447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19448#[inline]
19449#[target_feature(enable = "avx512f,avx512vl")]
19450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19451#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19452#[rustc_legacy_const_generics(3)]
19453pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19454    unsafe {
19455        static_assert_uimm_bits!(IMM8, 8);
19456        let a = a.as_i64x4();
19457        let r = vprolq256(a, IMM8);
19458        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19459    }
19460}
19461
19462/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19463///
19464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19465#[inline]
19466#[target_feature(enable = "avx512f,avx512vl")]
19467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19468#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19469#[rustc_legacy_const_generics(2)]
19470pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19471    unsafe {
19472        static_assert_uimm_bits!(IMM8, 8);
19473        let a = a.as_i64x4();
19474        let r = vprolq256(a, IMM8);
19475        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19476    }
19477}
19478
19479/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19480///
19481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19482#[inline]
19483#[target_feature(enable = "avx512f,avx512vl")]
19484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19485#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19486#[rustc_legacy_const_generics(1)]
19487pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19488    unsafe {
19489        static_assert_uimm_bits!(IMM8, 8);
19490        let a = a.as_i64x2();
19491        let r = vprolq128(a, IMM8);
19492        transmute(r)
19493    }
19494}
19495
19496/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19497///
19498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19499#[inline]
19500#[target_feature(enable = "avx512f,avx512vl")]
19501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19502#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19503#[rustc_legacy_const_generics(3)]
19504pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19505    unsafe {
19506        static_assert_uimm_bits!(IMM8, 8);
19507        let a = a.as_i64x2();
19508        let r = vprolq128(a, IMM8);
19509        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19510    }
19511}
19512
19513/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19514///
19515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19516#[inline]
19517#[target_feature(enable = "avx512f,avx512vl")]
19518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19519#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19520#[rustc_legacy_const_generics(2)]
19521pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19522    unsafe {
19523        static_assert_uimm_bits!(IMM8, 8);
19524        let a = a.as_i64x2();
19525        let r = vprolq128(a, IMM8);
19526        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19527    }
19528}
19529
19530/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19531///
19532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19533#[inline]
19534#[target_feature(enable = "avx512f")]
19535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19536#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19537#[rustc_legacy_const_generics(1)]
19538pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19539    unsafe {
19540        static_assert_uimm_bits!(IMM8, 8);
19541        let a = a.as_i64x8();
19542        let r = vprorq(a, IMM8);
19543        transmute(r)
19544    }
19545}
19546
19547/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19548///
19549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19550#[inline]
19551#[target_feature(enable = "avx512f")]
19552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19553#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19554#[rustc_legacy_const_generics(3)]
19555pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19556    unsafe {
19557        static_assert_uimm_bits!(IMM8, 8);
19558        let a = a.as_i64x8();
19559        let r = vprorq(a, IMM8);
19560        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
19561    }
19562}
19563
19564/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19565///
19566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19567#[inline]
19568#[target_feature(enable = "avx512f")]
19569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19570#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19571#[rustc_legacy_const_generics(2)]
19572pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19573    unsafe {
19574        static_assert_uimm_bits!(IMM8, 8);
19575        let a = a.as_i64x8();
19576        let r = vprorq(a, IMM8);
19577        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
19578    }
19579}
19580
19581/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19582///
19583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19584#[inline]
19585#[target_feature(enable = "avx512f,avx512vl")]
19586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19587#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19588#[rustc_legacy_const_generics(1)]
19589pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19590    unsafe {
19591        static_assert_uimm_bits!(IMM8, 8);
19592        let a = a.as_i64x4();
19593        let r = vprorq256(a, IMM8);
19594        transmute(r)
19595    }
19596}
19597
19598/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19599///
19600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19601#[inline]
19602#[target_feature(enable = "avx512f,avx512vl")]
19603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19604#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19605#[rustc_legacy_const_generics(3)]
19606pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19607    unsafe {
19608        static_assert_uimm_bits!(IMM8, 8);
19609        let a = a.as_i64x4();
19610        let r = vprorq256(a, IMM8);
19611        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
19612    }
19613}
19614
19615/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19616///
19617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19618#[inline]
19619#[target_feature(enable = "avx512f,avx512vl")]
19620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19621#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19622#[rustc_legacy_const_generics(2)]
19623pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19624    unsafe {
19625        static_assert_uimm_bits!(IMM8, 8);
19626        let a = a.as_i64x4();
19627        let r = vprorq256(a, IMM8);
19628        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
19629    }
19630}
19631
19632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19633///
19634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19635#[inline]
19636#[target_feature(enable = "avx512f,avx512vl")]
19637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19638#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19639#[rustc_legacy_const_generics(1)]
19640pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19641    unsafe {
19642        static_assert_uimm_bits!(IMM8, 8);
19643        let a = a.as_i64x2();
19644        let r = vprorq128(a, IMM8);
19645        transmute(r)
19646    }
19647}
19648
19649/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19650///
19651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19652#[inline]
19653#[target_feature(enable = "avx512f,avx512vl")]
19654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19655#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19656#[rustc_legacy_const_generics(3)]
19657pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19658    unsafe {
19659        static_assert_uimm_bits!(IMM8, 8);
19660        let a = a.as_i64x2();
19661        let r = vprorq128(a, IMM8);
19662        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
19663    }
19664}
19665
19666/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19667///
19668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19669#[inline]
19670#[target_feature(enable = "avx512f,avx512vl")]
19671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19672#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19673#[rustc_legacy_const_generics(2)]
19674pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19675    unsafe {
19676        static_assert_uimm_bits!(IMM8, 8);
19677        let a = a.as_i64x2();
19678        let r = vprorq128(a, IMM8);
19679        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
19680    }
19681}
19682
19683/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19684///
19685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19686#[inline]
19687#[target_feature(enable = "avx512f")]
19688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19689#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19690#[rustc_legacy_const_generics(1)]
19691pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19692    unsafe {
19693        static_assert_uimm_bits!(IMM8, 8);
19694        if IMM8 >= 32 {
19695            _mm512_setzero_si512()
19696        } else {
19697            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
19698        }
19699    }
19700}
19701
19702/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19703///
19704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19705#[inline]
19706#[target_feature(enable = "avx512f")]
19707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19708#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19709#[rustc_legacy_const_generics(3)]
19710pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19711    unsafe {
19712        static_assert_uimm_bits!(IMM8, 8);
19713        let shf = if IMM8 >= 32 {
19714            u32x16::ZERO
19715        } else {
19716            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
19717        };
19718        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19719    }
19720}
19721
19722/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19723///
19724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19725#[inline]
19726#[target_feature(enable = "avx512f")]
19727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19728#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19729#[rustc_legacy_const_generics(2)]
19730pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19731    unsafe {
19732        static_assert_uimm_bits!(IMM8, 8);
19733        if IMM8 >= 32 {
19734            _mm512_setzero_si512()
19735        } else {
19736            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
19737            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19738        }
19739    }
19740}
19741
19742/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19743///
19744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19745#[inline]
19746#[target_feature(enable = "avx512f,avx512vl")]
19747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19748#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19749#[rustc_legacy_const_generics(3)]
19750pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19751    unsafe {
19752        static_assert_uimm_bits!(IMM8, 8);
19753        let r = if IMM8 >= 32 {
19754            u32x8::ZERO
19755        } else {
19756            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
19757        };
19758        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19759    }
19760}
19761
19762/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19765#[inline]
19766#[target_feature(enable = "avx512f,avx512vl")]
19767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19768#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19769#[rustc_legacy_const_generics(2)]
19770pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19771    unsafe {
19772        static_assert_uimm_bits!(IMM8, 8);
19773        if IMM8 >= 32 {
19774            _mm256_setzero_si256()
19775        } else {
19776            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
19777            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19778        }
19779    }
19780}
19781
19782/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19783///
19784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19785#[inline]
19786#[target_feature(enable = "avx512f,avx512vl")]
19787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19788#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19789#[rustc_legacy_const_generics(3)]
19790pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19791    unsafe {
19792        static_assert_uimm_bits!(IMM8, 8);
19793        let r = if IMM8 >= 32 {
19794            u32x4::ZERO
19795        } else {
19796            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
19797        };
19798        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19799    }
19800}
19801
19802/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19803///
19804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19805#[inline]
19806#[target_feature(enable = "avx512f,avx512vl")]
19807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19808#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19809#[rustc_legacy_const_generics(2)]
19810pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19811    unsafe {
19812        static_assert_uimm_bits!(IMM8, 8);
19813        if IMM8 >= 32 {
19814            _mm_setzero_si128()
19815        } else {
19816            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
19817            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19818        }
19819    }
19820}
19821
19822/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19823///
19824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19825#[inline]
19826#[target_feature(enable = "avx512f")]
19827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19828#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19829#[rustc_legacy_const_generics(1)]
19830pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19831    unsafe {
19832        static_assert_uimm_bits!(IMM8, 8);
19833        if IMM8 >= 32 {
19834            _mm512_setzero_si512()
19835        } else {
19836            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
19837        }
19838    }
19839}
19840
19841/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19842///
19843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19844#[inline]
19845#[target_feature(enable = "avx512f")]
19846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19847#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19848#[rustc_legacy_const_generics(3)]
19849pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19850    unsafe {
19851        static_assert_uimm_bits!(IMM8, 8);
19852        let shf = if IMM8 >= 32 {
19853            u32x16::ZERO
19854        } else {
19855            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
19856        };
19857        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
19858    }
19859}
19860
19861/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19862///
19863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19864#[inline]
19865#[target_feature(enable = "avx512f")]
19866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19867#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19868#[rustc_legacy_const_generics(2)]
19869pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19870    unsafe {
19871        static_assert_uimm_bits!(IMM8, 8);
19872        if IMM8 >= 32 {
19873            _mm512_setzero_si512()
19874        } else {
19875            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
19876            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
19877        }
19878    }
19879}
19880
19881/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19882///
19883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19884#[inline]
19885#[target_feature(enable = "avx512f,avx512vl")]
19886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19887#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19888#[rustc_legacy_const_generics(3)]
19889pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19890    unsafe {
19891        static_assert_uimm_bits!(IMM8, 8);
19892        let r = if IMM8 >= 32 {
19893            u32x8::ZERO
19894        } else {
19895            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
19896        };
19897        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
19898    }
19899}
19900
19901/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19902///
19903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19904#[inline]
19905#[target_feature(enable = "avx512f,avx512vl")]
19906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19907#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19908#[rustc_legacy_const_generics(2)]
19909pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19910    unsafe {
19911        static_assert_uimm_bits!(IMM8, 8);
19912        if IMM8 >= 32 {
19913            _mm256_setzero_si256()
19914        } else {
19915            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
19916            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
19917        }
19918    }
19919}
19920
19921/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19922///
19923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19924#[inline]
19925#[target_feature(enable = "avx512f,avx512vl")]
19926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19927#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19928#[rustc_legacy_const_generics(3)]
19929pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19930    unsafe {
19931        static_assert_uimm_bits!(IMM8, 8);
19932        let r = if IMM8 >= 32 {
19933            u32x4::ZERO
19934        } else {
19935            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
19936        };
19937        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
19938    }
19939}
19940
19941/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19942///
19943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19944#[inline]
19945#[target_feature(enable = "avx512f,avx512vl")]
19946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19947#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19948#[rustc_legacy_const_generics(2)]
19949pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19950    unsafe {
19951        static_assert_uimm_bits!(IMM8, 8);
19952        if IMM8 >= 32 {
19953            _mm_setzero_si128()
19954        } else {
19955            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
19956            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
19957        }
19958    }
19959}
19960
19961/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19962///
19963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19964#[inline]
19965#[target_feature(enable = "avx512f")]
19966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19967#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19968#[rustc_legacy_const_generics(1)]
19969pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19970    unsafe {
19971        static_assert_uimm_bits!(IMM8, 8);
19972        if IMM8 >= 64 {
19973            _mm512_setzero_si512()
19974        } else {
19975            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
19976        }
19977    }
19978}
19979
19980/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19981///
19982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19983#[inline]
19984#[target_feature(enable = "avx512f")]
19985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19986#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19987#[rustc_legacy_const_generics(3)]
19988pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19989    unsafe {
19990        static_assert_uimm_bits!(IMM8, 8);
19991        let shf = if IMM8 >= 64 {
19992            u64x8::ZERO
19993        } else {
19994            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
19995        };
19996        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
19997    }
19998}
19999
20000/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20001///
20002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20003#[inline]
20004#[target_feature(enable = "avx512f")]
20005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20006#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20007#[rustc_legacy_const_generics(2)]
20008pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20009    unsafe {
20010        static_assert_uimm_bits!(IMM8, 8);
20011        if IMM8 >= 64 {
20012            _mm512_setzero_si512()
20013        } else {
20014            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20015            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20016        }
20017    }
20018}
20019
20020/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20021///
20022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20023#[inline]
20024#[target_feature(enable = "avx512f,avx512vl")]
20025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20026#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20027#[rustc_legacy_const_generics(3)]
20028pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20029    unsafe {
20030        static_assert_uimm_bits!(IMM8, 8);
20031        let r = if IMM8 >= 64 {
20032            u64x4::ZERO
20033        } else {
20034            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20035        };
20036        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20037    }
20038}
20039
20040/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20041///
20042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20043#[inline]
20044#[target_feature(enable = "avx512f,avx512vl")]
20045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20046#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20047#[rustc_legacy_const_generics(2)]
20048pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20049    unsafe {
20050        static_assert_uimm_bits!(IMM8, 8);
20051        if IMM8 >= 64 {
20052            _mm256_setzero_si256()
20053        } else {
20054            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20055            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20056        }
20057    }
20058}
20059
20060/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20061///
20062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20063#[inline]
20064#[target_feature(enable = "avx512f,avx512vl")]
20065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20066#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20067#[rustc_legacy_const_generics(3)]
20068pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20069    unsafe {
20070        static_assert_uimm_bits!(IMM8, 8);
20071        let r = if IMM8 >= 64 {
20072            u64x2::ZERO
20073        } else {
20074            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20075        };
20076        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20077    }
20078}
20079
20080/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20081///
20082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20083#[inline]
20084#[target_feature(enable = "avx512f,avx512vl")]
20085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20086#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20087#[rustc_legacy_const_generics(2)]
20088pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20089    unsafe {
20090        static_assert_uimm_bits!(IMM8, 8);
20091        if IMM8 >= 64 {
20092            _mm_setzero_si128()
20093        } else {
20094            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20095            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20096        }
20097    }
20098}
20099
20100/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20101///
20102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20103#[inline]
20104#[target_feature(enable = "avx512f")]
20105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20106#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20107#[rustc_legacy_const_generics(1)]
20108pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20109    unsafe {
20110        static_assert_uimm_bits!(IMM8, 8);
20111        if IMM8 >= 64 {
20112            _mm512_setzero_si512()
20113        } else {
20114            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20115        }
20116    }
20117}
20118
20119/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20120///
20121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20122#[inline]
20123#[target_feature(enable = "avx512f")]
20124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20125#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20126#[rustc_legacy_const_generics(3)]
20127pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20128    unsafe {
20129        static_assert_uimm_bits!(IMM8, 8);
20130        let shf = if IMM8 >= 64 {
20131            u64x8::ZERO
20132        } else {
20133            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20134        };
20135        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20136    }
20137}
20138
20139/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20140///
20141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20142#[inline]
20143#[target_feature(enable = "avx512f")]
20144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20145#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20146#[rustc_legacy_const_generics(2)]
20147pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20148    unsafe {
20149        static_assert_uimm_bits!(IMM8, 8);
20150        if IMM8 >= 64 {
20151            _mm512_setzero_si512()
20152        } else {
20153            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20154            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20155        }
20156    }
20157}
20158
20159/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20160///
20161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20162#[inline]
20163#[target_feature(enable = "avx512f,avx512vl")]
20164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20165#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20166#[rustc_legacy_const_generics(3)]
20167pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20168    unsafe {
20169        static_assert_uimm_bits!(IMM8, 8);
20170        let r = if IMM8 >= 64 {
20171            u64x4::ZERO
20172        } else {
20173            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20174        };
20175        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20176    }
20177}
20178
20179/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20180///
20181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20182#[inline]
20183#[target_feature(enable = "avx512f,avx512vl")]
20184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20185#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20186#[rustc_legacy_const_generics(2)]
20187pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20188    unsafe {
20189        static_assert_uimm_bits!(IMM8, 8);
20190        if IMM8 >= 64 {
20191            _mm256_setzero_si256()
20192        } else {
20193            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20194            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20195        }
20196    }
20197}
20198
20199/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20200///
20201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20202#[inline]
20203#[target_feature(enable = "avx512f,avx512vl")]
20204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20205#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20206#[rustc_legacy_const_generics(3)]
20207pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20208    unsafe {
20209        static_assert_uimm_bits!(IMM8, 8);
20210        let r = if IMM8 >= 64 {
20211            u64x2::ZERO
20212        } else {
20213            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20214        };
20215        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20216    }
20217}
20218
20219/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20222#[inline]
20223#[target_feature(enable = "avx512f,avx512vl")]
20224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20225#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20228    unsafe {
20229        static_assert_uimm_bits!(IMM8, 8);
20230        if IMM8 >= 64 {
20231            _mm_setzero_si128()
20232        } else {
20233            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20234            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20235        }
20236    }
20237}
20238
20239/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20240///
20241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20242#[inline]
20243#[target_feature(enable = "avx512f")]
20244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20245#[cfg_attr(test, assert_instr(vpslld))]
20246pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20247    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20248}
20249
20250/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20251///
20252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20253#[inline]
20254#[target_feature(enable = "avx512f")]
20255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20256#[cfg_attr(test, assert_instr(vpslld))]
20257pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20258    unsafe {
20259        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20260        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20261    }
20262}
20263
20264/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20265///
20266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20267#[inline]
20268#[target_feature(enable = "avx512f")]
20269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20270#[cfg_attr(test, assert_instr(vpslld))]
20271pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20272    unsafe {
20273        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20274        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20275    }
20276}
20277
20278/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20279///
20280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20281#[inline]
20282#[target_feature(enable = "avx512f,avx512vl")]
20283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20284#[cfg_attr(test, assert_instr(vpslld))]
20285pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20286    unsafe {
20287        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20288        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20289    }
20290}
20291
20292/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20293///
20294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20295#[inline]
20296#[target_feature(enable = "avx512f,avx512vl")]
20297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20298#[cfg_attr(test, assert_instr(vpslld))]
20299pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20300    unsafe {
20301        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20302        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20303    }
20304}
20305
20306/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20307///
20308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20309#[inline]
20310#[target_feature(enable = "avx512f,avx512vl")]
20311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20312#[cfg_attr(test, assert_instr(vpslld))]
20313pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20314    unsafe {
20315        let shf = _mm_sll_epi32(a, count).as_i32x4();
20316        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20317    }
20318}
20319
20320/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20321///
20322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20323#[inline]
20324#[target_feature(enable = "avx512f,avx512vl")]
20325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20326#[cfg_attr(test, assert_instr(vpslld))]
20327pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20328    unsafe {
20329        let shf = _mm_sll_epi32(a, count).as_i32x4();
20330        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20331    }
20332}
20333
20334/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20335///
20336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20337#[inline]
20338#[target_feature(enable = "avx512f")]
20339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20340#[cfg_attr(test, assert_instr(vpsrld))]
20341pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20342    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20343}
20344
20345/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20346///
20347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20348#[inline]
20349#[target_feature(enable = "avx512f")]
20350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20351#[cfg_attr(test, assert_instr(vpsrld))]
20352pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20353    unsafe {
20354        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20355        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20356    }
20357}
20358
20359/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20360///
20361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20362#[inline]
20363#[target_feature(enable = "avx512f")]
20364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20365#[cfg_attr(test, assert_instr(vpsrld))]
20366pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20367    unsafe {
20368        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20369        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20370    }
20371}
20372
20373/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20374///
20375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20376#[inline]
20377#[target_feature(enable = "avx512f,avx512vl")]
20378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20379#[cfg_attr(test, assert_instr(vpsrld))]
20380pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20381    unsafe {
20382        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20383        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20384    }
20385}
20386
20387/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20388///
20389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20390#[inline]
20391#[target_feature(enable = "avx512f,avx512vl")]
20392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20393#[cfg_attr(test, assert_instr(vpsrld))]
20394pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20395    unsafe {
20396        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20397        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20398    }
20399}
20400
20401/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20402///
20403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20404#[inline]
20405#[target_feature(enable = "avx512f,avx512vl")]
20406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20407#[cfg_attr(test, assert_instr(vpsrld))]
20408pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20409    unsafe {
20410        let shf = _mm_srl_epi32(a, count).as_i32x4();
20411        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20412    }
20413}
20414
20415/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20416///
20417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20418#[inline]
20419#[target_feature(enable = "avx512f,avx512vl")]
20420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20421#[cfg_attr(test, assert_instr(vpsrld))]
20422pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20423    unsafe {
20424        let shf = _mm_srl_epi32(a, count).as_i32x4();
20425        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20426    }
20427}
20428
20429/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20430///
20431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20432#[inline]
20433#[target_feature(enable = "avx512f")]
20434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20435#[cfg_attr(test, assert_instr(vpsllq))]
20436pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20437    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20438}
20439
20440/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20441///
20442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20443#[inline]
20444#[target_feature(enable = "avx512f")]
20445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20446#[cfg_attr(test, assert_instr(vpsllq))]
20447pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20448    unsafe {
20449        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20450        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20451    }
20452}
20453
20454/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20455///
20456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20457#[inline]
20458#[target_feature(enable = "avx512f")]
20459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20460#[cfg_attr(test, assert_instr(vpsllq))]
20461pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20462    unsafe {
20463        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20464        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20465    }
20466}
20467
20468/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20469///
20470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20471#[inline]
20472#[target_feature(enable = "avx512f,avx512vl")]
20473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20474#[cfg_attr(test, assert_instr(vpsllq))]
20475pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20476    unsafe {
20477        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20478        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20479    }
20480}
20481
20482/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20483///
20484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20485#[inline]
20486#[target_feature(enable = "avx512f,avx512vl")]
20487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20488#[cfg_attr(test, assert_instr(vpsllq))]
20489pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20490    unsafe {
20491        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20492        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20493    }
20494}
20495
20496/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20497///
20498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20499#[inline]
20500#[target_feature(enable = "avx512f,avx512vl")]
20501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20502#[cfg_attr(test, assert_instr(vpsllq))]
20503pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20504    unsafe {
20505        let shf = _mm_sll_epi64(a, count).as_i64x2();
20506        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20507    }
20508}
20509
20510/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20513#[inline]
20514#[target_feature(enable = "avx512f,avx512vl")]
20515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20516#[cfg_attr(test, assert_instr(vpsllq))]
20517pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20518    unsafe {
20519        let shf = _mm_sll_epi64(a, count).as_i64x2();
20520        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20521    }
20522}
20523
20524/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20525///
20526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20527#[inline]
20528#[target_feature(enable = "avx512f")]
20529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20530#[cfg_attr(test, assert_instr(vpsrlq))]
20531pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20532    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20533}
20534
20535/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20541#[cfg_attr(test, assert_instr(vpsrlq))]
20542pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20543    unsafe {
20544        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20545        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20546    }
20547}
20548
20549/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20550///
20551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20552#[inline]
20553#[target_feature(enable = "avx512f")]
20554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20555#[cfg_attr(test, assert_instr(vpsrlq))]
20556pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20557    unsafe {
20558        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20559        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20560    }
20561}
20562
20563/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20564///
20565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20566#[inline]
20567#[target_feature(enable = "avx512f,avx512vl")]
20568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20569#[cfg_attr(test, assert_instr(vpsrlq))]
20570pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20571    unsafe {
20572        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20573        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20574    }
20575}
20576
20577/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20578///
20579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20580#[inline]
20581#[target_feature(enable = "avx512f,avx512vl")]
20582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20583#[cfg_attr(test, assert_instr(vpsrlq))]
20584pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20585    unsafe {
20586        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20587        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20588    }
20589}
20590
20591/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20592///
20593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20594#[inline]
20595#[target_feature(enable = "avx512f,avx512vl")]
20596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20597#[cfg_attr(test, assert_instr(vpsrlq))]
20598pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20599    unsafe {
20600        let shf = _mm_srl_epi64(a, count).as_i64x2();
20601        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20602    }
20603}
20604
20605/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20606///
20607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20608#[inline]
20609#[target_feature(enable = "avx512f,avx512vl")]
20610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20611#[cfg_attr(test, assert_instr(vpsrlq))]
20612pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20613    unsafe {
20614        let shf = _mm_srl_epi64(a, count).as_i64x2();
20615        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20616    }
20617}
20618
20619/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20620///
20621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20622#[inline]
20623#[target_feature(enable = "avx512f")]
20624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20625#[cfg_attr(test, assert_instr(vpsrad))]
20626pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20627    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
20628}
20629
20630/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20631///
20632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20633#[inline]
20634#[target_feature(enable = "avx512f")]
20635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20636#[cfg_attr(test, assert_instr(vpsrad))]
20637pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20638    unsafe {
20639        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20640        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20641    }
20642}
20643
20644/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20645///
20646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20647#[inline]
20648#[target_feature(enable = "avx512f")]
20649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20650#[cfg_attr(test, assert_instr(vpsrad))]
20651pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20652    unsafe {
20653        let shf = _mm512_sra_epi32(a, count).as_i32x16();
20654        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20655    }
20656}
20657
20658/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20659///
20660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20661#[inline]
20662#[target_feature(enable = "avx512f,avx512vl")]
20663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20664#[cfg_attr(test, assert_instr(vpsrad))]
20665pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20666    unsafe {
20667        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20668        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20669    }
20670}
20671
20672/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20673///
20674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20675#[inline]
20676#[target_feature(enable = "avx512f,avx512vl")]
20677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20678#[cfg_attr(test, assert_instr(vpsrad))]
20679pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20680    unsafe {
20681        let shf = _mm256_sra_epi32(a, count).as_i32x8();
20682        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20683    }
20684}
20685
20686/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20687///
20688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20689#[inline]
20690#[target_feature(enable = "avx512f,avx512vl")]
20691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20692#[cfg_attr(test, assert_instr(vpsrad))]
20693pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20694    unsafe {
20695        let shf = _mm_sra_epi32(a, count).as_i32x4();
20696        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20697    }
20698}
20699
20700/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20701///
20702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20703#[inline]
20704#[target_feature(enable = "avx512f,avx512vl")]
20705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20706#[cfg_attr(test, assert_instr(vpsrad))]
20707pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20708    unsafe {
20709        let shf = _mm_sra_epi32(a, count).as_i32x4();
20710        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20711    }
20712}
20713
20714/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20715///
20716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20717#[inline]
20718#[target_feature(enable = "avx512f")]
20719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20720#[cfg_attr(test, assert_instr(vpsraq))]
20721pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20722    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
20723}
20724
20725/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20726///
20727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20728#[inline]
20729#[target_feature(enable = "avx512f")]
20730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20731#[cfg_attr(test, assert_instr(vpsraq))]
20732pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20733    unsafe {
20734        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20735        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20736    }
20737}
20738
20739/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20740///
20741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20742#[inline]
20743#[target_feature(enable = "avx512f")]
20744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20745#[cfg_attr(test, assert_instr(vpsraq))]
20746pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20747    unsafe {
20748        let shf = _mm512_sra_epi64(a, count).as_i64x8();
20749        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20750    }
20751}
20752
20753/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20754///
20755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20756#[inline]
20757#[target_feature(enable = "avx512f,avx512vl")]
20758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20759#[cfg_attr(test, assert_instr(vpsraq))]
20760pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20761    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20762}
20763
20764/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20765///
20766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20767#[inline]
20768#[target_feature(enable = "avx512f,avx512vl")]
20769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20770#[cfg_attr(test, assert_instr(vpsraq))]
20771pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20772    unsafe {
20773        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20774        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20775    }
20776}
20777
20778/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20779///
20780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20781#[inline]
20782#[target_feature(enable = "avx512f,avx512vl")]
20783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20784#[cfg_attr(test, assert_instr(vpsraq))]
20785pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20786    unsafe {
20787        let shf = _mm256_sra_epi64(a, count).as_i64x4();
20788        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20789    }
20790}
20791
20792/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20793///
20794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20795#[inline]
20796#[target_feature(enable = "avx512f,avx512vl")]
20797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20798#[cfg_attr(test, assert_instr(vpsraq))]
20799pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20800    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20801}
20802
20803/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20804///
20805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20806#[inline]
20807#[target_feature(enable = "avx512f,avx512vl")]
20808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20809#[cfg_attr(test, assert_instr(vpsraq))]
20810pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20811    unsafe {
20812        let shf = _mm_sra_epi64(a, count).as_i64x2();
20813        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20814    }
20815}
20816
20817/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20818///
20819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20820#[inline]
20821#[target_feature(enable = "avx512f,avx512vl")]
20822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20823#[cfg_attr(test, assert_instr(vpsraq))]
20824pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20825    unsafe {
20826        let shf = _mm_sra_epi64(a, count).as_i64x2();
20827        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20828    }
20829}
20830
20831/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20832///
20833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20834#[inline]
20835#[target_feature(enable = "avx512f")]
20836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20837#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20838#[rustc_legacy_const_generics(1)]
20839pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20840    unsafe {
20841        static_assert_uimm_bits!(IMM8, 8);
20842        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
20843    }
20844}
20845
20846/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20847///
20848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20849#[inline]
20850#[target_feature(enable = "avx512f")]
20851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20852#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20853#[rustc_legacy_const_generics(3)]
20854pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20855    unsafe {
20856        static_assert_uimm_bits!(IMM8, 8);
20857        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20858        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
20859    }
20860}
20861
20862/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20863///
20864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20865#[inline]
20866#[target_feature(enable = "avx512f")]
20867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20868#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20869#[rustc_legacy_const_generics(2)]
20870pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20871    unsafe {
20872        static_assert_uimm_bits!(IMM8, 8);
20873        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
20874        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
20875    }
20876}
20877
20878/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20879///
20880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20881#[inline]
20882#[target_feature(enable = "avx512f,avx512vl")]
20883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20884#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20885#[rustc_legacy_const_generics(3)]
20886pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20887    unsafe {
20888        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20889        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
20890    }
20891}
20892
20893/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20894///
20895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20896#[inline]
20897#[target_feature(enable = "avx512f,avx512vl")]
20898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20899#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20900#[rustc_legacy_const_generics(2)]
20901pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20902    unsafe {
20903        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
20904        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
20905    }
20906}
20907
20908/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20909///
20910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20911#[inline]
20912#[target_feature(enable = "avx512f,avx512vl")]
20913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20914#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20915#[rustc_legacy_const_generics(3)]
20916pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20917    unsafe {
20918        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20919        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
20920    }
20921}
20922
20923/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20924///
20925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20926#[inline]
20927#[target_feature(enable = "avx512f,avx512vl")]
20928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20929#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20930#[rustc_legacy_const_generics(2)]
20931pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20932    unsafe {
20933        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
20934        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
20935    }
20936}
20937
20938/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20939///
20940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20941#[inline]
20942#[target_feature(enable = "avx512f")]
20943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20944#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20945#[rustc_legacy_const_generics(1)]
20946pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20947    unsafe {
20948        static_assert_uimm_bits!(IMM8, 8);
20949        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
20950    }
20951}
20952
20953/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20954///
20955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20956#[inline]
20957#[target_feature(enable = "avx512f")]
20958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20959#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20960#[rustc_legacy_const_generics(3)]
20961pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20962    unsafe {
20963        static_assert_uimm_bits!(IMM8, 8);
20964        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20965        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20966    }
20967}
20968
20969/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20970///
20971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20972#[inline]
20973#[target_feature(enable = "avx512f")]
20974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20975#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20976#[rustc_legacy_const_generics(2)]
20977pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20978    unsafe {
20979        static_assert_uimm_bits!(IMM8, 8);
20980        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
20981        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20982    }
20983}
20984
20985/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20986///
20987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20988#[inline]
20989#[target_feature(enable = "avx512f,avx512vl")]
20990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20991#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20992#[rustc_legacy_const_generics(1)]
20993pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20994    unsafe {
20995        static_assert_uimm_bits!(IMM8, 8);
20996        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
20997    }
20998}
20999
21000/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21006#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21007#[rustc_legacy_const_generics(3)]
21008pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
21009    unsafe {
21010        static_assert_uimm_bits!(IMM8, 8);
21011        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21012        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21013    }
21014}
21015
21016/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21017///
21018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21019#[inline]
21020#[target_feature(enable = "avx512f,avx512vl")]
21021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21022#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21023#[rustc_legacy_const_generics(2)]
21024pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21025    unsafe {
21026        static_assert_uimm_bits!(IMM8, 8);
21027        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21028        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21029    }
21030}
21031
21032/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21033///
21034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21035#[inline]
21036#[target_feature(enable = "avx512f,avx512vl")]
21037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21038#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21039#[rustc_legacy_const_generics(1)]
21040pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21041    unsafe {
21042        static_assert_uimm_bits!(IMM8, 8);
21043        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
21044    }
21045}
21046
21047/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21048///
21049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21050#[inline]
21051#[target_feature(enable = "avx512f,avx512vl")]
21052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21053#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21054#[rustc_legacy_const_generics(3)]
21055pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
21056    unsafe {
21057        static_assert_uimm_bits!(IMM8, 8);
21058        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21059        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21060    }
21061}
21062
21063/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21064///
21065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21066#[inline]
21067#[target_feature(enable = "avx512f,avx512vl")]
21068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21069#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21070#[rustc_legacy_const_generics(2)]
21071pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21072    unsafe {
21073        static_assert_uimm_bits!(IMM8, 8);
21074        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21075        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21076    }
21077}
21078
21079/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21080///
21081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21082#[inline]
21083#[target_feature(enable = "avx512f")]
21084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21085#[cfg_attr(test, assert_instr(vpsravd))]
21086pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21087    unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
21088}
21089
21090/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21091///
21092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21093#[inline]
21094#[target_feature(enable = "avx512f")]
21095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21096#[cfg_attr(test, assert_instr(vpsravd))]
21097pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21098    unsafe {
21099        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21100        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21101    }
21102}
21103
21104/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21105///
21106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21107#[inline]
21108#[target_feature(enable = "avx512f")]
21109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21110#[cfg_attr(test, assert_instr(vpsravd))]
21111pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21112    unsafe {
21113        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21114        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21115    }
21116}
21117
21118/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21119///
21120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21121#[inline]
21122#[target_feature(enable = "avx512f,avx512vl")]
21123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21124#[cfg_attr(test, assert_instr(vpsravd))]
21125pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21126    unsafe {
21127        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21128        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21129    }
21130}
21131
21132/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21133///
21134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21135#[inline]
21136#[target_feature(enable = "avx512f,avx512vl")]
21137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21138#[cfg_attr(test, assert_instr(vpsravd))]
21139pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21140    unsafe {
21141        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21142        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21143    }
21144}
21145
21146/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21147///
21148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21149#[inline]
21150#[target_feature(enable = "avx512f,avx512vl")]
21151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21152#[cfg_attr(test, assert_instr(vpsravd))]
21153pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21154    unsafe {
21155        let shf = _mm_srav_epi32(a, count).as_i32x4();
21156        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21157    }
21158}
21159
21160/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21161///
21162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21163#[inline]
21164#[target_feature(enable = "avx512f,avx512vl")]
21165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21166#[cfg_attr(test, assert_instr(vpsravd))]
21167pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21168    unsafe {
21169        let shf = _mm_srav_epi32(a, count).as_i32x4();
21170        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21171    }
21172}
21173
21174/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21175///
21176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21177#[inline]
21178#[target_feature(enable = "avx512f")]
21179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21180#[cfg_attr(test, assert_instr(vpsravq))]
21181pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21182    unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
21183}
21184
21185/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21186///
21187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21188#[inline]
21189#[target_feature(enable = "avx512f")]
21190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21191#[cfg_attr(test, assert_instr(vpsravq))]
21192pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21193    unsafe {
21194        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21195        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21196    }
21197}
21198
21199/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21200///
21201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21202#[inline]
21203#[target_feature(enable = "avx512f")]
21204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21205#[cfg_attr(test, assert_instr(vpsravq))]
21206pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21207    unsafe {
21208        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21209        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21210    }
21211}
21212
21213/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21214///
21215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21216#[inline]
21217#[target_feature(enable = "avx512f,avx512vl")]
21218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21219#[cfg_attr(test, assert_instr(vpsravq))]
21220pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21221    unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21222}
21223
21224/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21225///
21226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21227#[inline]
21228#[target_feature(enable = "avx512f,avx512vl")]
21229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21230#[cfg_attr(test, assert_instr(vpsravq))]
21231pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21232    unsafe {
21233        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21234        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21235    }
21236}
21237
21238/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21239///
21240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21241#[inline]
21242#[target_feature(enable = "avx512f,avx512vl")]
21243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21244#[cfg_attr(test, assert_instr(vpsravq))]
21245pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21246    unsafe {
21247        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21248        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21249    }
21250}
21251
21252/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21253///
21254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21255#[inline]
21256#[target_feature(enable = "avx512f,avx512vl")]
21257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21258#[cfg_attr(test, assert_instr(vpsravq))]
21259pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21260    unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21261}
21262
21263/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21264///
21265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21266#[inline]
21267#[target_feature(enable = "avx512f,avx512vl")]
21268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21269#[cfg_attr(test, assert_instr(vpsravq))]
21270pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21271    unsafe {
21272        let shf = _mm_srav_epi64(a, count).as_i64x2();
21273        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21274    }
21275}
21276
21277/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21280#[inline]
21281#[target_feature(enable = "avx512f,avx512vl")]
21282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21283#[cfg_attr(test, assert_instr(vpsravq))]
21284pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21285    unsafe {
21286        let shf = _mm_srav_epi64(a, count).as_i64x2();
21287        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21288    }
21289}
21290
21291/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21292///
21293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21294#[inline]
21295#[target_feature(enable = "avx512f")]
21296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21297#[cfg_attr(test, assert_instr(vprolvd))]
21298pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21299    unsafe { transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) }
21300}
21301
21302/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21303///
21304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21305#[inline]
21306#[target_feature(enable = "avx512f")]
21307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21308#[cfg_attr(test, assert_instr(vprolvd))]
21309pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21310    unsafe {
21311        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21312        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21313    }
21314}
21315
21316/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21317///
21318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21319#[inline]
21320#[target_feature(enable = "avx512f")]
21321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21322#[cfg_attr(test, assert_instr(vprolvd))]
21323pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21324    unsafe {
21325        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21326        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21327    }
21328}
21329
21330/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21336#[cfg_attr(test, assert_instr(vprolvd))]
21337pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21338    unsafe { transmute(vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21339}
21340
21341/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21342///
21343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21344#[inline]
21345#[target_feature(enable = "avx512f,avx512vl")]
21346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21347#[cfg_attr(test, assert_instr(vprolvd))]
21348pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21349    unsafe {
21350        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21351        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21352    }
21353}
21354
21355/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21356///
21357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21358#[inline]
21359#[target_feature(enable = "avx512f,avx512vl")]
21360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21361#[cfg_attr(test, assert_instr(vprolvd))]
21362pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21363    unsafe {
21364        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21365        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21366    }
21367}
21368
21369/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21370///
21371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21372#[inline]
21373#[target_feature(enable = "avx512f,avx512vl")]
21374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21375#[cfg_attr(test, assert_instr(vprolvd))]
21376pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21377    unsafe { transmute(vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21378}
21379
21380/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21381///
21382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21383#[inline]
21384#[target_feature(enable = "avx512f,avx512vl")]
21385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21386#[cfg_attr(test, assert_instr(vprolvd))]
21387pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21388    unsafe {
21389        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21390        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21391    }
21392}
21393
21394/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21395///
21396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21397#[inline]
21398#[target_feature(enable = "avx512f,avx512vl")]
21399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21400#[cfg_attr(test, assert_instr(vprolvd))]
21401pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21402    unsafe {
21403        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21404        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21405    }
21406}
21407
21408/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21409///
21410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21411#[inline]
21412#[target_feature(enable = "avx512f")]
21413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21414#[cfg_attr(test, assert_instr(vprorvd))]
21415pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21416    unsafe { transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) }
21417}
21418
21419/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21420///
21421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21422#[inline]
21423#[target_feature(enable = "avx512f")]
21424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21425#[cfg_attr(test, assert_instr(vprorvd))]
21426pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21427    unsafe {
21428        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21429        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21430    }
21431}
21432
21433/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21434///
21435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21436#[inline]
21437#[target_feature(enable = "avx512f")]
21438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21439#[cfg_attr(test, assert_instr(vprorvd))]
21440pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21441    unsafe {
21442        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21443        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21444    }
21445}
21446
21447/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21448///
21449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21450#[inline]
21451#[target_feature(enable = "avx512f,avx512vl")]
21452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21453#[cfg_attr(test, assert_instr(vprorvd))]
21454pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21455    unsafe { transmute(vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21456}
21457
21458/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21459///
21460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21461#[inline]
21462#[target_feature(enable = "avx512f,avx512vl")]
21463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21464#[cfg_attr(test, assert_instr(vprorvd))]
21465pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21466    unsafe {
21467        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21468        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
21469    }
21470}
21471
21472/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21473///
21474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21475#[inline]
21476#[target_feature(enable = "avx512f,avx512vl")]
21477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21478#[cfg_attr(test, assert_instr(vprorvd))]
21479pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21480    unsafe {
21481        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
21482        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
21483    }
21484}
21485
21486/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21487///
21488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21489#[inline]
21490#[target_feature(enable = "avx512f,avx512vl")]
21491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21492#[cfg_attr(test, assert_instr(vprorvd))]
21493pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21494    unsafe { transmute(vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21495}
21496
21497/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21498///
21499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21500#[inline]
21501#[target_feature(enable = "avx512f,avx512vl")]
21502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21503#[cfg_attr(test, assert_instr(vprorvd))]
21504pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21505    unsafe {
21506        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21507        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
21508    }
21509}
21510
21511/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21517#[cfg_attr(test, assert_instr(vprorvd))]
21518pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21519    unsafe {
21520        let ror = _mm_rorv_epi32(a, b).as_i32x4();
21521        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
21522    }
21523}
21524
21525/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21526///
21527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21528#[inline]
21529#[target_feature(enable = "avx512f")]
21530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21531#[cfg_attr(test, assert_instr(vprolvq))]
21532pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21533    unsafe { transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) }
21534}
21535
21536/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21537///
21538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21539#[inline]
21540#[target_feature(enable = "avx512f")]
21541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21542#[cfg_attr(test, assert_instr(vprolvq))]
21543pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21544    unsafe {
21545        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21546        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
21547    }
21548}
21549
21550/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21551///
21552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21553#[inline]
21554#[target_feature(enable = "avx512f")]
21555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21556#[cfg_attr(test, assert_instr(vprolvq))]
21557pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21558    unsafe {
21559        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
21560        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
21561    }
21562}
21563
21564/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21565///
21566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21567#[inline]
21568#[target_feature(enable = "avx512f,avx512vl")]
21569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21570#[cfg_attr(test, assert_instr(vprolvq))]
21571pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21572    unsafe { transmute(vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21573}
21574
21575/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21576///
21577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21578#[inline]
21579#[target_feature(enable = "avx512f,avx512vl")]
21580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21581#[cfg_attr(test, assert_instr(vprolvq))]
21582pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21583    unsafe {
21584        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21585        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
21586    }
21587}
21588
21589/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21590///
21591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21592#[inline]
21593#[target_feature(enable = "avx512f,avx512vl")]
21594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21595#[cfg_attr(test, assert_instr(vprolvq))]
21596pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21597    unsafe {
21598        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
21599        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
21600    }
21601}
21602
21603/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21604///
21605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21606#[inline]
21607#[target_feature(enable = "avx512f,avx512vl")]
21608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21609#[cfg_attr(test, assert_instr(vprolvq))]
21610pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21611    unsafe { transmute(vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21612}
21613
21614/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21615///
21616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21617#[inline]
21618#[target_feature(enable = "avx512f,avx512vl")]
21619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21620#[cfg_attr(test, assert_instr(vprolvq))]
21621pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21622    unsafe {
21623        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21624        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
21625    }
21626}
21627
21628/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21629///
21630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21631#[inline]
21632#[target_feature(enable = "avx512f,avx512vl")]
21633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21634#[cfg_attr(test, assert_instr(vprolvq))]
21635pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21636    unsafe {
21637        let rol = _mm_rolv_epi64(a, b).as_i64x2();
21638        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
21639    }
21640}
21641
21642/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21643///
21644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21645#[inline]
21646#[target_feature(enable = "avx512f")]
21647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21648#[cfg_attr(test, assert_instr(vprorvq))]
21649pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21650    unsafe { transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) }
21651}
21652
21653/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21654///
21655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21656#[inline]
21657#[target_feature(enable = "avx512f")]
21658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21659#[cfg_attr(test, assert_instr(vprorvq))]
21660pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21661    unsafe {
21662        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21663        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
21664    }
21665}
21666
21667/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21668///
21669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21670#[inline]
21671#[target_feature(enable = "avx512f")]
21672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21673#[cfg_attr(test, assert_instr(vprorvq))]
21674pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21675    unsafe {
21676        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
21677        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
21678    }
21679}
21680
21681/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21682///
21683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21684#[inline]
21685#[target_feature(enable = "avx512f,avx512vl")]
21686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21687#[cfg_attr(test, assert_instr(vprorvq))]
21688pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21689    unsafe { transmute(vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21690}
21691
21692/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21693///
21694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21695#[inline]
21696#[target_feature(enable = "avx512f,avx512vl")]
21697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21698#[cfg_attr(test, assert_instr(vprorvq))]
21699pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21700    unsafe {
21701        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21702        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
21703    }
21704}
21705
21706/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21707///
21708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21709#[inline]
21710#[target_feature(enable = "avx512f,avx512vl")]
21711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21712#[cfg_attr(test, assert_instr(vprorvq))]
21713pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21714    unsafe {
21715        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
21716        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
21717    }
21718}
21719
21720/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21721///
21722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21723#[inline]
21724#[target_feature(enable = "avx512f,avx512vl")]
21725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21726#[cfg_attr(test, assert_instr(vprorvq))]
21727pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21728    unsafe { transmute(vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21729}
21730
21731/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21732///
21733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21734#[inline]
21735#[target_feature(enable = "avx512f,avx512vl")]
21736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21737#[cfg_attr(test, assert_instr(vprorvq))]
21738pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21739    unsafe {
21740        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21741        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
21742    }
21743}
21744
21745/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21746///
21747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21748#[inline]
21749#[target_feature(enable = "avx512f,avx512vl")]
21750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21751#[cfg_attr(test, assert_instr(vprorvq))]
21752pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21753    unsafe {
21754        let ror = _mm_rorv_epi64(a, b).as_i64x2();
21755        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
21756    }
21757}
21758
21759/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21760///
21761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21762#[inline]
21763#[target_feature(enable = "avx512f")]
21764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21765#[cfg_attr(test, assert_instr(vpsllvd))]
21766pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21767    unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
21768}
21769
21770/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21771///
21772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21773#[inline]
21774#[target_feature(enable = "avx512f")]
21775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21776#[cfg_attr(test, assert_instr(vpsllvd))]
21777pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21778    unsafe {
21779        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21780        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21781    }
21782}
21783
21784/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21785///
21786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21787#[inline]
21788#[target_feature(enable = "avx512f")]
21789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21790#[cfg_attr(test, assert_instr(vpsllvd))]
21791pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21792    unsafe {
21793        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
21794        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21795    }
21796}
21797
21798/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21799///
21800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21801#[inline]
21802#[target_feature(enable = "avx512f,avx512vl")]
21803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21804#[cfg_attr(test, assert_instr(vpsllvd))]
21805pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21806    unsafe {
21807        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21808        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21809    }
21810}
21811
21812/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21813///
21814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21815#[inline]
21816#[target_feature(enable = "avx512f,avx512vl")]
21817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21818#[cfg_attr(test, assert_instr(vpsllvd))]
21819pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21820    unsafe {
21821        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
21822        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21823    }
21824}
21825
21826/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21827///
21828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21829#[inline]
21830#[target_feature(enable = "avx512f,avx512vl")]
21831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21832#[cfg_attr(test, assert_instr(vpsllvd))]
21833pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21834    unsafe {
21835        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21836        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21837    }
21838}
21839
21840/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21841///
21842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21843#[inline]
21844#[target_feature(enable = "avx512f,avx512vl")]
21845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21846#[cfg_attr(test, assert_instr(vpsllvd))]
21847pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21848    unsafe {
21849        let shf = _mm_sllv_epi32(a, count).as_i32x4();
21850        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21851    }
21852}
21853
21854/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21855///
21856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21857#[inline]
21858#[target_feature(enable = "avx512f")]
21859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21860#[cfg_attr(test, assert_instr(vpsrlvd))]
21861pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21862    unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
21863}
21864
21865/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21866///
21867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21868#[inline]
21869#[target_feature(enable = "avx512f")]
21870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21871#[cfg_attr(test, assert_instr(vpsrlvd))]
21872pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21873    unsafe {
21874        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21875        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21876    }
21877}
21878
21879/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21880///
21881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21882#[inline]
21883#[target_feature(enable = "avx512f")]
21884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21885#[cfg_attr(test, assert_instr(vpsrlvd))]
21886pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21887    unsafe {
21888        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
21889        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21890    }
21891}
21892
21893/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21894///
21895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21896#[inline]
21897#[target_feature(enable = "avx512f,avx512vl")]
21898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21899#[cfg_attr(test, assert_instr(vpsrlvd))]
21900pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21901    unsafe {
21902        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21903        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21904    }
21905}
21906
21907/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21908///
21909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21910#[inline]
21911#[target_feature(enable = "avx512f,avx512vl")]
21912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21913#[cfg_attr(test, assert_instr(vpsrlvd))]
21914pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21915    unsafe {
21916        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
21917        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21918    }
21919}
21920
21921/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21922///
21923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21924#[inline]
21925#[target_feature(enable = "avx512f,avx512vl")]
21926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21927#[cfg_attr(test, assert_instr(vpsrlvd))]
21928pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21929    unsafe {
21930        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21931        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21932    }
21933}
21934
21935/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21936///
21937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21938#[inline]
21939#[target_feature(enable = "avx512f,avx512vl")]
21940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21941#[cfg_attr(test, assert_instr(vpsrlvd))]
21942pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21943    unsafe {
21944        let shf = _mm_srlv_epi32(a, count).as_i32x4();
21945        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21946    }
21947}
21948
21949/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21950///
21951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21952#[inline]
21953#[target_feature(enable = "avx512f")]
21954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21955#[cfg_attr(test, assert_instr(vpsllvq))]
21956pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21957    unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
21958}
21959
21960/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21961///
21962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21963#[inline]
21964#[target_feature(enable = "avx512f")]
21965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21966#[cfg_attr(test, assert_instr(vpsllvq))]
21967pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21968    unsafe {
21969        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21970        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21971    }
21972}
21973
21974/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21975///
21976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21977#[inline]
21978#[target_feature(enable = "avx512f")]
21979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21980#[cfg_attr(test, assert_instr(vpsllvq))]
21981pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21982    unsafe {
21983        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
21984        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21985    }
21986}
21987
21988/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21994#[cfg_attr(test, assert_instr(vpsllvq))]
21995pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21996    unsafe {
21997        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
21998        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21999    }
22000}
22001
22002/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22003///
22004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22005#[inline]
22006#[target_feature(enable = "avx512f,avx512vl")]
22007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22008#[cfg_attr(test, assert_instr(vpsllvq))]
22009pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22010    unsafe {
22011        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22012        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22013    }
22014}
22015
22016/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22017///
22018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22019#[inline]
22020#[target_feature(enable = "avx512f,avx512vl")]
22021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22022#[cfg_attr(test, assert_instr(vpsllvq))]
22023pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22024    unsafe {
22025        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22026        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22027    }
22028}
22029
22030/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22031///
22032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22033#[inline]
22034#[target_feature(enable = "avx512f,avx512vl")]
22035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22036#[cfg_attr(test, assert_instr(vpsllvq))]
22037pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22038    unsafe {
22039        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22040        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22041    }
22042}
22043
22044/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22045///
22046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22047#[inline]
22048#[target_feature(enable = "avx512f")]
22049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22050#[cfg_attr(test, assert_instr(vpsrlvq))]
22051pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22052    unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
22053}
22054
22055/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22056///
22057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22058#[inline]
22059#[target_feature(enable = "avx512f")]
22060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22061#[cfg_attr(test, assert_instr(vpsrlvq))]
22062pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22063    unsafe {
22064        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22065        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22066    }
22067}
22068
22069/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22072#[inline]
22073#[target_feature(enable = "avx512f")]
22074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22075#[cfg_attr(test, assert_instr(vpsrlvq))]
22076pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22077    unsafe {
22078        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22079        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22080    }
22081}
22082
22083/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22084///
22085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22086#[inline]
22087#[target_feature(enable = "avx512f,avx512vl")]
22088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22089#[cfg_attr(test, assert_instr(vpsrlvq))]
22090pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22091    unsafe {
22092        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22093        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22094    }
22095}
22096
22097/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22098///
22099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22100#[inline]
22101#[target_feature(enable = "avx512f,avx512vl")]
22102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22103#[cfg_attr(test, assert_instr(vpsrlvq))]
22104pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22105    unsafe {
22106        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22107        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22108    }
22109}
22110
22111/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22112///
22113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22114#[inline]
22115#[target_feature(enable = "avx512f,avx512vl")]
22116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22117#[cfg_attr(test, assert_instr(vpsrlvq))]
22118pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22119    unsafe {
22120        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22121        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22122    }
22123}
22124
22125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22126///
22127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22128#[inline]
22129#[target_feature(enable = "avx512f,avx512vl")]
22130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22131#[cfg_attr(test, assert_instr(vpsrlvq))]
22132pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22133    unsafe {
22134        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22135        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22136    }
22137}
22138
22139/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22140///
22141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22142#[inline]
22143#[target_feature(enable = "avx512f")]
22144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22145#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22146#[rustc_legacy_const_generics(1)]
22147pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22148    unsafe {
22149        static_assert_uimm_bits!(MASK, 8);
22150        simd_shuffle!(
22151            a,
22152            a,
22153            [
22154                MASK as u32 & 0b11,
22155                (MASK as u32 >> 2) & 0b11,
22156                ((MASK as u32 >> 4) & 0b11),
22157                ((MASK as u32 >> 6) & 0b11),
22158                (MASK as u32 & 0b11) + 4,
22159                ((MASK as u32 >> 2) & 0b11) + 4,
22160                ((MASK as u32 >> 4) & 0b11) + 4,
22161                ((MASK as u32 >> 6) & 0b11) + 4,
22162                (MASK as u32 & 0b11) + 8,
22163                ((MASK as u32 >> 2) & 0b11) + 8,
22164                ((MASK as u32 >> 4) & 0b11) + 8,
22165                ((MASK as u32 >> 6) & 0b11) + 8,
22166                (MASK as u32 & 0b11) + 12,
22167                ((MASK as u32 >> 2) & 0b11) + 12,
22168                ((MASK as u32 >> 4) & 0b11) + 12,
22169                ((MASK as u32 >> 6) & 0b11) + 12,
22170            ],
22171        )
22172    }
22173}
22174
22175/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22176///
22177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22178#[inline]
22179#[target_feature(enable = "avx512f")]
22180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22181#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22182#[rustc_legacy_const_generics(3)]
22183pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22184    unsafe {
22185        static_assert_uimm_bits!(MASK, 8);
22186        let r = _mm512_permute_ps::<MASK>(a);
22187        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22188    }
22189}
22190
22191/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22192///
22193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22194#[inline]
22195#[target_feature(enable = "avx512f")]
22196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22197#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22198#[rustc_legacy_const_generics(2)]
22199pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22200    unsafe {
22201        static_assert_uimm_bits!(MASK, 8);
22202        let r = _mm512_permute_ps::<MASK>(a);
22203        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22204    }
22205}
22206
22207/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22208///
22209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22210#[inline]
22211#[target_feature(enable = "avx512f,avx512vl")]
22212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22213#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22214#[rustc_legacy_const_generics(3)]
22215pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22216    unsafe {
22217        let r = _mm256_permute_ps::<MASK>(a);
22218        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22219    }
22220}
22221
22222/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22223///
22224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22225#[inline]
22226#[target_feature(enable = "avx512f,avx512vl")]
22227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22228#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22229#[rustc_legacy_const_generics(2)]
22230pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22231    unsafe {
22232        let r = _mm256_permute_ps::<MASK>(a);
22233        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22234    }
22235}
22236
22237/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22238///
22239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22240#[inline]
22241#[target_feature(enable = "avx512f,avx512vl")]
22242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22243#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22244#[rustc_legacy_const_generics(3)]
22245pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22246    unsafe {
22247        let r = _mm_permute_ps::<MASK>(a);
22248        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22249    }
22250}
22251
22252/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22253///
22254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22255#[inline]
22256#[target_feature(enable = "avx512f,avx512vl")]
22257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22258#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22259#[rustc_legacy_const_generics(2)]
22260pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22261    unsafe {
22262        let r = _mm_permute_ps::<MASK>(a);
22263        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22264    }
22265}
22266
22267/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22268///
22269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22270#[inline]
22271#[target_feature(enable = "avx512f")]
22272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22273#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22274#[rustc_legacy_const_generics(1)]
22275pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22276    unsafe {
22277        static_assert_uimm_bits!(MASK, 8);
22278        simd_shuffle!(
22279            a,
22280            a,
22281            [
22282                MASK as u32 & 0b1,
22283                ((MASK as u32 >> 1) & 0b1),
22284                ((MASK as u32 >> 2) & 0b1) + 2,
22285                ((MASK as u32 >> 3) & 0b1) + 2,
22286                ((MASK as u32 >> 4) & 0b1) + 4,
22287                ((MASK as u32 >> 5) & 0b1) + 4,
22288                ((MASK as u32 >> 6) & 0b1) + 6,
22289                ((MASK as u32 >> 7) & 0b1) + 6,
22290            ],
22291        )
22292    }
22293}
22294
22295/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22296///
22297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22298#[inline]
22299#[target_feature(enable = "avx512f")]
22300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22301#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22302#[rustc_legacy_const_generics(3)]
22303pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22304    unsafe {
22305        static_assert_uimm_bits!(MASK, 8);
22306        let r = _mm512_permute_pd::<MASK>(a);
22307        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22308    }
22309}
22310
22311/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22312///
22313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22314#[inline]
22315#[target_feature(enable = "avx512f")]
22316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22317#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22318#[rustc_legacy_const_generics(2)]
22319pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22320    unsafe {
22321        static_assert_uimm_bits!(MASK, 8);
22322        let r = _mm512_permute_pd::<MASK>(a);
22323        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22324    }
22325}
22326
22327/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22328///
22329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22330#[inline]
22331#[target_feature(enable = "avx512f,avx512vl")]
22332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22333#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22334#[rustc_legacy_const_generics(3)]
22335pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22336    unsafe {
22337        static_assert_uimm_bits!(MASK, 4);
22338        let r = _mm256_permute_pd::<MASK>(a);
22339        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22340    }
22341}
22342
22343/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22344///
22345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22346#[inline]
22347#[target_feature(enable = "avx512f,avx512vl")]
22348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22349#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22350#[rustc_legacy_const_generics(2)]
22351pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22352    unsafe {
22353        static_assert_uimm_bits!(MASK, 4);
22354        let r = _mm256_permute_pd::<MASK>(a);
22355        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22356    }
22357}
22358
22359/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22360///
22361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22362#[inline]
22363#[target_feature(enable = "avx512f,avx512vl")]
22364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22365#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22366#[rustc_legacy_const_generics(3)]
22367pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22368    unsafe {
22369        static_assert_uimm_bits!(IMM2, 2);
22370        let r = _mm_permute_pd::<IMM2>(a);
22371        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22372    }
22373}
22374
22375/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22376///
22377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22378#[inline]
22379#[target_feature(enable = "avx512f,avx512vl")]
22380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22381#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22382#[rustc_legacy_const_generics(2)]
22383pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22384    unsafe {
22385        static_assert_uimm_bits!(IMM2, 2);
22386        let r = _mm_permute_pd::<IMM2>(a);
22387        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22388    }
22389}
22390
22391/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22392///
22393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22394#[inline]
22395#[target_feature(enable = "avx512f")]
22396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22397#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22398#[rustc_legacy_const_generics(1)]
22399pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22400    unsafe {
22401        static_assert_uimm_bits!(MASK, 8);
22402        simd_shuffle!(
22403            a,
22404            a,
22405            [
22406                MASK as u32 & 0b11,
22407                (MASK as u32 >> 2) & 0b11,
22408                ((MASK as u32 >> 4) & 0b11),
22409                ((MASK as u32 >> 6) & 0b11),
22410                (MASK as u32 & 0b11) + 4,
22411                ((MASK as u32 >> 2) & 0b11) + 4,
22412                ((MASK as u32 >> 4) & 0b11) + 4,
22413                ((MASK as u32 >> 6) & 0b11) + 4,
22414            ],
22415        )
22416    }
22417}
22418
22419/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22420///
22421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22422#[inline]
22423#[target_feature(enable = "avx512f")]
22424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22425#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22426#[rustc_legacy_const_generics(3)]
22427pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22428    src: __m512i,
22429    k: __mmask8,
22430    a: __m512i,
22431) -> __m512i {
22432    unsafe {
22433        static_assert_uimm_bits!(MASK, 8);
22434        let r = _mm512_permutex_epi64::<MASK>(a);
22435        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
22436    }
22437}
22438
22439/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22440///
22441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22442#[inline]
22443#[target_feature(enable = "avx512f")]
22444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22445#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22446#[rustc_legacy_const_generics(2)]
22447pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22448    unsafe {
22449        static_assert_uimm_bits!(MASK, 8);
22450        let r = _mm512_permutex_epi64::<MASK>(a);
22451        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
22452    }
22453}
22454
22455/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22456///
22457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22458#[inline]
22459#[target_feature(enable = "avx512f,avx512vl")]
22460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22461#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22462#[rustc_legacy_const_generics(1)]
22463pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22464    unsafe {
22465        static_assert_uimm_bits!(MASK, 8);
22466        simd_shuffle!(
22467            a,
22468            a,
22469            [
22470                MASK as u32 & 0b11,
22471                (MASK as u32 >> 2) & 0b11,
22472                ((MASK as u32 >> 4) & 0b11),
22473                ((MASK as u32 >> 6) & 0b11),
22474            ],
22475        )
22476    }
22477}
22478
22479/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22480///
22481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22482#[inline]
22483#[target_feature(enable = "avx512f,avx512vl")]
22484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22485#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22486#[rustc_legacy_const_generics(3)]
22487pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22488    src: __m256i,
22489    k: __mmask8,
22490    a: __m256i,
22491) -> __m256i {
22492    unsafe {
22493        static_assert_uimm_bits!(MASK, 8);
22494        let r = _mm256_permutex_epi64::<MASK>(a);
22495        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
22496    }
22497}
22498
22499/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22500///
22501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22502#[inline]
22503#[target_feature(enable = "avx512f,avx512vl")]
22504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22505#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22506#[rustc_legacy_const_generics(2)]
22507pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22508    unsafe {
22509        static_assert_uimm_bits!(MASK, 8);
22510        let r = _mm256_permutex_epi64::<MASK>(a);
22511        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
22512    }
22513}
22514
22515/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22516///
22517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22518#[inline]
22519#[target_feature(enable = "avx512f")]
22520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22521#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22522#[rustc_legacy_const_generics(1)]
22523pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22524    unsafe {
22525        static_assert_uimm_bits!(MASK, 8);
22526        simd_shuffle!(
22527            a,
22528            a,
22529            [
22530                MASK as u32 & 0b11,
22531                (MASK as u32 >> 2) & 0b11,
22532                ((MASK as u32 >> 4) & 0b11),
22533                ((MASK as u32 >> 6) & 0b11),
22534                (MASK as u32 & 0b11) + 4,
22535                ((MASK as u32 >> 2) & 0b11) + 4,
22536                ((MASK as u32 >> 4) & 0b11) + 4,
22537                ((MASK as u32 >> 6) & 0b11) + 4,
22538            ],
22539        )
22540    }
22541}
22542
22543/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22544///
22545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22546#[inline]
22547#[target_feature(enable = "avx512f")]
22548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22549#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22550#[rustc_legacy_const_generics(3)]
22551pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22552    unsafe {
22553        let r = _mm512_permutex_pd::<MASK>(a);
22554        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22555    }
22556}
22557
22558/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22559///
22560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22561#[inline]
22562#[target_feature(enable = "avx512f")]
22563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22564#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22565#[rustc_legacy_const_generics(2)]
22566pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22567    unsafe {
22568        let r = _mm512_permutex_pd::<MASK>(a);
22569        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22570    }
22571}
22572
22573/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22574///
22575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22576#[inline]
22577#[target_feature(enable = "avx512f,avx512vl")]
22578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22579#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22580#[rustc_legacy_const_generics(1)]
22581pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22582    unsafe {
22583        static_assert_uimm_bits!(MASK, 8);
22584        simd_shuffle!(
22585            a,
22586            a,
22587            [
22588                MASK as u32 & 0b11,
22589                (MASK as u32 >> 2) & 0b11,
22590                ((MASK as u32 >> 4) & 0b11),
22591                ((MASK as u32 >> 6) & 0b11),
22592            ],
22593        )
22594    }
22595}
22596
22597/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22598///
22599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22600#[inline]
22601#[target_feature(enable = "avx512f,avx512vl")]
22602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22603#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22604#[rustc_legacy_const_generics(3)]
22605pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22606    unsafe {
22607        static_assert_uimm_bits!(MASK, 8);
22608        let r = _mm256_permutex_pd::<MASK>(a);
22609        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22610    }
22611}
22612
22613/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22614///
22615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22616#[inline]
22617#[target_feature(enable = "avx512f,avx512vl")]
22618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22619#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22620#[rustc_legacy_const_generics(2)]
22621pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22622    unsafe {
22623        static_assert_uimm_bits!(MASK, 8);
22624        let r = _mm256_permutex_pd::<MASK>(a);
22625        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22626    }
22627}
22628
22629/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22630///
22631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22632#[inline]
22633#[target_feature(enable = "avx512f")]
22634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22635#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22636pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22637    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22638}
22639
22640/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22641///
22642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22643#[inline]
22644#[target_feature(enable = "avx512f")]
22645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22646#[cfg_attr(test, assert_instr(vpermd))]
22647pub fn _mm512_mask_permutevar_epi32(
22648    src: __m512i,
22649    k: __mmask16,
22650    idx: __m512i,
22651    a: __m512i,
22652) -> __m512i {
22653    unsafe {
22654        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
22655        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22656    }
22657}
22658
22659/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22660///
22661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22662#[inline]
22663#[target_feature(enable = "avx512f")]
22664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22665#[cfg_attr(test, assert_instr(vpermilps))]
22666pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22667    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
22668}
22669
22670/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22671///
22672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22673#[inline]
22674#[target_feature(enable = "avx512f")]
22675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22676#[cfg_attr(test, assert_instr(vpermilps))]
22677pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22678    unsafe {
22679        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22680        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
22681    }
22682}
22683
22684/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22685///
22686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22687#[inline]
22688#[target_feature(enable = "avx512f")]
22689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22690#[cfg_attr(test, assert_instr(vpermilps))]
22691pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22692    unsafe {
22693        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
22694        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22695    }
22696}
22697
22698/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22699///
22700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22701#[inline]
22702#[target_feature(enable = "avx512f,avx512vl")]
22703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22704#[cfg_attr(test, assert_instr(vpermilps))]
22705pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22706    unsafe {
22707        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22708        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
22709    }
22710}
22711
22712/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22713///
22714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22715#[inline]
22716#[target_feature(enable = "avx512f,avx512vl")]
22717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22718#[cfg_attr(test, assert_instr(vpermilps))]
22719pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22720    unsafe {
22721        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
22722        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22723    }
22724}
22725
22726/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22727///
22728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22729#[inline]
22730#[target_feature(enable = "avx512f,avx512vl")]
22731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22732#[cfg_attr(test, assert_instr(vpermilps))]
22733pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22734    unsafe {
22735        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22736        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
22737    }
22738}
22739
22740/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22743#[inline]
22744#[target_feature(enable = "avx512f,avx512vl")]
22745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22746#[cfg_attr(test, assert_instr(vpermilps))]
22747pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22748    unsafe {
22749        let permute = _mm_permutevar_ps(a, b).as_f32x4();
22750        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22751    }
22752}
22753
22754/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22755///
22756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22757#[inline]
22758#[target_feature(enable = "avx512f")]
22759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22760#[cfg_attr(test, assert_instr(vpermilpd))]
22761pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22762    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22763}
22764
22765/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22766///
22767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22768#[inline]
22769#[target_feature(enable = "avx512f")]
22770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22771#[cfg_attr(test, assert_instr(vpermilpd))]
22772pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22773    unsafe {
22774        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22775        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
22776    }
22777}
22778
22779/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22780///
22781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22782#[inline]
22783#[target_feature(enable = "avx512f")]
22784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22785#[cfg_attr(test, assert_instr(vpermilpd))]
22786pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22787    unsafe {
22788        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
22789        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22790    }
22791}
22792
22793/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22794///
22795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22796#[inline]
22797#[target_feature(enable = "avx512f,avx512vl")]
22798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22799#[cfg_attr(test, assert_instr(vpermilpd))]
22800pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22801    unsafe {
22802        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22803        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
22804    }
22805}
22806
22807/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22808///
22809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22810#[inline]
22811#[target_feature(enable = "avx512f,avx512vl")]
22812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22813#[cfg_attr(test, assert_instr(vpermilpd))]
22814pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22815    unsafe {
22816        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
22817        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22818    }
22819}
22820
22821/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22822///
22823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22824#[inline]
22825#[target_feature(enable = "avx512f,avx512vl")]
22826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22827#[cfg_attr(test, assert_instr(vpermilpd))]
22828pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22829    unsafe {
22830        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22831        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
22832    }
22833}
22834
22835/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22836///
22837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22838#[inline]
22839#[target_feature(enable = "avx512f,avx512vl")]
22840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22841#[cfg_attr(test, assert_instr(vpermilpd))]
22842pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22843    unsafe {
22844        let permute = _mm_permutevar_pd(a, b).as_f64x2();
22845        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22846    }
22847}
22848
22849/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22850///
22851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22852#[inline]
22853#[target_feature(enable = "avx512f")]
22854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22855#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22856pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22857    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
22858}
22859
22860/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22861///
22862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22863#[inline]
22864#[target_feature(enable = "avx512f")]
22865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22866#[cfg_attr(test, assert_instr(vpermd))]
22867pub fn _mm512_mask_permutexvar_epi32(
22868    src: __m512i,
22869    k: __mmask16,
22870    idx: __m512i,
22871    a: __m512i,
22872) -> __m512i {
22873    unsafe {
22874        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22875        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
22876    }
22877}
22878
22879/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22880///
22881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22882#[inline]
22883#[target_feature(enable = "avx512f")]
22884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22885#[cfg_attr(test, assert_instr(vpermd))]
22886pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22887    unsafe {
22888        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22889        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
22890    }
22891}
22892
22893/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22894///
22895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22896#[inline]
22897#[target_feature(enable = "avx512f,avx512vl")]
22898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22899#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22900pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22901    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
22902}
22903
22904/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22905///
22906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22907#[inline]
22908#[target_feature(enable = "avx512f,avx512vl")]
22909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22910#[cfg_attr(test, assert_instr(vpermd))]
22911pub fn _mm256_mask_permutexvar_epi32(
22912    src: __m256i,
22913    k: __mmask8,
22914    idx: __m256i,
22915    a: __m256i,
22916) -> __m256i {
22917    unsafe {
22918        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22919        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
22920    }
22921}
22922
22923/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22924///
22925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22926#[inline]
22927#[target_feature(enable = "avx512f,avx512vl")]
22928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22929#[cfg_attr(test, assert_instr(vpermd))]
22930pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22931    unsafe {
22932        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22933        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
22934    }
22935}
22936
22937/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22938///
22939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22940#[inline]
22941#[target_feature(enable = "avx512f")]
22942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22943#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22944pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22945    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
22946}
22947
22948/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22949///
22950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22951#[inline]
22952#[target_feature(enable = "avx512f")]
22953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22954#[cfg_attr(test, assert_instr(vpermq))]
22955pub fn _mm512_mask_permutexvar_epi64(
22956    src: __m512i,
22957    k: __mmask8,
22958    idx: __m512i,
22959    a: __m512i,
22960) -> __m512i {
22961    unsafe {
22962        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22963        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
22964    }
22965}
22966
22967/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22970#[inline]
22971#[target_feature(enable = "avx512f")]
22972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22973#[cfg_attr(test, assert_instr(vpermq))]
22974pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22975    unsafe {
22976        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22977        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22978    }
22979}
22980
22981/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22982///
22983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22984#[inline]
22985#[target_feature(enable = "avx512f,avx512vl")]
22986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22987#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22988pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22989    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22990}
22991
22992/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22993///
22994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22995#[inline]
22996#[target_feature(enable = "avx512f,avx512vl")]
22997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22998#[cfg_attr(test, assert_instr(vpermq))]
22999pub fn _mm256_mask_permutexvar_epi64(
23000    src: __m256i,
23001    k: __mmask8,
23002    idx: __m256i,
23003    a: __m256i,
23004) -> __m256i {
23005    unsafe {
23006        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23007        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
23008    }
23009}
23010
23011/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23012///
23013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23014#[inline]
23015#[target_feature(enable = "avx512f,avx512vl")]
23016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23017#[cfg_attr(test, assert_instr(vpermq))]
23018pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23019    unsafe {
23020        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23021        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23022    }
23023}
23024
23025/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23026///
23027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23028#[inline]
23029#[target_feature(enable = "avx512f")]
23030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23031#[cfg_attr(test, assert_instr(vpermps))]
23032pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23033    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
23034}
23035
23036/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23037///
23038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23039#[inline]
23040#[target_feature(enable = "avx512f")]
23041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23042#[cfg_attr(test, assert_instr(vpermps))]
23043pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23044    unsafe {
23045        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23046        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23047    }
23048}
23049
23050/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23053#[inline]
23054#[target_feature(enable = "avx512f")]
23055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23056#[cfg_attr(test, assert_instr(vpermps))]
23057pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23058    unsafe {
23059        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23060        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23061    }
23062}
23063
23064/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23065///
23066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23067#[inline]
23068#[target_feature(enable = "avx512f,avx512vl")]
23069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23070#[cfg_attr(test, assert_instr(vpermps))]
23071pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23072    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23073}
23074
23075/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23076///
23077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23078#[inline]
23079#[target_feature(enable = "avx512f,avx512vl")]
23080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23081#[cfg_attr(test, assert_instr(vpermps))]
23082pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23083    unsafe {
23084        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23085        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23086    }
23087}
23088
23089/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23090///
23091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23092#[inline]
23093#[target_feature(enable = "avx512f,avx512vl")]
23094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23095#[cfg_attr(test, assert_instr(vpermps))]
23096pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23097    unsafe {
23098        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23099        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23100    }
23101}
23102
23103/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23104///
23105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23106#[inline]
23107#[target_feature(enable = "avx512f")]
23108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23109#[cfg_attr(test, assert_instr(vpermpd))]
23110pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23111    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23112}
23113
23114/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23115///
23116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23117#[inline]
23118#[target_feature(enable = "avx512f")]
23119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23120#[cfg_attr(test, assert_instr(vpermpd))]
23121pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23122    unsafe {
23123        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23124        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23125    }
23126}
23127
23128/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23129///
23130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23131#[inline]
23132#[target_feature(enable = "avx512f")]
23133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23134#[cfg_attr(test, assert_instr(vpermpd))]
23135pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23136    unsafe {
23137        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23138        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23139    }
23140}
23141
23142/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23143///
23144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23145#[inline]
23146#[target_feature(enable = "avx512f,avx512vl")]
23147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23148#[cfg_attr(test, assert_instr(vpermpd))]
23149pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23150    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23151}
23152
23153/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23154///
23155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23156#[inline]
23157#[target_feature(enable = "avx512f,avx512vl")]
23158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23159#[cfg_attr(test, assert_instr(vpermpd))]
23160pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23161    unsafe {
23162        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23163        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23164    }
23165}
23166
23167/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23168///
23169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23170#[inline]
23171#[target_feature(enable = "avx512f,avx512vl")]
23172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23173#[cfg_attr(test, assert_instr(vpermpd))]
23174pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23175    unsafe {
23176        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23177        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23178    }
23179}
23180
23181/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23182///
23183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23184#[inline]
23185#[target_feature(enable = "avx512f")]
23186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23187#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23188pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23189    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23190}
23191
23192/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23193///
23194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23195#[inline]
23196#[target_feature(enable = "avx512f")]
23197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23198#[cfg_attr(test, assert_instr(vpermt2d))]
23199pub fn _mm512_mask_permutex2var_epi32(
23200    a: __m512i,
23201    k: __mmask16,
23202    idx: __m512i,
23203    b: __m512i,
23204) -> __m512i {
23205    unsafe {
23206        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23207        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23208    }
23209}
23210
23211/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23212///
23213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23214#[inline]
23215#[target_feature(enable = "avx512f")]
23216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23217#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23218pub fn _mm512_maskz_permutex2var_epi32(
23219    k: __mmask16,
23220    a: __m512i,
23221    idx: __m512i,
23222    b: __m512i,
23223) -> __m512i {
23224    unsafe {
23225        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23226        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23227    }
23228}
23229
23230/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23231///
23232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23233#[inline]
23234#[target_feature(enable = "avx512f")]
23235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23236#[cfg_attr(test, assert_instr(vpermi2d))]
23237pub fn _mm512_mask2_permutex2var_epi32(
23238    a: __m512i,
23239    idx: __m512i,
23240    k: __mmask16,
23241    b: __m512i,
23242) -> __m512i {
23243    unsafe {
23244        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23245        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
23246    }
23247}
23248
23249/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23250///
23251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23252#[inline]
23253#[target_feature(enable = "avx512f,avx512vl")]
23254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23255#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23256pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23257    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23258}
23259
23260/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23261///
23262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23263#[inline]
23264#[target_feature(enable = "avx512f,avx512vl")]
23265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23266#[cfg_attr(test, assert_instr(vpermt2d))]
23267pub fn _mm256_mask_permutex2var_epi32(
23268    a: __m256i,
23269    k: __mmask8,
23270    idx: __m256i,
23271    b: __m256i,
23272) -> __m256i {
23273    unsafe {
23274        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23275        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
23276    }
23277}
23278
23279/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23280///
23281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23282#[inline]
23283#[target_feature(enable = "avx512f,avx512vl")]
23284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23285#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23286pub fn _mm256_maskz_permutex2var_epi32(
23287    k: __mmask8,
23288    a: __m256i,
23289    idx: __m256i,
23290    b: __m256i,
23291) -> __m256i {
23292    unsafe {
23293        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23294        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23295    }
23296}
23297
23298/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23299///
23300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23301#[inline]
23302#[target_feature(enable = "avx512f,avx512vl")]
23303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23304#[cfg_attr(test, assert_instr(vpermi2d))]
23305pub fn _mm256_mask2_permutex2var_epi32(
23306    a: __m256i,
23307    idx: __m256i,
23308    k: __mmask8,
23309    b: __m256i,
23310) -> __m256i {
23311    unsafe {
23312        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23313        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
23314    }
23315}
23316
23317/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23318///
23319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23320#[inline]
23321#[target_feature(enable = "avx512f,avx512vl")]
23322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23323#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23324pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23325    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23326}
23327
23328/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23329///
23330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23331#[inline]
23332#[target_feature(enable = "avx512f,avx512vl")]
23333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23334#[cfg_attr(test, assert_instr(vpermt2d))]
23335pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23336    unsafe {
23337        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23338        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
23339    }
23340}
23341
23342/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23343///
23344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23345#[inline]
23346#[target_feature(enable = "avx512f,avx512vl")]
23347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23348#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23349pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23350    unsafe {
23351        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23352        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
23353    }
23354}
23355
23356/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23357///
23358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23359#[inline]
23360#[target_feature(enable = "avx512f,avx512vl")]
23361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23362#[cfg_attr(test, assert_instr(vpermi2d))]
23363pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23364    unsafe {
23365        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23366        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
23367    }
23368}
23369
23370/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23371///
23372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23373#[inline]
23374#[target_feature(enable = "avx512f")]
23375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23376#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23377pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23378    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23379}
23380
23381/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23382///
23383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23384#[inline]
23385#[target_feature(enable = "avx512f")]
23386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23387#[cfg_attr(test, assert_instr(vpermt2q))]
23388pub fn _mm512_mask_permutex2var_epi64(
23389    a: __m512i,
23390    k: __mmask8,
23391    idx: __m512i,
23392    b: __m512i,
23393) -> __m512i {
23394    unsafe {
23395        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23396        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
23397    }
23398}
23399
23400/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23401///
23402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23403#[inline]
23404#[target_feature(enable = "avx512f")]
23405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23406#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23407pub fn _mm512_maskz_permutex2var_epi64(
23408    k: __mmask8,
23409    a: __m512i,
23410    idx: __m512i,
23411    b: __m512i,
23412) -> __m512i {
23413    unsafe {
23414        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23415        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23416    }
23417}
23418
23419/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23420///
23421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23422#[inline]
23423#[target_feature(enable = "avx512f")]
23424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23425#[cfg_attr(test, assert_instr(vpermi2q))]
23426pub fn _mm512_mask2_permutex2var_epi64(
23427    a: __m512i,
23428    idx: __m512i,
23429    k: __mmask8,
23430    b: __m512i,
23431) -> __m512i {
23432    unsafe {
23433        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23434        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
23435    }
23436}
23437
23438/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23439///
23440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23441#[inline]
23442#[target_feature(enable = "avx512f,avx512vl")]
23443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23444#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23445pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23446    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23447}
23448
23449/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23450///
23451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23452#[inline]
23453#[target_feature(enable = "avx512f,avx512vl")]
23454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23455#[cfg_attr(test, assert_instr(vpermt2q))]
23456pub fn _mm256_mask_permutex2var_epi64(
23457    a: __m256i,
23458    k: __mmask8,
23459    idx: __m256i,
23460    b: __m256i,
23461) -> __m256i {
23462    unsafe {
23463        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23464        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
23465    }
23466}
23467
23468/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23469///
23470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23471#[inline]
23472#[target_feature(enable = "avx512f,avx512vl")]
23473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23474#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23475pub fn _mm256_maskz_permutex2var_epi64(
23476    k: __mmask8,
23477    a: __m256i,
23478    idx: __m256i,
23479    b: __m256i,
23480) -> __m256i {
23481    unsafe {
23482        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23483        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23484    }
23485}
23486
23487/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23488///
23489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23490#[inline]
23491#[target_feature(enable = "avx512f,avx512vl")]
23492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23493#[cfg_attr(test, assert_instr(vpermi2q))]
23494pub fn _mm256_mask2_permutex2var_epi64(
23495    a: __m256i,
23496    idx: __m256i,
23497    k: __mmask8,
23498    b: __m256i,
23499) -> __m256i {
23500    unsafe {
23501        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23502        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
23503    }
23504}
23505
23506/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23507///
23508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23509#[inline]
23510#[target_feature(enable = "avx512f,avx512vl")]
23511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23512#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23513pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23514    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23515}
23516
23517/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23518///
23519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23520#[inline]
23521#[target_feature(enable = "avx512f,avx512vl")]
23522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23523#[cfg_attr(test, assert_instr(vpermt2q))]
23524pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23525    unsafe {
23526        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23527        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
23528    }
23529}
23530
23531/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23532///
23533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23534#[inline]
23535#[target_feature(enable = "avx512f,avx512vl")]
23536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23537#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23538pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23539    unsafe {
23540        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23541        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
23542    }
23543}
23544
23545/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23546///
23547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23548#[inline]
23549#[target_feature(enable = "avx512f,avx512vl")]
23550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23551#[cfg_attr(test, assert_instr(vpermi2q))]
23552pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23553    unsafe {
23554        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23555        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
23556    }
23557}
23558
23559/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23560///
23561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23562#[inline]
23563#[target_feature(enable = "avx512f")]
23564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23565#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23566pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23567    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23568}
23569
23570/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23571///
23572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23573#[inline]
23574#[target_feature(enable = "avx512f")]
23575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23576#[cfg_attr(test, assert_instr(vpermt2ps))]
23577pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23578    unsafe {
23579        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23580        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
23581    }
23582}
23583
23584/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23585///
23586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23587#[inline]
23588#[target_feature(enable = "avx512f")]
23589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23590#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23591pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23592    unsafe {
23593        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23594        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23595    }
23596}
23597
23598/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23599///
23600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23601#[inline]
23602#[target_feature(enable = "avx512f")]
23603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23604#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23605pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23606    unsafe {
23607        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23608        let idx = _mm512_castsi512_ps(idx).as_f32x16();
23609        transmute(simd_select_bitmask(k, permute, idx))
23610    }
23611}
23612
23613/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23614///
23615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23616#[inline]
23617#[target_feature(enable = "avx512f,avx512vl")]
23618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23619#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23620pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23621    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23622}
23623
23624/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23625///
23626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23627#[inline]
23628#[target_feature(enable = "avx512f,avx512vl")]
23629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23630#[cfg_attr(test, assert_instr(vpermt2ps))]
23631pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23632    unsafe {
23633        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23634        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
23635    }
23636}
23637
23638/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23639///
23640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23641#[inline]
23642#[target_feature(enable = "avx512f,avx512vl")]
23643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23644#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23645pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23646    unsafe {
23647        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23648        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23649    }
23650}
23651
23652/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23653///
23654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23655#[inline]
23656#[target_feature(enable = "avx512f,avx512vl")]
23657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23658#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23659pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23660    unsafe {
23661        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23662        let idx = _mm256_castsi256_ps(idx).as_f32x8();
23663        transmute(simd_select_bitmask(k, permute, idx))
23664    }
23665}
23666
23667/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23668///
23669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23670#[inline]
23671#[target_feature(enable = "avx512f,avx512vl")]
23672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23673#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23674pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23675    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23676}
23677
23678/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23684#[cfg_attr(test, assert_instr(vpermt2ps))]
23685pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23686    unsafe {
23687        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23688        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
23689    }
23690}
23691
23692/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23695#[inline]
23696#[target_feature(enable = "avx512f,avx512vl")]
23697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23698#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23699pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23700    unsafe {
23701        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23702        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23703    }
23704}
23705
23706/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23707///
23708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23709#[inline]
23710#[target_feature(enable = "avx512f,avx512vl")]
23711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23712#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23713pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23714    unsafe {
23715        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23716        let idx = _mm_castsi128_ps(idx).as_f32x4();
23717        transmute(simd_select_bitmask(k, permute, idx))
23718    }
23719}
23720
23721/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23722///
23723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23724#[inline]
23725#[target_feature(enable = "avx512f")]
23726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23727#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23728pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23729    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23730}
23731
23732/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23733///
23734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23735#[inline]
23736#[target_feature(enable = "avx512f")]
23737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23738#[cfg_attr(test, assert_instr(vpermt2pd))]
23739pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23740    unsafe {
23741        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23742        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
23743    }
23744}
23745
23746/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23747///
23748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23749#[inline]
23750#[target_feature(enable = "avx512f")]
23751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23752#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23753pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23754    unsafe {
23755        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23756        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23757    }
23758}
23759
23760/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23761///
23762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23763#[inline]
23764#[target_feature(enable = "avx512f")]
23765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23766#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23767pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23768    unsafe {
23769        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23770        let idx = _mm512_castsi512_pd(idx).as_f64x8();
23771        transmute(simd_select_bitmask(k, permute, idx))
23772    }
23773}
23774
23775/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23776///
23777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23778#[inline]
23779#[target_feature(enable = "avx512f,avx512vl")]
23780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23781#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23782pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23783    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23784}
23785
23786/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23787///
23788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23789#[inline]
23790#[target_feature(enable = "avx512f,avx512vl")]
23791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23792#[cfg_attr(test, assert_instr(vpermt2pd))]
23793pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23794    unsafe {
23795        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23796        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
23797    }
23798}
23799
23800/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23801///
23802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23803#[inline]
23804#[target_feature(enable = "avx512f,avx512vl")]
23805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23806#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23807pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23808    unsafe {
23809        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23810        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23811    }
23812}
23813
23814/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23815///
23816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23817#[inline]
23818#[target_feature(enable = "avx512f,avx512vl")]
23819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23820#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23821pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23822    unsafe {
23823        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23824        let idx = _mm256_castsi256_pd(idx).as_f64x4();
23825        transmute(simd_select_bitmask(k, permute, idx))
23826    }
23827}
23828
23829/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23830///
23831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23832#[inline]
23833#[target_feature(enable = "avx512f,avx512vl")]
23834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23835#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23836pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23837    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23838}
23839
23840/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23841///
23842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23843#[inline]
23844#[target_feature(enable = "avx512f,avx512vl")]
23845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23846#[cfg_attr(test, assert_instr(vpermt2pd))]
23847pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23848    unsafe {
23849        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23850        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
23851    }
23852}
23853
23854/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23855///
23856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23857#[inline]
23858#[target_feature(enable = "avx512f,avx512vl")]
23859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23860#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23861pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23862    unsafe {
23863        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23864        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23865    }
23866}
23867
23868/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23869///
23870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23871#[inline]
23872#[target_feature(enable = "avx512f,avx512vl")]
23873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23874#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23875pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23876    unsafe {
23877        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23878        let idx = _mm_castsi128_pd(idx).as_f64x2();
23879        transmute(simd_select_bitmask(k, permute, idx))
23880    }
23881}
23882
23883/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23889#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23890#[rustc_legacy_const_generics(1)]
23891pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23892    unsafe {
23893        static_assert_uimm_bits!(MASK, 8);
23894        let r: i32x16 = simd_shuffle!(
23895            a.as_i32x16(),
23896            a.as_i32x16(),
23897            [
23898                MASK as u32 & 0b11,
23899                (MASK as u32 >> 2) & 0b11,
23900                (MASK as u32 >> 4) & 0b11,
23901                (MASK as u32 >> 6) & 0b11,
23902                (MASK as u32 & 0b11) + 4,
23903                ((MASK as u32 >> 2) & 0b11) + 4,
23904                ((MASK as u32 >> 4) & 0b11) + 4,
23905                ((MASK as u32 >> 6) & 0b11) + 4,
23906                (MASK as u32 & 0b11) + 8,
23907                ((MASK as u32 >> 2) & 0b11) + 8,
23908                ((MASK as u32 >> 4) & 0b11) + 8,
23909                ((MASK as u32 >> 6) & 0b11) + 8,
23910                (MASK as u32 & 0b11) + 12,
23911                ((MASK as u32 >> 2) & 0b11) + 12,
23912                ((MASK as u32 >> 4) & 0b11) + 12,
23913                ((MASK as u32 >> 6) & 0b11) + 12,
23914            ],
23915        );
23916        transmute(r)
23917    }
23918}
23919
23920/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23921///
23922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23923#[inline]
23924#[target_feature(enable = "avx512f")]
23925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23926#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23927#[rustc_legacy_const_generics(3)]
23928pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23929    src: __m512i,
23930    k: __mmask16,
23931    a: __m512i,
23932) -> __m512i {
23933    unsafe {
23934        static_assert_uimm_bits!(MASK, 8);
23935        let r = _mm512_shuffle_epi32::<MASK>(a);
23936        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23937    }
23938}
23939
23940/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23941///
23942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23943#[inline]
23944#[target_feature(enable = "avx512f")]
23945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23946#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23947#[rustc_legacy_const_generics(2)]
23948pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23949    unsafe {
23950        static_assert_uimm_bits!(MASK, 8);
23951        let r = _mm512_shuffle_epi32::<MASK>(a);
23952        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23953    }
23954}
23955
23956/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23957///
23958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23959#[inline]
23960#[target_feature(enable = "avx512f,avx512vl")]
23961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23962#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23963#[rustc_legacy_const_generics(3)]
23964pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23965    src: __m256i,
23966    k: __mmask8,
23967    a: __m256i,
23968) -> __m256i {
23969    unsafe {
23970        static_assert_uimm_bits!(MASK, 8);
23971        let r = _mm256_shuffle_epi32::<MASK>(a);
23972        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23973    }
23974}
23975
23976/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23977///
23978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23979#[inline]
23980#[target_feature(enable = "avx512f,avx512vl")]
23981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23982#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23983#[rustc_legacy_const_generics(2)]
23984pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23985    unsafe {
23986        static_assert_uimm_bits!(MASK, 8);
23987        let r = _mm256_shuffle_epi32::<MASK>(a);
23988        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23989    }
23990}
23991
23992/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23993///
23994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23995#[inline]
23996#[target_feature(enable = "avx512f,avx512vl")]
23997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23998#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23999#[rustc_legacy_const_generics(3)]
24000pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24001    src: __m128i,
24002    k: __mmask8,
24003    a: __m128i,
24004) -> __m128i {
24005    unsafe {
24006        static_assert_uimm_bits!(MASK, 8);
24007        let r = _mm_shuffle_epi32::<MASK>(a);
24008        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
24009    }
24010}
24011
24012/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24013///
24014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24015#[inline]
24016#[target_feature(enable = "avx512f,avx512vl")]
24017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24018#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24019#[rustc_legacy_const_generics(2)]
24020pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
24021    unsafe {
24022        static_assert_uimm_bits!(MASK, 8);
24023        let r = _mm_shuffle_epi32::<MASK>(a);
24024        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
24025    }
24026}
24027
24028/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24029///
24030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24031#[inline]
24032#[target_feature(enable = "avx512f")]
24033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24034#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24035#[rustc_legacy_const_generics(2)]
24036pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24037    unsafe {
24038        static_assert_uimm_bits!(MASK, 8);
24039        simd_shuffle!(
24040            a,
24041            b,
24042            [
24043                MASK as u32 & 0b11,
24044                (MASK as u32 >> 2) & 0b11,
24045                ((MASK as u32 >> 4) & 0b11) + 16,
24046                ((MASK as u32 >> 6) & 0b11) + 16,
24047                (MASK as u32 & 0b11) + 4,
24048                ((MASK as u32 >> 2) & 0b11) + 4,
24049                ((MASK as u32 >> 4) & 0b11) + 20,
24050                ((MASK as u32 >> 6) & 0b11) + 20,
24051                (MASK as u32 & 0b11) + 8,
24052                ((MASK as u32 >> 2) & 0b11) + 8,
24053                ((MASK as u32 >> 4) & 0b11) + 24,
24054                ((MASK as u32 >> 6) & 0b11) + 24,
24055                (MASK as u32 & 0b11) + 12,
24056                ((MASK as u32 >> 2) & 0b11) + 12,
24057                ((MASK as u32 >> 4) & 0b11) + 28,
24058                ((MASK as u32 >> 6) & 0b11) + 28,
24059            ],
24060        )
24061    }
24062}
24063
24064/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24067#[inline]
24068#[target_feature(enable = "avx512f")]
24069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24071#[rustc_legacy_const_generics(4)]
24072pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
24073    src: __m512,
24074    k: __mmask16,
24075    a: __m512,
24076    b: __m512,
24077) -> __m512 {
24078    unsafe {
24079        static_assert_uimm_bits!(MASK, 8);
24080        let r = _mm512_shuffle_ps::<MASK>(a, b);
24081        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24082    }
24083}
24084
24085/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086///
24087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24088#[inline]
24089#[target_feature(enable = "avx512f")]
24090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24091#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24092#[rustc_legacy_const_generics(3)]
24093pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24094    unsafe {
24095        static_assert_uimm_bits!(MASK, 8);
24096        let r = _mm512_shuffle_ps::<MASK>(a, b);
24097        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24098    }
24099}
24100
24101/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24104#[inline]
24105#[target_feature(enable = "avx512f,avx512vl")]
24106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24108#[rustc_legacy_const_generics(4)]
24109pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24110    src: __m256,
24111    k: __mmask8,
24112    a: __m256,
24113    b: __m256,
24114) -> __m256 {
24115    unsafe {
24116        static_assert_uimm_bits!(MASK, 8);
24117        let r = _mm256_shuffle_ps::<MASK>(a, b);
24118        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24119    }
24120}
24121
24122/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24123///
24124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24125#[inline]
24126#[target_feature(enable = "avx512f,avx512vl")]
24127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24128#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24129#[rustc_legacy_const_generics(3)]
24130pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24131    unsafe {
24132        static_assert_uimm_bits!(MASK, 8);
24133        let r = _mm256_shuffle_ps::<MASK>(a, b);
24134        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24135    }
24136}
24137
24138/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24139///
24140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24141#[inline]
24142#[target_feature(enable = "avx512f,avx512vl")]
24143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24144#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24145#[rustc_legacy_const_generics(4)]
24146pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24147    src: __m128,
24148    k: __mmask8,
24149    a: __m128,
24150    b: __m128,
24151) -> __m128 {
24152    unsafe {
24153        static_assert_uimm_bits!(MASK, 8);
24154        let r = _mm_shuffle_ps::<MASK>(a, b);
24155        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24156    }
24157}
24158
24159/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24160///
24161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24162#[inline]
24163#[target_feature(enable = "avx512f,avx512vl")]
24164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24165#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24166#[rustc_legacy_const_generics(3)]
24167pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24168    unsafe {
24169        static_assert_uimm_bits!(MASK, 8);
24170        let r = _mm_shuffle_ps::<MASK>(a, b);
24171        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24172    }
24173}
24174
24175/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24176///
24177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24178#[inline]
24179#[target_feature(enable = "avx512f")]
24180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24181#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24182#[rustc_legacy_const_generics(2)]
24183pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24184    unsafe {
24185        static_assert_uimm_bits!(MASK, 8);
24186        simd_shuffle!(
24187            a,
24188            b,
24189            [
24190                MASK as u32 & 0b1,
24191                ((MASK as u32 >> 1) & 0b1) + 8,
24192                ((MASK as u32 >> 2) & 0b1) + 2,
24193                ((MASK as u32 >> 3) & 0b1) + 10,
24194                ((MASK as u32 >> 4) & 0b1) + 4,
24195                ((MASK as u32 >> 5) & 0b1) + 12,
24196                ((MASK as u32 >> 6) & 0b1) + 6,
24197                ((MASK as u32 >> 7) & 0b1) + 14,
24198            ],
24199        )
24200    }
24201}
24202
24203/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204///
24205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24206#[inline]
24207#[target_feature(enable = "avx512f")]
24208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24209#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24210#[rustc_legacy_const_generics(4)]
24211pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24212    src: __m512d,
24213    k: __mmask8,
24214    a: __m512d,
24215    b: __m512d,
24216) -> __m512d {
24217    unsafe {
24218        static_assert_uimm_bits!(MASK, 8);
24219        let r = _mm512_shuffle_pd::<MASK>(a, b);
24220        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24221    }
24222}
24223
24224/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225///
24226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24227#[inline]
24228#[target_feature(enable = "avx512f")]
24229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24230#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24231#[rustc_legacy_const_generics(3)]
24232pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24233    unsafe {
24234        static_assert_uimm_bits!(MASK, 8);
24235        let r = _mm512_shuffle_pd::<MASK>(a, b);
24236        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24237    }
24238}
24239
24240/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24241///
24242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24243#[inline]
24244#[target_feature(enable = "avx512f,avx512vl")]
24245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24246#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24247#[rustc_legacy_const_generics(4)]
24248pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24249    src: __m256d,
24250    k: __mmask8,
24251    a: __m256d,
24252    b: __m256d,
24253) -> __m256d {
24254    unsafe {
24255        static_assert_uimm_bits!(MASK, 8);
24256        let r = _mm256_shuffle_pd::<MASK>(a, b);
24257        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24258    }
24259}
24260
24261/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24267#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24268#[rustc_legacy_const_generics(3)]
24269pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24270    unsafe {
24271        static_assert_uimm_bits!(MASK, 8);
24272        let r = _mm256_shuffle_pd::<MASK>(a, b);
24273        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24274    }
24275}
24276
24277/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24278///
24279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24280#[inline]
24281#[target_feature(enable = "avx512f,avx512vl")]
24282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24283#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24284#[rustc_legacy_const_generics(4)]
24285pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24286    src: __m128d,
24287    k: __mmask8,
24288    a: __m128d,
24289    b: __m128d,
24290) -> __m128d {
24291    unsafe {
24292        static_assert_uimm_bits!(MASK, 8);
24293        let r = _mm_shuffle_pd::<MASK>(a, b);
24294        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
24295    }
24296}
24297
24298/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24299///
24300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24301#[inline]
24302#[target_feature(enable = "avx512f,avx512vl")]
24303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24304#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24305#[rustc_legacy_const_generics(3)]
24306pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24307    unsafe {
24308        static_assert_uimm_bits!(MASK, 8);
24309        let r = _mm_shuffle_pd::<MASK>(a, b);
24310        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
24311    }
24312}
24313
24314/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24320#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24321#[rustc_legacy_const_generics(2)]
24322pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24323    unsafe {
24324        static_assert_uimm_bits!(MASK, 8);
24325        let a = a.as_i32x16();
24326        let b = b.as_i32x16();
24327        let r: i32x16 = simd_shuffle!(
24328            a,
24329            b,
24330            [
24331                (MASK as u32 & 0b11) * 4 + 0,
24332                (MASK as u32 & 0b11) * 4 + 1,
24333                (MASK as u32 & 0b11) * 4 + 2,
24334                (MASK as u32 & 0b11) * 4 + 3,
24335                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24336                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24337                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24338                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24339                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24340                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24341                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24342                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24343                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24344                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24345                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24346                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24347            ],
24348        );
24349        transmute(r)
24350    }
24351}
24352
24353/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24359#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24360#[rustc_legacy_const_generics(4)]
24361pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24362    src: __m512i,
24363    k: __mmask16,
24364    a: __m512i,
24365    b: __m512i,
24366) -> __m512i {
24367    unsafe {
24368        static_assert_uimm_bits!(MASK, 8);
24369        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24370        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24371    }
24372}
24373
24374/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24375///
24376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24377#[inline]
24378#[target_feature(enable = "avx512f")]
24379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24380#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24381#[rustc_legacy_const_generics(3)]
24382pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24383    k: __mmask16,
24384    a: __m512i,
24385    b: __m512i,
24386) -> __m512i {
24387    unsafe {
24388        static_assert_uimm_bits!(MASK, 8);
24389        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
24390        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24391    }
24392}
24393
24394/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24395///
24396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24397#[inline]
24398#[target_feature(enable = "avx512f,avx512vl")]
24399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24400#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24401#[rustc_legacy_const_generics(2)]
24402pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24403    unsafe {
24404        static_assert_uimm_bits!(MASK, 8);
24405        let a = a.as_i32x8();
24406        let b = b.as_i32x8();
24407        let r: i32x8 = simd_shuffle!(
24408            a,
24409            b,
24410            [
24411                (MASK as u32 & 0b1) * 4 + 0,
24412                (MASK as u32 & 0b1) * 4 + 1,
24413                (MASK as u32 & 0b1) * 4 + 2,
24414                (MASK as u32 & 0b1) * 4 + 3,
24415                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24416                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24417                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24418                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24419            ],
24420        );
24421        transmute(r)
24422    }
24423}
24424
24425/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24426///
24427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24428#[inline]
24429#[target_feature(enable = "avx512f,avx512vl")]
24430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24431#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24432#[rustc_legacy_const_generics(4)]
24433pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24434    src: __m256i,
24435    k: __mmask8,
24436    a: __m256i,
24437    b: __m256i,
24438) -> __m256i {
24439    unsafe {
24440        static_assert_uimm_bits!(MASK, 8);
24441        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24442        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24443    }
24444}
24445
24446/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24447///
24448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24449#[inline]
24450#[target_feature(enable = "avx512f,avx512vl")]
24451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24452#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24453#[rustc_legacy_const_generics(3)]
24454pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24455    unsafe {
24456        static_assert_uimm_bits!(MASK, 8);
24457        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
24458        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24459    }
24460}
24461
24462/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24463///
24464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24465#[inline]
24466#[target_feature(enable = "avx512f")]
24467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24468#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24469#[rustc_legacy_const_generics(2)]
24470pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24471    unsafe {
24472        static_assert_uimm_bits!(MASK, 8);
24473        let a = a.as_i64x8();
24474        let b = b.as_i64x8();
24475        let r: i64x8 = simd_shuffle!(
24476            a,
24477            b,
24478            [
24479                (MASK as u32 & 0b11) * 2 + 0,
24480                (MASK as u32 & 0b11) * 2 + 1,
24481                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24482                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24483                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24484                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24485                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24486                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24487            ],
24488        );
24489        transmute(r)
24490    }
24491}
24492
24493/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24494///
24495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24496#[inline]
24497#[target_feature(enable = "avx512f")]
24498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24499#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24500#[rustc_legacy_const_generics(4)]
24501pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24502    src: __m512i,
24503    k: __mmask8,
24504    a: __m512i,
24505    b: __m512i,
24506) -> __m512i {
24507    unsafe {
24508        static_assert_uimm_bits!(MASK, 8);
24509        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24510        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24511    }
24512}
24513
24514/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24515///
24516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24517#[inline]
24518#[target_feature(enable = "avx512f")]
24519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24520#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24521#[rustc_legacy_const_generics(3)]
24522pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24523    unsafe {
24524        static_assert_uimm_bits!(MASK, 8);
24525        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
24526        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24527    }
24528}
24529
24530/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24536#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24537#[rustc_legacy_const_generics(2)]
24538pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24539    unsafe {
24540        static_assert_uimm_bits!(MASK, 8);
24541        let a = a.as_i64x4();
24542        let b = b.as_i64x4();
24543        let r: i64x4 = simd_shuffle!(
24544            a,
24545            b,
24546            [
24547                (MASK as u32 & 0b1) * 2 + 0,
24548                (MASK as u32 & 0b1) * 2 + 1,
24549                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24550                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24551            ],
24552        );
24553        transmute(r)
24554    }
24555}
24556
24557/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24558///
24559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24560#[inline]
24561#[target_feature(enable = "avx512f,avx512vl")]
24562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24563#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24564#[rustc_legacy_const_generics(4)]
24565pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24566    src: __m256i,
24567    k: __mmask8,
24568    a: __m256i,
24569    b: __m256i,
24570) -> __m256i {
24571    unsafe {
24572        static_assert_uimm_bits!(MASK, 8);
24573        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24574        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
24575    }
24576}
24577
24578/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24579///
24580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24581#[inline]
24582#[target_feature(enable = "avx512f,avx512vl")]
24583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24584#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24585#[rustc_legacy_const_generics(3)]
24586pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24587    unsafe {
24588        static_assert_uimm_bits!(MASK, 8);
24589        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
24590        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
24591    }
24592}
24593
24594/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24595///
24596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24597#[inline]
24598#[target_feature(enable = "avx512f")]
24599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24600#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24601#[rustc_legacy_const_generics(2)]
24602pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24603    unsafe {
24604        static_assert_uimm_bits!(MASK, 8);
24605        let a = a.as_f32x16();
24606        let b = b.as_f32x16();
24607        let r: f32x16 = simd_shuffle!(
24608            a,
24609            b,
24610            [
24611                (MASK as u32 & 0b11) * 4 + 0,
24612                (MASK as u32 & 0b11) * 4 + 1,
24613                (MASK as u32 & 0b11) * 4 + 2,
24614                (MASK as u32 & 0b11) * 4 + 3,
24615                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24616                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24617                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24618                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24619                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24620                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24621                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24622                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24623                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24624                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24625                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24626                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24627            ],
24628        );
24629        transmute(r)
24630    }
24631}
24632
24633/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24634///
24635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24636#[inline]
24637#[target_feature(enable = "avx512f")]
24638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24639#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24640#[rustc_legacy_const_generics(4)]
24641pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24642    src: __m512,
24643    k: __mmask16,
24644    a: __m512,
24645    b: __m512,
24646) -> __m512 {
24647    unsafe {
24648        static_assert_uimm_bits!(MASK, 8);
24649        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24650        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24651    }
24652}
24653
24654/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24655///
24656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24657#[inline]
24658#[target_feature(enable = "avx512f")]
24659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24660#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24661#[rustc_legacy_const_generics(3)]
24662pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24663    unsafe {
24664        static_assert_uimm_bits!(MASK, 8);
24665        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
24666        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24667    }
24668}
24669
24670/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24671///
24672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24673#[inline]
24674#[target_feature(enable = "avx512f,avx512vl")]
24675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24676#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24677#[rustc_legacy_const_generics(2)]
24678pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24679    unsafe {
24680        static_assert_uimm_bits!(MASK, 8);
24681        let a = a.as_f32x8();
24682        let b = b.as_f32x8();
24683        let r: f32x8 = simd_shuffle!(
24684            a,
24685            b,
24686            [
24687                (MASK as u32 & 0b1) * 4 + 0,
24688                (MASK as u32 & 0b1) * 4 + 1,
24689                (MASK as u32 & 0b1) * 4 + 2,
24690                (MASK as u32 & 0b1) * 4 + 3,
24691                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24692                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24693                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24694                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24695            ],
24696        );
24697        transmute(r)
24698    }
24699}
24700
24701/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24702///
24703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24704#[inline]
24705#[target_feature(enable = "avx512f,avx512vl")]
24706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24707#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24708#[rustc_legacy_const_generics(4)]
24709pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24710    src: __m256,
24711    k: __mmask8,
24712    a: __m256,
24713    b: __m256,
24714) -> __m256 {
24715    unsafe {
24716        static_assert_uimm_bits!(MASK, 8);
24717        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24718        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24719    }
24720}
24721
24722/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24723///
24724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24725#[inline]
24726#[target_feature(enable = "avx512f,avx512vl")]
24727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24728#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24729#[rustc_legacy_const_generics(3)]
24730pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24731    unsafe {
24732        static_assert_uimm_bits!(MASK, 8);
24733        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
24734        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24735    }
24736}
24737
24738/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24741#[inline]
24742#[target_feature(enable = "avx512f")]
24743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24744#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24745#[rustc_legacy_const_generics(2)]
24746pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24747    unsafe {
24748        static_assert_uimm_bits!(MASK, 8);
24749        let a = a.as_f64x8();
24750        let b = b.as_f64x8();
24751        let r: f64x8 = simd_shuffle!(
24752            a,
24753            b,
24754            [
24755                (MASK as u32 & 0b11) * 2 + 0,
24756                (MASK as u32 & 0b11) * 2 + 1,
24757                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24758                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24759                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24760                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24761                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24762                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24763            ],
24764        );
24765        transmute(r)
24766    }
24767}
24768
24769/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24770///
24771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24772#[inline]
24773#[target_feature(enable = "avx512f")]
24774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24775#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24776#[rustc_legacy_const_generics(4)]
24777pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24778    src: __m512d,
24779    k: __mmask8,
24780    a: __m512d,
24781    b: __m512d,
24782) -> __m512d {
24783    unsafe {
24784        static_assert_uimm_bits!(MASK, 8);
24785        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24786        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24787    }
24788}
24789
24790/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24791///
24792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24793#[inline]
24794#[target_feature(enable = "avx512f")]
24795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24796#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24797#[rustc_legacy_const_generics(3)]
24798pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24799    unsafe {
24800        static_assert_uimm_bits!(MASK, 8);
24801        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
24802        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24803    }
24804}
24805
24806/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24807///
24808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24809#[inline]
24810#[target_feature(enable = "avx512f,avx512vl")]
24811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24812#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24813#[rustc_legacy_const_generics(2)]
24814pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24815    unsafe {
24816        static_assert_uimm_bits!(MASK, 8);
24817        let a = a.as_f64x4();
24818        let b = b.as_f64x4();
24819        let r: f64x4 = simd_shuffle!(
24820            a,
24821            b,
24822            [
24823                (MASK as u32 & 0b1) * 2 + 0,
24824                (MASK as u32 & 0b1) * 2 + 1,
24825                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24826                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24827            ],
24828        );
24829        transmute(r)
24830    }
24831}
24832
24833/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24834///
24835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24836#[inline]
24837#[target_feature(enable = "avx512f,avx512vl")]
24838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24839#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24840#[rustc_legacy_const_generics(4)]
24841pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24842    src: __m256d,
24843    k: __mmask8,
24844    a: __m256d,
24845    b: __m256d,
24846) -> __m256d {
24847    unsafe {
24848        static_assert_uimm_bits!(MASK, 8);
24849        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24850        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
24851    }
24852}
24853
24854/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24855///
24856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24857#[inline]
24858#[target_feature(enable = "avx512f,avx512vl")]
24859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24860#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24861#[rustc_legacy_const_generics(3)]
24862pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24863    unsafe {
24864        static_assert_uimm_bits!(MASK, 8);
24865        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
24866        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
24867    }
24868}
24869
24870/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24871///
24872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24873#[inline]
24874#[target_feature(enable = "avx512f")]
24875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24876#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24877#[rustc_legacy_const_generics(1)]
24878pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24879    unsafe {
24880        static_assert_uimm_bits!(IMM8, 2);
24881        match IMM8 & 0x3 {
24882            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24883            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24884            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24885            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24886        }
24887    }
24888}
24889
24890/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24891///
24892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24893#[inline]
24894#[target_feature(enable = "avx512f")]
24895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24896#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24897#[rustc_legacy_const_generics(3)]
24898pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24899    unsafe {
24900        static_assert_uimm_bits!(IMM8, 2);
24901        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24902        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24903    }
24904}
24905
24906/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24907///
24908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24909#[inline]
24910#[target_feature(enable = "avx512f")]
24911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24912#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24913#[rustc_legacy_const_generics(2)]
24914pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24915    unsafe {
24916        static_assert_uimm_bits!(IMM8, 2);
24917        let r = _mm512_extractf32x4_ps::<IMM8>(a);
24918        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24919    }
24920}
24921
24922/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24923///
24924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24925#[inline]
24926#[target_feature(enable = "avx512f,avx512vl")]
24927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24928#[cfg_attr(
24929    test,
24930    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24931)]
24932#[rustc_legacy_const_generics(1)]
24933pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24934    unsafe {
24935        static_assert_uimm_bits!(IMM8, 1);
24936        match IMM8 & 0x1 {
24937            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24938            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24939        }
24940    }
24941}
24942
24943/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24944///
24945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24946#[inline]
24947#[target_feature(enable = "avx512f,avx512vl")]
24948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24949#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24950#[rustc_legacy_const_generics(3)]
24951pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24952    unsafe {
24953        static_assert_uimm_bits!(IMM8, 1);
24954        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24955        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24956    }
24957}
24958
24959/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24960///
24961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24962#[inline]
24963#[target_feature(enable = "avx512f,avx512vl")]
24964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24965#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24966#[rustc_legacy_const_generics(2)]
24967pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24968    unsafe {
24969        static_assert_uimm_bits!(IMM8, 1);
24970        let r = _mm256_extractf32x4_ps::<IMM8>(a);
24971        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24972    }
24973}
24974
24975/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24976///
24977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24978#[inline]
24979#[target_feature(enable = "avx512f")]
24980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24981#[cfg_attr(
24982    test,
24983    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24984)]
24985#[rustc_legacy_const_generics(1)]
24986pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24987    unsafe {
24988        static_assert_uimm_bits!(IMM1, 1);
24989        match IMM1 {
24990            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24991            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24992        }
24993    }
24994}
24995
24996/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24997///
24998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24999#[inline]
25000#[target_feature(enable = "avx512f")]
25001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25002#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25003#[rustc_legacy_const_generics(3)]
25004pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25005    src: __m256i,
25006    k: __mmask8,
25007    a: __m512i,
25008) -> __m256i {
25009    unsafe {
25010        static_assert_uimm_bits!(IMM1, 1);
25011        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25012        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25013    }
25014}
25015
25016/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25017///
25018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25019#[inline]
25020#[target_feature(enable = "avx512f")]
25021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25022#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25023#[rustc_legacy_const_generics(2)]
25024pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25025    unsafe {
25026        static_assert_uimm_bits!(IMM1, 1);
25027        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25028        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25029    }
25030}
25031
25032/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25033///
25034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25035#[inline]
25036#[target_feature(enable = "avx512f")]
25037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25038#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25039#[rustc_legacy_const_generics(1)]
25040pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25041    unsafe {
25042        static_assert_uimm_bits!(IMM8, 1);
25043        match IMM8 & 0x1 {
25044            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25045            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25046        }
25047    }
25048}
25049
25050/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25051///
25052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25053#[inline]
25054#[target_feature(enable = "avx512f")]
25055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25056#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25057#[rustc_legacy_const_generics(3)]
25058pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25059    src: __m256d,
25060    k: __mmask8,
25061    a: __m512d,
25062) -> __m256d {
25063    unsafe {
25064        static_assert_uimm_bits!(IMM8, 1);
25065        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25066        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25067    }
25068}
25069
25070/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25071///
25072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25073#[inline]
25074#[target_feature(enable = "avx512f")]
25075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25076#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25077#[rustc_legacy_const_generics(2)]
25078pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25079    unsafe {
25080        static_assert_uimm_bits!(IMM8, 1);
25081        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25082        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25083    }
25084}
25085
25086/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25087///
25088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25089#[inline]
25090#[target_feature(enable = "avx512f")]
25091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25092#[cfg_attr(
25093    test,
25094    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25095)]
25096#[rustc_legacy_const_generics(1)]
25097pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25098    unsafe {
25099        static_assert_uimm_bits!(IMM2, 2);
25100        let a = a.as_i32x16();
25101        let zero = i32x16::ZERO;
25102        let extract: i32x4 = match IMM2 {
25103            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25104            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25105            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25106            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25107        };
25108        transmute(extract)
25109    }
25110}
25111
25112/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25113///
25114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25115#[inline]
25116#[target_feature(enable = "avx512f")]
25117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25118#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25119#[rustc_legacy_const_generics(3)]
25120pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25121    src: __m128i,
25122    k: __mmask8,
25123    a: __m512i,
25124) -> __m128i {
25125    unsafe {
25126        static_assert_uimm_bits!(IMM2, 2);
25127        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25128        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25129    }
25130}
25131
25132/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25133///
25134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25135#[inline]
25136#[target_feature(enable = "avx512f")]
25137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25138#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25139#[rustc_legacy_const_generics(2)]
25140pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25141    unsafe {
25142        static_assert_uimm_bits!(IMM2, 2);
25143        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25144        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25145    }
25146}
25147
25148/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25149///
25150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25151#[inline]
25152#[target_feature(enable = "avx512f,avx512vl")]
25153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25154#[cfg_attr(
25155    test,
25156    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25157)]
25158#[rustc_legacy_const_generics(1)]
25159pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25160    unsafe {
25161        static_assert_uimm_bits!(IMM1, 1);
25162        let a = a.as_i32x8();
25163        let zero = i32x8::ZERO;
25164        let extract: i32x4 = match IMM1 {
25165            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25166            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25167        };
25168        transmute(extract)
25169    }
25170}
25171
25172/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25173///
25174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25175#[inline]
25176#[target_feature(enable = "avx512f,avx512vl")]
25177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25178#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25179#[rustc_legacy_const_generics(3)]
25180pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25181    src: __m128i,
25182    k: __mmask8,
25183    a: __m256i,
25184) -> __m128i {
25185    unsafe {
25186        static_assert_uimm_bits!(IMM1, 1);
25187        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25188        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
25189    }
25190}
25191
25192/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25193///
25194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25195#[inline]
25196#[target_feature(enable = "avx512f,avx512vl")]
25197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25198#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25199#[rustc_legacy_const_generics(2)]
25200pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25201    unsafe {
25202        static_assert_uimm_bits!(IMM1, 1);
25203        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
25204        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
25205    }
25206}
25207
25208/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25209///
25210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25211#[inline]
25212#[target_feature(enable = "avx512f")]
25213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25214#[cfg_attr(test, assert_instr(vmovsldup))]
25215pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25216    unsafe {
25217        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25218        transmute(r)
25219    }
25220}
25221
25222/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25223///
25224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25225#[inline]
25226#[target_feature(enable = "avx512f")]
25227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25228#[cfg_attr(test, assert_instr(vmovsldup))]
25229pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25230    unsafe {
25231        let mov: f32x16 =
25232            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25233        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25234    }
25235}
25236
25237/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25238///
25239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25240#[inline]
25241#[target_feature(enable = "avx512f")]
25242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25243#[cfg_attr(test, assert_instr(vmovsldup))]
25244pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25245    unsafe {
25246        let mov: f32x16 =
25247            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25248        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25249    }
25250}
25251
25252/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25253///
25254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25255#[inline]
25256#[target_feature(enable = "avx512f,avx512vl")]
25257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25258#[cfg_attr(test, assert_instr(vmovsldup))]
25259pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25260    unsafe {
25261        let mov = _mm256_moveldup_ps(a);
25262        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25263    }
25264}
25265
25266/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25267///
25268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25269#[inline]
25270#[target_feature(enable = "avx512f,avx512vl")]
25271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25272#[cfg_attr(test, assert_instr(vmovsldup))]
25273pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25274    unsafe {
25275        let mov = _mm256_moveldup_ps(a);
25276        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25277    }
25278}
25279
25280/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25281///
25282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25283#[inline]
25284#[target_feature(enable = "avx512f,avx512vl")]
25285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25286#[cfg_attr(test, assert_instr(vmovsldup))]
25287pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25288    unsafe {
25289        let mov = _mm_moveldup_ps(a);
25290        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25291    }
25292}
25293
25294/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25295///
25296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25297#[inline]
25298#[target_feature(enable = "avx512f,avx512vl")]
25299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25300#[cfg_attr(test, assert_instr(vmovsldup))]
25301pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25302    unsafe {
25303        let mov = _mm_moveldup_ps(a);
25304        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25305    }
25306}
25307
25308/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25309///
25310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25311#[inline]
25312#[target_feature(enable = "avx512f")]
25313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25314#[cfg_attr(test, assert_instr(vmovshdup))]
25315pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25316    unsafe {
25317        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25318        transmute(r)
25319    }
25320}
25321
25322/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25323///
25324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25325#[inline]
25326#[target_feature(enable = "avx512f")]
25327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25328#[cfg_attr(test, assert_instr(vmovshdup))]
25329pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25330    unsafe {
25331        let mov: f32x16 =
25332            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25333        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
25334    }
25335}
25336
25337/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25338///
25339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25340#[inline]
25341#[target_feature(enable = "avx512f")]
25342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25343#[cfg_attr(test, assert_instr(vmovshdup))]
25344pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25345    unsafe {
25346        let mov: f32x16 =
25347            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25348        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
25349    }
25350}
25351
25352/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25353///
25354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25355#[inline]
25356#[target_feature(enable = "avx512f,avx512vl")]
25357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25358#[cfg_attr(test, assert_instr(vmovshdup))]
25359pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25360    unsafe {
25361        let mov = _mm256_movehdup_ps(a);
25362        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
25363    }
25364}
25365
25366/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25367///
25368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25369#[inline]
25370#[target_feature(enable = "avx512f,avx512vl")]
25371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25372#[cfg_attr(test, assert_instr(vmovshdup))]
25373pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25374    unsafe {
25375        let mov = _mm256_movehdup_ps(a);
25376        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
25377    }
25378}
25379
25380/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25381///
25382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25383#[inline]
25384#[target_feature(enable = "avx512f,avx512vl")]
25385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25386#[cfg_attr(test, assert_instr(vmovshdup))]
25387pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25388    unsafe {
25389        let mov = _mm_movehdup_ps(a);
25390        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
25391    }
25392}
25393
25394/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25400#[cfg_attr(test, assert_instr(vmovshdup))]
25401pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25402    unsafe {
25403        let mov = _mm_movehdup_ps(a);
25404        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
25405    }
25406}
25407
25408/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25409///
25410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25411#[inline]
25412#[target_feature(enable = "avx512f")]
25413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25414#[cfg_attr(test, assert_instr(vmovddup))]
25415pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25416    unsafe {
25417        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25418        transmute(r)
25419    }
25420}
25421
25422/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25423///
25424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25425#[inline]
25426#[target_feature(enable = "avx512f")]
25427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25428#[cfg_attr(test, assert_instr(vmovddup))]
25429pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25430    unsafe {
25431        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25432        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
25433    }
25434}
25435
25436/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25437///
25438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25439#[inline]
25440#[target_feature(enable = "avx512f")]
25441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25442#[cfg_attr(test, assert_instr(vmovddup))]
25443pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25444    unsafe {
25445        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25446        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
25447    }
25448}
25449
25450/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25451///
25452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25453#[inline]
25454#[target_feature(enable = "avx512f,avx512vl")]
25455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25456#[cfg_attr(test, assert_instr(vmovddup))]
25457pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25458    unsafe {
25459        let mov = _mm256_movedup_pd(a);
25460        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
25461    }
25462}
25463
25464/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25465///
25466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25467#[inline]
25468#[target_feature(enable = "avx512f,avx512vl")]
25469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25470#[cfg_attr(test, assert_instr(vmovddup))]
25471pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25472    unsafe {
25473        let mov = _mm256_movedup_pd(a);
25474        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
25475    }
25476}
25477
25478/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25479///
25480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25481#[inline]
25482#[target_feature(enable = "avx512f,avx512vl")]
25483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25484#[cfg_attr(test, assert_instr(vmovddup))]
25485pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25486    unsafe {
25487        let mov = _mm_movedup_pd(a);
25488        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
25489    }
25490}
25491
25492/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25493///
25494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25495#[inline]
25496#[target_feature(enable = "avx512f,avx512vl")]
25497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25498#[cfg_attr(test, assert_instr(vmovddup))]
25499pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25500    unsafe {
25501        let mov = _mm_movedup_pd(a);
25502        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
25503    }
25504}
25505
25506/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25507///
25508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25509#[inline]
25510#[target_feature(enable = "avx512f")]
25511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25512#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25513#[rustc_legacy_const_generics(2)]
25514pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25515    unsafe {
25516        static_assert_uimm_bits!(IMM8, 2);
25517        let a = a.as_i32x16();
25518        let b = _mm512_castsi128_si512(b).as_i32x16();
25519        let ret: i32x16 = match IMM8 & 0b11 {
25520            0 => {
25521                simd_shuffle!(
25522                    a,
25523                    b,
25524                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25525                )
25526            }
25527            1 => {
25528                simd_shuffle!(
25529                    a,
25530                    b,
25531                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25532                )
25533            }
25534            2 => {
25535                simd_shuffle!(
25536                    a,
25537                    b,
25538                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25539                )
25540            }
25541            _ => {
25542                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25543            }
25544        };
25545        transmute(ret)
25546    }
25547}
25548
25549/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25550///
25551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25552#[inline]
25553#[target_feature(enable = "avx512f")]
25554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25555#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25556#[rustc_legacy_const_generics(4)]
25557pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25558    src: __m512i,
25559    k: __mmask16,
25560    a: __m512i,
25561    b: __m128i,
25562) -> __m512i {
25563    unsafe {
25564        static_assert_uimm_bits!(IMM8, 2);
25565        let r = _mm512_inserti32x4::<IMM8>(a, b);
25566        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25567    }
25568}
25569
25570/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25571///
25572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25573#[inline]
25574#[target_feature(enable = "avx512f")]
25575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25576#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25577#[rustc_legacy_const_generics(3)]
25578pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25579    unsafe {
25580        static_assert_uimm_bits!(IMM8, 2);
25581        let r = _mm512_inserti32x4::<IMM8>(a, b);
25582        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25583    }
25584}
25585
25586/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25587///
25588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25589#[inline]
25590#[target_feature(enable = "avx512f,avx512vl")]
25591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25592#[cfg_attr(
25593    test,
25594    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25595)]
25596#[rustc_legacy_const_generics(2)]
25597pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25598    unsafe {
25599        static_assert_uimm_bits!(IMM8, 1);
25600        let a = a.as_i32x8();
25601        let b = _mm256_castsi128_si256(b).as_i32x8();
25602        let ret: i32x8 = match IMM8 & 0b1 {
25603            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25604            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25605        };
25606        transmute(ret)
25607    }
25608}
25609
25610/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25611///
25612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25613#[inline]
25614#[target_feature(enable = "avx512f,avx512vl")]
25615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25616#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25617#[rustc_legacy_const_generics(4)]
25618pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25619    src: __m256i,
25620    k: __mmask8,
25621    a: __m256i,
25622    b: __m128i,
25623) -> __m256i {
25624    unsafe {
25625        static_assert_uimm_bits!(IMM8, 1);
25626        let r = _mm256_inserti32x4::<IMM8>(a, b);
25627        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25628    }
25629}
25630
25631/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25632///
25633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25634#[inline]
25635#[target_feature(enable = "avx512f,avx512vl")]
25636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25637#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25638#[rustc_legacy_const_generics(3)]
25639pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25640    unsafe {
25641        static_assert_uimm_bits!(IMM8, 1);
25642        let r = _mm256_inserti32x4::<IMM8>(a, b);
25643        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25644    }
25645}
25646
25647/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25648///
25649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25650#[inline]
25651#[target_feature(enable = "avx512f")]
25652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25653#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25654#[rustc_legacy_const_generics(2)]
25655pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25656    unsafe {
25657        static_assert_uimm_bits!(IMM8, 1);
25658        let b = _mm512_castsi256_si512(b);
25659        match IMM8 & 0b1 {
25660            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25661            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25662        }
25663    }
25664}
25665
25666/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25667///
25668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25669#[inline]
25670#[target_feature(enable = "avx512f")]
25671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25672#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25673#[rustc_legacy_const_generics(4)]
25674pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25675    src: __m512i,
25676    k: __mmask8,
25677    a: __m512i,
25678    b: __m256i,
25679) -> __m512i {
25680    unsafe {
25681        static_assert_uimm_bits!(IMM8, 1);
25682        let r = _mm512_inserti64x4::<IMM8>(a, b);
25683        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25684    }
25685}
25686
25687/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25688///
25689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25690#[inline]
25691#[target_feature(enable = "avx512f")]
25692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25693#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25694#[rustc_legacy_const_generics(3)]
25695pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25696    unsafe {
25697        static_assert_uimm_bits!(IMM8, 1);
25698        let r = _mm512_inserti64x4::<IMM8>(a, b);
25699        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25700    }
25701}
25702
25703/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25704///
25705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25706#[inline]
25707#[target_feature(enable = "avx512f")]
25708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25709#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25710#[rustc_legacy_const_generics(2)]
25711pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25712    unsafe {
25713        static_assert_uimm_bits!(IMM8, 2);
25714        let b = _mm512_castps128_ps512(b);
25715        match IMM8 & 0b11 {
25716            0 => {
25717                simd_shuffle!(
25718                    a,
25719                    b,
25720                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25721                )
25722            }
25723            1 => {
25724                simd_shuffle!(
25725                    a,
25726                    b,
25727                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25728                )
25729            }
25730            2 => {
25731                simd_shuffle!(
25732                    a,
25733                    b,
25734                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25735                )
25736            }
25737            _ => {
25738                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25739            }
25740        }
25741    }
25742}
25743
25744/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25745///
25746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25747#[inline]
25748#[target_feature(enable = "avx512f")]
25749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25750#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25751#[rustc_legacy_const_generics(4)]
25752pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25753    src: __m512,
25754    k: __mmask16,
25755    a: __m512,
25756    b: __m128,
25757) -> __m512 {
25758    unsafe {
25759        static_assert_uimm_bits!(IMM8, 2);
25760        let r = _mm512_insertf32x4::<IMM8>(a, b);
25761        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25762    }
25763}
25764
25765/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25766///
25767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25768#[inline]
25769#[target_feature(enable = "avx512f")]
25770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25771#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25772#[rustc_legacy_const_generics(3)]
25773pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25774    unsafe {
25775        static_assert_uimm_bits!(IMM8, 2);
25776        let r = _mm512_insertf32x4::<IMM8>(a, b);
25777        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25778    }
25779}
25780
25781/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25782///
25783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25784#[inline]
25785#[target_feature(enable = "avx512f,avx512vl")]
25786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25787#[cfg_attr(
25788    test,
25789    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25790)]
25791#[rustc_legacy_const_generics(2)]
25792pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25793    unsafe {
25794        static_assert_uimm_bits!(IMM8, 1);
25795        let b = _mm256_castps128_ps256(b);
25796        match IMM8 & 0b1 {
25797            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25798            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25799        }
25800    }
25801}
25802
25803/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25804///
25805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25806#[inline]
25807#[target_feature(enable = "avx512f,avx512vl")]
25808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25809#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25810#[rustc_legacy_const_generics(4)]
25811pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25812    src: __m256,
25813    k: __mmask8,
25814    a: __m256,
25815    b: __m128,
25816) -> __m256 {
25817    unsafe {
25818        static_assert_uimm_bits!(IMM8, 1);
25819        let r = _mm256_insertf32x4::<IMM8>(a, b);
25820        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25821    }
25822}
25823
25824/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25825///
25826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25827#[inline]
25828#[target_feature(enable = "avx512f,avx512vl")]
25829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25830#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25831#[rustc_legacy_const_generics(3)]
25832pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25833    unsafe {
25834        static_assert_uimm_bits!(IMM8, 1);
25835        let r = _mm256_insertf32x4::<IMM8>(a, b);
25836        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25837    }
25838}
25839
25840/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25841///
25842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25843#[inline]
25844#[target_feature(enable = "avx512f")]
25845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25846#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25847#[rustc_legacy_const_generics(2)]
25848pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25849    unsafe {
25850        static_assert_uimm_bits!(IMM8, 1);
25851        let b = _mm512_castpd256_pd512(b);
25852        match IMM8 & 0b1 {
25853            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25854            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25855        }
25856    }
25857}
25858
25859/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25860///
25861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25862#[inline]
25863#[target_feature(enable = "avx512f")]
25864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25865#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25866#[rustc_legacy_const_generics(4)]
25867pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25868    src: __m512d,
25869    k: __mmask8,
25870    a: __m512d,
25871    b: __m256d,
25872) -> __m512d {
25873    unsafe {
25874        static_assert_uimm_bits!(IMM8, 1);
25875        let r = _mm512_insertf64x4::<IMM8>(a, b);
25876        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25877    }
25878}
25879
25880/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25881///
25882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25883#[inline]
25884#[target_feature(enable = "avx512f")]
25885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25886#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25887#[rustc_legacy_const_generics(3)]
25888pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25889    unsafe {
25890        static_assert_uimm_bits!(IMM8, 1);
25891        let r = _mm512_insertf64x4::<IMM8>(a, b);
25892        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25893    }
25894}
25895
25896/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25897///
25898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25899#[inline]
25900#[target_feature(enable = "avx512f")]
25901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25902#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25903pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25904    unsafe {
25905        let a = a.as_i32x16();
25906        let b = b.as_i32x16();
25907        #[rustfmt::skip]
25908        let r: i32x16 = simd_shuffle!(
25909            a, b,
25910            [ 2, 18, 3, 19,
25911              2 + 4, 18 + 4, 3 + 4, 19 + 4,
25912              2 + 8, 18 + 8, 3 + 8, 19 + 8,
25913              2 + 12, 18 + 12, 3 + 12, 19 + 12],
25914        );
25915        transmute(r)
25916    }
25917}
25918
25919/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25920///
25921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25922#[inline]
25923#[target_feature(enable = "avx512f")]
25924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25925#[cfg_attr(test, assert_instr(vpunpckhdq))]
25926pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25927    unsafe {
25928        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25929        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
25930    }
25931}
25932
25933/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25934///
25935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25936#[inline]
25937#[target_feature(enable = "avx512f")]
25938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25939#[cfg_attr(test, assert_instr(vpunpckhdq))]
25940pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25941    unsafe {
25942        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
25943        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
25944    }
25945}
25946
25947/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25948///
25949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25950#[inline]
25951#[target_feature(enable = "avx512f,avx512vl")]
25952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25953#[cfg_attr(test, assert_instr(vpunpckhdq))]
25954pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25955    unsafe {
25956        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25957        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
25958    }
25959}
25960
25961/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25962///
25963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25964#[inline]
25965#[target_feature(enable = "avx512f,avx512vl")]
25966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25967#[cfg_attr(test, assert_instr(vpunpckhdq))]
25968pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25969    unsafe {
25970        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
25971        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
25972    }
25973}
25974
25975/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25976///
25977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25978#[inline]
25979#[target_feature(enable = "avx512f,avx512vl")]
25980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25981#[cfg_attr(test, assert_instr(vpunpckhdq))]
25982pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25983    unsafe {
25984        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25985        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
25986    }
25987}
25988
25989/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25990///
25991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25992#[inline]
25993#[target_feature(enable = "avx512f,avx512vl")]
25994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25995#[cfg_attr(test, assert_instr(vpunpckhdq))]
25996pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25997    unsafe {
25998        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
25999        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
26000    }
26001}
26002
26003/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26009#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26010pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26011    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26012}
26013
26014/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26015///
26016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26017#[inline]
26018#[target_feature(enable = "avx512f")]
26019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26020#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26021pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26022    unsafe {
26023        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26024        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26025    }
26026}
26027
26028/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26029///
26030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26031#[inline]
26032#[target_feature(enable = "avx512f")]
26033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26034#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26035pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26036    unsafe {
26037        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26038        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
26039    }
26040}
26041
26042/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26043///
26044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26045#[inline]
26046#[target_feature(enable = "avx512f,avx512vl")]
26047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26048#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26049pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26050    unsafe {
26051        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26052        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
26053    }
26054}
26055
26056/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26057///
26058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26059#[inline]
26060#[target_feature(enable = "avx512f,avx512vl")]
26061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26062#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26063pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26064    unsafe {
26065        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
26066        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
26067    }
26068}
26069
26070/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26071///
26072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26073#[inline]
26074#[target_feature(enable = "avx512f,avx512vl")]
26075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26076#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26077pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26078    unsafe {
26079        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26080        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
26081    }
26082}
26083
26084/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26085///
26086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26087#[inline]
26088#[target_feature(enable = "avx512f,avx512vl")]
26089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26090#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26091pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26092    unsafe {
26093        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
26094        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
26095    }
26096}
26097
26098/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26104#[cfg_attr(test, assert_instr(vunpckhps))]
26105pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26106    unsafe {
26107        #[rustfmt::skip]
26108        simd_shuffle!(
26109            a, b,
26110            [ 2, 18, 3, 19,
26111              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26112              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26113              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26114        )
26115    }
26116}
26117
26118/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26119///
26120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26121#[inline]
26122#[target_feature(enable = "avx512f")]
26123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26124#[cfg_attr(test, assert_instr(vunpckhps))]
26125pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26126    unsafe {
26127        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26128        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
26129    }
26130}
26131
26132/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26133///
26134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26135#[inline]
26136#[target_feature(enable = "avx512f")]
26137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26138#[cfg_attr(test, assert_instr(vunpckhps))]
26139pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26140    unsafe {
26141        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
26142        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
26143    }
26144}
26145
26146/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26147///
26148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26149#[inline]
26150#[target_feature(enable = "avx512f,avx512vl")]
26151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26152#[cfg_attr(test, assert_instr(vunpckhps))]
26153pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26154    unsafe {
26155        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26156        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
26157    }
26158}
26159
26160/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26166#[cfg_attr(test, assert_instr(vunpckhps))]
26167pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26168    unsafe {
26169        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
26170        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
26171    }
26172}
26173
26174/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26175///
26176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26177#[inline]
26178#[target_feature(enable = "avx512f,avx512vl")]
26179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26180#[cfg_attr(test, assert_instr(vunpckhps))]
26181pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26182    unsafe {
26183        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26184        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
26185    }
26186}
26187
26188/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26189///
26190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26191#[inline]
26192#[target_feature(enable = "avx512f,avx512vl")]
26193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26194#[cfg_attr(test, assert_instr(vunpckhps))]
26195pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26196    unsafe {
26197        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
26198        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
26199    }
26200}
26201
26202/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26203///
26204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26205#[inline]
26206#[target_feature(enable = "avx512f")]
26207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26208#[cfg_attr(test, assert_instr(vunpckhpd))]
26209pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26210    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26211}
26212
26213/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26214///
26215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26216#[inline]
26217#[target_feature(enable = "avx512f")]
26218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26219#[cfg_attr(test, assert_instr(vunpckhpd))]
26220pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26221    unsafe {
26222        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26223        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
26224    }
26225}
26226
26227/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26228///
26229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26230#[inline]
26231#[target_feature(enable = "avx512f")]
26232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26233#[cfg_attr(test, assert_instr(vunpckhpd))]
26234pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26235    unsafe {
26236        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
26237        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
26238    }
26239}
26240
26241/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26242///
26243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26244#[inline]
26245#[target_feature(enable = "avx512f,avx512vl")]
26246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26247#[cfg_attr(test, assert_instr(vunpckhpd))]
26248pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26249    unsafe {
26250        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26251        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
26252    }
26253}
26254
26255/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26256///
26257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26258#[inline]
26259#[target_feature(enable = "avx512f,avx512vl")]
26260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26261#[cfg_attr(test, assert_instr(vunpckhpd))]
26262pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26263    unsafe {
26264        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
26265        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
26266    }
26267}
26268
26269/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26270///
26271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26272#[inline]
26273#[target_feature(enable = "avx512f,avx512vl")]
26274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26275#[cfg_attr(test, assert_instr(vunpckhpd))]
26276pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26277    unsafe {
26278        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26279        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
26280    }
26281}
26282
26283/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26284///
26285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26286#[inline]
26287#[target_feature(enable = "avx512f,avx512vl")]
26288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26289#[cfg_attr(test, assert_instr(vunpckhpd))]
26290pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26291    unsafe {
26292        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
26293        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
26294    }
26295}
26296
26297/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26303#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26304pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26305    unsafe {
26306        let a = a.as_i32x16();
26307        let b = b.as_i32x16();
26308        #[rustfmt::skip]
26309        let r: i32x16 = simd_shuffle!(
26310            a, b,
26311            [ 0, 16, 1, 17,
26312              0 + 4, 16 + 4, 1 + 4, 17 + 4,
26313              0 + 8, 16 + 8, 1 + 8, 17 + 8,
26314              0 + 12, 16 + 12, 1 + 12, 17 + 12],
26315        );
26316        transmute(r)
26317    }
26318}
26319
26320/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26321///
26322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26323#[inline]
26324#[target_feature(enable = "avx512f")]
26325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26326#[cfg_attr(test, assert_instr(vpunpckldq))]
26327pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26328    unsafe {
26329        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26330        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
26331    }
26332}
26333
26334/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26335///
26336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26337#[inline]
26338#[target_feature(enable = "avx512f")]
26339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26340#[cfg_attr(test, assert_instr(vpunpckldq))]
26341pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26342    unsafe {
26343        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
26344        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
26345    }
26346}
26347
26348/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26349///
26350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26351#[inline]
26352#[target_feature(enable = "avx512f,avx512vl")]
26353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26354#[cfg_attr(test, assert_instr(vpunpckldq))]
26355pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26356    unsafe {
26357        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26358        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
26359    }
26360}
26361
26362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26363///
26364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26365#[inline]
26366#[target_feature(enable = "avx512f,avx512vl")]
26367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26368#[cfg_attr(test, assert_instr(vpunpckldq))]
26369pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26370    unsafe {
26371        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
26372        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
26373    }
26374}
26375
26376/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26377///
26378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26379#[inline]
26380#[target_feature(enable = "avx512f,avx512vl")]
26381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26382#[cfg_attr(test, assert_instr(vpunpckldq))]
26383pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26384    unsafe {
26385        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26386        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
26387    }
26388}
26389
26390/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26391///
26392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26393#[inline]
26394#[target_feature(enable = "avx512f,avx512vl")]
26395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26396#[cfg_attr(test, assert_instr(vpunpckldq))]
26397pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26398    unsafe {
26399        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
26400        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
26401    }
26402}
26403
26404/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26405///
26406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26407#[inline]
26408#[target_feature(enable = "avx512f")]
26409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26410#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26411pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26412    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26413}
26414
26415/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26416///
26417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26418#[inline]
26419#[target_feature(enable = "avx512f")]
26420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26421#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26422pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26423    unsafe {
26424        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26425        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
26426    }
26427}
26428
26429/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26430///
26431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26432#[inline]
26433#[target_feature(enable = "avx512f")]
26434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26435#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26436pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26437    unsafe {
26438        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
26439        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
26440    }
26441}
26442
26443/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26444///
26445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26446#[inline]
26447#[target_feature(enable = "avx512f,avx512vl")]
26448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26449#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26450pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26451    unsafe {
26452        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26453        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
26454    }
26455}
26456
26457/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26458///
26459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26460#[inline]
26461#[target_feature(enable = "avx512f,avx512vl")]
26462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26463#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26464pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26465    unsafe {
26466        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
26467        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
26468    }
26469}
26470
26471/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26472///
26473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26474#[inline]
26475#[target_feature(enable = "avx512f,avx512vl")]
26476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26477#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26478pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26479    unsafe {
26480        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26481        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
26482    }
26483}
26484
26485/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26486///
26487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26488#[inline]
26489#[target_feature(enable = "avx512f,avx512vl")]
26490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26491#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26492pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26493    unsafe {
26494        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
26495        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
26496    }
26497}
26498
26499/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26500///
26501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26502#[inline]
26503#[target_feature(enable = "avx512f")]
26504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26505#[cfg_attr(test, assert_instr(vunpcklps))]
26506pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26507    unsafe {
26508        #[rustfmt::skip]
26509        simd_shuffle!(a, b,
26510                       [ 0, 16, 1, 17,
26511                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
26512                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
26513                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
26514        )
26515    }
26516}
26517
26518/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26519///
26520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26521#[inline]
26522#[target_feature(enable = "avx512f")]
26523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26524#[cfg_attr(test, assert_instr(vunpcklps))]
26525pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26526    unsafe {
26527        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26528        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
26529    }
26530}
26531
26532/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26533///
26534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26535#[inline]
26536#[target_feature(enable = "avx512f")]
26537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26538#[cfg_attr(test, assert_instr(vunpcklps))]
26539pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26540    unsafe {
26541        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
26542        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
26543    }
26544}
26545
26546/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26547///
26548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26549#[inline]
26550#[target_feature(enable = "avx512f,avx512vl")]
26551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26552#[cfg_attr(test, assert_instr(vunpcklps))]
26553pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26554    unsafe {
26555        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26556        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
26557    }
26558}
26559
26560/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26561///
26562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26563#[inline]
26564#[target_feature(enable = "avx512f,avx512vl")]
26565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26566#[cfg_attr(test, assert_instr(vunpcklps))]
26567pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26568    unsafe {
26569        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
26570        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
26571    }
26572}
26573
26574/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26575///
26576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26577#[inline]
26578#[target_feature(enable = "avx512f,avx512vl")]
26579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26580#[cfg_attr(test, assert_instr(vunpcklps))]
26581pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26582    unsafe {
26583        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26584        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
26585    }
26586}
26587
26588/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26589///
26590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26591#[inline]
26592#[target_feature(enable = "avx512f,avx512vl")]
26593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26594#[cfg_attr(test, assert_instr(vunpcklps))]
26595pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26596    unsafe {
26597        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
26598        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
26599    }
26600}
26601
26602/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26603///
26604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26605#[inline]
26606#[target_feature(enable = "avx512f")]
26607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26608#[cfg_attr(test, assert_instr(vunpcklpd))]
26609pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26610    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26611}
26612
26613/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26614///
26615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26616#[inline]
26617#[target_feature(enable = "avx512f")]
26618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26619#[cfg_attr(test, assert_instr(vunpcklpd))]
26620pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26621    unsafe {
26622        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26623        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
26624    }
26625}
26626
26627/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26628///
26629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26630#[inline]
26631#[target_feature(enable = "avx512f")]
26632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26633#[cfg_attr(test, assert_instr(vunpcklpd))]
26634pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26635    unsafe {
26636        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
26637        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
26638    }
26639}
26640
26641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26642///
26643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26644#[inline]
26645#[target_feature(enable = "avx512f,avx512vl")]
26646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26647#[cfg_attr(test, assert_instr(vunpcklpd))]
26648pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26649    unsafe {
26650        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26651        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
26652    }
26653}
26654
26655/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26656///
26657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26658#[inline]
26659#[target_feature(enable = "avx512f,avx512vl")]
26660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26661#[cfg_attr(test, assert_instr(vunpcklpd))]
26662pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26663    unsafe {
26664        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
26665        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
26666    }
26667}
26668
26669/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26670///
26671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26672#[inline]
26673#[target_feature(enable = "avx512f,avx512vl")]
26674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26675#[cfg_attr(test, assert_instr(vunpcklpd))]
26676pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26677    unsafe {
26678        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26679        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
26680    }
26681}
26682
26683/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26684///
26685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26686#[inline]
26687#[target_feature(enable = "avx512f,avx512vl")]
26688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26689#[cfg_attr(test, assert_instr(vunpcklpd))]
26690pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26691    unsafe {
26692        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
26693        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
26694    }
26695}
26696
26697/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26698///
26699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26700#[inline]
26701#[target_feature(enable = "avx512f")]
26702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26703pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26704    unsafe {
26705        simd_shuffle!(
26706            a,
26707            _mm_undefined_ps(),
26708            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26709        )
26710    }
26711}
26712
26713/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26714///
26715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26716#[inline]
26717#[target_feature(enable = "avx512f")]
26718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26719pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26720    unsafe {
26721        simd_shuffle!(
26722            a,
26723            _mm256_undefined_ps(),
26724            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26725        )
26726    }
26727}
26728
26729/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26730///
26731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26732#[inline]
26733#[target_feature(enable = "avx512f")]
26734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26735pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26736    unsafe {
26737        simd_shuffle!(
26738            a,
26739            _mm_set1_ps(0.),
26740            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26741        )
26742    }
26743}
26744
26745/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26746///
26747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26748#[inline]
26749#[target_feature(enable = "avx512f")]
26750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26751pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26752    unsafe {
26753        simd_shuffle!(
26754            a,
26755            _mm256_set1_ps(0.),
26756            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26757        )
26758    }
26759}
26760
26761/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26767pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26768    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26769}
26770
26771/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26777pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26778    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26779}
26780
26781/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26787pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26788    unsafe { transmute(a) }
26789}
26790
26791/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26797pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26798    unsafe { transmute(a) }
26799}
26800
26801/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26807pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26808    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26809}
26810
26811/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26817pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26818    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26819}
26820
26821/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26827pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26828    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26829}
26830
26831/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26838    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26839}
26840
26841/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26847pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26848    unsafe { simd_shuffle!(a, a, [0, 1]) }
26849}
26850
26851/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26858    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26859}
26860
26861/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26867pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26868    unsafe { transmute(a) }
26869}
26870
26871/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26877pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26878    unsafe { transmute(a) }
26879}
26880
26881/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26887pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26888    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26889}
26890
26891/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26897pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26898    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26899}
26900
26901/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26907pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26908    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26909}
26910
26911/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912///
26913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26914#[inline]
26915#[target_feature(enable = "avx512f")]
26916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26917pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26918    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26919}
26920
26921/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26927pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26928    unsafe { simd_shuffle!(a, a, [0, 1]) }
26929}
26930
26931/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932///
26933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26934#[inline]
26935#[target_feature(enable = "avx512f")]
26936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26937pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26938    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26939}
26940
26941/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26942///
26943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26944#[inline]
26945#[target_feature(enable = "avx512f")]
26946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26947pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26948    unsafe { transmute(a) }
26949}
26950
26951/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26952///
26953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26954#[inline]
26955#[target_feature(enable = "avx512f")]
26956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26957pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26958    unsafe { transmute(a) }
26959}
26960
26961/// Copy the lower 32-bit integer in a to dst.
26962///
26963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26964#[inline]
26965#[target_feature(enable = "avx512f")]
26966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26967#[cfg_attr(test, assert_instr(vmovd))]
26968pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26969    unsafe { simd_extract!(a.as_i32x16(), 0) }
26970}
26971
26972/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26973///
26974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26975#[inline]
26976#[target_feature(enable = "avx512f")]
26977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26978pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26979    unsafe { simd_extract!(a, 0) }
26980}
26981
26982/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26983///
26984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26985#[inline]
26986#[target_feature(enable = "avx512f")]
26987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26988pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26989    unsafe { simd_extract!(a, 0) }
26990}
26991
26992/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26993///
26994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26995#[inline]
26996#[target_feature(enable = "avx512f")]
26997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26998#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26999pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
27000    unsafe {
27001        let a = _mm512_castsi128_si512(a).as_i32x16();
27002        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
27003        transmute(ret)
27004    }
27005}
27006
27007/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27008///
27009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
27010#[inline]
27011#[target_feature(enable = "avx512f")]
27012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27013#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27014pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27015    unsafe {
27016        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
27017        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27018    }
27019}
27020
27021/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27022///
27023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27024#[inline]
27025#[target_feature(enable = "avx512f")]
27026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27027#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27028pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27029    unsafe {
27030        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
27031        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27032    }
27033}
27034
27035/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27036///
27037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27038#[inline]
27039#[target_feature(enable = "avx512f,avx512vl")]
27040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27041#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27042pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27043    unsafe {
27044        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27045        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27046    }
27047}
27048
27049/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27050///
27051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27052#[inline]
27053#[target_feature(enable = "avx512f,avx512vl")]
27054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27055#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27056pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27057    unsafe {
27058        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
27059        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27060    }
27061}
27062
27063/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27064///
27065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27066#[inline]
27067#[target_feature(enable = "avx512f,avx512vl")]
27068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27069#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27070pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27071    unsafe {
27072        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27073        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
27074    }
27075}
27076
27077/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27078///
27079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27080#[inline]
27081#[target_feature(enable = "avx512f,avx512vl")]
27082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27083#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27084pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27085    unsafe {
27086        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
27087        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
27088    }
27089}
27090
27091/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27097#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27098pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27099    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27100}
27101
27102/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27103///
27104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27105#[inline]
27106#[target_feature(enable = "avx512f")]
27107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27108#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27109pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27110    unsafe {
27111        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27112        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27113    }
27114}
27115
27116/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27117///
27118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27119#[inline]
27120#[target_feature(enable = "avx512f")]
27121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27122#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27123pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27124    unsafe {
27125        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
27126        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27127    }
27128}
27129
27130/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27131///
27132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27133#[inline]
27134#[target_feature(enable = "avx512f,avx512vl")]
27135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27136#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27137pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27138    unsafe {
27139        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27140        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
27141    }
27142}
27143
27144/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27145///
27146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27147#[inline]
27148#[target_feature(enable = "avx512f,avx512vl")]
27149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27150#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27151pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27152    unsafe {
27153        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
27154        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
27155    }
27156}
27157
27158/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27159///
27160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27161#[inline]
27162#[target_feature(enable = "avx512f,avx512vl")]
27163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27164#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27165pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27166    unsafe {
27167        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27168        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
27169    }
27170}
27171
27172/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27173///
27174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27175#[inline]
27176#[target_feature(enable = "avx512f,avx512vl")]
27177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27178#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27179pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27180    unsafe {
27181        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
27182        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
27183    }
27184}
27185
27186/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27187///
27188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27189#[inline]
27190#[target_feature(enable = "avx512f")]
27191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27192#[cfg_attr(test, assert_instr(vbroadcastss))]
27193pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27194    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27195}
27196
27197/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27198///
27199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27200#[inline]
27201#[target_feature(enable = "avx512f")]
27202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27203#[cfg_attr(test, assert_instr(vbroadcastss))]
27204pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27205    unsafe {
27206        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27207        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27208    }
27209}
27210
27211/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27212///
27213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27214#[inline]
27215#[target_feature(enable = "avx512f")]
27216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27217#[cfg_attr(test, assert_instr(vbroadcastss))]
27218pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27219    unsafe {
27220        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
27221        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27222    }
27223}
27224
27225/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27226///
27227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27228#[inline]
27229#[target_feature(enable = "avx512f,avx512vl")]
27230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27231#[cfg_attr(test, assert_instr(vbroadcastss))]
27232pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27233    unsafe {
27234        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27235        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27236    }
27237}
27238
27239/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27240///
27241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27242#[inline]
27243#[target_feature(enable = "avx512f,avx512vl")]
27244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27245#[cfg_attr(test, assert_instr(vbroadcastss))]
27246pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27247    unsafe {
27248        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
27249        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27250    }
27251}
27252
27253/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27259#[cfg_attr(test, assert_instr(vbroadcastss))]
27260pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27261    unsafe {
27262        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27263        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
27264    }
27265}
27266
27267/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27268///
27269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27270#[inline]
27271#[target_feature(enable = "avx512f,avx512vl")]
27272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27273#[cfg_attr(test, assert_instr(vbroadcastss))]
27274pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27275    unsafe {
27276        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
27277        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
27278    }
27279}
27280
27281/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27282///
27283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27284#[inline]
27285#[target_feature(enable = "avx512f")]
27286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27287#[cfg_attr(test, assert_instr(vbroadcastsd))]
27288pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27289    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27290}
27291
27292/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27293///
27294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27295#[inline]
27296#[target_feature(enable = "avx512f")]
27297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27298#[cfg_attr(test, assert_instr(vbroadcastsd))]
27299pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27300    unsafe {
27301        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27302        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27303    }
27304}
27305
27306/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27307///
27308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27309#[inline]
27310#[target_feature(enable = "avx512f")]
27311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27312#[cfg_attr(test, assert_instr(vbroadcastsd))]
27313pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27314    unsafe {
27315        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
27316        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27317    }
27318}
27319
27320/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27321///
27322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27323#[inline]
27324#[target_feature(enable = "avx512f,avx512vl")]
27325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27326#[cfg_attr(test, assert_instr(vbroadcastsd))]
27327pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27328    unsafe {
27329        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27330        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
27331    }
27332}
27333
27334/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27335///
27336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27337#[inline]
27338#[target_feature(enable = "avx512f,avx512vl")]
27339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27340#[cfg_attr(test, assert_instr(vbroadcastsd))]
27341pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27342    unsafe {
27343        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
27344        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
27345    }
27346}
27347
27348/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27349///
27350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27351#[inline]
27352#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27354pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27355    unsafe {
27356        let a = a.as_i32x4();
27357        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27358        transmute(ret)
27359    }
27360}
27361
27362/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27365#[inline]
27366#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27368pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27369    unsafe {
27370        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27371        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
27372    }
27373}
27374
27375/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27376///
27377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27378#[inline]
27379#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27381pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27382    unsafe {
27383        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
27384        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
27385    }
27386}
27387
27388/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27389///
27390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27391#[inline]
27392#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27394pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27395    unsafe {
27396        let a = a.as_i32x4();
27397        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27398        transmute(ret)
27399    }
27400}
27401
27402/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27403///
27404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27405#[inline]
27406#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27408pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27409    unsafe {
27410        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27411        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
27412    }
27413}
27414
27415/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27416///
27417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27418#[inline]
27419#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27421pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27422    unsafe {
27423        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
27424        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
27425    }
27426}
27427
27428/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27429///
27430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27431#[inline]
27432#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27434pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27435    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27436}
27437
27438/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27439///
27440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27441#[inline]
27442#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27444pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27445    unsafe {
27446        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27447        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
27448    }
27449}
27450
27451/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27452///
27453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27454#[inline]
27455#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27457pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27458    unsafe {
27459        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
27460        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
27461    }
27462}
27463
27464/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27465///
27466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27467#[inline]
27468#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27470pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27471    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27472}
27473
27474/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27477#[inline]
27478#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27480pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27481    unsafe {
27482        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27483        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
27484    }
27485}
27486
27487/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27488///
27489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27490#[inline]
27491#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27493pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27494    unsafe {
27495        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
27496        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
27497    }
27498}
27499
27500/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27501///
27502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27503#[inline]
27504#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27506pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27507    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27508}
27509
27510/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27511///
27512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27513#[inline]
27514#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27516pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27517    unsafe {
27518        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27519        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
27520    }
27521}
27522
27523/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27524///
27525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27526#[inline]
27527#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27529pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27530    unsafe {
27531        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
27532        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
27533    }
27534}
27535
27536/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27537///
27538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27539#[inline]
27540#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27542pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27543    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27544}
27545
27546/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27547///
27548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27549#[inline]
27550#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27552pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27553    unsafe {
27554        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27555        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
27556    }
27557}
27558
27559/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27560///
27561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27562#[inline]
27563#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27565pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27566    unsafe {
27567        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
27568        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
27569    }
27570}
27571
27572/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27573///
27574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27575#[inline]
27576#[target_feature(enable = "avx512f")]
27577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27578#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27579pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27580    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
27581}
27582
27583/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27584///
27585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27586#[inline]
27587#[target_feature(enable = "avx512f,avx512vl")]
27588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27589#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27590pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27591    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
27592}
27593
27594/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27595///
27596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27597#[inline]
27598#[target_feature(enable = "avx512f,avx512vl")]
27599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27600#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27601pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27602    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
27603}
27604
27605/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27606///
27607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27608#[inline]
27609#[target_feature(enable = "avx512f")]
27610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27611#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27612pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27613    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
27614}
27615
27616/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27617///
27618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27619#[inline]
27620#[target_feature(enable = "avx512f,avx512vl")]
27621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27622#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27623pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27624    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
27625}
27626
27627/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27628///
27629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27630#[inline]
27631#[target_feature(enable = "avx512f,avx512vl")]
27632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27633#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27634pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27635    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
27636}
27637
27638/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27639///
27640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27641#[inline]
27642#[target_feature(enable = "avx512f")]
27643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27644#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27645pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27646    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
27647}
27648
27649/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27650///
27651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27652#[inline]
27653#[target_feature(enable = "avx512f,avx512vl")]
27654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27655#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27656pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27657    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
27658}
27659
27660/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27661///
27662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27663#[inline]
27664#[target_feature(enable = "avx512f,avx512vl")]
27665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27666#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27667pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27668    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
27669}
27670
27671/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27674#[inline]
27675#[target_feature(enable = "avx512f")]
27676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27677#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27678pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27679    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
27680}
27681
27682/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27683///
27684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27685#[inline]
27686#[target_feature(enable = "avx512f,avx512vl")]
27687#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27688#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27689pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27690    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
27691}
27692
27693/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27694///
27695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27696#[inline]
27697#[target_feature(enable = "avx512f,avx512vl")]
27698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27699#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27700pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27701    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
27702}
27703
27704/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27705///
27706/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27707///
27708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27709#[inline]
27710#[target_feature(enable = "avx512f")]
27711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27712#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27713#[rustc_legacy_const_generics(2)]
27714pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27715    unsafe {
27716        static_assert_uimm_bits!(IMM8, 8);
27717        let a = a.as_i32x16();
27718        let b = b.as_i32x16();
27719        let imm8: i32 = IMM8 % 16;
27720        let r: i32x16 = match imm8 {
27721            0 => simd_shuffle!(
27722                a,
27723                b,
27724                [
27725                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27726                ],
27727            ),
27728            1 => simd_shuffle!(
27729                a,
27730                b,
27731                [
27732                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27733                ],
27734            ),
27735            2 => simd_shuffle!(
27736                a,
27737                b,
27738                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27739            ),
27740            3 => simd_shuffle!(
27741                a,
27742                b,
27743                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27744            ),
27745            4 => simd_shuffle!(
27746                a,
27747                b,
27748                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27749            ),
27750            5 => simd_shuffle!(
27751                a,
27752                b,
27753                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27754            ),
27755            6 => simd_shuffle!(
27756                a,
27757                b,
27758                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27759            ),
27760            7 => simd_shuffle!(
27761                a,
27762                b,
27763                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27764            ),
27765            8 => simd_shuffle!(
27766                a,
27767                b,
27768                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27769            ),
27770            9 => simd_shuffle!(
27771                a,
27772                b,
27773                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27774            ),
27775            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27776            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27777            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27778            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27779            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27780            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27781            _ => unreachable_unchecked(),
27782        };
27783        transmute(r)
27784    }
27785}
27786
27787/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27788///
27789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27790#[inline]
27791#[target_feature(enable = "avx512f")]
27792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27793#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27794#[rustc_legacy_const_generics(4)]
27795pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27796    src: __m512i,
27797    k: __mmask16,
27798    a: __m512i,
27799    b: __m512i,
27800) -> __m512i {
27801    unsafe {
27802        static_assert_uimm_bits!(IMM8, 8);
27803        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27804        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
27805    }
27806}
27807
27808/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27809///
27810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27811#[inline]
27812#[target_feature(enable = "avx512f")]
27813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27814#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27815#[rustc_legacy_const_generics(3)]
27816pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27817    unsafe {
27818        static_assert_uimm_bits!(IMM8, 8);
27819        let r = _mm512_alignr_epi32::<IMM8>(a, b);
27820        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
27821    }
27822}
27823
27824/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27825///
27826/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27827///
27828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27829#[inline]
27830#[target_feature(enable = "avx512f,avx512vl")]
27831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27832#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27833#[rustc_legacy_const_generics(2)]
27834pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27835    unsafe {
27836        static_assert_uimm_bits!(IMM8, 8);
27837        let a = a.as_i32x8();
27838        let b = b.as_i32x8();
27839        let imm8: i32 = IMM8 % 8;
27840        let r: i32x8 = match imm8 {
27841            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27842            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27843            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27844            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27845            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27846            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27847            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27848            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27849            _ => unreachable_unchecked(),
27850        };
27851        transmute(r)
27852    }
27853}
27854
27855/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27856///
27857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27858#[inline]
27859#[target_feature(enable = "avx512f,avx512vl")]
27860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27861#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27862#[rustc_legacy_const_generics(4)]
27863pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27864    src: __m256i,
27865    k: __mmask8,
27866    a: __m256i,
27867    b: __m256i,
27868) -> __m256i {
27869    unsafe {
27870        static_assert_uimm_bits!(IMM8, 8);
27871        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27872        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
27873    }
27874}
27875
27876/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27877///
27878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27879#[inline]
27880#[target_feature(enable = "avx512f,avx512vl")]
27881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27882#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27883#[rustc_legacy_const_generics(3)]
27884pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27885    unsafe {
27886        static_assert_uimm_bits!(IMM8, 8);
27887        let r = _mm256_alignr_epi32::<IMM8>(a, b);
27888        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
27889    }
27890}
27891
27892/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27893///
27894/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27895///
27896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27897#[inline]
27898#[target_feature(enable = "avx512f,avx512vl")]
27899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27900#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27901#[rustc_legacy_const_generics(2)]
27902pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27903    unsafe {
27904        static_assert_uimm_bits!(IMM8, 8);
27905        let a = a.as_i32x4();
27906        let b = b.as_i32x4();
27907        let imm8: i32 = IMM8 % 4;
27908        let r: i32x4 = match imm8 {
27909            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27910            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27911            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27912            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27913            _ => unreachable_unchecked(),
27914        };
27915        transmute(r)
27916    }
27917}
27918
27919/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27920///
27921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27922#[inline]
27923#[target_feature(enable = "avx512f,avx512vl")]
27924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27925#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27926#[rustc_legacy_const_generics(4)]
27927pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27928    src: __m128i,
27929    k: __mmask8,
27930    a: __m128i,
27931    b: __m128i,
27932) -> __m128i {
27933    unsafe {
27934        static_assert_uimm_bits!(IMM8, 8);
27935        let r = _mm_alignr_epi32::<IMM8>(a, b);
27936        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
27937    }
27938}
27939
27940/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27941///
27942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27943#[inline]
27944#[target_feature(enable = "avx512f,avx512vl")]
27945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27946#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27947#[rustc_legacy_const_generics(3)]
27948pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27949    unsafe {
27950        static_assert_uimm_bits!(IMM8, 8);
27951        let r = _mm_alignr_epi32::<IMM8>(a, b);
27952        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
27953    }
27954}
27955
27956/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27957///
27958/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27959///
27960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27961#[inline]
27962#[target_feature(enable = "avx512f")]
27963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27964#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27965#[rustc_legacy_const_generics(2)]
27966pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27967    unsafe {
27968        static_assert_uimm_bits!(IMM8, 8);
27969        let imm8: i32 = IMM8 % 8;
27970        let r: i64x8 = match imm8 {
27971            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27972            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27973            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27974            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27975            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27976            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27977            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27978            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27979            _ => unreachable_unchecked(),
27980        };
27981        transmute(r)
27982    }
27983}
27984
27985/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27986///
27987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27988#[inline]
27989#[target_feature(enable = "avx512f")]
27990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27991#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27992#[rustc_legacy_const_generics(4)]
27993pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27994    src: __m512i,
27995    k: __mmask8,
27996    a: __m512i,
27997    b: __m512i,
27998) -> __m512i {
27999    unsafe {
28000        static_assert_uimm_bits!(IMM8, 8);
28001        let r = _mm512_alignr_epi64::<IMM8>(a, b);
28002        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
28003    }
28004}
28005
28006/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28007///
28008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
28009#[inline]
28010#[target_feature(enable = "avx512f")]
28011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28012#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28013#[rustc_legacy_const_generics(3)]
28014pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28015    unsafe {
28016        static_assert_uimm_bits!(IMM8, 8);
28017        let r = _mm512_alignr_epi64::<IMM8>(a, b);
28018        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
28019    }
28020}
28021
28022/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28023///
28024/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28025///
28026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28027#[inline]
28028#[target_feature(enable = "avx512f,avx512vl")]
28029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28030#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28031#[rustc_legacy_const_generics(2)]
28032pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28033    unsafe {
28034        static_assert_uimm_bits!(IMM8, 8);
28035        let imm8: i32 = IMM8 % 4;
28036        let r: i64x4 = match imm8 {
28037            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28038            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28039            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28040            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28041            _ => unreachable_unchecked(),
28042        };
28043        transmute(r)
28044    }
28045}
28046
28047/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28048///
28049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28050#[inline]
28051#[target_feature(enable = "avx512f,avx512vl")]
28052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28053#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28054#[rustc_legacy_const_generics(4)]
28055pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28056    src: __m256i,
28057    k: __mmask8,
28058    a: __m256i,
28059    b: __m256i,
28060) -> __m256i {
28061    unsafe {
28062        static_assert_uimm_bits!(IMM8, 8);
28063        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28064        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
28065    }
28066}
28067
28068/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28069///
28070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28071#[inline]
28072#[target_feature(enable = "avx512f,avx512vl")]
28073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28074#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28075#[rustc_legacy_const_generics(3)]
28076pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28077    unsafe {
28078        static_assert_uimm_bits!(IMM8, 8);
28079        let r = _mm256_alignr_epi64::<IMM8>(a, b);
28080        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
28081    }
28082}
28083
28084/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28085///
28086/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28087///
28088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28089#[inline]
28090#[target_feature(enable = "avx512f,avx512vl")]
28091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28092#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28093#[rustc_legacy_const_generics(2)]
28094pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28095    unsafe {
28096        static_assert_uimm_bits!(IMM8, 8);
28097        let imm8: i32 = IMM8 % 2;
28098        let r: i64x2 = match imm8 {
28099            0 => simd_shuffle!(a, b, [2, 3]),
28100            1 => simd_shuffle!(a, b, [3, 0]),
28101            _ => unreachable_unchecked(),
28102        };
28103        transmute(r)
28104    }
28105}
28106
28107/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28108///
28109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28110#[inline]
28111#[target_feature(enable = "avx512f,avx512vl")]
28112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28113#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28114#[rustc_legacy_const_generics(4)]
28115pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28116    src: __m128i,
28117    k: __mmask8,
28118    a: __m128i,
28119    b: __m128i,
28120) -> __m128i {
28121    unsafe {
28122        static_assert_uimm_bits!(IMM8, 8);
28123        let r = _mm_alignr_epi64::<IMM8>(a, b);
28124        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
28125    }
28126}
28127
28128/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28129///
28130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28131#[inline]
28132#[target_feature(enable = "avx512f,avx512vl")]
28133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28134#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28135#[rustc_legacy_const_generics(3)]
28136pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28137    unsafe {
28138        static_assert_uimm_bits!(IMM8, 8);
28139        let r = _mm_alignr_epi64::<IMM8>(a, b);
28140        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
28141    }
28142}
28143
28144/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28145///
28146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28147#[inline]
28148#[target_feature(enable = "avx512f")]
28149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28150#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28151pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28152    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28153}
28154
28155/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28156///
28157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28158#[inline]
28159#[target_feature(enable = "avx512f")]
28160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28161#[cfg_attr(test, assert_instr(vpandd))]
28162pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28163    unsafe {
28164        let and = _mm512_and_epi32(a, b).as_i32x16();
28165        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
28166    }
28167}
28168
28169/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28170///
28171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28172#[inline]
28173#[target_feature(enable = "avx512f")]
28174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28175#[cfg_attr(test, assert_instr(vpandd))]
28176pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28177    unsafe {
28178        let and = _mm512_and_epi32(a, b).as_i32x16();
28179        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
28180    }
28181}
28182
28183/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28184///
28185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28186#[inline]
28187#[target_feature(enable = "avx512f,avx512vl")]
28188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28189#[cfg_attr(test, assert_instr(vpandd))]
28190pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28191    unsafe {
28192        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28193        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
28194    }
28195}
28196
28197/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28198///
28199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28200#[inline]
28201#[target_feature(enable = "avx512f,avx512vl")]
28202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28203#[cfg_attr(test, assert_instr(vpandd))]
28204pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28205    unsafe {
28206        let and = simd_and(a.as_i32x8(), b.as_i32x8());
28207        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
28208    }
28209}
28210
28211/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28212///
28213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28214#[inline]
28215#[target_feature(enable = "avx512f,avx512vl")]
28216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28217#[cfg_attr(test, assert_instr(vpandd))]
28218pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28219    unsafe {
28220        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28221        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
28222    }
28223}
28224
28225/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28226///
28227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28228#[inline]
28229#[target_feature(enable = "avx512f,avx512vl")]
28230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28231#[cfg_attr(test, assert_instr(vpandd))]
28232pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28233    unsafe {
28234        let and = simd_and(a.as_i32x4(), b.as_i32x4());
28235        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
28236    }
28237}
28238
28239/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28240///
28241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28242#[inline]
28243#[target_feature(enable = "avx512f")]
28244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28245#[cfg_attr(test, assert_instr(vpandq))]
28246pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28247    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
28248}
28249
28250/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28251///
28252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28253#[inline]
28254#[target_feature(enable = "avx512f")]
28255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28256#[cfg_attr(test, assert_instr(vpandq))]
28257pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28258    unsafe {
28259        let and = _mm512_and_epi64(a, b).as_i64x8();
28260        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
28261    }
28262}
28263
28264/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28265///
28266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28267#[inline]
28268#[target_feature(enable = "avx512f")]
28269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28270#[cfg_attr(test, assert_instr(vpandq))]
28271pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28272    unsafe {
28273        let and = _mm512_and_epi64(a, b).as_i64x8();
28274        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
28275    }
28276}
28277
28278/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28279///
28280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28281#[inline]
28282#[target_feature(enable = "avx512f,avx512vl")]
28283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28284#[cfg_attr(test, assert_instr(vpandq))]
28285pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28286    unsafe {
28287        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28288        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
28289    }
28290}
28291
28292/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28293///
28294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28295#[inline]
28296#[target_feature(enable = "avx512f,avx512vl")]
28297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28298#[cfg_attr(test, assert_instr(vpandq))]
28299pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28300    unsafe {
28301        let and = simd_and(a.as_i64x4(), b.as_i64x4());
28302        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
28303    }
28304}
28305
28306/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28307///
28308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28309#[inline]
28310#[target_feature(enable = "avx512f,avx512vl")]
28311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28312#[cfg_attr(test, assert_instr(vpandq))]
28313pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28314    unsafe {
28315        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28316        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
28317    }
28318}
28319
28320/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28321///
28322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28323#[inline]
28324#[target_feature(enable = "avx512f,avx512vl")]
28325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28326#[cfg_attr(test, assert_instr(vpandq))]
28327pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28328    unsafe {
28329        let and = simd_and(a.as_i64x2(), b.as_i64x2());
28330        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
28331    }
28332}
28333
28334/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28335///
28336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28337#[inline]
28338#[target_feature(enable = "avx512f")]
28339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28340#[cfg_attr(test, assert_instr(vpandq))]
28341pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28342    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
28343}
28344
28345/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28346///
28347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28348#[inline]
28349#[target_feature(enable = "avx512f")]
28350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28351#[cfg_attr(test, assert_instr(vporq))]
28352pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28353    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28354}
28355
28356/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28357///
28358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28359#[inline]
28360#[target_feature(enable = "avx512f")]
28361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28362#[cfg_attr(test, assert_instr(vpord))]
28363pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28364    unsafe {
28365        let or = _mm512_or_epi32(a, b).as_i32x16();
28366        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
28367    }
28368}
28369
28370/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28371///
28372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28373#[inline]
28374#[target_feature(enable = "avx512f")]
28375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28376#[cfg_attr(test, assert_instr(vpord))]
28377pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28378    unsafe {
28379        let or = _mm512_or_epi32(a, b).as_i32x16();
28380        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
28381    }
28382}
28383
28384/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28390#[cfg_attr(test, assert_instr(vor))] //should be vpord
28391pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28392    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
28393}
28394
28395/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28396///
28397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28398#[inline]
28399#[target_feature(enable = "avx512f,avx512vl")]
28400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28401#[cfg_attr(test, assert_instr(vpord))]
28402pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28403    unsafe {
28404        let or = _mm256_or_epi32(a, b).as_i32x8();
28405        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
28406    }
28407}
28408
28409/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28410///
28411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28412#[inline]
28413#[target_feature(enable = "avx512f,avx512vl")]
28414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28415#[cfg_attr(test, assert_instr(vpord))]
28416pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28417    unsafe {
28418        let or = _mm256_or_epi32(a, b).as_i32x8();
28419        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
28420    }
28421}
28422
28423/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28424///
28425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28426#[inline]
28427#[target_feature(enable = "avx512f,avx512vl")]
28428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28429#[cfg_attr(test, assert_instr(vor))] //should be vpord
28430pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28431    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
28432}
28433
28434/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28435///
28436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28437#[inline]
28438#[target_feature(enable = "avx512f,avx512vl")]
28439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28440#[cfg_attr(test, assert_instr(vpord))]
28441pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28442    unsafe {
28443        let or = _mm_or_epi32(a, b).as_i32x4();
28444        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
28445    }
28446}
28447
28448/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28449///
28450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28451#[inline]
28452#[target_feature(enable = "avx512f,avx512vl")]
28453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28454#[cfg_attr(test, assert_instr(vpord))]
28455pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28456    unsafe {
28457        let or = _mm_or_epi32(a, b).as_i32x4();
28458        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
28459    }
28460}
28461
28462/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28463///
28464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28465#[inline]
28466#[target_feature(enable = "avx512f")]
28467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28468#[cfg_attr(test, assert_instr(vporq))]
28469pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28470    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
28471}
28472
28473/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28474///
28475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28476#[inline]
28477#[target_feature(enable = "avx512f")]
28478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28479#[cfg_attr(test, assert_instr(vporq))]
28480pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28481    unsafe {
28482        let or = _mm512_or_epi64(a, b).as_i64x8();
28483        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
28484    }
28485}
28486
28487/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28488///
28489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28490#[inline]
28491#[target_feature(enable = "avx512f")]
28492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28493#[cfg_attr(test, assert_instr(vporq))]
28494pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28495    unsafe {
28496        let or = _mm512_or_epi64(a, b).as_i64x8();
28497        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
28498    }
28499}
28500
28501/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28502///
28503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28504#[inline]
28505#[target_feature(enable = "avx512f,avx512vl")]
28506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28507#[cfg_attr(test, assert_instr(vor))] //should be vporq
28508pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28509    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
28510}
28511
28512/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28513///
28514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28515#[inline]
28516#[target_feature(enable = "avx512f,avx512vl")]
28517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28518#[cfg_attr(test, assert_instr(vporq))]
28519pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28520    unsafe {
28521        let or = _mm256_or_epi64(a, b).as_i64x4();
28522        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
28523    }
28524}
28525
28526/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28527///
28528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28529#[inline]
28530#[target_feature(enable = "avx512f,avx512vl")]
28531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28532#[cfg_attr(test, assert_instr(vporq))]
28533pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28534    unsafe {
28535        let or = _mm256_or_epi64(a, b).as_i64x4();
28536        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
28537    }
28538}
28539
28540/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28541///
28542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28543#[inline]
28544#[target_feature(enable = "avx512f,avx512vl")]
28545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28546#[cfg_attr(test, assert_instr(vor))] //should be vporq
28547pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28548    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
28549}
28550
28551/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28552///
28553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28554#[inline]
28555#[target_feature(enable = "avx512f,avx512vl")]
28556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28557#[cfg_attr(test, assert_instr(vporq))]
28558pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28559    unsafe {
28560        let or = _mm_or_epi64(a, b).as_i64x2();
28561        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
28562    }
28563}
28564
28565/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28566///
28567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28568#[inline]
28569#[target_feature(enable = "avx512f,avx512vl")]
28570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28571#[cfg_attr(test, assert_instr(vporq))]
28572pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28573    unsafe {
28574        let or = _mm_or_epi64(a, b).as_i64x2();
28575        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
28576    }
28577}
28578
28579/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28580///
28581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28582#[inline]
28583#[target_feature(enable = "avx512f")]
28584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28585#[cfg_attr(test, assert_instr(vporq))]
28586pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28587    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
28588}
28589
28590/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28591///
28592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28593#[inline]
28594#[target_feature(enable = "avx512f")]
28595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28596#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28597pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28598    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28599}
28600
28601/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28602///
28603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28604#[inline]
28605#[target_feature(enable = "avx512f")]
28606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28607#[cfg_attr(test, assert_instr(vpxord))]
28608pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28609    unsafe {
28610        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28611        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
28612    }
28613}
28614
28615/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28616///
28617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28618#[inline]
28619#[target_feature(enable = "avx512f")]
28620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28621#[cfg_attr(test, assert_instr(vpxord))]
28622pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28623    unsafe {
28624        let xor = _mm512_xor_epi32(a, b).as_i32x16();
28625        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
28626    }
28627}
28628
28629/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28630///
28631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28632#[inline]
28633#[target_feature(enable = "avx512f,avx512vl")]
28634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28635#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28636pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28637    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
28638}
28639
28640/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28641///
28642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28643#[inline]
28644#[target_feature(enable = "avx512f,avx512vl")]
28645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28646#[cfg_attr(test, assert_instr(vpxord))]
28647pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28648    unsafe {
28649        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28650        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
28651    }
28652}
28653
28654/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28655///
28656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28657#[inline]
28658#[target_feature(enable = "avx512f,avx512vl")]
28659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28660#[cfg_attr(test, assert_instr(vpxord))]
28661pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28662    unsafe {
28663        let xor = _mm256_xor_epi32(a, b).as_i32x8();
28664        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
28665    }
28666}
28667
28668/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28669///
28670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28671#[inline]
28672#[target_feature(enable = "avx512f,avx512vl")]
28673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28674#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28675pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28676    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
28677}
28678
28679/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28680///
28681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28682#[inline]
28683#[target_feature(enable = "avx512f,avx512vl")]
28684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28685#[cfg_attr(test, assert_instr(vpxord))]
28686pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28687    unsafe {
28688        let xor = _mm_xor_epi32(a, b).as_i32x4();
28689        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
28690    }
28691}
28692
28693/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28694///
28695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28696#[inline]
28697#[target_feature(enable = "avx512f,avx512vl")]
28698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28699#[cfg_attr(test, assert_instr(vpxord))]
28700pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28701    unsafe {
28702        let xor = _mm_xor_epi32(a, b).as_i32x4();
28703        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
28704    }
28705}
28706
28707/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28708///
28709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28710#[inline]
28711#[target_feature(enable = "avx512f")]
28712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28713#[cfg_attr(test, assert_instr(vpxorq))]
28714pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28715    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
28716}
28717
28718/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28719///
28720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28721#[inline]
28722#[target_feature(enable = "avx512f")]
28723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28724#[cfg_attr(test, assert_instr(vpxorq))]
28725pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28726    unsafe {
28727        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28728        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
28729    }
28730}
28731
28732/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28733///
28734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28735#[inline]
28736#[target_feature(enable = "avx512f")]
28737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28738#[cfg_attr(test, assert_instr(vpxorq))]
28739pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28740    unsafe {
28741        let xor = _mm512_xor_epi64(a, b).as_i64x8();
28742        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
28743    }
28744}
28745
28746/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28747///
28748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28749#[inline]
28750#[target_feature(enable = "avx512f,avx512vl")]
28751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28752#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28753pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28754    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
28755}
28756
28757/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28758///
28759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28760#[inline]
28761#[target_feature(enable = "avx512f,avx512vl")]
28762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28763#[cfg_attr(test, assert_instr(vpxorq))]
28764pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28765    unsafe {
28766        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28767        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
28768    }
28769}
28770
28771/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28772///
28773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28774#[inline]
28775#[target_feature(enable = "avx512f,avx512vl")]
28776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28777#[cfg_attr(test, assert_instr(vpxorq))]
28778pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28779    unsafe {
28780        let xor = _mm256_xor_epi64(a, b).as_i64x4();
28781        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
28782    }
28783}
28784
28785/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28788#[inline]
28789#[target_feature(enable = "avx512f,avx512vl")]
28790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28791#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28792pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28793    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
28794}
28795
28796/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28797///
28798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28799#[inline]
28800#[target_feature(enable = "avx512f,avx512vl")]
28801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28802#[cfg_attr(test, assert_instr(vpxorq))]
28803pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28804    unsafe {
28805        let xor = _mm_xor_epi64(a, b).as_i64x2();
28806        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
28807    }
28808}
28809
28810/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28811///
28812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28813#[inline]
28814#[target_feature(enable = "avx512f,avx512vl")]
28815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28816#[cfg_attr(test, assert_instr(vpxorq))]
28817pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28818    unsafe {
28819        let xor = _mm_xor_epi64(a, b).as_i64x2();
28820        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
28821    }
28822}
28823
28824/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28825///
28826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28827#[inline]
28828#[target_feature(enable = "avx512f")]
28829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28830#[cfg_attr(test, assert_instr(vpxorq))]
28831pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28832    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
28833}
28834
28835/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28836///
28837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28838#[inline]
28839#[target_feature(enable = "avx512f")]
28840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28841#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28842pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28843    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
28844}
28845
28846/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28847///
28848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28849#[inline]
28850#[target_feature(enable = "avx512f")]
28851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28852#[cfg_attr(test, assert_instr(vpandnd))]
28853pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28854    unsafe {
28855        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28856        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
28857    }
28858}
28859
28860/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28861///
28862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28863#[inline]
28864#[target_feature(enable = "avx512f")]
28865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28866#[cfg_attr(test, assert_instr(vpandnd))]
28867pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28868    unsafe {
28869        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
28870        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
28871    }
28872}
28873
28874/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28875///
28876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28877#[inline]
28878#[target_feature(enable = "avx512f,avx512vl")]
28879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28880#[cfg_attr(test, assert_instr(vpandnd))]
28881pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28882    unsafe {
28883        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28884        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28885        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
28886    }
28887}
28888
28889/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28890///
28891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28892#[inline]
28893#[target_feature(enable = "avx512f,avx512vl")]
28894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28895#[cfg_attr(test, assert_instr(vpandnd))]
28896pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28897    unsafe {
28898        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
28899        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
28900        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
28901    }
28902}
28903
28904/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28905///
28906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28907#[inline]
28908#[target_feature(enable = "avx512f,avx512vl")]
28909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28910#[cfg_attr(test, assert_instr(vpandnd))]
28911pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28912    unsafe {
28913        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28914        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28915        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
28916    }
28917}
28918
28919/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28920///
28921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28922#[inline]
28923#[target_feature(enable = "avx512f,avx512vl")]
28924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28925#[cfg_attr(test, assert_instr(vpandnd))]
28926pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28927    unsafe {
28928        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
28929        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
28930        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
28931    }
28932}
28933
28934/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28935///
28936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28937#[inline]
28938#[target_feature(enable = "avx512f")]
28939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28940#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28941pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28942    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
28943}
28944
28945/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28946///
28947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28948#[inline]
28949#[target_feature(enable = "avx512f")]
28950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28951#[cfg_attr(test, assert_instr(vpandnq))]
28952pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28953    unsafe {
28954        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28955        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
28956    }
28957}
28958
28959/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28960///
28961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28962#[inline]
28963#[target_feature(enable = "avx512f")]
28964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28965#[cfg_attr(test, assert_instr(vpandnq))]
28966pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28967    unsafe {
28968        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
28969        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
28970    }
28971}
28972
28973/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28974///
28975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28976#[inline]
28977#[target_feature(enable = "avx512f,avx512vl")]
28978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28979#[cfg_attr(test, assert_instr(vpandnq))]
28980pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28981    unsafe {
28982        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28983        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28984        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
28985    }
28986}
28987
28988/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28989///
28990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28991#[inline]
28992#[target_feature(enable = "avx512f,avx512vl")]
28993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28994#[cfg_attr(test, assert_instr(vpandnq))]
28995pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28996    unsafe {
28997        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
28998        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
28999        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
29000    }
29001}
29002
29003/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29004///
29005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
29006#[inline]
29007#[target_feature(enable = "avx512f,avx512vl")]
29008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29009#[cfg_attr(test, assert_instr(vpandnq))]
29010pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29011    unsafe {
29012        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
29013        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
29014        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
29015    }
29016}
29017
29018/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29019///
29020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29021#[inline]
29022#[target_feature(enable = "avx512f,avx512vl")]
29023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29024#[cfg_attr(test, assert_instr(vpandnq))]
29025pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29026    unsafe {
29027        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
29028        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
29029        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
29030    }
29031}
29032
29033/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29034///
29035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29036#[inline]
29037#[target_feature(enable = "avx512f")]
29038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29039#[cfg_attr(test, assert_instr(vpandnq))]
29040pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29041    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
29042}
29043
29044/// Convert 16-bit mask a into an integer value, and store the result in dst.
29045///
29046/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29047#[inline]
29048#[target_feature(enable = "avx512f")]
29049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29050pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29051    a as u32
29052}
29053
29054/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29055///
29056/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29057#[inline]
29058#[target_feature(enable = "avx512f")]
29059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29060pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29061    a as __mmask16
29062}
29063
29064/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29065///
29066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29067#[inline]
29068#[target_feature(enable = "avx512f")]
29069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29070#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29071pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29072    a & b
29073}
29074
29075/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29076///
29077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29078#[inline]
29079#[target_feature(enable = "avx512f")]
29080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29081#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29082pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29083    a & b
29084}
29085
29086/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29087///
29088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29089#[inline]
29090#[target_feature(enable = "avx512f")]
29091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29092#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29093pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29094    a | b
29095}
29096
29097/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29098///
29099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29100#[inline]
29101#[target_feature(enable = "avx512f")]
29102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29103#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29104pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29105    a | b
29106}
29107
29108/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29109///
29110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29111#[inline]
29112#[target_feature(enable = "avx512f")]
29113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29114#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29115pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29116    a ^ b
29117}
29118
29119/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29120///
29121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29122#[inline]
29123#[target_feature(enable = "avx512f")]
29124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29125#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29126pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29127    a ^ b
29128}
29129
29130/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29136pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29137    a ^ 0b11111111_11111111
29138}
29139
29140/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29141///
29142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29143#[inline]
29144#[target_feature(enable = "avx512f")]
29145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29146pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29147    a ^ 0b11111111_11111111
29148}
29149
29150/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29151///
29152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29153#[inline]
29154#[target_feature(enable = "avx512f")]
29155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29156#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29157pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29158    _mm512_kand(_mm512_knot(a), b)
29159}
29160
29161/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29162///
29163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29164#[inline]
29165#[target_feature(enable = "avx512f")]
29166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29167#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29168pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29169    _mm512_kand(_mm512_knot(a), b)
29170}
29171
29172/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29173///
29174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29175#[inline]
29176#[target_feature(enable = "avx512f")]
29177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29178#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29179pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29180    _mm512_knot(_mm512_kxor(a, b))
29181}
29182
29183/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29184///
29185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29186#[inline]
29187#[target_feature(enable = "avx512f")]
29188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29189#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29190pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29191    _mm512_knot(_mm512_kxor(a, b))
29192}
29193
29194/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29195/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29196///
29197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29198#[inline]
29199#[target_feature(enable = "avx512f")]
29200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29201pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29202    let tmp = _kor_mask16(a, b);
29203    *all_ones = (tmp == 0xffff) as u8;
29204    (tmp == 0) as u8
29205}
29206
29207/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29208/// store 0 in dst.
29209///
29210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29211#[inline]
29212#[target_feature(enable = "avx512f")]
29213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29214pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29215    (_kor_mask16(a, b) == 0xffff) as u8
29216}
29217
29218/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29219/// store 0 in dst.
29220///
29221/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29222#[inline]
29223#[target_feature(enable = "avx512f")]
29224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29225pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29226    (_kor_mask16(a, b) == 0) as u8
29227}
29228
29229/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29230///
29231/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29232#[inline]
29233#[target_feature(enable = "avx512f")]
29234#[rustc_legacy_const_generics(1)]
29235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29236pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29237    a << COUNT
29238}
29239
29240/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29241///
29242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29243#[inline]
29244#[target_feature(enable = "avx512f")]
29245#[rustc_legacy_const_generics(1)]
29246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29247pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29248    a >> COUNT
29249}
29250
29251/// Load 16-bit mask from memory
29252///
29253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29254#[inline]
29255#[target_feature(enable = "avx512f")]
29256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29257pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29258    *mem_addr
29259}
29260
29261/// Store 16-bit mask to memory
29262///
29263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29264#[inline]
29265#[target_feature(enable = "avx512f")]
29266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29267pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29268    *mem_addr = a;
29269}
29270
29271/// Copy 16-bit mask a to k.
29272///
29273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29274#[inline]
29275#[target_feature(enable = "avx512f")]
29276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29277#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29278pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29279    a
29280}
29281
29282/// Converts integer mask into bitmask, storing the result in dst.
29283///
29284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29285#[inline]
29286#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29288pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29289    mask as u16
29290}
29291
29292/// Converts bit mask k1 into an integer value, storing the results in dst.
29293///
29294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29295#[inline]
29296#[target_feature(enable = "avx512f")]
29297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29298#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29299pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29300    k1 as i32
29301}
29302
29303/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29304///
29305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29306#[inline]
29307#[target_feature(enable = "avx512f")]
29308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29309#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29310pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29311    ((a & 0xff) << 8) | (b & 0xff)
29312}
29313
29314/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29315///
29316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29317#[inline]
29318#[target_feature(enable = "avx512f")]
29319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29320#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29321pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29322    let r = (a | b) == 0b11111111_11111111;
29323    r as i32
29324}
29325
29326/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29327///
29328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29329#[inline]
29330#[target_feature(enable = "avx512f")]
29331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29332#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29333pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29334    let r = (a | b) == 0;
29335    r as i32
29336}
29337
29338/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29339///
29340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29341#[inline]
29342#[target_feature(enable = "avx512f")]
29343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29344#[cfg_attr(test, assert_instr(vptestmd))]
29345pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29346    let and = _mm512_and_epi32(a, b);
29347    let zero = _mm512_setzero_si512();
29348    _mm512_cmpneq_epi32_mask(and, zero)
29349}
29350
29351/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29357#[cfg_attr(test, assert_instr(vptestmd))]
29358pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29359    let and = _mm512_and_epi32(a, b);
29360    let zero = _mm512_setzero_si512();
29361    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
29362}
29363
29364/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29365///
29366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29367#[inline]
29368#[target_feature(enable = "avx512f,avx512vl")]
29369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29370#[cfg_attr(test, assert_instr(vptestmd))]
29371pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29372    let and = _mm256_and_si256(a, b);
29373    let zero = _mm256_setzero_si256();
29374    _mm256_cmpneq_epi32_mask(and, zero)
29375}
29376
29377/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29378///
29379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29380#[inline]
29381#[target_feature(enable = "avx512f,avx512vl")]
29382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29383#[cfg_attr(test, assert_instr(vptestmd))]
29384pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29385    let and = _mm256_and_si256(a, b);
29386    let zero = _mm256_setzero_si256();
29387    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
29388}
29389
29390/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29391///
29392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29393#[inline]
29394#[target_feature(enable = "avx512f,avx512vl")]
29395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29396#[cfg_attr(test, assert_instr(vptestmd))]
29397pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29398    let and = _mm_and_si128(a, b);
29399    let zero = _mm_setzero_si128();
29400    _mm_cmpneq_epi32_mask(and, zero)
29401}
29402
29403/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29404///
29405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29406#[inline]
29407#[target_feature(enable = "avx512f,avx512vl")]
29408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29409#[cfg_attr(test, assert_instr(vptestmd))]
29410pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29411    let and = _mm_and_si128(a, b);
29412    let zero = _mm_setzero_si128();
29413    _mm_mask_cmpneq_epi32_mask(k, and, zero)
29414}
29415
29416/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29417///
29418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29419#[inline]
29420#[target_feature(enable = "avx512f")]
29421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29422#[cfg_attr(test, assert_instr(vptestmq))]
29423pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29424    let and = _mm512_and_epi64(a, b);
29425    let zero = _mm512_setzero_si512();
29426    _mm512_cmpneq_epi64_mask(and, zero)
29427}
29428
29429/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29430///
29431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29432#[inline]
29433#[target_feature(enable = "avx512f")]
29434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29435#[cfg_attr(test, assert_instr(vptestmq))]
29436pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29437    let and = _mm512_and_epi64(a, b);
29438    let zero = _mm512_setzero_si512();
29439    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
29440}
29441
29442/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29443///
29444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29445#[inline]
29446#[target_feature(enable = "avx512f,avx512vl")]
29447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29448#[cfg_attr(test, assert_instr(vptestmq))]
29449pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29450    let and = _mm256_and_si256(a, b);
29451    let zero = _mm256_setzero_si256();
29452    _mm256_cmpneq_epi64_mask(and, zero)
29453}
29454
29455/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29456///
29457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29458#[inline]
29459#[target_feature(enable = "avx512f,avx512vl")]
29460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29461#[cfg_attr(test, assert_instr(vptestmq))]
29462pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29463    let and = _mm256_and_si256(a, b);
29464    let zero = _mm256_setzero_si256();
29465    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29471#[inline]
29472#[target_feature(enable = "avx512f,avx512vl")]
29473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29474#[cfg_attr(test, assert_instr(vptestmq))]
29475pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29476    let and = _mm_and_si128(a, b);
29477    let zero = _mm_setzero_si128();
29478    _mm_cmpneq_epi64_mask(and, zero)
29479}
29480
29481/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29482///
29483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29484#[inline]
29485#[target_feature(enable = "avx512f,avx512vl")]
29486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29487#[cfg_attr(test, assert_instr(vptestmq))]
29488pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29489    let and = _mm_and_si128(a, b);
29490    let zero = _mm_setzero_si128();
29491    _mm_mask_cmpneq_epi64_mask(k, and, zero)
29492}
29493
29494/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29495///
29496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29497#[inline]
29498#[target_feature(enable = "avx512f")]
29499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29500#[cfg_attr(test, assert_instr(vptestnmd))]
29501pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29502    let and = _mm512_and_epi32(a, b);
29503    let zero = _mm512_setzero_si512();
29504    _mm512_cmpeq_epi32_mask(and, zero)
29505}
29506
29507/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29508///
29509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29510#[inline]
29511#[target_feature(enable = "avx512f")]
29512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29513#[cfg_attr(test, assert_instr(vptestnmd))]
29514pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29515    let and = _mm512_and_epi32(a, b);
29516    let zero = _mm512_setzero_si512();
29517    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
29518}
29519
29520/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29521///
29522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29523#[inline]
29524#[target_feature(enable = "avx512f,avx512vl")]
29525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29526#[cfg_attr(test, assert_instr(vptestnmd))]
29527pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29528    let and = _mm256_and_si256(a, b);
29529    let zero = _mm256_setzero_si256();
29530    _mm256_cmpeq_epi32_mask(and, zero)
29531}
29532
29533/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29534///
29535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29536#[inline]
29537#[target_feature(enable = "avx512f,avx512vl")]
29538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29539#[cfg_attr(test, assert_instr(vptestnmd))]
29540pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29541    let and = _mm256_and_si256(a, b);
29542    let zero = _mm256_setzero_si256();
29543    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
29544}
29545
29546/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29547///
29548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29549#[inline]
29550#[target_feature(enable = "avx512f,avx512vl")]
29551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29552#[cfg_attr(test, assert_instr(vptestnmd))]
29553pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29554    let and = _mm_and_si128(a, b);
29555    let zero = _mm_setzero_si128();
29556    _mm_cmpeq_epi32_mask(and, zero)
29557}
29558
29559/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29560///
29561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29562#[inline]
29563#[target_feature(enable = "avx512f,avx512vl")]
29564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29565#[cfg_attr(test, assert_instr(vptestnmd))]
29566pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29567    let and = _mm_and_si128(a, b);
29568    let zero = _mm_setzero_si128();
29569    _mm_mask_cmpeq_epi32_mask(k, and, zero)
29570}
29571
29572/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29573///
29574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29575#[inline]
29576#[target_feature(enable = "avx512f")]
29577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29578#[cfg_attr(test, assert_instr(vptestnmq))]
29579pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29580    let and = _mm512_and_epi64(a, b);
29581    let zero = _mm512_setzero_si512();
29582    _mm512_cmpeq_epi64_mask(and, zero)
29583}
29584
29585/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29586///
29587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29588#[inline]
29589#[target_feature(enable = "avx512f")]
29590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29591#[cfg_attr(test, assert_instr(vptestnmq))]
29592pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29593    let and = _mm512_and_epi64(a, b);
29594    let zero = _mm512_setzero_si512();
29595    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
29596}
29597
29598/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29599///
29600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29601#[inline]
29602#[target_feature(enable = "avx512f,avx512vl")]
29603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29604#[cfg_attr(test, assert_instr(vptestnmq))]
29605pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29606    let and = _mm256_and_si256(a, b);
29607    let zero = _mm256_setzero_si256();
29608    _mm256_cmpeq_epi64_mask(and, zero)
29609}
29610
29611/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29612///
29613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29614#[inline]
29615#[target_feature(enable = "avx512f,avx512vl")]
29616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29617#[cfg_attr(test, assert_instr(vptestnmq))]
29618pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29619    let and = _mm256_and_si256(a, b);
29620    let zero = _mm256_setzero_si256();
29621    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
29622}
29623
29624/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29630#[cfg_attr(test, assert_instr(vptestnmq))]
29631pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29632    let and = _mm_and_si128(a, b);
29633    let zero = _mm_setzero_si128();
29634    _mm_cmpeq_epi64_mask(and, zero)
29635}
29636
29637/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29638///
29639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29640#[inline]
29641#[target_feature(enable = "avx512f,avx512vl")]
29642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29643#[cfg_attr(test, assert_instr(vptestnmq))]
29644pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29645    let and = _mm_and_si128(a, b);
29646    let zero = _mm_setzero_si128();
29647    _mm_mask_cmpeq_epi64_mask(k, and, zero)
29648}
29649
29650/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29651///
29652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29653///
29654/// # Safety of non-temporal stores
29655///
29656/// After using this intrinsic, but before any other access to the memory that this intrinsic
29657/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29658/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29659/// return.
29660///
29661/// See [`_mm_sfence`] for details.
29662#[inline]
29663#[target_feature(enable = "avx512f")]
29664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29665#[cfg_attr(test, assert_instr(vmovntps))]
29666#[allow(clippy::cast_ptr_alignment)]
29667pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29668    crate::arch::asm!(
29669        vps!("vmovntps", ",{a}"),
29670        p = in(reg) mem_addr,
29671        a = in(zmm_reg) a,
29672        options(nostack, preserves_flags),
29673    );
29674}
29675
29676/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29677///
29678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29679///
29680/// # Safety of non-temporal stores
29681///
29682/// After using this intrinsic, but before any other access to the memory that this intrinsic
29683/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29684/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29685/// return.
29686///
29687/// See [`_mm_sfence`] for details.
29688#[inline]
29689#[target_feature(enable = "avx512f")]
29690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29691#[cfg_attr(test, assert_instr(vmovntpd))]
29692#[allow(clippy::cast_ptr_alignment)]
29693pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29694    crate::arch::asm!(
29695        vps!("vmovntpd", ",{a}"),
29696        p = in(reg) mem_addr,
29697        a = in(zmm_reg) a,
29698        options(nostack, preserves_flags),
29699    );
29700}
29701
29702/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29703///
29704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29705///
29706/// # Safety of non-temporal stores
29707///
29708/// After using this intrinsic, but before any other access to the memory that this intrinsic
29709/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29710/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29711/// return.
29712///
29713/// See [`_mm_sfence`] for details.
29714#[inline]
29715#[target_feature(enable = "avx512f")]
29716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29717#[cfg_attr(test, assert_instr(vmovntdq))]
29718#[allow(clippy::cast_ptr_alignment)]
29719pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
29720    crate::arch::asm!(
29721        vps!("vmovntdq", ",{a}"),
29722        p = in(reg) mem_addr,
29723        a = in(zmm_reg) a,
29724        options(nostack, preserves_flags),
29725    );
29726}
29727
29728/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29729/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29730/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29731///
29732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29733#[inline]
29734#[target_feature(enable = "avx512f")]
29735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29736pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29737    let dst: __m512i;
29738    crate::arch::asm!(
29739        vpl!("vmovntdqa {a}"),
29740        a = out(zmm_reg) dst,
29741        p = in(reg) mem_addr,
29742        options(pure, readonly, nostack, preserves_flags),
29743    );
29744    dst
29745}
29746
29747/// Sets packed 32-bit integers in `dst` with the supplied values.
29748///
29749/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29750#[inline]
29751#[target_feature(enable = "avx512f")]
29752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29753pub fn _mm512_set_ps(
29754    e0: f32,
29755    e1: f32,
29756    e2: f32,
29757    e3: f32,
29758    e4: f32,
29759    e5: f32,
29760    e6: f32,
29761    e7: f32,
29762    e8: f32,
29763    e9: f32,
29764    e10: f32,
29765    e11: f32,
29766    e12: f32,
29767    e13: f32,
29768    e14: f32,
29769    e15: f32,
29770) -> __m512 {
29771    _mm512_setr_ps(
29772        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
29773    )
29774}
29775
29776/// Sets packed 32-bit integers in `dst` with the supplied values in
29777/// reverse order.
29778///
29779/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29780#[inline]
29781#[target_feature(enable = "avx512f")]
29782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29783pub fn _mm512_setr_ps(
29784    e0: f32,
29785    e1: f32,
29786    e2: f32,
29787    e3: f32,
29788    e4: f32,
29789    e5: f32,
29790    e6: f32,
29791    e7: f32,
29792    e8: f32,
29793    e9: f32,
29794    e10: f32,
29795    e11: f32,
29796    e12: f32,
29797    e13: f32,
29798    e14: f32,
29799    e15: f32,
29800) -> __m512 {
29801    unsafe {
29802        let r = f32x16::new(
29803            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29804        );
29805        transmute(r)
29806    }
29807}
29808
29809/// Broadcast 64-bit float `a` to all elements of `dst`.
29810///
29811/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29812#[inline]
29813#[target_feature(enable = "avx512f")]
29814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29815pub fn _mm512_set1_pd(a: f64) -> __m512d {
29816    unsafe { transmute(f64x8::splat(a)) }
29817}
29818
29819/// Broadcast 32-bit float `a` to all elements of `dst`.
29820///
29821/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29825pub fn _mm512_set1_ps(a: f32) -> __m512 {
29826    unsafe { transmute(f32x16::splat(a)) }
29827}
29828
29829/// Sets packed 32-bit integers in `dst` with the supplied values.
29830///
29831/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29832#[inline]
29833#[target_feature(enable = "avx512f")]
29834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29835pub fn _mm512_set_epi32(
29836    e15: i32,
29837    e14: i32,
29838    e13: i32,
29839    e12: i32,
29840    e11: i32,
29841    e10: i32,
29842    e9: i32,
29843    e8: i32,
29844    e7: i32,
29845    e6: i32,
29846    e5: i32,
29847    e4: i32,
29848    e3: i32,
29849    e2: i32,
29850    e1: i32,
29851    e0: i32,
29852) -> __m512i {
29853    _mm512_setr_epi32(
29854        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
29855    )
29856}
29857
29858/// Broadcast 8-bit integer a to all elements of dst.
29859///
29860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29861#[inline]
29862#[target_feature(enable = "avx512f")]
29863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29864pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29865    unsafe { transmute(i8x64::splat(a)) }
29866}
29867
29868/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29869///
29870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29871#[inline]
29872#[target_feature(enable = "avx512f")]
29873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29874pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29875    unsafe { transmute(i16x32::splat(a)) }
29876}
29877
29878/// Broadcast 32-bit integer `a` to all elements of `dst`.
29879///
29880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29881#[inline]
29882#[target_feature(enable = "avx512f")]
29883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29884pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29885    unsafe { transmute(i32x16::splat(a)) }
29886}
29887
29888/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29891#[inline]
29892#[target_feature(enable = "avx512f")]
29893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29894#[cfg_attr(test, assert_instr(vpbroadcastd))]
29895pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29896    unsafe {
29897        let r = _mm512_set1_epi32(a).as_i32x16();
29898        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29899    }
29900}
29901
29902/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29903///
29904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29905#[inline]
29906#[target_feature(enable = "avx512f")]
29907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29908#[cfg_attr(test, assert_instr(vpbroadcastd))]
29909pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29910    unsafe {
29911        let r = _mm512_set1_epi32(a).as_i32x16();
29912        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
29913    }
29914}
29915
29916/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29917///
29918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29919#[inline]
29920#[target_feature(enable = "avx512f,avx512vl")]
29921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29922#[cfg_attr(test, assert_instr(vpbroadcastd))]
29923pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29924    unsafe {
29925        let r = _mm256_set1_epi32(a).as_i32x8();
29926        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
29927    }
29928}
29929
29930/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29936#[cfg_attr(test, assert_instr(vpbroadcastd))]
29937pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29938    unsafe {
29939        let r = _mm256_set1_epi32(a).as_i32x8();
29940        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
29941    }
29942}
29943
29944/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29945///
29946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29947#[inline]
29948#[target_feature(enable = "avx512f,avx512vl")]
29949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29950#[cfg_attr(test, assert_instr(vpbroadcastd))]
29951pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29952    unsafe {
29953        let r = _mm_set1_epi32(a).as_i32x4();
29954        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
29955    }
29956}
29957
29958/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29959///
29960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29961#[inline]
29962#[target_feature(enable = "avx512f,avx512vl")]
29963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29964#[cfg_attr(test, assert_instr(vpbroadcastd))]
29965pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29966    unsafe {
29967        let r = _mm_set1_epi32(a).as_i32x4();
29968        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
29969    }
29970}
29971
29972/// Broadcast 64-bit integer `a` to all elements of `dst`.
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29978pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29979    unsafe { transmute(i64x8::splat(a)) }
29980}
29981
29982/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29983///
29984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29985#[inline]
29986#[target_feature(enable = "avx512f")]
29987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29988#[cfg_attr(test, assert_instr(vpbroadcastq))]
29989pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29990    unsafe {
29991        let r = _mm512_set1_epi64(a).as_i64x8();
29992        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
29993    }
29994}
29995
29996/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29997///
29998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29999#[inline]
30000#[target_feature(enable = "avx512f")]
30001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30002#[cfg_attr(test, assert_instr(vpbroadcastq))]
30003pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
30004    unsafe {
30005        let r = _mm512_set1_epi64(a).as_i64x8();
30006        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
30007    }
30008}
30009
30010/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30011///
30012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
30013#[inline]
30014#[target_feature(enable = "avx512f,avx512vl")]
30015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30016#[cfg_attr(test, assert_instr(vpbroadcastq))]
30017pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
30018    unsafe {
30019        let r = _mm256_set1_epi64x(a).as_i64x4();
30020        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
30021    }
30022}
30023
30024/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30025///
30026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30027#[inline]
30028#[target_feature(enable = "avx512f,avx512vl")]
30029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30030#[cfg_attr(test, assert_instr(vpbroadcastq))]
30031pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30032    unsafe {
30033        let r = _mm256_set1_epi64x(a).as_i64x4();
30034        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
30035    }
30036}
30037
30038/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30039///
30040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30041#[inline]
30042#[target_feature(enable = "avx512f,avx512vl")]
30043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30044#[cfg_attr(test, assert_instr(vpbroadcastq))]
30045pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30046    unsafe {
30047        let r = _mm_set1_epi64x(a).as_i64x2();
30048        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
30049    }
30050}
30051
30052/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30053///
30054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30055#[inline]
30056#[target_feature(enable = "avx512f,avx512vl")]
30057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30058#[cfg_attr(test, assert_instr(vpbroadcastq))]
30059pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30060    unsafe {
30061        let r = _mm_set1_epi64x(a).as_i64x2();
30062        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
30063    }
30064}
30065
30066/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30067///
30068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30069#[inline]
30070#[target_feature(enable = "avx512f")]
30071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30072pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30073    _mm512_set_epi64(d, c, b, a, d, c, b, a)
30074}
30075
30076/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30077///
30078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30079#[inline]
30080#[target_feature(enable = "avx512f")]
30081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30082pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30083    _mm512_set_epi64(a, b, c, d, a, b, c, d)
30084}
30085
30086/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30087///
30088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30089#[inline]
30090#[target_feature(enable = "avx512f")]
30091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30092#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30093pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30094    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30095}
30096
30097/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30098///
30099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30100#[inline]
30101#[target_feature(enable = "avx512f")]
30102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30103#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30104pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30105    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30106}
30107
30108/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30109///
30110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30111#[inline]
30112#[target_feature(enable = "avx512f")]
30113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30114#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30115pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30116    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30117}
30118
30119/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30120///
30121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30122#[inline]
30123#[target_feature(enable = "avx512f")]
30124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30125#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30126pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30127    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30128}
30129
30130/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30133#[inline]
30134#[target_feature(enable = "avx512f")]
30135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30136#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30137pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30138    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30139}
30140
30141/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30142///
30143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30144#[inline]
30145#[target_feature(enable = "avx512f")]
30146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30147#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30148pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30149    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30150}
30151
30152/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30153///
30154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30155#[inline]
30156#[target_feature(enable = "avx512f")]
30157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30158#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30159pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30160    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30161}
30162
30163/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30164///
30165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30166#[inline]
30167#[target_feature(enable = "avx512f")]
30168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30169#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30170pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30171    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30172}
30173
30174/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30175///
30176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30177#[inline]
30178#[target_feature(enable = "avx512f")]
30179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30180#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30181pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30182    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30183}
30184
30185/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30186///
30187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30188#[inline]
30189#[target_feature(enable = "avx512f")]
30190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30191#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30192pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30193    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30194}
30195
30196/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30197///
30198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30199#[inline]
30200#[target_feature(enable = "avx512f")]
30201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30202#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30203pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30204    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30205}
30206
30207/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30208///
30209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30210#[inline]
30211#[target_feature(enable = "avx512f")]
30212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30213#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30214pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30215    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30216}
30217
30218/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30219///
30220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30221#[inline]
30222#[target_feature(enable = "avx512f")]
30223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30224#[rustc_legacy_const_generics(2)]
30225#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30226pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30227    unsafe {
30228        static_assert_uimm_bits!(IMM8, 5);
30229        let neg_one = -1;
30230        let a = a.as_f32x16();
30231        let b = b.as_f32x16();
30232        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30233        r.cast_unsigned()
30234    }
30235}
30236
30237/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30238///
30239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30240#[inline]
30241#[target_feature(enable = "avx512f")]
30242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30243#[rustc_legacy_const_generics(3)]
30244#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30245pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30246    unsafe {
30247        static_assert_uimm_bits!(IMM8, 5);
30248        let a = a.as_f32x16();
30249        let b = b.as_f32x16();
30250        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
30251        r.cast_unsigned()
30252    }
30253}
30254
30255/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30256///
30257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30258#[inline]
30259#[target_feature(enable = "avx512f,avx512vl")]
30260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30261#[rustc_legacy_const_generics(2)]
30262#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30263pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30264    unsafe {
30265        static_assert_uimm_bits!(IMM8, 5);
30266        let neg_one = -1;
30267        let a = a.as_f32x8();
30268        let b = b.as_f32x8();
30269        let r = vcmpps256(a, b, IMM8, neg_one);
30270        r.cast_unsigned()
30271    }
30272}
30273
30274/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30275///
30276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30277#[inline]
30278#[target_feature(enable = "avx512f,avx512vl")]
30279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30280#[rustc_legacy_const_generics(3)]
30281#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30282pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30283    unsafe {
30284        static_assert_uimm_bits!(IMM8, 5);
30285        let a = a.as_f32x8();
30286        let b = b.as_f32x8();
30287        let r = vcmpps256(a, b, IMM8, k1 as i8);
30288        r.cast_unsigned()
30289    }
30290}
30291
30292/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30293///
30294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30295#[inline]
30296#[target_feature(enable = "avx512f,avx512vl")]
30297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30298#[rustc_legacy_const_generics(2)]
30299#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30300pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30301    unsafe {
30302        static_assert_uimm_bits!(IMM8, 5);
30303        let neg_one = -1;
30304        let a = a.as_f32x4();
30305        let b = b.as_f32x4();
30306        let r = vcmpps128(a, b, IMM8, neg_one);
30307        r.cast_unsigned()
30308    }
30309}
30310
30311/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30312///
30313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30314#[inline]
30315#[target_feature(enable = "avx512f,avx512vl")]
30316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30317#[rustc_legacy_const_generics(3)]
30318#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30319pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30320    unsafe {
30321        static_assert_uimm_bits!(IMM8, 5);
30322        let a = a.as_f32x4();
30323        let b = b.as_f32x4();
30324        let r = vcmpps128(a, b, IMM8, k1 as i8);
30325        r.cast_unsigned()
30326    }
30327}
30328
30329/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30330/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30331///
30332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30333#[inline]
30334#[target_feature(enable = "avx512f")]
30335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30336#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30337#[rustc_legacy_const_generics(2, 3)]
30338pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30339    a: __m512,
30340    b: __m512,
30341) -> __mmask16 {
30342    unsafe {
30343        static_assert_uimm_bits!(IMM5, 5);
30344        static_assert_mantissas_sae!(SAE);
30345        let neg_one = -1;
30346        let a = a.as_f32x16();
30347        let b = b.as_f32x16();
30348        let r = vcmpps(a, b, IMM5, neg_one, SAE);
30349        r.cast_unsigned()
30350    }
30351}
30352
30353/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30354/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30355///
30356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30357#[inline]
30358#[target_feature(enable = "avx512f")]
30359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30360#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30361#[rustc_legacy_const_generics(3, 4)]
30362pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30363    m: __mmask16,
30364    a: __m512,
30365    b: __m512,
30366) -> __mmask16 {
30367    unsafe {
30368        static_assert_uimm_bits!(IMM5, 5);
30369        static_assert_mantissas_sae!(SAE);
30370        let a = a.as_f32x16();
30371        let b = b.as_f32x16();
30372        let r = vcmpps(a, b, IMM5, m as i16, SAE);
30373        r.cast_unsigned()
30374    }
30375}
30376
30377/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30378///
30379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30380#[inline]
30381#[target_feature(enable = "avx512f")]
30382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30383#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30384pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30385    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30386}
30387
30388/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30389///
30390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30391#[inline]
30392#[target_feature(enable = "avx512f")]
30393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30394#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30395pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30396    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30397}
30398
30399/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30400///
30401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30402#[inline]
30403#[target_feature(enable = "avx512f")]
30404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30405#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30406pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30407    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30408}
30409
30410/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30411///
30412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30413#[inline]
30414#[target_feature(enable = "avx512f")]
30415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30416#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30417pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30418    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30419}
30420
30421/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30427#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30428pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30429    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30430}
30431
30432/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30438#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30439pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30440    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30441}
30442
30443/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30449#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30450pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30451    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30452}
30453
30454/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30455///
30456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30457#[inline]
30458#[target_feature(enable = "avx512f")]
30459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30460#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30461pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30462    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
30463}
30464
30465/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30466///
30467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30468#[inline]
30469#[target_feature(enable = "avx512f")]
30470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30471#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30472pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30473    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30474}
30475
30476/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30477///
30478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30479#[inline]
30480#[target_feature(enable = "avx512f")]
30481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30482#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30483pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30484    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30485}
30486
30487/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30488///
30489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30490#[inline]
30491#[target_feature(enable = "avx512f")]
30492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30493#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30494pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30495    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30496}
30497
30498/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30499///
30500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30501#[inline]
30502#[target_feature(enable = "avx512f")]
30503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30504#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30505pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30506    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30507}
30508
30509/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30510///
30511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30512#[inline]
30513#[target_feature(enable = "avx512f")]
30514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30515#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30516pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30517    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30518}
30519
30520/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30521///
30522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30523#[inline]
30524#[target_feature(enable = "avx512f")]
30525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30526#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30527pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30528    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30529}
30530
30531/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30532///
30533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30534#[inline]
30535#[target_feature(enable = "avx512f")]
30536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30537#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30538pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30539    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30540}
30541
30542/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30543///
30544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30545#[inline]
30546#[target_feature(enable = "avx512f")]
30547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30548#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30549pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30550    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30551}
30552
30553/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30554///
30555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30559#[rustc_legacy_const_generics(2)]
30560#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30561pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30562    unsafe {
30563        static_assert_uimm_bits!(IMM8, 5);
30564        let neg_one = -1;
30565        let a = a.as_f64x8();
30566        let b = b.as_f64x8();
30567        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30568        r.cast_unsigned()
30569    }
30570}
30571
30572/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30573///
30574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30575#[inline]
30576#[target_feature(enable = "avx512f")]
30577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30578#[rustc_legacy_const_generics(3)]
30579#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30580pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30581    unsafe {
30582        static_assert_uimm_bits!(IMM8, 5);
30583        let a = a.as_f64x8();
30584        let b = b.as_f64x8();
30585        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30586        r.cast_unsigned()
30587    }
30588}
30589
30590/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30591///
30592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30593#[inline]
30594#[target_feature(enable = "avx512f,avx512vl")]
30595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30596#[rustc_legacy_const_generics(2)]
30597#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30598pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30599    unsafe {
30600        static_assert_uimm_bits!(IMM8, 5);
30601        let neg_one = -1;
30602        let a = a.as_f64x4();
30603        let b = b.as_f64x4();
30604        let r = vcmppd256(a, b, IMM8, neg_one);
30605        r.cast_unsigned()
30606    }
30607}
30608
30609/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30610///
30611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30612#[inline]
30613#[target_feature(enable = "avx512f,avx512vl")]
30614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30615#[rustc_legacy_const_generics(3)]
30616#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30617pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30618    unsafe {
30619        static_assert_uimm_bits!(IMM8, 5);
30620        let a = a.as_f64x4();
30621        let b = b.as_f64x4();
30622        let r = vcmppd256(a, b, IMM8, k1 as i8);
30623        r.cast_unsigned()
30624    }
30625}
30626
30627/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30628///
30629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30630#[inline]
30631#[target_feature(enable = "avx512f,avx512vl")]
30632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30633#[rustc_legacy_const_generics(2)]
30634#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30635pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30636    unsafe {
30637        static_assert_uimm_bits!(IMM8, 5);
30638        let neg_one = -1;
30639        let a = a.as_f64x2();
30640        let b = b.as_f64x2();
30641        let r = vcmppd128(a, b, IMM8, neg_one);
30642        r.cast_unsigned()
30643    }
30644}
30645
30646/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30647///
30648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30649#[inline]
30650#[target_feature(enable = "avx512f,avx512vl")]
30651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30652#[rustc_legacy_const_generics(3)]
30653#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30654pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30655    unsafe {
30656        static_assert_uimm_bits!(IMM8, 5);
30657        let a = a.as_f64x2();
30658        let b = b.as_f64x2();
30659        let r = vcmppd128(a, b, IMM8, k1 as i8);
30660        r.cast_unsigned()
30661    }
30662}
30663
30664/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30665/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30666///
30667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30668#[inline]
30669#[target_feature(enable = "avx512f")]
30670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30671#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30672#[rustc_legacy_const_generics(2, 3)]
30673pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30674    a: __m512d,
30675    b: __m512d,
30676) -> __mmask8 {
30677    unsafe {
30678        static_assert_uimm_bits!(IMM5, 5);
30679        static_assert_mantissas_sae!(SAE);
30680        let neg_one = -1;
30681        let a = a.as_f64x8();
30682        let b = b.as_f64x8();
30683        let r = vcmppd(a, b, IMM5, neg_one, SAE);
30684        r.cast_unsigned()
30685    }
30686}
30687
30688/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30689/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30690///
30691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30692#[inline]
30693#[target_feature(enable = "avx512f")]
30694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30695#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30696#[rustc_legacy_const_generics(3, 4)]
30697pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30698    k1: __mmask8,
30699    a: __m512d,
30700    b: __m512d,
30701) -> __mmask8 {
30702    unsafe {
30703        static_assert_uimm_bits!(IMM5, 5);
30704        static_assert_mantissas_sae!(SAE);
30705        let a = a.as_f64x8();
30706        let b = b.as_f64x8();
30707        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
30708        r.cast_unsigned()
30709    }
30710}
30711
30712/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30713///
30714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30715#[inline]
30716#[target_feature(enable = "avx512f")]
30717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30718#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30719pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30720    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30721}
30722
30723/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30724///
30725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30726#[inline]
30727#[target_feature(enable = "avx512f")]
30728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30729#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30730pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30731    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30732}
30733
30734/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30735///
30736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30737#[inline]
30738#[target_feature(enable = "avx512f")]
30739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30740#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30741pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30742    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30743}
30744
30745/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30746///
30747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30748#[inline]
30749#[target_feature(enable = "avx512f")]
30750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30751#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30752pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30753    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30754}
30755
30756/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30757///
30758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30759#[inline]
30760#[target_feature(enable = "avx512f")]
30761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30762#[rustc_legacy_const_generics(2)]
30763#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30764pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30765    unsafe {
30766        static_assert_uimm_bits!(IMM8, 5);
30767        let neg_one = -1;
30768        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30769        r.cast_unsigned()
30770    }
30771}
30772
30773/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30774///
30775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30776#[inline]
30777#[target_feature(enable = "avx512f")]
30778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30779#[rustc_legacy_const_generics(3)]
30780#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30781pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30782    unsafe {
30783        static_assert_uimm_bits!(IMM8, 5);
30784        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30785        r.cast_unsigned()
30786    }
30787}
30788
30789/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30790/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30791///
30792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30793#[inline]
30794#[target_feature(enable = "avx512f")]
30795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30796#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30797#[rustc_legacy_const_generics(2, 3)]
30798pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30799    unsafe {
30800        static_assert_uimm_bits!(IMM5, 5);
30801        static_assert_mantissas_sae!(SAE);
30802        let neg_one = -1;
30803        let r = vcmpss(a, b, IMM5, neg_one, SAE);
30804        r.cast_unsigned()
30805    }
30806}
30807
30808/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30809/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30810///
30811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30812#[inline]
30813#[target_feature(enable = "avx512f")]
30814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30815#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30816#[rustc_legacy_const_generics(3, 4)]
30817pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30818    k1: __mmask8,
30819    a: __m128,
30820    b: __m128,
30821) -> __mmask8 {
30822    unsafe {
30823        static_assert_uimm_bits!(IMM5, 5);
30824        static_assert_mantissas_sae!(SAE);
30825        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
30826        r.cast_unsigned()
30827    }
30828}
30829
30830/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30836#[rustc_legacy_const_generics(2)]
30837#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30838pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30839    unsafe {
30840        static_assert_uimm_bits!(IMM8, 5);
30841        let neg_one = -1;
30842        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
30843        r.cast_unsigned()
30844    }
30845}
30846
30847/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30848///
30849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30850#[inline]
30851#[target_feature(enable = "avx512f")]
30852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30853#[rustc_legacy_const_generics(3)]
30854#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30855pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30856    unsafe {
30857        static_assert_uimm_bits!(IMM8, 5);
30858        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
30859        r.cast_unsigned()
30860    }
30861}
30862
30863/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30864/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30865///
30866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30867#[inline]
30868#[target_feature(enable = "avx512f")]
30869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30870#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30871#[rustc_legacy_const_generics(2, 3)]
30872pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30873    unsafe {
30874        static_assert_uimm_bits!(IMM5, 5);
30875        static_assert_mantissas_sae!(SAE);
30876        let neg_one = -1;
30877        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
30878        r.cast_unsigned()
30879    }
30880}
30881
30882/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30883/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30884///
30885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30886#[inline]
30887#[target_feature(enable = "avx512f")]
30888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30889#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30890#[rustc_legacy_const_generics(3, 4)]
30891pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30892    k1: __mmask8,
30893    a: __m128d,
30894    b: __m128d,
30895) -> __mmask8 {
30896    unsafe {
30897        static_assert_uimm_bits!(IMM5, 5);
30898        static_assert_mantissas_sae!(SAE);
30899        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
30900        r.cast_unsigned()
30901    }
30902}
30903
30904/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30905///
30906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30907#[inline]
30908#[target_feature(enable = "avx512f")]
30909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30910#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30911pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30912    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
30913}
30914
30915/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30916///
30917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30918#[inline]
30919#[target_feature(enable = "avx512f")]
30920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30921#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30922pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30923    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30924}
30925
30926/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30927///
30928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30929#[inline]
30930#[target_feature(enable = "avx512f,avx512vl")]
30931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30933pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30934    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
30935}
30936
30937/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30938///
30939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30940#[inline]
30941#[target_feature(enable = "avx512f,avx512vl")]
30942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30943#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30944pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30945    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30946}
30947
30948/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30949///
30950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30951#[inline]
30952#[target_feature(enable = "avx512f,avx512vl")]
30953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30954#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30955pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30956    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
30957}
30958
30959/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30960///
30961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30962#[inline]
30963#[target_feature(enable = "avx512f,avx512vl")]
30964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30965#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30966pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30967    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30968}
30969
30970/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30973#[inline]
30974#[target_feature(enable = "avx512f")]
30975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30976#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30977pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30978    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
30979}
30980
30981/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30982///
30983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30984#[inline]
30985#[target_feature(enable = "avx512f")]
30986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30988pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30989    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30990}
30991
30992/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30993///
30994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30995#[inline]
30996#[target_feature(enable = "avx512f,avx512vl")]
30997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30998#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30999pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31000    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
31001}
31002
31003/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31004///
31005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
31006#[inline]
31007#[target_feature(enable = "avx512f,avx512vl")]
31008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31009#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31010pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31011    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31012}
31013
31014/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31015///
31016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
31017#[inline]
31018#[target_feature(enable = "avx512f,avx512vl")]
31019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31020#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31021pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31022    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
31023}
31024
31025/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31026///
31027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31028#[inline]
31029#[target_feature(enable = "avx512f,avx512vl")]
31030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31031#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31032pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31033    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31034}
31035
31036/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31037///
31038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31039#[inline]
31040#[target_feature(enable = "avx512f")]
31041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31042#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31043pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31044    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
31045}
31046
31047/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31048///
31049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31053#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31054pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31055    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31056}
31057
31058/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31059///
31060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31061#[inline]
31062#[target_feature(enable = "avx512f,avx512vl")]
31063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31064#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31065pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31066    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
31067}
31068
31069/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31070///
31071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31072#[inline]
31073#[target_feature(enable = "avx512f,avx512vl")]
31074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31075#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31076pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31077    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31078}
31079
31080/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31081///
31082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31083#[inline]
31084#[target_feature(enable = "avx512f,avx512vl")]
31085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31086#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31087pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31088    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
31089}
31090
31091/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31092///
31093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31094#[inline]
31095#[target_feature(enable = "avx512f,avx512vl")]
31096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31097#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31098pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31099    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31100}
31101
31102/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31103///
31104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31105#[inline]
31106#[target_feature(enable = "avx512f")]
31107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31108#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31109pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31110    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
31111}
31112
31113/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31114///
31115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31116#[inline]
31117#[target_feature(enable = "avx512f")]
31118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31120pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31121    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31122}
31123
31124/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31125///
31126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31127#[inline]
31128#[target_feature(enable = "avx512f,avx512vl")]
31129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31130#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31131pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31132    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
31133}
31134
31135/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31136///
31137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31138#[inline]
31139#[target_feature(enable = "avx512f,avx512vl")]
31140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31141#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31142pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31143    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31144}
31145
31146/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31147///
31148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31149#[inline]
31150#[target_feature(enable = "avx512f,avx512vl")]
31151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31152#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31153pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31154    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
31155}
31156
31157/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31158///
31159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31160#[inline]
31161#[target_feature(enable = "avx512f,avx512vl")]
31162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31163#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31164pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31165    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31166}
31167
31168/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31169///
31170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31171#[inline]
31172#[target_feature(enable = "avx512f")]
31173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31174#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31175pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31176    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
31177}
31178
31179/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31180///
31181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31182#[inline]
31183#[target_feature(enable = "avx512f")]
31184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31185#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31186pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31187    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31188}
31189
31190/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31191///
31192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31193#[inline]
31194#[target_feature(enable = "avx512f,avx512vl")]
31195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31196#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31197pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31198    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
31199}
31200
31201/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31202///
31203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31204#[inline]
31205#[target_feature(enable = "avx512f,avx512vl")]
31206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31207#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31208pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31209    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31210}
31211
31212/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31213///
31214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31215#[inline]
31216#[target_feature(enable = "avx512f,avx512vl")]
31217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31218#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31219pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31220    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
31221}
31222
31223/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31224///
31225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31226#[inline]
31227#[target_feature(enable = "avx512f,avx512vl")]
31228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31230pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31231    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31232}
31233
31234/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31235///
31236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31237#[inline]
31238#[target_feature(enable = "avx512f")]
31239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31240#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31241pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31242    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
31243}
31244
31245/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31246///
31247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31248#[inline]
31249#[target_feature(enable = "avx512f")]
31250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31252pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31253    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31254}
31255
31256/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31257///
31258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31259#[inline]
31260#[target_feature(enable = "avx512f,avx512vl")]
31261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31262#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31263pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31264    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
31265}
31266
31267/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31268///
31269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31270#[inline]
31271#[target_feature(enable = "avx512f,avx512vl")]
31272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31273#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31274pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31275    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31276}
31277
31278/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31284#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31285pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31286    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
31287}
31288
31289/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31290///
31291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31292#[inline]
31293#[target_feature(enable = "avx512f,avx512vl")]
31294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31295#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31296pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31297    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31298}
31299
31300/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31301///
31302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31303#[inline]
31304#[target_feature(enable = "avx512f")]
31305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31306#[rustc_legacy_const_generics(2)]
31307#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31308pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31309    unsafe {
31310        static_assert_uimm_bits!(IMM3, 3);
31311        let a = a.as_u32x16();
31312        let b = b.as_u32x16();
31313        let r = match IMM3 {
31314            0 => simd_eq(a, b),
31315            1 => simd_lt(a, b),
31316            2 => simd_le(a, b),
31317            3 => i32x16::ZERO,
31318            4 => simd_ne(a, b),
31319            5 => simd_ge(a, b),
31320            6 => simd_gt(a, b),
31321            _ => i32x16::splat(-1),
31322        };
31323        simd_bitmask(r)
31324    }
31325}
31326
31327/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31328///
31329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31330#[inline]
31331#[target_feature(enable = "avx512f")]
31332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31333#[rustc_legacy_const_generics(3)]
31334#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31335pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31336    k1: __mmask16,
31337    a: __m512i,
31338    b: __m512i,
31339) -> __mmask16 {
31340    unsafe {
31341        static_assert_uimm_bits!(IMM3, 3);
31342        let a = a.as_u32x16();
31343        let b = b.as_u32x16();
31344        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31345        let r = match IMM3 {
31346            0 => simd_and(k1, simd_eq(a, b)),
31347            1 => simd_and(k1, simd_lt(a, b)),
31348            2 => simd_and(k1, simd_le(a, b)),
31349            3 => i32x16::ZERO,
31350            4 => simd_and(k1, simd_ne(a, b)),
31351            5 => simd_and(k1, simd_ge(a, b)),
31352            6 => simd_and(k1, simd_gt(a, b)),
31353            _ => k1,
31354        };
31355        simd_bitmask(r)
31356    }
31357}
31358
31359/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31360///
31361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31362#[inline]
31363#[target_feature(enable = "avx512f,avx512vl")]
31364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31365#[rustc_legacy_const_generics(2)]
31366#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31367pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31368    unsafe {
31369        static_assert_uimm_bits!(IMM3, 3);
31370        let a = a.as_u32x8();
31371        let b = b.as_u32x8();
31372        let r = match IMM3 {
31373            0 => simd_eq(a, b),
31374            1 => simd_lt(a, b),
31375            2 => simd_le(a, b),
31376            3 => i32x8::ZERO,
31377            4 => simd_ne(a, b),
31378            5 => simd_ge(a, b),
31379            6 => simd_gt(a, b),
31380            _ => i32x8::splat(-1),
31381        };
31382        simd_bitmask(r)
31383    }
31384}
31385
31386/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31387///
31388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31389#[inline]
31390#[target_feature(enable = "avx512f,avx512vl")]
31391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31392#[rustc_legacy_const_generics(3)]
31393#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31394pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31395    k1: __mmask8,
31396    a: __m256i,
31397    b: __m256i,
31398) -> __mmask8 {
31399    unsafe {
31400        static_assert_uimm_bits!(IMM3, 3);
31401        let a = a.as_u32x8();
31402        let b = b.as_u32x8();
31403        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31404        let r = match IMM3 {
31405            0 => simd_and(k1, simd_eq(a, b)),
31406            1 => simd_and(k1, simd_lt(a, b)),
31407            2 => simd_and(k1, simd_le(a, b)),
31408            3 => i32x8::ZERO,
31409            4 => simd_and(k1, simd_ne(a, b)),
31410            5 => simd_and(k1, simd_ge(a, b)),
31411            6 => simd_and(k1, simd_gt(a, b)),
31412            _ => k1,
31413        };
31414        simd_bitmask(r)
31415    }
31416}
31417
31418/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31419///
31420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31421#[inline]
31422#[target_feature(enable = "avx512f,avx512vl")]
31423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31424#[rustc_legacy_const_generics(2)]
31425#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31426pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31427    unsafe {
31428        static_assert_uimm_bits!(IMM3, 3);
31429        let a = a.as_u32x4();
31430        let b = b.as_u32x4();
31431        let r = match IMM3 {
31432            0 => simd_eq(a, b),
31433            1 => simd_lt(a, b),
31434            2 => simd_le(a, b),
31435            3 => i32x4::ZERO,
31436            4 => simd_ne(a, b),
31437            5 => simd_ge(a, b),
31438            6 => simd_gt(a, b),
31439            _ => i32x4::splat(-1),
31440        };
31441        simd_bitmask(r)
31442    }
31443}
31444
31445/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31446///
31447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31448#[inline]
31449#[target_feature(enable = "avx512f,avx512vl")]
31450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31451#[rustc_legacy_const_generics(3)]
31452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31453pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31454    k1: __mmask8,
31455    a: __m128i,
31456    b: __m128i,
31457) -> __mmask8 {
31458    unsafe {
31459        static_assert_uimm_bits!(IMM3, 3);
31460        let a = a.as_u32x4();
31461        let b = b.as_u32x4();
31462        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
31463        let r = match IMM3 {
31464            0 => simd_and(k1, simd_eq(a, b)),
31465            1 => simd_and(k1, simd_lt(a, b)),
31466            2 => simd_and(k1, simd_le(a, b)),
31467            3 => i32x4::ZERO,
31468            4 => simd_and(k1, simd_ne(a, b)),
31469            5 => simd_and(k1, simd_ge(a, b)),
31470            6 => simd_and(k1, simd_gt(a, b)),
31471            _ => k1,
31472        };
31473        simd_bitmask(r)
31474    }
31475}
31476
31477/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31478///
31479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31480#[inline]
31481#[target_feature(enable = "avx512f")]
31482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31483#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31484pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31485    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
31486}
31487
31488/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31489///
31490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31491#[inline]
31492#[target_feature(enable = "avx512f")]
31493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31494#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31495pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31496    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31497}
31498
31499/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31500///
31501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31502#[inline]
31503#[target_feature(enable = "avx512f,avx512vl")]
31504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31506pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31507    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
31508}
31509
31510/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31511///
31512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31513#[inline]
31514#[target_feature(enable = "avx512f,avx512vl")]
31515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31516#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31517pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31518    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31519}
31520
31521/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31522///
31523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31524#[inline]
31525#[target_feature(enable = "avx512f,avx512vl")]
31526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31527#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31528pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31529    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
31530}
31531
31532/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31533///
31534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31535#[inline]
31536#[target_feature(enable = "avx512f,avx512vl")]
31537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31538#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31539pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31540    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31541}
31542
31543/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31544///
31545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31546#[inline]
31547#[target_feature(enable = "avx512f")]
31548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31549#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31550pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31551    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
31552}
31553
31554/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31555///
31556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31557#[inline]
31558#[target_feature(enable = "avx512f")]
31559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31561pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31562    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31563}
31564
31565/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31566///
31567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31568#[inline]
31569#[target_feature(enable = "avx512f,avx512vl")]
31570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31571#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31572pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31573    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
31574}
31575
31576/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31577///
31578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31579#[inline]
31580#[target_feature(enable = "avx512f,avx512vl")]
31581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31583pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31584    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31585}
31586
31587/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31588///
31589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31590#[inline]
31591#[target_feature(enable = "avx512f,avx512vl")]
31592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31593#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31594pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31595    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
31596}
31597
31598/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31599///
31600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31601#[inline]
31602#[target_feature(enable = "avx512f,avx512vl")]
31603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31604#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31605pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31606    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31607}
31608
31609/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31610///
31611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31612#[inline]
31613#[target_feature(enable = "avx512f")]
31614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31615#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31616pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31617    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
31618}
31619
31620/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31621///
31622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31623#[inline]
31624#[target_feature(enable = "avx512f")]
31625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31626#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31627pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31628    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31629}
31630
31631/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31632///
31633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31634#[inline]
31635#[target_feature(enable = "avx512f,avx512vl")]
31636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31637#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31638pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31639    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
31640}
31641
31642/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31643///
31644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31645#[inline]
31646#[target_feature(enable = "avx512f,avx512vl")]
31647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31648#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31649pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31650    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31651}
31652
31653/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31654///
31655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31656#[inline]
31657#[target_feature(enable = "avx512f,avx512vl")]
31658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31659#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31660pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31661    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
31662}
31663
31664/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31665///
31666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31667#[inline]
31668#[target_feature(enable = "avx512f,avx512vl")]
31669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31670#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31671pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31672    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31673}
31674
31675/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31676///
31677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31678#[inline]
31679#[target_feature(enable = "avx512f")]
31680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31681#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31682pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31683    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
31684}
31685
31686/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31687///
31688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31689#[inline]
31690#[target_feature(enable = "avx512f")]
31691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31693pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31694    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31695}
31696
31697/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31698///
31699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31700#[inline]
31701#[target_feature(enable = "avx512f,avx512vl")]
31702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31703#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31704pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31705    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
31706}
31707
31708/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31709///
31710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31711#[inline]
31712#[target_feature(enable = "avx512f,avx512vl")]
31713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31714#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31715pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31716    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31717}
31718
31719/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31720///
31721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31722#[inline]
31723#[target_feature(enable = "avx512f,avx512vl")]
31724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31725#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31726pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31727    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
31728}
31729
31730/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31731///
31732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31733#[inline]
31734#[target_feature(enable = "avx512f,avx512vl")]
31735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31736#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31737pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31738    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31739}
31740
31741/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31742///
31743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31744#[inline]
31745#[target_feature(enable = "avx512f")]
31746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31747#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31748pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31749    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
31750}
31751
31752/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31753///
31754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31755#[inline]
31756#[target_feature(enable = "avx512f")]
31757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31758#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31759pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31760    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31761}
31762
31763/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31764///
31765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31766#[inline]
31767#[target_feature(enable = "avx512f,avx512vl")]
31768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31769#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31770pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31771    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
31772}
31773
31774/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31775///
31776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31777#[inline]
31778#[target_feature(enable = "avx512f,avx512vl")]
31779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31780#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31781pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31782    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31783}
31784
31785/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31786///
31787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31788#[inline]
31789#[target_feature(enable = "avx512f,avx512vl")]
31790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31791#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31792pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31793    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
31794}
31795
31796/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31797///
31798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31799#[inline]
31800#[target_feature(enable = "avx512f,avx512vl")]
31801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31802#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31803pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31804    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31805}
31806
31807/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31808///
31809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31810#[inline]
31811#[target_feature(enable = "avx512f")]
31812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31813#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31814pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31815    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
31816}
31817
31818/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31819///
31820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31821#[inline]
31822#[target_feature(enable = "avx512f")]
31823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31825pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31826    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31827}
31828
31829/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31830///
31831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31832#[inline]
31833#[target_feature(enable = "avx512f,avx512vl")]
31834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31835#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31836pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31837    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
31838}
31839
31840/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31841///
31842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31843#[inline]
31844#[target_feature(enable = "avx512f,avx512vl")]
31845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31846#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31847pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31848    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31849}
31850
31851/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31852///
31853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31854#[inline]
31855#[target_feature(enable = "avx512f,avx512vl")]
31856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31857#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31858pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31859    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
31860}
31861
31862/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31863///
31864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31865#[inline]
31866#[target_feature(enable = "avx512f,avx512vl")]
31867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31868#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31869pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31870    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31871}
31872
31873/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31874///
31875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31876#[inline]
31877#[target_feature(enable = "avx512f")]
31878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31879#[rustc_legacy_const_generics(2)]
31880#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31881pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31882    unsafe {
31883        static_assert_uimm_bits!(IMM3, 3);
31884        let a = a.as_i32x16();
31885        let b = b.as_i32x16();
31886        let r = match IMM3 {
31887            0 => simd_eq(a, b),
31888            1 => simd_lt(a, b),
31889            2 => simd_le(a, b),
31890            3 => i32x16::ZERO,
31891            4 => simd_ne(a, b),
31892            5 => simd_ge(a, b),
31893            6 => simd_gt(a, b),
31894            _ => i32x16::splat(-1),
31895        };
31896        simd_bitmask(r)
31897    }
31898}
31899
31900/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31901///
31902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31903#[inline]
31904#[target_feature(enable = "avx512f")]
31905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31906#[rustc_legacy_const_generics(3)]
31907#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31908pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31909    k1: __mmask16,
31910    a: __m512i,
31911    b: __m512i,
31912) -> __mmask16 {
31913    unsafe {
31914        static_assert_uimm_bits!(IMM3, 3);
31915        let a = a.as_i32x16();
31916        let b = b.as_i32x16();
31917        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
31918        let r = match IMM3 {
31919            0 => simd_and(k1, simd_eq(a, b)),
31920            1 => simd_and(k1, simd_lt(a, b)),
31921            2 => simd_and(k1, simd_le(a, b)),
31922            3 => i32x16::ZERO,
31923            4 => simd_and(k1, simd_ne(a, b)),
31924            5 => simd_and(k1, simd_ge(a, b)),
31925            6 => simd_and(k1, simd_gt(a, b)),
31926            _ => k1,
31927        };
31928        simd_bitmask(r)
31929    }
31930}
31931
31932/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31935#[inline]
31936#[target_feature(enable = "avx512f,avx512vl")]
31937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31938#[rustc_legacy_const_generics(2)]
31939#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31940pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31941    unsafe {
31942        static_assert_uimm_bits!(IMM3, 3);
31943        let a = a.as_i32x8();
31944        let b = b.as_i32x8();
31945        let r = match IMM3 {
31946            0 => simd_eq(a, b),
31947            1 => simd_lt(a, b),
31948            2 => simd_le(a, b),
31949            3 => i32x8::ZERO,
31950            4 => simd_ne(a, b),
31951            5 => simd_ge(a, b),
31952            6 => simd_gt(a, b),
31953            _ => i32x8::splat(-1),
31954        };
31955        simd_bitmask(r)
31956    }
31957}
31958
31959/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31960///
31961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31962#[inline]
31963#[target_feature(enable = "avx512f,avx512vl")]
31964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31965#[rustc_legacy_const_generics(3)]
31966#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31967pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31968    k1: __mmask8,
31969    a: __m256i,
31970    b: __m256i,
31971) -> __mmask8 {
31972    unsafe {
31973        static_assert_uimm_bits!(IMM3, 3);
31974        let a = a.as_i32x8();
31975        let b = b.as_i32x8();
31976        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
31977        let r = match IMM3 {
31978            0 => simd_and(k1, simd_eq(a, b)),
31979            1 => simd_and(k1, simd_lt(a, b)),
31980            2 => simd_and(k1, simd_le(a, b)),
31981            3 => i32x8::ZERO,
31982            4 => simd_and(k1, simd_ne(a, b)),
31983            5 => simd_and(k1, simd_ge(a, b)),
31984            6 => simd_and(k1, simd_gt(a, b)),
31985            _ => k1,
31986        };
31987        simd_bitmask(r)
31988    }
31989}
31990
31991/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31992///
31993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31994#[inline]
31995#[target_feature(enable = "avx512f,avx512vl")]
31996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31997#[rustc_legacy_const_generics(2)]
31998#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31999pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32000    unsafe {
32001        static_assert_uimm_bits!(IMM3, 3);
32002        let a = a.as_i32x4();
32003        let b = b.as_i32x4();
32004        let r = match IMM3 {
32005            0 => simd_eq(a, b),
32006            1 => simd_lt(a, b),
32007            2 => simd_le(a, b),
32008            3 => i32x4::ZERO,
32009            4 => simd_ne(a, b),
32010            5 => simd_ge(a, b),
32011            6 => simd_gt(a, b),
32012            _ => i32x4::splat(-1),
32013        };
32014        simd_bitmask(r)
32015    }
32016}
32017
32018/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32019///
32020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32021#[inline]
32022#[target_feature(enable = "avx512f,avx512vl")]
32023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32024#[rustc_legacy_const_generics(3)]
32025#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32026pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32027    k1: __mmask8,
32028    a: __m128i,
32029    b: __m128i,
32030) -> __mmask8 {
32031    unsafe {
32032        static_assert_uimm_bits!(IMM3, 3);
32033        let a = a.as_i32x4();
32034        let b = b.as_i32x4();
32035        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32036        let r = match IMM3 {
32037            0 => simd_and(k1, simd_eq(a, b)),
32038            1 => simd_and(k1, simd_lt(a, b)),
32039            2 => simd_and(k1, simd_le(a, b)),
32040            3 => i32x4::ZERO,
32041            4 => simd_and(k1, simd_ne(a, b)),
32042            5 => simd_and(k1, simd_ge(a, b)),
32043            6 => simd_and(k1, simd_gt(a, b)),
32044            _ => k1,
32045        };
32046        simd_bitmask(r)
32047    }
32048}
32049
32050/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32051///
32052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32053#[inline]
32054#[target_feature(enable = "avx512f")]
32055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32056#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32057pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32058    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
32059}
32060
32061/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32062///
32063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32064#[inline]
32065#[target_feature(enable = "avx512f")]
32066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32067#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32068pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32069    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32070}
32071
32072/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32075#[inline]
32076#[target_feature(enable = "avx512f,avx512vl")]
32077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32078#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32079pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32080    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
32081}
32082
32083/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32086#[inline]
32087#[target_feature(enable = "avx512f,avx512vl")]
32088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32089#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32090pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32091    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32092}
32093
32094/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32097#[inline]
32098#[target_feature(enable = "avx512f,avx512vl")]
32099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32100#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32101pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32102    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
32103}
32104
32105/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32108#[inline]
32109#[target_feature(enable = "avx512f,avx512vl")]
32110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32111#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32112pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32113    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32114}
32115
32116/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32122#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32123pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32124    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
32125}
32126
32127/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32128///
32129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32130#[inline]
32131#[target_feature(enable = "avx512f")]
32132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32134pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32135    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32136}
32137
32138/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32139///
32140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32141#[inline]
32142#[target_feature(enable = "avx512f,avx512vl")]
32143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32144#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32145pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32146    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
32147}
32148
32149/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32150///
32151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32152#[inline]
32153#[target_feature(enable = "avx512f,avx512vl")]
32154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32156pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32157    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32158}
32159
32160/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32161///
32162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32163#[inline]
32164#[target_feature(enable = "avx512f,avx512vl")]
32165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32166#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32167pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32168    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
32169}
32170
32171/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32172///
32173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32174#[inline]
32175#[target_feature(enable = "avx512f,avx512vl")]
32176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32177#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32178pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32179    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32180}
32181
32182/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32183///
32184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32185#[inline]
32186#[target_feature(enable = "avx512f")]
32187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32188#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32189pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32190    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
32191}
32192
32193/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32194///
32195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32196#[inline]
32197#[target_feature(enable = "avx512f")]
32198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32199#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32200pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32201    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32202}
32203
32204/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32205///
32206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32207#[inline]
32208#[target_feature(enable = "avx512f,avx512vl")]
32209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32210#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32211pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32212    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
32213}
32214
32215/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32216///
32217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32218#[inline]
32219#[target_feature(enable = "avx512f,avx512vl")]
32220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32221#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32222pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32223    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32224}
32225
32226/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32227///
32228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32229#[inline]
32230#[target_feature(enable = "avx512f,avx512vl")]
32231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32232#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32233pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32234    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
32235}
32236
32237/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32238///
32239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32240#[inline]
32241#[target_feature(enable = "avx512f,avx512vl")]
32242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32243#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32244pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32245    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32246}
32247
32248/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32249///
32250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32251#[inline]
32252#[target_feature(enable = "avx512f")]
32253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32254#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32255pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32256    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
32257}
32258
32259/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32260///
32261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32262#[inline]
32263#[target_feature(enable = "avx512f")]
32264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32266pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32267    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32268}
32269
32270/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32271///
32272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32273#[inline]
32274#[target_feature(enable = "avx512f,avx512vl")]
32275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32276#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32277pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32278    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
32279}
32280
32281/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32282///
32283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32284#[inline]
32285#[target_feature(enable = "avx512f,avx512vl")]
32286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32288pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32289    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32290}
32291
32292/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32293///
32294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32295#[inline]
32296#[target_feature(enable = "avx512f,avx512vl")]
32297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32298#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32299pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32300    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
32301}
32302
32303/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32304///
32305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32306#[inline]
32307#[target_feature(enable = "avx512f,avx512vl")]
32308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32309#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32310pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32311    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32312}
32313
32314/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32315///
32316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32317#[inline]
32318#[target_feature(enable = "avx512f")]
32319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32320#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32321pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32322    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
32323}
32324
32325/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32326///
32327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32328#[inline]
32329#[target_feature(enable = "avx512f")]
32330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32331#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32332pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32333    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32339#[inline]
32340#[target_feature(enable = "avx512f,avx512vl")]
32341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32343pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32344    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
32345}
32346
32347/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32348///
32349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32350#[inline]
32351#[target_feature(enable = "avx512f,avx512vl")]
32352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32353#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32354pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32355    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32356}
32357
32358/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32359///
32360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32361#[inline]
32362#[target_feature(enable = "avx512f,avx512vl")]
32363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32364#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32365pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32366    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
32367}
32368
32369/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32370///
32371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32372#[inline]
32373#[target_feature(enable = "avx512f,avx512vl")]
32374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32375#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32376pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32377    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32378}
32379
32380/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32381///
32382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32383#[inline]
32384#[target_feature(enable = "avx512f")]
32385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32386#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32387pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32388    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
32389}
32390
32391/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32392///
32393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32394#[inline]
32395#[target_feature(enable = "avx512f")]
32396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32398pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32399    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32400}
32401
32402/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32403///
32404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32405#[inline]
32406#[target_feature(enable = "avx512f,avx512vl")]
32407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32408#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32409pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32410    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
32411}
32412
32413/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32414///
32415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32416#[inline]
32417#[target_feature(enable = "avx512f,avx512vl")]
32418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32419#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32420pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32421    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32422}
32423
32424/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32425///
32426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32427#[inline]
32428#[target_feature(enable = "avx512f,avx512vl")]
32429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32430#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32431pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32432    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
32433}
32434
32435/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32436///
32437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32438#[inline]
32439#[target_feature(enable = "avx512f,avx512vl")]
32440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32441#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32442pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32443    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32444}
32445
32446/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32447///
32448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32449#[inline]
32450#[target_feature(enable = "avx512f")]
32451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32452#[rustc_legacy_const_generics(2)]
32453#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32454pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32455    unsafe {
32456        static_assert_uimm_bits!(IMM3, 3);
32457        let a = a.as_u64x8();
32458        let b = b.as_u64x8();
32459        let r = match IMM3 {
32460            0 => simd_eq(a, b),
32461            1 => simd_lt(a, b),
32462            2 => simd_le(a, b),
32463            3 => i64x8::ZERO,
32464            4 => simd_ne(a, b),
32465            5 => simd_ge(a, b),
32466            6 => simd_gt(a, b),
32467            _ => i64x8::splat(-1),
32468        };
32469        simd_bitmask(r)
32470    }
32471}
32472
32473/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32474///
32475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32476#[inline]
32477#[target_feature(enable = "avx512f")]
32478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32479#[rustc_legacy_const_generics(3)]
32480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32481pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32482    k1: __mmask8,
32483    a: __m512i,
32484    b: __m512i,
32485) -> __mmask8 {
32486    unsafe {
32487        static_assert_uimm_bits!(IMM3, 3);
32488        let a = a.as_u64x8();
32489        let b = b.as_u64x8();
32490        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
32491        let r = match IMM3 {
32492            0 => simd_and(k1, simd_eq(a, b)),
32493            1 => simd_and(k1, simd_lt(a, b)),
32494            2 => simd_and(k1, simd_le(a, b)),
32495            3 => i64x8::ZERO,
32496            4 => simd_and(k1, simd_ne(a, b)),
32497            5 => simd_and(k1, simd_ge(a, b)),
32498            6 => simd_and(k1, simd_gt(a, b)),
32499            _ => k1,
32500        };
32501        simd_bitmask(r)
32502    }
32503}
32504
32505/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32506///
32507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32508#[inline]
32509#[target_feature(enable = "avx512f,avx512vl")]
32510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32511#[rustc_legacy_const_generics(2)]
32512#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32513pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32514    unsafe {
32515        static_assert_uimm_bits!(IMM3, 3);
32516        let a = a.as_u64x4();
32517        let b = b.as_u64x4();
32518        let r = match IMM3 {
32519            0 => simd_eq(a, b),
32520            1 => simd_lt(a, b),
32521            2 => simd_le(a, b),
32522            3 => i64x4::ZERO,
32523            4 => simd_ne(a, b),
32524            5 => simd_ge(a, b),
32525            6 => simd_gt(a, b),
32526            _ => i64x4::splat(-1),
32527        };
32528        simd_bitmask(r)
32529    }
32530}
32531
32532/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32533///
32534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32535#[inline]
32536#[target_feature(enable = "avx512f,avx512vl")]
32537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32538#[rustc_legacy_const_generics(3)]
32539#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32540pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32541    k1: __mmask8,
32542    a: __m256i,
32543    b: __m256i,
32544) -> __mmask8 {
32545    unsafe {
32546        static_assert_uimm_bits!(IMM3, 3);
32547        let a = a.as_u64x4();
32548        let b = b.as_u64x4();
32549        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
32550        let r = match IMM3 {
32551            0 => simd_and(k1, simd_eq(a, b)),
32552            1 => simd_and(k1, simd_lt(a, b)),
32553            2 => simd_and(k1, simd_le(a, b)),
32554            3 => i64x4::ZERO,
32555            4 => simd_and(k1, simd_ne(a, b)),
32556            5 => simd_and(k1, simd_ge(a, b)),
32557            6 => simd_and(k1, simd_gt(a, b)),
32558            _ => k1,
32559        };
32560        simd_bitmask(r)
32561    }
32562}
32563
32564/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32567#[inline]
32568#[target_feature(enable = "avx512f,avx512vl")]
32569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32570#[rustc_legacy_const_generics(2)]
32571#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32572pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32573    unsafe {
32574        static_assert_uimm_bits!(IMM3, 3);
32575        let a = a.as_u64x2();
32576        let b = b.as_u64x2();
32577        let r = match IMM3 {
32578            0 => simd_eq(a, b),
32579            1 => simd_lt(a, b),
32580            2 => simd_le(a, b),
32581            3 => i64x2::ZERO,
32582            4 => simd_ne(a, b),
32583            5 => simd_ge(a, b),
32584            6 => simd_gt(a, b),
32585            _ => i64x2::splat(-1),
32586        };
32587        simd_bitmask(r)
32588    }
32589}
32590
32591/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32592///
32593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32594#[inline]
32595#[target_feature(enable = "avx512f,avx512vl")]
32596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32597#[rustc_legacy_const_generics(3)]
32598#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32599pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32600    k1: __mmask8,
32601    a: __m128i,
32602    b: __m128i,
32603) -> __mmask8 {
32604    unsafe {
32605        static_assert_uimm_bits!(IMM3, 3);
32606        let a = a.as_u64x2();
32607        let b = b.as_u64x2();
32608        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
32609        let r = match IMM3 {
32610            0 => simd_and(k1, simd_eq(a, b)),
32611            1 => simd_and(k1, simd_lt(a, b)),
32612            2 => simd_and(k1, simd_le(a, b)),
32613            3 => i64x2::ZERO,
32614            4 => simd_and(k1, simd_ne(a, b)),
32615            5 => simd_and(k1, simd_ge(a, b)),
32616            6 => simd_and(k1, simd_gt(a, b)),
32617            _ => k1,
32618        };
32619        simd_bitmask(r)
32620    }
32621}
32622
32623/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32624///
32625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32626#[inline]
32627#[target_feature(enable = "avx512f")]
32628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32629#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32630pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32631    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
32632}
32633
32634/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32635///
32636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32637#[inline]
32638#[target_feature(enable = "avx512f")]
32639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32640#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32641pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32642    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32643}
32644
32645/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32646///
32647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32648#[inline]
32649#[target_feature(enable = "avx512f,avx512vl")]
32650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32651#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32652pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32653    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
32654}
32655
32656/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32657///
32658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32659#[inline]
32660#[target_feature(enable = "avx512f,avx512vl")]
32661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32662#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32663pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32664    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32665}
32666
32667/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32668///
32669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32670#[inline]
32671#[target_feature(enable = "avx512f,avx512vl")]
32672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32673#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32674pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32675    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
32676}
32677
32678/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32679///
32680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32681#[inline]
32682#[target_feature(enable = "avx512f,avx512vl")]
32683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32684#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32685pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32686    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32687}
32688
32689/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32690///
32691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32692#[inline]
32693#[target_feature(enable = "avx512f")]
32694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32695#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32696pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32697    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
32698}
32699
32700/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32701///
32702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32703#[inline]
32704#[target_feature(enable = "avx512f")]
32705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32706#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32707pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32708    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32709}
32710
32711/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32712///
32713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32714#[inline]
32715#[target_feature(enable = "avx512f,avx512vl")]
32716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32717#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32718pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32719    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
32720}
32721
32722/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32723///
32724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32725#[inline]
32726#[target_feature(enable = "avx512f,avx512vl")]
32727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32729pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32730    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32731}
32732
32733/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32734///
32735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32736#[inline]
32737#[target_feature(enable = "avx512f,avx512vl")]
32738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32739#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32740pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32741    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
32742}
32743
32744/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32745///
32746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32747#[inline]
32748#[target_feature(enable = "avx512f,avx512vl")]
32749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32750#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32751pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32752    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32753}
32754
32755/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32756///
32757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32758#[inline]
32759#[target_feature(enable = "avx512f")]
32760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32761#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32762pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32763    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
32764}
32765
32766/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32767///
32768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32769#[inline]
32770#[target_feature(enable = "avx512f")]
32771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32772#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32773pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32774    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32775}
32776
32777/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32778///
32779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32780#[inline]
32781#[target_feature(enable = "avx512f,avx512vl")]
32782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32783#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32784pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32785    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
32786}
32787
32788/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32789///
32790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32791#[inline]
32792#[target_feature(enable = "avx512f,avx512vl")]
32793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32794#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32795pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32796    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32797}
32798
32799/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32800///
32801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32802#[inline]
32803#[target_feature(enable = "avx512f,avx512vl")]
32804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32805#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32806pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32807    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
32808}
32809
32810/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32811///
32812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32813#[inline]
32814#[target_feature(enable = "avx512f,avx512vl")]
32815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32816#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32817pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32818    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32819}
32820
32821/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32822///
32823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32824#[inline]
32825#[target_feature(enable = "avx512f")]
32826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32827#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32828pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32829    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
32830}
32831
32832/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32833///
32834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32835#[inline]
32836#[target_feature(enable = "avx512f")]
32837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32838#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32839pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32840    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32841}
32842
32843/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32844///
32845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32846#[inline]
32847#[target_feature(enable = "avx512f,avx512vl")]
32848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32849#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32850pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32851    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
32852}
32853
32854/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32855///
32856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32857#[inline]
32858#[target_feature(enable = "avx512f,avx512vl")]
32859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32861pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32862    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32863}
32864
32865/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32866///
32867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32868#[inline]
32869#[target_feature(enable = "avx512f,avx512vl")]
32870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32871#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32872pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32873    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
32874}
32875
32876/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32877///
32878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32879#[inline]
32880#[target_feature(enable = "avx512f,avx512vl")]
32881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32882#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32883pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32884    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32885}
32886
32887/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32888///
32889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32890#[inline]
32891#[target_feature(enable = "avx512f")]
32892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32893#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32894pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32895    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
32896}
32897
32898/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32899///
32900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32901#[inline]
32902#[target_feature(enable = "avx512f")]
32903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32904#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32905pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32906    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32907}
32908
32909/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32916pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32917    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
32918}
32919
32920/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32921///
32922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32923#[inline]
32924#[target_feature(enable = "avx512f,avx512vl")]
32925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32926#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32927pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32928    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32929}
32930
32931/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32932///
32933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32934#[inline]
32935#[target_feature(enable = "avx512f,avx512vl")]
32936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32937#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32938pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32939    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
32940}
32941
32942/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32943///
32944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32945#[inline]
32946#[target_feature(enable = "avx512f,avx512vl")]
32947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32948#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32949pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32950    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32951}
32952
32953/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32954///
32955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32956#[inline]
32957#[target_feature(enable = "avx512f")]
32958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32959#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32960pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32961    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
32962}
32963
32964/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32965///
32966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32967#[inline]
32968#[target_feature(enable = "avx512f")]
32969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32970#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32971pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32972    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32973}
32974
32975/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32976///
32977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32978#[inline]
32979#[target_feature(enable = "avx512f,avx512vl")]
32980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32981#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32982pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32983    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
32984}
32985
32986/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32987///
32988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32989#[inline]
32990#[target_feature(enable = "avx512f,avx512vl")]
32991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32992#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32993pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32994    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32995}
32996
32997/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32998///
32999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
33000#[inline]
33001#[target_feature(enable = "avx512f,avx512vl")]
33002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33003#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
33004pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33005    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
33006}
33007
33008/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33009///
33010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
33011#[inline]
33012#[target_feature(enable = "avx512f,avx512vl")]
33013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33014#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
33015pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33016    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
33017}
33018
33019/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33020///
33021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33022#[inline]
33023#[target_feature(enable = "avx512f")]
33024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33025#[rustc_legacy_const_generics(2)]
33026#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33027pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33028    unsafe {
33029        static_assert_uimm_bits!(IMM3, 3);
33030        let a = a.as_i64x8();
33031        let b = b.as_i64x8();
33032        let r = match IMM3 {
33033            0 => simd_eq(a, b),
33034            1 => simd_lt(a, b),
33035            2 => simd_le(a, b),
33036            3 => i64x8::ZERO,
33037            4 => simd_ne(a, b),
33038            5 => simd_ge(a, b),
33039            6 => simd_gt(a, b),
33040            _ => i64x8::splat(-1),
33041        };
33042        simd_bitmask(r)
33043    }
33044}
33045
33046/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33047///
33048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33049#[inline]
33050#[target_feature(enable = "avx512f")]
33051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33052#[rustc_legacy_const_generics(3)]
33053#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33054pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33055    k1: __mmask8,
33056    a: __m512i,
33057    b: __m512i,
33058) -> __mmask8 {
33059    unsafe {
33060        static_assert_uimm_bits!(IMM3, 3);
33061        let a = a.as_i64x8();
33062        let b = b.as_i64x8();
33063        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33064        let r = match IMM3 {
33065            0 => simd_and(k1, simd_eq(a, b)),
33066            1 => simd_and(k1, simd_lt(a, b)),
33067            2 => simd_and(k1, simd_le(a, b)),
33068            3 => i64x8::ZERO,
33069            4 => simd_and(k1, simd_ne(a, b)),
33070            5 => simd_and(k1, simd_ge(a, b)),
33071            6 => simd_and(k1, simd_gt(a, b)),
33072            _ => k1,
33073        };
33074        simd_bitmask(r)
33075    }
33076}
33077
33078/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33079///
33080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33081#[inline]
33082#[target_feature(enable = "avx512f,avx512vl")]
33083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33084#[rustc_legacy_const_generics(2)]
33085#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33086pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33087    unsafe {
33088        static_assert_uimm_bits!(IMM3, 3);
33089        let a = a.as_i64x4();
33090        let b = b.as_i64x4();
33091        let r = match IMM3 {
33092            0 => simd_eq(a, b),
33093            1 => simd_lt(a, b),
33094            2 => simd_le(a, b),
33095            3 => i64x4::ZERO,
33096            4 => simd_ne(a, b),
33097            5 => simd_ge(a, b),
33098            6 => simd_gt(a, b),
33099            _ => i64x4::splat(-1),
33100        };
33101        simd_bitmask(r)
33102    }
33103}
33104
33105/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33106///
33107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33108#[inline]
33109#[target_feature(enable = "avx512f,avx512vl")]
33110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33111#[rustc_legacy_const_generics(3)]
33112#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33113pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33114    k1: __mmask8,
33115    a: __m256i,
33116    b: __m256i,
33117) -> __mmask8 {
33118    unsafe {
33119        static_assert_uimm_bits!(IMM3, 3);
33120        let a = a.as_i64x4();
33121        let b = b.as_i64x4();
33122        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
33123        let r = match IMM3 {
33124            0 => simd_and(k1, simd_eq(a, b)),
33125            1 => simd_and(k1, simd_lt(a, b)),
33126            2 => simd_and(k1, simd_le(a, b)),
33127            3 => i64x4::ZERO,
33128            4 => simd_and(k1, simd_ne(a, b)),
33129            5 => simd_and(k1, simd_ge(a, b)),
33130            6 => simd_and(k1, simd_gt(a, b)),
33131            _ => k1,
33132        };
33133        simd_bitmask(r)
33134    }
33135}
33136
33137/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33143#[rustc_legacy_const_generics(2)]
33144#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33145pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33146    unsafe {
33147        static_assert_uimm_bits!(IMM3, 3);
33148        let a = a.as_i64x2();
33149        let b = b.as_i64x2();
33150        let r = match IMM3 {
33151            0 => simd_eq(a, b),
33152            1 => simd_lt(a, b),
33153            2 => simd_le(a, b),
33154            3 => i64x2::ZERO,
33155            4 => simd_ne(a, b),
33156            5 => simd_ge(a, b),
33157            6 => simd_gt(a, b),
33158            _ => i64x2::splat(-1),
33159        };
33160        simd_bitmask(r)
33161    }
33162}
33163
33164/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33165///
33166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33167#[inline]
33168#[target_feature(enable = "avx512f,avx512vl")]
33169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33170#[rustc_legacy_const_generics(3)]
33171#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33172pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33173    k1: __mmask8,
33174    a: __m128i,
33175    b: __m128i,
33176) -> __mmask8 {
33177    unsafe {
33178        static_assert_uimm_bits!(IMM3, 3);
33179        let a = a.as_i64x2();
33180        let b = b.as_i64x2();
33181        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
33182        let r = match IMM3 {
33183            0 => simd_and(k1, simd_eq(a, b)),
33184            1 => simd_and(k1, simd_lt(a, b)),
33185            2 => simd_and(k1, simd_le(a, b)),
33186            3 => i64x2::ZERO,
33187            4 => simd_and(k1, simd_ne(a, b)),
33188            5 => simd_and(k1, simd_ge(a, b)),
33189            6 => simd_and(k1, simd_gt(a, b)),
33190            _ => k1,
33191        };
33192        simd_bitmask(r)
33193    }
33194}
33195
33196/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33197///
33198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33199#[inline]
33200#[target_feature(enable = "avx512f")]
33201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33202pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33203    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33204}
33205
33206/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207///
33208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33209#[inline]
33210#[target_feature(enable = "avx512f")]
33211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33212pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33213    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33214}
33215
33216/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33217///
33218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33219#[inline]
33220#[target_feature(enable = "avx512f")]
33221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33222pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33223    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33224}
33225
33226/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33227///
33228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33229#[inline]
33230#[target_feature(enable = "avx512f")]
33231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33232pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33233    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33234}
33235
33236/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33237///
33238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33239#[inline]
33240#[target_feature(enable = "avx512f")]
33241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33242pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33243    unsafe {
33244        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33245        let a = _mm256_add_ps(
33246            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33247            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33248        );
33249        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33250        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33251        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
33252    }
33253}
33254
33255/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33256///
33257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33258#[inline]
33259#[target_feature(enable = "avx512f")]
33260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33261pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33262    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
33263}
33264
33265/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33266///
33267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33268#[inline]
33269#[target_feature(enable = "avx512f")]
33270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33271pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33272    unsafe {
33273        let a = _mm256_add_pd(
33274            _mm512_extractf64x4_pd::<0>(a),
33275            _mm512_extractf64x4_pd::<1>(a),
33276        );
33277        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33278        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
33279    }
33280}
33281
33282/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33283///
33284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33285#[inline]
33286#[target_feature(enable = "avx512f")]
33287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33288pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33289    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
33290}
33291
33292/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33293///
33294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33295#[inline]
33296#[target_feature(enable = "avx512f")]
33297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33298pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33299    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33300}
33301
33302/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33303///
33304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33305#[inline]
33306#[target_feature(enable = "avx512f")]
33307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33308pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33309    unsafe {
33310        simd_reduce_mul_unordered(simd_select_bitmask(
33311            k,
33312            a.as_i32x16(),
33313            _mm512_set1_epi32(1).as_i32x16(),
33314        ))
33315    }
33316}
33317
33318/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33319///
33320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33321#[inline]
33322#[target_feature(enable = "avx512f")]
33323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33324pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33325    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33326}
33327
33328/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33329///
33330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33331#[inline]
33332#[target_feature(enable = "avx512f")]
33333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33334pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33335    unsafe {
33336        simd_reduce_mul_unordered(simd_select_bitmask(
33337            k,
33338            a.as_i64x8(),
33339            _mm512_set1_epi64(1).as_i64x8(),
33340        ))
33341    }
33342}
33343
33344/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33345///
33346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33347#[inline]
33348#[target_feature(enable = "avx512f")]
33349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33350pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33351    unsafe {
33352        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33353        let a = _mm256_mul_ps(
33354            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33355            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33356        );
33357        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33358        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33359        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
33360    }
33361}
33362
33363/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33364///
33365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33366#[inline]
33367#[target_feature(enable = "avx512f")]
33368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33369pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33370    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
33371}
33372
33373/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33374///
33375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33376#[inline]
33377#[target_feature(enable = "avx512f")]
33378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33379pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33380    unsafe {
33381        let a = _mm256_mul_pd(
33382            _mm512_extractf64x4_pd::<0>(a),
33383            _mm512_extractf64x4_pd::<1>(a),
33384        );
33385        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33386        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
33387    }
33388}
33389
33390/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33391///
33392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33393#[inline]
33394#[target_feature(enable = "avx512f")]
33395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33396pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33397    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
33398}
33399
33400/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33401///
33402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33403#[inline]
33404#[target_feature(enable = "avx512f")]
33405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33406pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33407    unsafe { simd_reduce_max(a.as_i32x16()) }
33408}
33409
33410/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33411///
33412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33413#[inline]
33414#[target_feature(enable = "avx512f")]
33415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33416pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33417    unsafe {
33418        simd_reduce_max(simd_select_bitmask(
33419            k,
33420            a.as_i32x16(),
33421            i32x16::splat(i32::MIN),
33422        ))
33423    }
33424}
33425
33426/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33432pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33433    unsafe { simd_reduce_max(a.as_i64x8()) }
33434}
33435
33436/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437///
33438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33439#[inline]
33440#[target_feature(enable = "avx512f")]
33441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33442pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33443    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
33444}
33445
33446/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33452pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33453    unsafe { simd_reduce_max(a.as_u32x16()) }
33454}
33455
33456/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457///
33458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33459#[inline]
33460#[target_feature(enable = "avx512f")]
33461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33462pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33463    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
33464}
33465
33466/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33467///
33468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33469#[inline]
33470#[target_feature(enable = "avx512f")]
33471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33472pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33473    unsafe { simd_reduce_max(a.as_u64x8()) }
33474}
33475
33476/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33477///
33478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33479#[inline]
33480#[target_feature(enable = "avx512f")]
33481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33482pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33483    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
33484}
33485
33486/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33487///
33488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33489#[inline]
33490#[target_feature(enable = "avx512f")]
33491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33492pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33493    unsafe {
33494        let a = _mm256_max_ps(
33495            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33496            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33497        );
33498        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33499        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33500        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
33501    }
33502}
33503
33504/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33505///
33506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33507#[inline]
33508#[target_feature(enable = "avx512f")]
33509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33510pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33511    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
33512}
33513
33514/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33515///
33516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33517#[inline]
33518#[target_feature(enable = "avx512f")]
33519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33520pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33521    unsafe {
33522        let a = _mm256_max_pd(
33523            _mm512_extractf64x4_pd::<0>(a),
33524            _mm512_extractf64x4_pd::<1>(a),
33525        );
33526        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33527        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
33528    }
33529}
33530
33531/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33532///
33533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33534#[inline]
33535#[target_feature(enable = "avx512f")]
33536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33537pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33538    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
33539}
33540
33541/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33542///
33543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33544#[inline]
33545#[target_feature(enable = "avx512f")]
33546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33547pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33548    unsafe { simd_reduce_min(a.as_i32x16()) }
33549}
33550
33551/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33552///
33553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33554#[inline]
33555#[target_feature(enable = "avx512f")]
33556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33557pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33558    unsafe {
33559        simd_reduce_min(simd_select_bitmask(
33560            k,
33561            a.as_i32x16(),
33562            i32x16::splat(i32::MAX),
33563        ))
33564    }
33565}
33566
33567/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33568///
33569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33570#[inline]
33571#[target_feature(enable = "avx512f")]
33572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33573pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33574    unsafe { simd_reduce_min(a.as_i64x8()) }
33575}
33576
33577/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578///
33579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33580#[inline]
33581#[target_feature(enable = "avx512f")]
33582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33583pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33584    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
33585}
33586
33587/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33588///
33589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33590#[inline]
33591#[target_feature(enable = "avx512f")]
33592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33593pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33594    unsafe { simd_reduce_min(a.as_u32x16()) }
33595}
33596
33597/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33598///
33599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33600#[inline]
33601#[target_feature(enable = "avx512f")]
33602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33603pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33604    unsafe {
33605        simd_reduce_min(simd_select_bitmask(
33606            k,
33607            a.as_u32x16(),
33608            u32x16::splat(u32::MAX),
33609        ))
33610    }
33611}
33612
33613/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33614///
33615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33616#[inline]
33617#[target_feature(enable = "avx512f")]
33618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33619pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33620    unsafe { simd_reduce_min(a.as_u64x8()) }
33621}
33622
33623/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33624///
33625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33626#[inline]
33627#[target_feature(enable = "avx512f")]
33628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33629pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33630    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
33631}
33632
33633/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33634///
33635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33636#[inline]
33637#[target_feature(enable = "avx512f")]
33638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33639pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33640    unsafe {
33641        let a = _mm256_min_ps(
33642            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33643            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33644        );
33645        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
33646        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
33647        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
33648    }
33649}
33650
33651/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33652///
33653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33654#[inline]
33655#[target_feature(enable = "avx512f")]
33656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33657pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33658    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
33659}
33660
33661/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33662///
33663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33664#[inline]
33665#[target_feature(enable = "avx512f")]
33666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33667pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33668    unsafe {
33669        let a = _mm256_min_pd(
33670            _mm512_extractf64x4_pd::<0>(a),
33671            _mm512_extractf64x4_pd::<1>(a),
33672        );
33673        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
33674        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
33675    }
33676}
33677
33678/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33684pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33685    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
33686}
33687
33688/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33694pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33695    unsafe { simd_reduce_and(a.as_i32x16()) }
33696}
33697
33698/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33704pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33705    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
33706}
33707
33708/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33709///
33710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33711#[inline]
33712#[target_feature(enable = "avx512f")]
33713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33714pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33715    unsafe { simd_reduce_and(a.as_i64x8()) }
33716}
33717
33718/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33719///
33720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33721#[inline]
33722#[target_feature(enable = "avx512f")]
33723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33724pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33725    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
33726}
33727
33728/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33735    unsafe { simd_reduce_or(a.as_i32x16()) }
33736}
33737
33738/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739///
33740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33741#[inline]
33742#[target_feature(enable = "avx512f")]
33743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33744pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33745    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33746}
33747
33748/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33749///
33750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33751#[inline]
33752#[target_feature(enable = "avx512f")]
33753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33754pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33755    unsafe { simd_reduce_or(a.as_i64x8()) }
33756}
33757
33758/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33761#[inline]
33762#[target_feature(enable = "avx512f")]
33763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33764pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33765    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33766}
33767
33768/// Returns vector of type `__m512d` with indeterminate elements.
33769/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33770/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33771/// In practice, this is typically equivalent to [`mem::zeroed`].
33772///
33773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33774#[inline]
33775#[target_feature(enable = "avx512f")]
33776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33777// This intrinsic has no corresponding instruction.
33778pub fn _mm512_undefined_pd() -> __m512d {
33779    unsafe { const { mem::zeroed() } }
33780}
33781
33782/// Returns vector of type `__m512` with indeterminate elements.
33783/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33784/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33785/// In practice, this is typically equivalent to [`mem::zeroed`].
33786///
33787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33788#[inline]
33789#[target_feature(enable = "avx512f")]
33790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33791// This intrinsic has no corresponding instruction.
33792pub fn _mm512_undefined_ps() -> __m512 {
33793    unsafe { const { mem::zeroed() } }
33794}
33795
33796/// Return vector of type __m512i with indeterminate elements.
33797/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33798/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33799/// In practice, this is typically equivalent to [`mem::zeroed`].
33800///
33801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33802#[inline]
33803#[target_feature(enable = "avx512f")]
33804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33805// This intrinsic has no corresponding instruction.
33806pub fn _mm512_undefined_epi32() -> __m512i {
33807    unsafe { const { mem::zeroed() } }
33808}
33809
33810/// Return vector of type __m512 with indeterminate elements.
33811/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33812/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33813/// In practice, this is typically equivalent to [`mem::zeroed`].
33814///
33815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33816#[inline]
33817#[target_feature(enable = "avx512f")]
33818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33819// This intrinsic has no corresponding instruction.
33820pub fn _mm512_undefined() -> __m512 {
33821    unsafe { const { mem::zeroed() } }
33822}
33823
33824/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33825///
33826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33827#[inline]
33828#[target_feature(enable = "avx512f")]
33829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33830#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33831pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33832    ptr::read_unaligned(mem_addr as *const __m512i)
33833}
33834
33835/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33836///
33837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33838#[inline]
33839#[target_feature(enable = "avx512f,avx512vl")]
33840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33841#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33842pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33843    ptr::read_unaligned(mem_addr as *const __m256i)
33844}
33845
33846/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33847///
33848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33849#[inline]
33850#[target_feature(enable = "avx512f,avx512vl")]
33851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33852#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33853pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33854    ptr::read_unaligned(mem_addr as *const __m128i)
33855}
33856
33857/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33858///
33859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33860#[inline]
33861#[target_feature(enable = "avx512f")]
33862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33863#[cfg_attr(test, assert_instr(vpmovdw))]
33864pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33865    vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k);
33866}
33867
33868/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33869///
33870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33871#[inline]
33872#[target_feature(enable = "avx512f,avx512vl")]
33873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33874#[cfg_attr(test, assert_instr(vpmovdw))]
33875pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33876    vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33877}
33878
33879/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33880///
33881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33882#[inline]
33883#[target_feature(enable = "avx512f,avx512vl")]
33884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33885#[cfg_attr(test, assert_instr(vpmovdw))]
33886pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33887    vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33888}
33889
33890/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33893#[inline]
33894#[target_feature(enable = "avx512f")]
33895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33896#[cfg_attr(test, assert_instr(vpmovsdw))]
33897pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33898    vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k);
33899}
33900
33901/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33902///
33903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33904#[inline]
33905#[target_feature(enable = "avx512f,avx512vl")]
33906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33907#[cfg_attr(test, assert_instr(vpmovsdw))]
33908pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33909    vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33910}
33911
33912/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33913///
33914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33915#[inline]
33916#[target_feature(enable = "avx512f,avx512vl")]
33917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33918#[cfg_attr(test, assert_instr(vpmovsdw))]
33919pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33920    vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33921}
33922
33923/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33924///
33925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33926#[inline]
33927#[target_feature(enable = "avx512f")]
33928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33929#[cfg_attr(test, assert_instr(vpmovusdw))]
33930pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33931    vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k);
33932}
33933
33934/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33935///
33936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33937#[inline]
33938#[target_feature(enable = "avx512f,avx512vl")]
33939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33940#[cfg_attr(test, assert_instr(vpmovusdw))]
33941pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33942    vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k);
33943}
33944
33945/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33946///
33947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33948#[inline]
33949#[target_feature(enable = "avx512f,avx512vl")]
33950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33951#[cfg_attr(test, assert_instr(vpmovusdw))]
33952pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33953    vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k);
33954}
33955
33956/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33957///
33958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33959#[inline]
33960#[target_feature(enable = "avx512f")]
33961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33962#[cfg_attr(test, assert_instr(vpmovdb))]
33963pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33964    vpmovdbmem(mem_addr, a.as_i32x16(), k);
33965}
33966
33967/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33968///
33969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33970#[inline]
33971#[target_feature(enable = "avx512f,avx512vl")]
33972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33973#[cfg_attr(test, assert_instr(vpmovdb))]
33974pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33975    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
33976}
33977
33978/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33979///
33980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33981#[inline]
33982#[target_feature(enable = "avx512f,avx512vl")]
33983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33984#[cfg_attr(test, assert_instr(vpmovdb))]
33985pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33986    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
33987}
33988
33989/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33990///
33991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33992#[inline]
33993#[target_feature(enable = "avx512f")]
33994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33995#[cfg_attr(test, assert_instr(vpmovsdb))]
33996pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33997    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
33998}
33999
34000/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34001///
34002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
34003#[inline]
34004#[target_feature(enable = "avx512f,avx512vl")]
34005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34006#[cfg_attr(test, assert_instr(vpmovsdb))]
34007pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34008    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
34009}
34010
34011/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34012///
34013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
34014#[inline]
34015#[target_feature(enable = "avx512f,avx512vl")]
34016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34017#[cfg_attr(test, assert_instr(vpmovsdb))]
34018pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34019    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
34020}
34021
34022/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34023///
34024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34025#[inline]
34026#[target_feature(enable = "avx512f")]
34027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34028#[cfg_attr(test, assert_instr(vpmovusdb))]
34029pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34030    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
34031}
34032
34033/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34039#[cfg_attr(test, assert_instr(vpmovusdb))]
34040pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34041    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
34042}
34043
34044/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34045///
34046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34047#[inline]
34048#[target_feature(enable = "avx512f,avx512vl")]
34049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34050#[cfg_attr(test, assert_instr(vpmovusdb))]
34051pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34052    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
34053}
34054
34055/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34056///
34057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34058#[inline]
34059#[target_feature(enable = "avx512f")]
34060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34061#[cfg_attr(test, assert_instr(vpmovqw))]
34062pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34063    vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k);
34064}
34065
34066/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34072#[cfg_attr(test, assert_instr(vpmovqw))]
34073pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34074    vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34075}
34076
34077/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34078///
34079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34080#[inline]
34081#[target_feature(enable = "avx512f,avx512vl")]
34082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34083#[cfg_attr(test, assert_instr(vpmovqw))]
34084pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34085    vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34086}
34087
34088/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34089///
34090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34091#[inline]
34092#[target_feature(enable = "avx512f")]
34093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34094#[cfg_attr(test, assert_instr(vpmovsqw))]
34095pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34096    vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k);
34097}
34098
34099/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34100///
34101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34102#[inline]
34103#[target_feature(enable = "avx512f,avx512vl")]
34104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34105#[cfg_attr(test, assert_instr(vpmovsqw))]
34106pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34107    vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34108}
34109
34110/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34111///
34112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34113#[inline]
34114#[target_feature(enable = "avx512f,avx512vl")]
34115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34116#[cfg_attr(test, assert_instr(vpmovsqw))]
34117pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34118    vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34119}
34120
34121/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34122///
34123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34124#[inline]
34125#[target_feature(enable = "avx512f")]
34126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34127#[cfg_attr(test, assert_instr(vpmovusqw))]
34128pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34129    vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k);
34130}
34131
34132/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34133///
34134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34135#[inline]
34136#[target_feature(enable = "avx512f,avx512vl")]
34137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34138#[cfg_attr(test, assert_instr(vpmovusqw))]
34139pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34140    vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k);
34141}
34142
34143/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34144///
34145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34146#[inline]
34147#[target_feature(enable = "avx512f,avx512vl")]
34148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34149#[cfg_attr(test, assert_instr(vpmovusqw))]
34150pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34151    vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k);
34152}
34153
34154/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34155///
34156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34157#[inline]
34158#[target_feature(enable = "avx512f")]
34159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34160#[cfg_attr(test, assert_instr(vpmovqb))]
34161pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34162    vpmovqbmem(mem_addr, a.as_i64x8(), k);
34163}
34164
34165/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34166///
34167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34168#[inline]
34169#[target_feature(enable = "avx512f,avx512vl")]
34170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34171#[cfg_attr(test, assert_instr(vpmovqb))]
34172pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34173    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
34174}
34175
34176/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34177///
34178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34179#[inline]
34180#[target_feature(enable = "avx512f,avx512vl")]
34181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34182#[cfg_attr(test, assert_instr(vpmovqb))]
34183pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34184    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
34185}
34186
34187/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34190#[inline]
34191#[target_feature(enable = "avx512f")]
34192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34193#[cfg_attr(test, assert_instr(vpmovsqb))]
34194pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34195    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
34196}
34197
34198/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34199///
34200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34201#[inline]
34202#[target_feature(enable = "avx512f,avx512vl")]
34203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34204#[cfg_attr(test, assert_instr(vpmovsqb))]
34205pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34206    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
34207}
34208
34209/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34210///
34211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34212#[inline]
34213#[target_feature(enable = "avx512f,avx512vl")]
34214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34215#[cfg_attr(test, assert_instr(vpmovsqb))]
34216pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34217    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
34218}
34219
34220/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34221///
34222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34223#[inline]
34224#[target_feature(enable = "avx512f")]
34225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34226#[cfg_attr(test, assert_instr(vpmovusqb))]
34227pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34228    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
34229}
34230
34231/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34232///
34233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34234#[inline]
34235#[target_feature(enable = "avx512f,avx512vl")]
34236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34237#[cfg_attr(test, assert_instr(vpmovusqb))]
34238pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34239    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
34240}
34241
34242/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34243///
34244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34245#[inline]
34246#[target_feature(enable = "avx512f,avx512vl")]
34247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34248#[cfg_attr(test, assert_instr(vpmovusqb))]
34249pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34250    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
34251}
34252
34253///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34254///
34255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34256#[inline]
34257#[target_feature(enable = "avx512f")]
34258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34259#[cfg_attr(test, assert_instr(vpmovqd))]
34260pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34261    vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k);
34262}
34263
34264///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34265///
34266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34267#[inline]
34268#[target_feature(enable = "avx512f,avx512vl")]
34269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34270#[cfg_attr(test, assert_instr(vpmovqd))]
34271pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34272    vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34273}
34274
34275///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34276///
34277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34278#[inline]
34279#[target_feature(enable = "avx512f,avx512vl")]
34280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34281#[cfg_attr(test, assert_instr(vpmovqd))]
34282pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34283    vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34284}
34285
34286/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34287///
34288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34289#[inline]
34290#[target_feature(enable = "avx512f")]
34291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34292#[cfg_attr(test, assert_instr(vpmovsqd))]
34293pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34294    vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k);
34295}
34296
34297/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34298///
34299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34300#[inline]
34301#[target_feature(enable = "avx512f,avx512vl")]
34302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34303#[cfg_attr(test, assert_instr(vpmovsqd))]
34304pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34305    vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34306}
34307
34308/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34309///
34310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34311#[inline]
34312#[target_feature(enable = "avx512f,avx512vl")]
34313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34314#[cfg_attr(test, assert_instr(vpmovsqd))]
34315pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34316    vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34317}
34318
34319/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34322#[inline]
34323#[target_feature(enable = "avx512f")]
34324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34325#[cfg_attr(test, assert_instr(vpmovusqd))]
34326pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34327    vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k);
34328}
34329
34330/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34331///
34332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34333#[inline]
34334#[target_feature(enable = "avx512f,avx512vl")]
34335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34336#[cfg_attr(test, assert_instr(vpmovusqd))]
34337pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34338    vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k);
34339}
34340
34341/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34342///
34343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34344#[inline]
34345#[target_feature(enable = "avx512f,avx512vl")]
34346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34347#[cfg_attr(test, assert_instr(vpmovusqd))]
34348pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34349    vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k);
34350}
34351
34352/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34353///
34354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34355#[inline]
34356#[target_feature(enable = "avx512f")]
34357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34358#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34359pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34360    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34361}
34362
34363/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34364///
34365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34366#[inline]
34367#[target_feature(enable = "avx512f,avx512vl")]
34368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34369#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34370pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34371    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34372}
34373
34374/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34375///
34376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34377#[inline]
34378#[target_feature(enable = "avx512f,avx512vl")]
34379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34380#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34381pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34382    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34383}
34384
34385/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34386///
34387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34388#[inline]
34389#[target_feature(enable = "avx512f")]
34390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34391#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34392pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34393    ptr::read_unaligned(mem_addr as *const __m512i)
34394}
34395
34396/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34397///
34398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34399#[inline]
34400#[target_feature(enable = "avx512f,avx512vl")]
34401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34402#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34403pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34404    ptr::read_unaligned(mem_addr as *const __m256i)
34405}
34406
34407/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34408///
34409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34410#[inline]
34411#[target_feature(enable = "avx512f,avx512vl")]
34412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34413#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34414pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34415    ptr::read_unaligned(mem_addr as *const __m128i)
34416}
34417
34418/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34419///
34420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34421#[inline]
34422#[target_feature(enable = "avx512f")]
34423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34425pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34426    ptr::write_unaligned(mem_addr as *mut __m512i, a);
34427}
34428
34429/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34430///
34431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34432#[inline]
34433#[target_feature(enable = "avx512f,avx512vl")]
34434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34435#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34436pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34437    ptr::write_unaligned(mem_addr as *mut __m256i, a);
34438}
34439
34440/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34441///
34442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34443#[inline]
34444#[target_feature(enable = "avx512f,avx512vl")]
34445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34446#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34447pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34448    ptr::write_unaligned(mem_addr as *mut __m128i, a);
34449}
34450
34451/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34454#[inline]
34455#[target_feature(enable = "avx512f")]
34456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34457#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34458pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
34459    ptr::read_unaligned(mem_addr)
34460}
34461
34462/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34463///
34464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34465#[inline]
34466#[target_feature(enable = "avx512f")]
34467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34468#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34469pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34470    ptr::write_unaligned(mem_addr, a);
34471}
34472
34473/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34474/// floating-point elements) from memory into result.
34475/// `mem_addr` does not need to be aligned on any particular boundary.
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34478#[inline]
34479#[target_feature(enable = "avx512f")]
34480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34481#[cfg_attr(test, assert_instr(vmovups))]
34482pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34483    ptr::read_unaligned(mem_addr as *const __m512d)
34484}
34485
34486/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34487/// floating-point elements) from `a` into memory.
34488/// `mem_addr` does not need to be aligned on any particular boundary.
34489///
34490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34491#[inline]
34492#[target_feature(enable = "avx512f")]
34493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34494#[cfg_attr(test, assert_instr(vmovups))]
34495pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34496    ptr::write_unaligned(mem_addr as *mut __m512d, a);
34497}
34498
34499/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34500/// floating-point elements) from memory into result.
34501/// `mem_addr` does not need to be aligned on any particular boundary.
34502///
34503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34504#[inline]
34505#[target_feature(enable = "avx512f")]
34506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34507#[cfg_attr(test, assert_instr(vmovups))]
34508pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34509    ptr::read_unaligned(mem_addr as *const __m512)
34510}
34511
34512/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34513/// floating-point elements) from `a` into memory.
34514/// `mem_addr` does not need to be aligned on any particular boundary.
34515///
34516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34517#[inline]
34518#[target_feature(enable = "avx512f")]
34519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34520#[cfg_attr(test, assert_instr(vmovups))]
34521pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34522    ptr::write_unaligned(mem_addr as *mut __m512, a);
34523}
34524
34525/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34526///
34527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34528#[inline]
34529#[target_feature(enable = "avx512f")]
34530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34531#[cfg_attr(
34532    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34533    assert_instr(vmovaps)
34534)] //should be vmovdqa32
34535pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
34536    ptr::read(mem_addr)
34537}
34538
34539/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34540///
34541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34542#[inline]
34543#[target_feature(enable = "avx512f")]
34544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34545#[cfg_attr(
34546    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34547    assert_instr(vmovaps)
34548)] //should be vmovdqa32
34549pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34550    ptr::write(mem_addr, a);
34551}
34552
34553/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34554///
34555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34556#[inline]
34557#[target_feature(enable = "avx512f")]
34558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34559#[cfg_attr(
34560    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34561    assert_instr(vmovaps)
34562)] //should be vmovdqa32
34563pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34564    ptr::read(mem_addr as *const __m512i)
34565}
34566
34567/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34568///
34569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34570#[inline]
34571#[target_feature(enable = "avx512f,avx512vl")]
34572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34573#[cfg_attr(
34574    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34575    assert_instr(vmovaps)
34576)] //should be vmovdqa32
34577pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34578    ptr::read(mem_addr as *const __m256i)
34579}
34580
34581/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34582///
34583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34584#[inline]
34585#[target_feature(enable = "avx512f,avx512vl")]
34586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34587#[cfg_attr(
34588    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34589    assert_instr(vmovaps)
34590)] //should be vmovdqa32
34591pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34592    ptr::read(mem_addr as *const __m128i)
34593}
34594
34595/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34596///
34597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34598#[inline]
34599#[target_feature(enable = "avx512f")]
34600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34601#[cfg_attr(
34602    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34603    assert_instr(vmovaps)
34604)] //should be vmovdqa32
34605pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34606    ptr::write(mem_addr as *mut __m512i, a);
34607}
34608
34609/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34610///
34611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34612#[inline]
34613#[target_feature(enable = "avx512f,avx512vl")]
34614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34615#[cfg_attr(
34616    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34617    assert_instr(vmovaps)
34618)] //should be vmovdqa32
34619pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34620    ptr::write(mem_addr as *mut __m256i, a);
34621}
34622
34623/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34629#[cfg_attr(
34630    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34631    assert_instr(vmovaps)
34632)] //should be vmovdqa32
34633pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34634    ptr::write(mem_addr as *mut __m128i, a);
34635}
34636
34637/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34638///
34639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34640#[inline]
34641#[target_feature(enable = "avx512f")]
34642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34643#[cfg_attr(
34644    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34645    assert_instr(vmovaps)
34646)] //should be vmovdqa64
34647pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34648    ptr::read(mem_addr as *const __m512i)
34649}
34650
34651/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34652///
34653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34654#[inline]
34655#[target_feature(enable = "avx512f,avx512vl")]
34656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34657#[cfg_attr(
34658    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34659    assert_instr(vmovaps)
34660)] //should be vmovdqa64
34661pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34662    ptr::read(mem_addr as *const __m256i)
34663}
34664
34665/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34666///
34667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34668#[inline]
34669#[target_feature(enable = "avx512f,avx512vl")]
34670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34671#[cfg_attr(
34672    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34673    assert_instr(vmovaps)
34674)] //should be vmovdqa64
34675pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34676    ptr::read(mem_addr as *const __m128i)
34677}
34678
34679/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34680///
34681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34682#[inline]
34683#[target_feature(enable = "avx512f")]
34684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34685#[cfg_attr(
34686    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34687    assert_instr(vmovaps)
34688)] //should be vmovdqa64
34689pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34690    ptr::write(mem_addr as *mut __m512i, a);
34691}
34692
34693/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34694///
34695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34696#[inline]
34697#[target_feature(enable = "avx512f,avx512vl")]
34698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34699#[cfg_attr(
34700    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34701    assert_instr(vmovaps)
34702)] //should be vmovdqa64
34703pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34704    ptr::write(mem_addr as *mut __m256i, a);
34705}
34706
34707/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34708///
34709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34710#[inline]
34711#[target_feature(enable = "avx512f,avx512vl")]
34712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34713#[cfg_attr(
34714    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34715    assert_instr(vmovaps)
34716)] //should be vmovdqa64
34717pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34718    ptr::write(mem_addr as *mut __m128i, a);
34719}
34720
34721/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34722///
34723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34724#[inline]
34725#[target_feature(enable = "avx512f")]
34726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34727#[cfg_attr(
34728    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34729    assert_instr(vmovaps)
34730)]
34731pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34732    ptr::read(mem_addr as *const __m512)
34733}
34734
34735/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34736///
34737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34738#[inline]
34739#[target_feature(enable = "avx512f")]
34740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34741#[cfg_attr(
34742    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34743    assert_instr(vmovaps)
34744)]
34745pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34746    ptr::write(mem_addr as *mut __m512, a);
34747}
34748
34749/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34750///
34751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34752#[inline]
34753#[target_feature(enable = "avx512f")]
34754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34755#[cfg_attr(
34756    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34757    assert_instr(vmovaps)
34758)] //should be vmovapd
34759pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34760    ptr::read(mem_addr as *const __m512d)
34761}
34762
34763/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34764///
34765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34766#[inline]
34767#[target_feature(enable = "avx512f")]
34768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34769#[cfg_attr(
34770    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34771    assert_instr(vmovaps)
34772)] //should be vmovapd
34773pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34774    ptr::write(mem_addr as *mut __m512d, a);
34775}
34776
34777/// Load packed 32-bit integers from memory into dst using writemask k
34778/// (elements are copied from src when the corresponding mask bit is not set).
34779/// mem_addr does not need to be aligned on any particular boundary.
34780///
34781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34782#[inline]
34783#[target_feature(enable = "avx512f")]
34784#[cfg_attr(test, assert_instr(vmovdqu32))]
34785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34786pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34787    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
34788}
34789
34790/// Load packed 32-bit integers from memory into dst using zeromask k
34791/// (elements are zeroed out when the corresponding mask bit is not set).
34792/// mem_addr does not need to be aligned on any particular boundary.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[cfg_attr(test, assert_instr(vmovdqu32))]
34798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34799pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34800    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34801}
34802
34803/// Load packed 64-bit integers from memory into dst using writemask k
34804/// (elements are copied from src when the corresponding mask bit is not set).
34805/// mem_addr does not need to be aligned on any particular boundary.
34806///
34807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34808#[inline]
34809#[target_feature(enable = "avx512f")]
34810#[cfg_attr(test, assert_instr(vmovdqu64))]
34811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34812pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34813    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
34814}
34815
34816/// Load packed 64-bit integers from memory into dst using zeromask k
34817/// (elements are zeroed out when the corresponding mask bit is not set).
34818/// mem_addr does not need to be aligned on any particular boundary.
34819///
34820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34821#[inline]
34822#[target_feature(enable = "avx512f")]
34823#[cfg_attr(test, assert_instr(vmovdqu64))]
34824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34825pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34826    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34827}
34828
34829/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34830/// (elements are copied from src when the corresponding mask bit is not set).
34831/// mem_addr does not need to be aligned on any particular boundary.
34832///
34833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34834#[inline]
34835#[target_feature(enable = "avx512f")]
34836#[cfg_attr(test, assert_instr(vmovups))]
34837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34838pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34839    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
34840}
34841
34842/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34843/// (elements are zeroed out when the corresponding mask bit is not set).
34844/// mem_addr does not need to be aligned on any particular boundary.
34845///
34846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34847#[inline]
34848#[target_feature(enable = "avx512f")]
34849#[cfg_attr(test, assert_instr(vmovups))]
34850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34851pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34852    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
34853}
34854
34855/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34856/// (elements are copied from src when the corresponding mask bit is not set).
34857/// mem_addr does not need to be aligned on any particular boundary.
34858///
34859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34860#[inline]
34861#[target_feature(enable = "avx512f")]
34862#[cfg_attr(test, assert_instr(vmovupd))]
34863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34864pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34865    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
34866}
34867
34868/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34869/// (elements are zeroed out when the corresponding mask bit is not set).
34870/// mem_addr does not need to be aligned on any particular boundary.
34871///
34872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34873#[inline]
34874#[target_feature(enable = "avx512f")]
34875#[cfg_attr(test, assert_instr(vmovupd))]
34876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34877pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34878    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
34879}
34880
34881/// Load packed 32-bit integers from memory into dst using writemask k
34882/// (elements are copied from src when the corresponding mask bit is not set).
34883/// mem_addr does not need to be aligned on any particular boundary.
34884///
34885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34886#[inline]
34887#[target_feature(enable = "avx512f,avx512vl")]
34888#[cfg_attr(test, assert_instr(vmovdqu32))]
34889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34890pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34891    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
34892}
34893
34894/// Load packed 32-bit integers from memory into dst using zeromask k
34895/// (elements are zeroed out when the corresponding mask bit is not set).
34896/// mem_addr does not need to be aligned on any particular boundary.
34897///
34898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34899#[inline]
34900#[target_feature(enable = "avx512f,avx512vl")]
34901#[cfg_attr(test, assert_instr(vmovdqu32))]
34902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34903pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34904    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34905}
34906
34907/// Load packed 64-bit integers from memory into dst using writemask k
34908/// (elements are copied from src when the corresponding mask bit is not set).
34909/// mem_addr does not need to be aligned on any particular boundary.
34910///
34911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34912#[inline]
34913#[target_feature(enable = "avx512f,avx512vl")]
34914#[cfg_attr(test, assert_instr(vmovdqu64))]
34915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34916pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34917    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
34918}
34919
34920/// Load packed 64-bit integers from memory into dst using zeromask k
34921/// (elements are zeroed out when the corresponding mask bit is not set).
34922/// mem_addr does not need to be aligned on any particular boundary.
34923///
34924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34925#[inline]
34926#[target_feature(enable = "avx512f,avx512vl")]
34927#[cfg_attr(test, assert_instr(vmovdqu64))]
34928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34929pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34930    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34931}
34932
34933/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34934/// (elements are copied from src when the corresponding mask bit is not set).
34935/// mem_addr does not need to be aligned on any particular boundary.
34936///
34937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34938#[inline]
34939#[target_feature(enable = "avx512f,avx512vl")]
34940#[cfg_attr(test, assert_instr(vmovups))]
34941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34942pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34943    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
34944}
34945
34946/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34947/// (elements are zeroed out when the corresponding mask bit is not set).
34948/// mem_addr does not need to be aligned on any particular boundary.
34949///
34950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34951#[inline]
34952#[target_feature(enable = "avx512f,avx512vl")]
34953#[cfg_attr(test, assert_instr(vmovups))]
34954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34956    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
34957}
34958
34959/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34960/// (elements are copied from src when the corresponding mask bit is not set).
34961/// mem_addr does not need to be aligned on any particular boundary.
34962///
34963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34964#[inline]
34965#[target_feature(enable = "avx512f,avx512vl")]
34966#[cfg_attr(test, assert_instr(vmovupd))]
34967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34968pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34969    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
34970}
34971
34972/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34973/// (elements are zeroed out when the corresponding mask bit is not set).
34974/// mem_addr does not need to be aligned on any particular boundary.
34975///
34976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34977#[inline]
34978#[target_feature(enable = "avx512f,avx512vl")]
34979#[cfg_attr(test, assert_instr(vmovupd))]
34980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34981pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34982    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
34983}
34984
34985/// Load packed 32-bit integers from memory into dst using writemask k
34986/// (elements are copied from src when the corresponding mask bit is not set).
34987/// mem_addr does not need to be aligned on any particular boundary.
34988///
34989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34990#[inline]
34991#[target_feature(enable = "avx512f,avx512vl")]
34992#[cfg_attr(test, assert_instr(vmovdqu32))]
34993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34994pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34995    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
34996}
34997
34998/// Load packed 32-bit integers from memory into dst using zeromask k
34999/// (elements are zeroed out when the corresponding mask bit is not set).
35000/// mem_addr does not need to be aligned on any particular boundary.
35001///
35002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
35003#[inline]
35004#[target_feature(enable = "avx512f,avx512vl")]
35005#[cfg_attr(test, assert_instr(vmovdqu32))]
35006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35007pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35008    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
35009}
35010
35011/// Load packed 64-bit integers from memory into dst using writemask k
35012/// (elements are copied from src when the corresponding mask bit is not set).
35013/// mem_addr does not need to be aligned on any particular boundary.
35014///
35015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
35016#[inline]
35017#[target_feature(enable = "avx512f,avx512vl")]
35018#[cfg_attr(test, assert_instr(vmovdqu64))]
35019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35020pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35021    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
35022}
35023
35024/// Load packed 64-bit integers from memory into dst using zeromask k
35025/// (elements are zeroed out when the corresponding mask bit is not set).
35026/// mem_addr does not need to be aligned on any particular boundary.
35027///
35028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
35029#[inline]
35030#[target_feature(enable = "avx512f,avx512vl")]
35031#[cfg_attr(test, assert_instr(vmovdqu64))]
35032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35033pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35034    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
35035}
35036
35037/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35038/// (elements are copied from src when the corresponding mask bit is not set).
35039/// mem_addr does not need to be aligned on any particular boundary.
35040///
35041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
35042#[inline]
35043#[target_feature(enable = "avx512f,avx512vl")]
35044#[cfg_attr(test, assert_instr(vmovups))]
35045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35046pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35047    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
35048}
35049
35050/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35051/// (elements are zeroed out when the corresponding mask bit is not set).
35052/// mem_addr does not need to be aligned on any particular boundary.
35053///
35054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
35055#[inline]
35056#[target_feature(enable = "avx512f,avx512vl")]
35057#[cfg_attr(test, assert_instr(vmovups))]
35058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35059pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35060    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
35061}
35062
35063/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35064/// (elements are copied from src when the corresponding mask bit is not set).
35065/// mem_addr does not need to be aligned on any particular boundary.
35066///
35067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
35068#[inline]
35069#[target_feature(enable = "avx512f,avx512vl")]
35070#[cfg_attr(test, assert_instr(vmovupd))]
35071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35072pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35073    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
35074}
35075
35076/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35077/// (elements are zeroed out when the corresponding mask bit is not set).
35078/// mem_addr does not need to be aligned on any particular boundary.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35081#[inline]
35082#[target_feature(enable = "avx512f,avx512vl")]
35083#[cfg_attr(test, assert_instr(vmovupd))]
35084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35085pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35086    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
35087}
35088
35089/// Load packed 32-bit integers from memory into dst using writemask k
35090/// (elements are copied from src when the corresponding mask bit is not set).
35091/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35092///
35093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35094#[inline]
35095#[target_feature(enable = "avx512f")]
35096#[cfg_attr(test, assert_instr(vmovdqa32))]
35097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35098pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35099    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
35100}
35101
35102/// Load packed 32-bit integers from memory into dst using zeromask k
35103/// (elements are zeroed out when the corresponding mask bit is not set).
35104/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35105///
35106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35107#[inline]
35108#[target_feature(enable = "avx512f")]
35109#[cfg_attr(test, assert_instr(vmovdqa32))]
35110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35111pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35112    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
35113}
35114
35115/// Load packed 64-bit integers from memory into dst using writemask k
35116/// (elements are copied from src when the corresponding mask bit is not set).
35117/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35118///
35119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35120#[inline]
35121#[target_feature(enable = "avx512f")]
35122#[cfg_attr(test, assert_instr(vmovdqa64))]
35123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35124pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35125    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
35126}
35127
35128/// Load packed 64-bit integers from memory into dst using zeromask k
35129/// (elements are zeroed out when the corresponding mask bit is not set).
35130/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35131///
35132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35133#[inline]
35134#[target_feature(enable = "avx512f")]
35135#[cfg_attr(test, assert_instr(vmovdqa64))]
35136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35137pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35138    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
35139}
35140
35141/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35142/// (elements are copied from src when the corresponding mask bit is not set).
35143/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35146#[inline]
35147#[target_feature(enable = "avx512f")]
35148#[cfg_attr(test, assert_instr(vmovaps))]
35149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35150pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35151    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
35152}
35153
35154/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35155/// (elements are zeroed out when the corresponding mask bit is not set).
35156/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35157///
35158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35159#[inline]
35160#[target_feature(enable = "avx512f")]
35161#[cfg_attr(test, assert_instr(vmovaps))]
35162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35163pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35164    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
35165}
35166
35167/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35168/// (elements are copied from src when the corresponding mask bit is not set).
35169/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35170///
35171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35172#[inline]
35173#[target_feature(enable = "avx512f")]
35174#[cfg_attr(test, assert_instr(vmovapd))]
35175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35176pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35177    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
35178}
35179
35180/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35181/// (elements are zeroed out when the corresponding mask bit is not set).
35182/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35185#[inline]
35186#[target_feature(enable = "avx512f")]
35187#[cfg_attr(test, assert_instr(vmovapd))]
35188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35189pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35190    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
35191}
35192
35193/// Load packed 32-bit integers from memory into dst using writemask k
35194/// (elements are copied from src when the corresponding mask bit is not set).
35195/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35196///
35197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35198#[inline]
35199#[target_feature(enable = "avx512f,avx512vl")]
35200#[cfg_attr(test, assert_instr(vmovdqa32))]
35201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35202pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35203    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
35204}
35205
35206/// Load packed 32-bit integers from memory into dst using zeromask k
35207/// (elements are zeroed out when the corresponding mask bit is not set).
35208/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35209///
35210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35211#[inline]
35212#[target_feature(enable = "avx512f,avx512vl")]
35213#[cfg_attr(test, assert_instr(vmovdqa32))]
35214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35215pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35216    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
35217}
35218
35219/// Load packed 64-bit integers from memory into dst using writemask k
35220/// (elements are copied from src when the corresponding mask bit is not set).
35221/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35222///
35223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35224#[inline]
35225#[target_feature(enable = "avx512f,avx512vl")]
35226#[cfg_attr(test, assert_instr(vmovdqa64))]
35227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35228pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35229    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
35230}
35231
35232/// Load packed 64-bit integers from memory into dst using zeromask k
35233/// (elements are zeroed out when the corresponding mask bit is not set).
35234/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35235///
35236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35237#[inline]
35238#[target_feature(enable = "avx512f,avx512vl")]
35239#[cfg_attr(test, assert_instr(vmovdqa64))]
35240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35241pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35242    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
35243}
35244
35245/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35246/// (elements are copied from src when the corresponding mask bit is not set).
35247/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35248///
35249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35250#[inline]
35251#[target_feature(enable = "avx512f,avx512vl")]
35252#[cfg_attr(test, assert_instr(vmovaps))]
35253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35254pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35255    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
35256}
35257
35258/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35259/// (elements are zeroed out when the corresponding mask bit is not set).
35260/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35263#[inline]
35264#[target_feature(enable = "avx512f,avx512vl")]
35265#[cfg_attr(test, assert_instr(vmovaps))]
35266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35267pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35268    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
35269}
35270
35271/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35272/// (elements are copied from src when the corresponding mask bit is not set).
35273/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35274///
35275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35276#[inline]
35277#[target_feature(enable = "avx512f,avx512vl")]
35278#[cfg_attr(test, assert_instr(vmovapd))]
35279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35280pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35281    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
35282}
35283
35284/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35285/// (elements are zeroed out when the corresponding mask bit is not set).
35286/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35287///
35288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35289#[inline]
35290#[target_feature(enable = "avx512f,avx512vl")]
35291#[cfg_attr(test, assert_instr(vmovapd))]
35292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35293pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35294    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
35295}
35296
35297/// Load packed 32-bit integers from memory into dst using writemask k
35298/// (elements are copied from src when the corresponding mask bit is not set).
35299/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35300///
35301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35302#[inline]
35303#[target_feature(enable = "avx512f,avx512vl")]
35304#[cfg_attr(test, assert_instr(vmovdqa32))]
35305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35306pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35307    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
35308}
35309
35310/// Load packed 32-bit integers from memory into dst using zeromask k
35311/// (elements are zeroed out when the corresponding mask bit is not set).
35312/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35313///
35314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35315#[inline]
35316#[target_feature(enable = "avx512f,avx512vl")]
35317#[cfg_attr(test, assert_instr(vmovdqa32))]
35318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35319pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35320    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
35321}
35322
35323/// Load packed 64-bit integers from memory into dst using writemask k
35324/// (elements are copied from src when the corresponding mask bit is not set).
35325/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35326///
35327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35328#[inline]
35329#[target_feature(enable = "avx512f,avx512vl")]
35330#[cfg_attr(test, assert_instr(vmovdqa64))]
35331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35332pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35333    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
35334}
35335
35336/// Load packed 64-bit integers from memory into dst using zeromask k
35337/// (elements are zeroed out when the corresponding mask bit is not set).
35338/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35339///
35340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35341#[inline]
35342#[target_feature(enable = "avx512f,avx512vl")]
35343#[cfg_attr(test, assert_instr(vmovdqa64))]
35344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35345pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35346    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
35347}
35348
35349/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35350/// (elements are copied from src when the corresponding mask bit is not set).
35351/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35352///
35353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35354#[inline]
35355#[target_feature(enable = "avx512f,avx512vl")]
35356#[cfg_attr(test, assert_instr(vmovaps))]
35357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35358pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35359    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
35360}
35361
35362/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35363/// (elements are zeroed out when the corresponding mask bit is not set).
35364/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35365///
35366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35367#[inline]
35368#[target_feature(enable = "avx512f,avx512vl")]
35369#[cfg_attr(test, assert_instr(vmovaps))]
35370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35371pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35372    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
35373}
35374
35375/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35376/// (elements are copied from src when the corresponding mask bit is not set).
35377/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35378///
35379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35380#[inline]
35381#[target_feature(enable = "avx512f,avx512vl")]
35382#[cfg_attr(test, assert_instr(vmovapd))]
35383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35384pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35385    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
35386}
35387
35388/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35389/// (elements are zeroed out when the corresponding mask bit is not set).
35390/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35391///
35392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35393#[inline]
35394#[target_feature(enable = "avx512f,avx512vl")]
35395#[cfg_attr(test, assert_instr(vmovapd))]
35396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35397pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35398    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
35399}
35400
35401/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35402/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35403/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35404/// exception may be generated.
35405///
35406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35407#[inline]
35408#[cfg_attr(test, assert_instr(vmovss))]
35409#[target_feature(enable = "avx512f")]
35410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35411pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35412    let mut dst: __m128 = src;
35413    asm!(
35414        vpl!("vmovss {dst}{{{k}}}"),
35415        p = in(reg) mem_addr,
35416        k = in(kreg) k,
35417        dst = inout(xmm_reg) dst,
35418        options(pure, readonly, nostack, preserves_flags),
35419    );
35420    dst
35421}
35422
35423/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35424/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35425/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35426/// exception may be generated.
35427///
35428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35429#[inline]
35430#[cfg_attr(test, assert_instr(vmovss))]
35431#[target_feature(enable = "avx512f")]
35432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35433pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35434    let mut dst: __m128;
35435    asm!(
35436        vpl!("vmovss {dst}{{{k}}} {{z}}"),
35437        p = in(reg) mem_addr,
35438        k = in(kreg) k,
35439        dst = out(xmm_reg) dst,
35440        options(pure, readonly, nostack, preserves_flags),
35441    );
35442    dst
35443}
35444
35445/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35446/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35447/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35448/// exception may be generated.
35449///
35450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35451#[inline]
35452#[cfg_attr(test, assert_instr(vmovsd))]
35453#[target_feature(enable = "avx512f")]
35454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35455pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35456    let mut dst: __m128d = src;
35457    asm!(
35458        vpl!("vmovsd {dst}{{{k}}}"),
35459        p = in(reg) mem_addr,
35460        k = in(kreg) k,
35461        dst = inout(xmm_reg) dst,
35462        options(pure, readonly, nostack, preserves_flags),
35463    );
35464    dst
35465}
35466
35467/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35468/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35469/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35470/// may be generated.
35471///
35472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35473#[inline]
35474#[cfg_attr(test, assert_instr(vmovsd))]
35475#[target_feature(enable = "avx512f")]
35476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35477pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35478    let mut dst: __m128d;
35479    asm!(
35480        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35481        p = in(reg) mem_addr,
35482        k = in(kreg) k,
35483        dst = out(xmm_reg) dst,
35484        options(pure, readonly, nostack, preserves_flags),
35485    );
35486    dst
35487}
35488
35489/// Store packed 32-bit integers from a into memory using writemask k.
35490/// mem_addr does not need to be aligned on any particular boundary.
35491///
35492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35493#[inline]
35494#[target_feature(enable = "avx512f")]
35495#[cfg_attr(test, assert_instr(vmovdqu32))]
35496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35497pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35498    storedqu32_512(mem_addr, a.as_i32x16(), mask)
35499}
35500
35501/// Store packed 64-bit integers from a into memory using writemask k.
35502/// mem_addr does not need to be aligned on any particular boundary.
35503///
35504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35505#[inline]
35506#[target_feature(enable = "avx512f")]
35507#[cfg_attr(test, assert_instr(vmovdqu64))]
35508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35509pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35510    storedqu64_512(mem_addr, a.as_i64x8(), mask)
35511}
35512
35513/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35514/// mem_addr does not need to be aligned on any particular boundary.
35515///
35516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35517#[inline]
35518#[target_feature(enable = "avx512f")]
35519#[cfg_attr(test, assert_instr(vmovups))]
35520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35521pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35522    storeups_512(mem_addr, a.as_f32x16(), mask)
35523}
35524
35525/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35526/// mem_addr does not need to be aligned on any particular boundary.
35527///
35528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35529#[inline]
35530#[target_feature(enable = "avx512f")]
35531#[cfg_attr(test, assert_instr(vmovupd))]
35532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35533pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35534    storeupd_512(mem_addr, a.as_f64x8(), mask)
35535}
35536
35537/// Store packed 32-bit integers from a into memory using writemask k.
35538/// mem_addr does not need to be aligned on any particular boundary.
35539///
35540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35541#[inline]
35542#[target_feature(enable = "avx512f,avx512vl")]
35543#[cfg_attr(test, assert_instr(vmovdqu32))]
35544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35545pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35546    storedqu32_256(mem_addr, a.as_i32x8(), mask)
35547}
35548
35549/// Store packed 64-bit integers from a into memory using writemask k.
35550/// mem_addr does not need to be aligned on any particular boundary.
35551///
35552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35553#[inline]
35554#[target_feature(enable = "avx512f,avx512vl")]
35555#[cfg_attr(test, assert_instr(vmovdqu64))]
35556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35557pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35558    storedqu64_256(mem_addr, a.as_i64x4(), mask)
35559}
35560
35561/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35562/// mem_addr does not need to be aligned on any particular boundary.
35563///
35564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35565#[inline]
35566#[target_feature(enable = "avx512f,avx512vl")]
35567#[cfg_attr(test, assert_instr(vmovups))]
35568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35569pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35570    storeups_256(mem_addr, a.as_f32x8(), mask)
35571}
35572
35573/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35574/// mem_addr does not need to be aligned on any particular boundary.
35575///
35576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35577#[inline]
35578#[target_feature(enable = "avx512f,avx512vl")]
35579#[cfg_attr(test, assert_instr(vmovupd))]
35580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35581pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35582    storeupd_256(mem_addr, a.as_f64x4(), mask)
35583}
35584
35585/// Store packed 32-bit integers from a into memory using writemask k.
35586/// mem_addr does not need to be aligned on any particular boundary.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35589#[inline]
35590#[target_feature(enable = "avx512f,avx512vl")]
35591#[cfg_attr(test, assert_instr(vmovdqu32))]
35592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35593pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35594    storedqu32_128(mem_addr, a.as_i32x4(), mask)
35595}
35596
35597/// Store packed 64-bit integers from a into memory using writemask k.
35598/// mem_addr does not need to be aligned on any particular boundary.
35599///
35600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35601#[inline]
35602#[target_feature(enable = "avx512f,avx512vl")]
35603#[cfg_attr(test, assert_instr(vmovdqu64))]
35604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35605pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35606    storedqu64_128(mem_addr, a.as_i64x2(), mask)
35607}
35608
35609/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35610/// mem_addr does not need to be aligned on any particular boundary.
35611///
35612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35613#[inline]
35614#[target_feature(enable = "avx512f,avx512vl")]
35615#[cfg_attr(test, assert_instr(vmovups))]
35616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35617pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35618    storeups_128(mem_addr, a.as_f32x4(), mask)
35619}
35620
35621/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35622/// mem_addr does not need to be aligned on any particular boundary.
35623///
35624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35625#[inline]
35626#[target_feature(enable = "avx512f,avx512vl")]
35627#[cfg_attr(test, assert_instr(vmovupd))]
35628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35629pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35630    storeupd_128(mem_addr, a.as_f64x2(), mask)
35631}
35632
35633/// Store packed 32-bit integers from a into memory using writemask k.
35634/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35635///
35636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35637#[inline]
35638#[target_feature(enable = "avx512f")]
35639#[cfg_attr(test, assert_instr(vmovdqa32))]
35640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35641pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35642    storedqa32_512(mem_addr, a.as_i32x16(), mask)
35643}
35644
35645/// Store packed 64-bit integers from a into memory using writemask k.
35646/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35647///
35648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35649#[inline]
35650#[target_feature(enable = "avx512f")]
35651#[cfg_attr(test, assert_instr(vmovdqa64))]
35652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35653pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35654    storedqa64_512(mem_addr, a.as_i64x8(), mask)
35655}
35656
35657/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35658/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35659///
35660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35661#[inline]
35662#[target_feature(enable = "avx512f")]
35663#[cfg_attr(test, assert_instr(vmovaps))]
35664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35665pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35666    storeaps_512(mem_addr, a.as_f32x16(), mask)
35667}
35668
35669/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35670/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35671///
35672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35673#[inline]
35674#[target_feature(enable = "avx512f")]
35675#[cfg_attr(test, assert_instr(vmovapd))]
35676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35677pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35678    storeapd_512(mem_addr, a.as_f64x8(), mask)
35679}
35680
35681/// Store packed 32-bit integers from a into memory using writemask k.
35682/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35683///
35684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35685#[inline]
35686#[target_feature(enable = "avx512f,avx512vl")]
35687#[cfg_attr(test, assert_instr(vmovdqa32))]
35688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35689pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35690    storedqa32_256(mem_addr, a.as_i32x8(), mask)
35691}
35692
35693/// Store packed 64-bit integers from a into memory using writemask k.
35694/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35695///
35696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35697#[inline]
35698#[target_feature(enable = "avx512f,avx512vl")]
35699#[cfg_attr(test, assert_instr(vmovdqa64))]
35700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35701pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35702    storedqa64_256(mem_addr, a.as_i64x4(), mask)
35703}
35704
35705/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35706/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35707///
35708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35709#[inline]
35710#[target_feature(enable = "avx512f,avx512vl")]
35711#[cfg_attr(test, assert_instr(vmovaps))]
35712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35713pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35714    storeaps_256(mem_addr, a.as_f32x8(), mask)
35715}
35716
35717/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35718/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35719///
35720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35721#[inline]
35722#[target_feature(enable = "avx512f,avx512vl")]
35723#[cfg_attr(test, assert_instr(vmovapd))]
35724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35725pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35726    storeapd_256(mem_addr, a.as_f64x4(), mask)
35727}
35728
35729/// Store packed 32-bit integers from a into memory using writemask k.
35730/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35731///
35732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35733#[inline]
35734#[target_feature(enable = "avx512f,avx512vl")]
35735#[cfg_attr(test, assert_instr(vmovdqa32))]
35736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35737pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35738    storedqa32_128(mem_addr, a.as_i32x4(), mask)
35739}
35740
35741/// Store packed 64-bit integers from a into memory using writemask k.
35742/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35743///
35744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35745#[inline]
35746#[target_feature(enable = "avx512f,avx512vl")]
35747#[cfg_attr(test, assert_instr(vmovdqa64))]
35748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35749pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35750    storedqa64_128(mem_addr, a.as_i64x2(), mask)
35751}
35752
35753/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35754/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35755///
35756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35757#[inline]
35758#[target_feature(enable = "avx512f,avx512vl")]
35759#[cfg_attr(test, assert_instr(vmovaps))]
35760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35761pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35762    storeaps_128(mem_addr, a.as_f32x4(), mask)
35763}
35764
35765/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35766/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35767///
35768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35769#[inline]
35770#[target_feature(enable = "avx512f,avx512vl")]
35771#[cfg_attr(test, assert_instr(vmovapd))]
35772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35773pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35774    storeapd_128(mem_addr, a.as_f64x2(), mask)
35775}
35776
35777/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35778/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35779///
35780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35781#[inline]
35782#[cfg_attr(test, assert_instr(vmovss))]
35783#[target_feature(enable = "avx512f")]
35784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35785pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35786    asm!(
35787        vps!("vmovss", "{{{k}}}, {a}"),
35788        p = in(reg) mem_addr,
35789        k = in(kreg) k,
35790        a = in(xmm_reg) a,
35791        options(nostack, preserves_flags),
35792    );
35793}
35794
35795/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35796/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35797///
35798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35799#[inline]
35800#[cfg_attr(test, assert_instr(vmovsd))]
35801#[target_feature(enable = "avx512f")]
35802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35803pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35804    asm!(
35805        vps!("vmovsd", "{{{k}}}, {a}"),
35806        p = in(reg) mem_addr,
35807        k = in(kreg) k,
35808        a = in(xmm_reg) a,
35809        options(nostack, preserves_flags),
35810    );
35811}
35812
35813/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35814///
35815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35816#[inline]
35817#[target_feature(enable = "avx512f")]
35818#[cfg_attr(test, assert_instr(vpexpandd))]
35819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35820pub unsafe fn _mm512_mask_expandloadu_epi32(
35821    src: __m512i,
35822    k: __mmask16,
35823    mem_addr: *const i32,
35824) -> __m512i {
35825    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
35826}
35827
35828/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35831#[inline]
35832#[target_feature(enable = "avx512f")]
35833#[cfg_attr(test, assert_instr(vpexpandd))]
35834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35835pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35836    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
35837}
35838
35839/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[cfg_attr(test, assert_instr(vpexpandd))]
35845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35846pub unsafe fn _mm256_mask_expandloadu_epi32(
35847    src: __m256i,
35848    k: __mmask8,
35849    mem_addr: *const i32,
35850) -> __m256i {
35851    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
35852}
35853
35854/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35855///
35856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35857#[inline]
35858#[target_feature(enable = "avx512f,avx512vl")]
35859#[cfg_attr(test, assert_instr(vpexpandd))]
35860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35861pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35862    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
35863}
35864
35865/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35866///
35867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35868#[inline]
35869#[target_feature(enable = "avx512f,avx512vl")]
35870#[cfg_attr(test, assert_instr(vpexpandd))]
35871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35872pub unsafe fn _mm_mask_expandloadu_epi32(
35873    src: __m128i,
35874    k: __mmask8,
35875    mem_addr: *const i32,
35876) -> __m128i {
35877    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
35878}
35879
35880/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35881///
35882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35883#[inline]
35884#[target_feature(enable = "avx512f,avx512vl")]
35885#[cfg_attr(test, assert_instr(vpexpandd))]
35886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35887pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35888    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
35889}
35890
35891/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35892///
35893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35894#[inline]
35895#[target_feature(enable = "avx512f")]
35896#[cfg_attr(test, assert_instr(vpexpandq))]
35897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35898pub unsafe fn _mm512_mask_expandloadu_epi64(
35899    src: __m512i,
35900    k: __mmask8,
35901    mem_addr: *const i64,
35902) -> __m512i {
35903    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
35904}
35905
35906/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35907///
35908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35909#[inline]
35910#[target_feature(enable = "avx512f")]
35911#[cfg_attr(test, assert_instr(vpexpandq))]
35912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35913pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35914    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
35915}
35916
35917/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35918///
35919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35920#[inline]
35921#[target_feature(enable = "avx512f,avx512vl")]
35922#[cfg_attr(test, assert_instr(vpexpandq))]
35923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35924pub unsafe fn _mm256_mask_expandloadu_epi64(
35925    src: __m256i,
35926    k: __mmask8,
35927    mem_addr: *const i64,
35928) -> __m256i {
35929    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
35930}
35931
35932/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35933///
35934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35935#[inline]
35936#[target_feature(enable = "avx512f,avx512vl")]
35937#[cfg_attr(test, assert_instr(vpexpandq))]
35938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35939pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35940    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
35941}
35942
35943/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35944///
35945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35946#[inline]
35947#[target_feature(enable = "avx512f,avx512vl")]
35948#[cfg_attr(test, assert_instr(vpexpandq))]
35949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35950pub unsafe fn _mm_mask_expandloadu_epi64(
35951    src: __m128i,
35952    k: __mmask8,
35953    mem_addr: *const i64,
35954) -> __m128i {
35955    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
35956}
35957
35958/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35959///
35960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35961#[inline]
35962#[target_feature(enable = "avx512f,avx512vl")]
35963#[cfg_attr(test, assert_instr(vpexpandq))]
35964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35965pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35966    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
35967}
35968
35969/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35970///
35971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35972#[inline]
35973#[target_feature(enable = "avx512f")]
35974#[cfg_attr(test, assert_instr(vexpandps))]
35975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35976pub unsafe fn _mm512_mask_expandloadu_ps(
35977    src: __m512,
35978    k: __mmask16,
35979    mem_addr: *const f32,
35980) -> __m512 {
35981    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
35982}
35983
35984/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35985///
35986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35987#[inline]
35988#[target_feature(enable = "avx512f")]
35989#[cfg_attr(test, assert_instr(vexpandps))]
35990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35991pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35992    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
35993}
35994
35995/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35996///
35997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35998#[inline]
35999#[target_feature(enable = "avx512f,avx512vl")]
36000#[cfg_attr(test, assert_instr(vexpandps))]
36001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36002pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36003    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
36004}
36005
36006/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36007///
36008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
36009#[inline]
36010#[target_feature(enable = "avx512f,avx512vl")]
36011#[cfg_attr(test, assert_instr(vexpandps))]
36012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36013pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36014    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
36015}
36016
36017/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36018///
36019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
36020#[inline]
36021#[target_feature(enable = "avx512f,avx512vl")]
36022#[cfg_attr(test, assert_instr(vexpandps))]
36023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36024pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36025    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
36026}
36027
36028/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36029///
36030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
36031#[inline]
36032#[target_feature(enable = "avx512f,avx512vl")]
36033#[cfg_attr(test, assert_instr(vexpandps))]
36034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36035pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36036    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
36037}
36038
36039/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36040///
36041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
36042#[inline]
36043#[target_feature(enable = "avx512f")]
36044#[cfg_attr(test, assert_instr(vexpandpd))]
36045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36046pub unsafe fn _mm512_mask_expandloadu_pd(
36047    src: __m512d,
36048    k: __mmask8,
36049    mem_addr: *const f64,
36050) -> __m512d {
36051    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
36052}
36053
36054/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36055///
36056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
36057#[inline]
36058#[target_feature(enable = "avx512f")]
36059#[cfg_attr(test, assert_instr(vexpandpd))]
36060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36061pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36062    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
36063}
36064
36065/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36066///
36067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
36068#[inline]
36069#[target_feature(enable = "avx512f,avx512vl")]
36070#[cfg_attr(test, assert_instr(vexpandpd))]
36071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36072pub unsafe fn _mm256_mask_expandloadu_pd(
36073    src: __m256d,
36074    k: __mmask8,
36075    mem_addr: *const f64,
36076) -> __m256d {
36077    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
36078}
36079
36080/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36081///
36082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36083#[inline]
36084#[target_feature(enable = "avx512f,avx512vl")]
36085#[cfg_attr(test, assert_instr(vexpandpd))]
36086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36087pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36088    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
36089}
36090
36091/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36092///
36093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36094#[inline]
36095#[target_feature(enable = "avx512f,avx512vl")]
36096#[cfg_attr(test, assert_instr(vexpandpd))]
36097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36098pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36099    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
36100}
36101
36102/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36103///
36104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36105#[inline]
36106#[target_feature(enable = "avx512f,avx512vl")]
36107#[cfg_attr(test, assert_instr(vexpandpd))]
36108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36109pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36110    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
36111}
36112
36113/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36114///
36115/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36116#[inline]
36117#[target_feature(enable = "avx512f")]
36118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36119pub fn _mm512_setr_pd(
36120    e0: f64,
36121    e1: f64,
36122    e2: f64,
36123    e3: f64,
36124    e4: f64,
36125    e5: f64,
36126    e6: f64,
36127    e7: f64,
36128) -> __m512d {
36129    unsafe {
36130        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
36131        transmute(r)
36132    }
36133}
36134
36135/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36136///
36137/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36138#[inline]
36139#[target_feature(enable = "avx512f")]
36140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36141pub fn _mm512_set_pd(
36142    e0: f64,
36143    e1: f64,
36144    e2: f64,
36145    e3: f64,
36146    e4: f64,
36147    e5: f64,
36148    e6: f64,
36149    e7: f64,
36150) -> __m512d {
36151    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
36152}
36153
36154/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36155///
36156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36157#[inline]
36158#[target_feature(enable = "avx512f")]
36159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36160#[cfg_attr(test, assert_instr(vmovss))]
36161pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36162    unsafe {
36163        let extractsrc: f32 = simd_extract!(src, 0);
36164        let mut mov: f32 = extractsrc;
36165        if (k & 0b00000001) != 0 {
36166            mov = simd_extract!(b, 0);
36167        }
36168        simd_insert!(a, 0, mov)
36169    }
36170}
36171
36172/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36173///
36174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36175#[inline]
36176#[target_feature(enable = "avx512f")]
36177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36178#[cfg_attr(test, assert_instr(vmovss))]
36179pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36180    unsafe {
36181        let mut mov: f32 = 0.;
36182        if (k & 0b00000001) != 0 {
36183            mov = simd_extract!(b, 0);
36184        }
36185        simd_insert!(a, 0, mov)
36186    }
36187}
36188
36189/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36190///
36191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36192#[inline]
36193#[target_feature(enable = "avx512f")]
36194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36195#[cfg_attr(test, assert_instr(vmovsd))]
36196pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36197    unsafe {
36198        let extractsrc: f64 = simd_extract!(src, 0);
36199        let mut mov: f64 = extractsrc;
36200        if (k & 0b00000001) != 0 {
36201            mov = simd_extract!(b, 0);
36202        }
36203        simd_insert!(a, 0, mov)
36204    }
36205}
36206
36207/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36208///
36209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36210#[inline]
36211#[target_feature(enable = "avx512f")]
36212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36213#[cfg_attr(test, assert_instr(vmovsd))]
36214pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36215    unsafe {
36216        let mut mov: f64 = 0.;
36217        if (k & 0b00000001) != 0 {
36218            mov = simd_extract!(b, 0);
36219        }
36220        simd_insert!(a, 0, mov)
36221    }
36222}
36223
36224/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36225///
36226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36227#[inline]
36228#[target_feature(enable = "avx512f")]
36229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36230#[cfg_attr(test, assert_instr(vaddss))]
36231pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36232    unsafe {
36233        let extractsrc: f32 = simd_extract!(src, 0);
36234        let mut add: f32 = extractsrc;
36235        if (k & 0b00000001) != 0 {
36236            let extracta: f32 = simd_extract!(a, 0);
36237            let extractb: f32 = simd_extract!(b, 0);
36238            add = extracta + extractb;
36239        }
36240        simd_insert!(a, 0, add)
36241    }
36242}
36243
36244/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36245///
36246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36247#[inline]
36248#[target_feature(enable = "avx512f")]
36249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36250#[cfg_attr(test, assert_instr(vaddss))]
36251pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36252    unsafe {
36253        let mut add: f32 = 0.;
36254        if (k & 0b00000001) != 0 {
36255            let extracta: f32 = simd_extract!(a, 0);
36256            let extractb: f32 = simd_extract!(b, 0);
36257            add = extracta + extractb;
36258        }
36259        simd_insert!(a, 0, add)
36260    }
36261}
36262
36263/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36264///
36265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36266#[inline]
36267#[target_feature(enable = "avx512f")]
36268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36269#[cfg_attr(test, assert_instr(vaddsd))]
36270pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36271    unsafe {
36272        let extractsrc: f64 = simd_extract!(src, 0);
36273        let mut add: f64 = extractsrc;
36274        if (k & 0b00000001) != 0 {
36275            let extracta: f64 = simd_extract!(a, 0);
36276            let extractb: f64 = simd_extract!(b, 0);
36277            add = extracta + extractb;
36278        }
36279        simd_insert!(a, 0, add)
36280    }
36281}
36282
36283/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36284///
36285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36286#[inline]
36287#[target_feature(enable = "avx512f")]
36288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36289#[cfg_attr(test, assert_instr(vaddsd))]
36290pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36291    unsafe {
36292        let mut add: f64 = 0.;
36293        if (k & 0b00000001) != 0 {
36294            let extracta: f64 = simd_extract!(a, 0);
36295            let extractb: f64 = simd_extract!(b, 0);
36296            add = extracta + extractb;
36297        }
36298        simd_insert!(a, 0, add)
36299    }
36300}
36301
36302/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36308#[cfg_attr(test, assert_instr(vsubss))]
36309pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36310    unsafe {
36311        let extractsrc: f32 = simd_extract!(src, 0);
36312        let mut add: f32 = extractsrc;
36313        if (k & 0b00000001) != 0 {
36314            let extracta: f32 = simd_extract!(a, 0);
36315            let extractb: f32 = simd_extract!(b, 0);
36316            add = extracta - extractb;
36317        }
36318        simd_insert!(a, 0, add)
36319    }
36320}
36321
36322/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36323///
36324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36325#[inline]
36326#[target_feature(enable = "avx512f")]
36327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36328#[cfg_attr(test, assert_instr(vsubss))]
36329pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36330    unsafe {
36331        let mut add: f32 = 0.;
36332        if (k & 0b00000001) != 0 {
36333            let extracta: f32 = simd_extract!(a, 0);
36334            let extractb: f32 = simd_extract!(b, 0);
36335            add = extracta - extractb;
36336        }
36337        simd_insert!(a, 0, add)
36338    }
36339}
36340
36341/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36342///
36343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36344#[inline]
36345#[target_feature(enable = "avx512f")]
36346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36347#[cfg_attr(test, assert_instr(vsubsd))]
36348pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36349    unsafe {
36350        let extractsrc: f64 = simd_extract!(src, 0);
36351        let mut add: f64 = extractsrc;
36352        if (k & 0b00000001) != 0 {
36353            let extracta: f64 = simd_extract!(a, 0);
36354            let extractb: f64 = simd_extract!(b, 0);
36355            add = extracta - extractb;
36356        }
36357        simd_insert!(a, 0, add)
36358    }
36359}
36360
36361/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36362///
36363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36364#[inline]
36365#[target_feature(enable = "avx512f")]
36366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36367#[cfg_attr(test, assert_instr(vsubsd))]
36368pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36369    unsafe {
36370        let mut add: f64 = 0.;
36371        if (k & 0b00000001) != 0 {
36372            let extracta: f64 = simd_extract!(a, 0);
36373            let extractb: f64 = simd_extract!(b, 0);
36374            add = extracta - extractb;
36375        }
36376        simd_insert!(a, 0, add)
36377    }
36378}
36379
36380/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36386#[cfg_attr(test, assert_instr(vmulss))]
36387pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36388    unsafe {
36389        let extractsrc: f32 = simd_extract!(src, 0);
36390        let mut add: f32 = extractsrc;
36391        if (k & 0b00000001) != 0 {
36392            let extracta: f32 = simd_extract!(a, 0);
36393            let extractb: f32 = simd_extract!(b, 0);
36394            add = extracta * extractb;
36395        }
36396        simd_insert!(a, 0, add)
36397    }
36398}
36399
36400/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36401///
36402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36403#[inline]
36404#[target_feature(enable = "avx512f")]
36405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36406#[cfg_attr(test, assert_instr(vmulss))]
36407pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36408    unsafe {
36409        let mut add: f32 = 0.;
36410        if (k & 0b00000001) != 0 {
36411            let extracta: f32 = simd_extract!(a, 0);
36412            let extractb: f32 = simd_extract!(b, 0);
36413            add = extracta * extractb;
36414        }
36415        simd_insert!(a, 0, add)
36416    }
36417}
36418
36419/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36420///
36421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36422#[inline]
36423#[target_feature(enable = "avx512f")]
36424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36425#[cfg_attr(test, assert_instr(vmulsd))]
36426pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36427    unsafe {
36428        let extractsrc: f64 = simd_extract!(src, 0);
36429        let mut add: f64 = extractsrc;
36430        if (k & 0b00000001) != 0 {
36431            let extracta: f64 = simd_extract!(a, 0);
36432            let extractb: f64 = simd_extract!(b, 0);
36433            add = extracta * extractb;
36434        }
36435        simd_insert!(a, 0, add)
36436    }
36437}
36438
36439/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36440///
36441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36442#[inline]
36443#[target_feature(enable = "avx512f")]
36444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36445#[cfg_attr(test, assert_instr(vmulsd))]
36446pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36447    unsafe {
36448        let mut add: f64 = 0.;
36449        if (k & 0b00000001) != 0 {
36450            let extracta: f64 = simd_extract!(a, 0);
36451            let extractb: f64 = simd_extract!(b, 0);
36452            add = extracta * extractb;
36453        }
36454        simd_insert!(a, 0, add)
36455    }
36456}
36457
36458/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36459///
36460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36461#[inline]
36462#[target_feature(enable = "avx512f")]
36463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36464#[cfg_attr(test, assert_instr(vdivss))]
36465pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36466    unsafe {
36467        let extractsrc: f32 = simd_extract!(src, 0);
36468        let mut add: f32 = extractsrc;
36469        if (k & 0b00000001) != 0 {
36470            let extracta: f32 = simd_extract!(a, 0);
36471            let extractb: f32 = simd_extract!(b, 0);
36472            add = extracta / extractb;
36473        }
36474        simd_insert!(a, 0, add)
36475    }
36476}
36477
36478/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36479///
36480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36481#[inline]
36482#[target_feature(enable = "avx512f")]
36483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36484#[cfg_attr(test, assert_instr(vdivss))]
36485pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36486    unsafe {
36487        let mut add: f32 = 0.;
36488        if (k & 0b00000001) != 0 {
36489            let extracta: f32 = simd_extract!(a, 0);
36490            let extractb: f32 = simd_extract!(b, 0);
36491            add = extracta / extractb;
36492        }
36493        simd_insert!(a, 0, add)
36494    }
36495}
36496
36497/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36498///
36499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36500#[inline]
36501#[target_feature(enable = "avx512f")]
36502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36503#[cfg_attr(test, assert_instr(vdivsd))]
36504pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36505    unsafe {
36506        let extractsrc: f64 = simd_extract!(src, 0);
36507        let mut add: f64 = extractsrc;
36508        if (k & 0b00000001) != 0 {
36509            let extracta: f64 = simd_extract!(a, 0);
36510            let extractb: f64 = simd_extract!(b, 0);
36511            add = extracta / extractb;
36512        }
36513        simd_insert!(a, 0, add)
36514    }
36515}
36516
36517/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36518///
36519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36520#[inline]
36521#[target_feature(enable = "avx512f")]
36522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36523#[cfg_attr(test, assert_instr(vdivsd))]
36524pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36525    unsafe {
36526        let mut add: f64 = 0.;
36527        if (k & 0b00000001) != 0 {
36528            let extracta: f64 = simd_extract!(a, 0);
36529            let extractb: f64 = simd_extract!(b, 0);
36530            add = extracta / extractb;
36531        }
36532        simd_insert!(a, 0, add)
36533    }
36534}
36535
36536/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36537///
36538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36539#[inline]
36540#[target_feature(enable = "avx512f")]
36541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36542#[cfg_attr(test, assert_instr(vmaxss))]
36543pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36544    unsafe {
36545        transmute(vmaxss(
36546            a.as_f32x4(),
36547            b.as_f32x4(),
36548            src.as_f32x4(),
36549            k,
36550            _MM_FROUND_CUR_DIRECTION,
36551        ))
36552    }
36553}
36554
36555/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36556///
36557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36558#[inline]
36559#[target_feature(enable = "avx512f")]
36560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36561#[cfg_attr(test, assert_instr(vmaxss))]
36562pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36563    unsafe {
36564        transmute(vmaxss(
36565            a.as_f32x4(),
36566            b.as_f32x4(),
36567            f32x4::ZERO,
36568            k,
36569            _MM_FROUND_CUR_DIRECTION,
36570        ))
36571    }
36572}
36573
36574/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36577#[inline]
36578#[target_feature(enable = "avx512f")]
36579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36580#[cfg_attr(test, assert_instr(vmaxsd))]
36581pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36582    unsafe {
36583        transmute(vmaxsd(
36584            a.as_f64x2(),
36585            b.as_f64x2(),
36586            src.as_f64x2(),
36587            k,
36588            _MM_FROUND_CUR_DIRECTION,
36589        ))
36590    }
36591}
36592
36593/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36596#[inline]
36597#[target_feature(enable = "avx512f")]
36598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36599#[cfg_attr(test, assert_instr(vmaxsd))]
36600pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36601    unsafe {
36602        transmute(vmaxsd(
36603            a.as_f64x2(),
36604            b.as_f64x2(),
36605            f64x2::ZERO,
36606            k,
36607            _MM_FROUND_CUR_DIRECTION,
36608        ))
36609    }
36610}
36611
36612/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36613///
36614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36615#[inline]
36616#[target_feature(enable = "avx512f")]
36617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36618#[cfg_attr(test, assert_instr(vminss))]
36619pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36620    unsafe {
36621        transmute(vminss(
36622            a.as_f32x4(),
36623            b.as_f32x4(),
36624            src.as_f32x4(),
36625            k,
36626            _MM_FROUND_CUR_DIRECTION,
36627        ))
36628    }
36629}
36630
36631/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36632///
36633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36634#[inline]
36635#[target_feature(enable = "avx512f")]
36636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36637#[cfg_attr(test, assert_instr(vminss))]
36638pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36639    unsafe {
36640        transmute(vminss(
36641            a.as_f32x4(),
36642            b.as_f32x4(),
36643            f32x4::ZERO,
36644            k,
36645            _MM_FROUND_CUR_DIRECTION,
36646        ))
36647    }
36648}
36649
36650/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36651///
36652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36653#[inline]
36654#[target_feature(enable = "avx512f")]
36655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36656#[cfg_attr(test, assert_instr(vminsd))]
36657pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36658    unsafe {
36659        transmute(vminsd(
36660            a.as_f64x2(),
36661            b.as_f64x2(),
36662            src.as_f64x2(),
36663            k,
36664            _MM_FROUND_CUR_DIRECTION,
36665        ))
36666    }
36667}
36668
36669/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36672#[inline]
36673#[target_feature(enable = "avx512f")]
36674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36675#[cfg_attr(test, assert_instr(vminsd))]
36676pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36677    unsafe {
36678        transmute(vminsd(
36679            a.as_f64x2(),
36680            b.as_f64x2(),
36681            f64x2::ZERO,
36682            k,
36683            _MM_FROUND_CUR_DIRECTION,
36684        ))
36685    }
36686}
36687
36688/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36691#[inline]
36692#[target_feature(enable = "avx512f")]
36693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36694#[cfg_attr(test, assert_instr(vsqrtss))]
36695pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36696    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36697}
36698
36699/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36700///
36701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36702#[inline]
36703#[target_feature(enable = "avx512f")]
36704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36705#[cfg_attr(test, assert_instr(vsqrtss))]
36706pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36707    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
36708}
36709
36710/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36711///
36712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36713#[inline]
36714#[target_feature(enable = "avx512f")]
36715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36716#[cfg_attr(test, assert_instr(vsqrtsd))]
36717pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36718    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
36719}
36720
36721/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36722///
36723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36724#[inline]
36725#[target_feature(enable = "avx512f")]
36726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36727#[cfg_attr(test, assert_instr(vsqrtsd))]
36728pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36729    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
36730}
36731
36732/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36733///
36734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36735#[inline]
36736#[target_feature(enable = "avx512f")]
36737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36738#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36739pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36740    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36741}
36742
36743/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36744///
36745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36746#[inline]
36747#[target_feature(enable = "avx512f")]
36748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36749#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36750pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36751    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36752}
36753
36754/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36755///
36756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36757#[inline]
36758#[target_feature(enable = "avx512f")]
36759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36760#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36761pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36762    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36763}
36764
36765/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36766///
36767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36768#[inline]
36769#[target_feature(enable = "avx512f")]
36770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36771#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36772pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36773    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36774}
36775
36776/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36777///
36778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36779#[inline]
36780#[target_feature(enable = "avx512f")]
36781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36782#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36783pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36784    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36785}
36786
36787/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36788///
36789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36790#[inline]
36791#[target_feature(enable = "avx512f")]
36792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36793#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36794pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36795    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36796}
36797
36798/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36799///
36800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36801#[inline]
36802#[target_feature(enable = "avx512f")]
36803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36804#[cfg_attr(test, assert_instr(vrcp14ss))]
36805pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36806    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
36807}
36808
36809/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36810///
36811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36812#[inline]
36813#[target_feature(enable = "avx512f")]
36814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36815#[cfg_attr(test, assert_instr(vrcp14ss))]
36816pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36817    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
36818}
36819
36820/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36821///
36822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36823#[inline]
36824#[target_feature(enable = "avx512f")]
36825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36826#[cfg_attr(test, assert_instr(vrcp14ss))]
36827pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36828    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
36829}
36830
36831/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36832///
36833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36834#[inline]
36835#[target_feature(enable = "avx512f")]
36836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36837#[cfg_attr(test, assert_instr(vrcp14sd))]
36838pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36839    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
36840}
36841
36842/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36843///
36844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36845#[inline]
36846#[target_feature(enable = "avx512f")]
36847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36848#[cfg_attr(test, assert_instr(vrcp14sd))]
36849pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36850    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
36851}
36852
36853/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36854///
36855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36856#[inline]
36857#[target_feature(enable = "avx512f")]
36858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36859#[cfg_attr(test, assert_instr(vrcp14sd))]
36860pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36861    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
36862}
36863
36864/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36865///
36866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36867#[inline]
36868#[target_feature(enable = "avx512f")]
36869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36870#[cfg_attr(test, assert_instr(vgetexpss))]
36871pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36872    unsafe {
36873        transmute(vgetexpss(
36874            a.as_f32x4(),
36875            b.as_f32x4(),
36876            f32x4::ZERO,
36877            0b1,
36878            _MM_FROUND_NO_EXC,
36879        ))
36880    }
36881}
36882
36883/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36884///
36885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36886#[inline]
36887#[target_feature(enable = "avx512f")]
36888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36889#[cfg_attr(test, assert_instr(vgetexpss))]
36890pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36891    unsafe {
36892        transmute(vgetexpss(
36893            a.as_f32x4(),
36894            b.as_f32x4(),
36895            src.as_f32x4(),
36896            k,
36897            _MM_FROUND_NO_EXC,
36898        ))
36899    }
36900}
36901
36902/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36903///
36904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36905#[inline]
36906#[target_feature(enable = "avx512f")]
36907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36908#[cfg_attr(test, assert_instr(vgetexpss))]
36909pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36910    unsafe {
36911        transmute(vgetexpss(
36912            a.as_f32x4(),
36913            b.as_f32x4(),
36914            f32x4::ZERO,
36915            k,
36916            _MM_FROUND_NO_EXC,
36917        ))
36918    }
36919}
36920
36921/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36924#[inline]
36925#[target_feature(enable = "avx512f")]
36926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36927#[cfg_attr(test, assert_instr(vgetexpsd))]
36928pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36929    unsafe {
36930        transmute(vgetexpsd(
36931            a.as_f64x2(),
36932            b.as_f64x2(),
36933            f64x2::ZERO,
36934            0b1,
36935            _MM_FROUND_NO_EXC,
36936        ))
36937    }
36938}
36939
36940/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36943#[inline]
36944#[target_feature(enable = "avx512f")]
36945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36946#[cfg_attr(test, assert_instr(vgetexpsd))]
36947pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36948    unsafe {
36949        transmute(vgetexpsd(
36950            a.as_f64x2(),
36951            b.as_f64x2(),
36952            src.as_f64x2(),
36953            k,
36954            _MM_FROUND_NO_EXC,
36955        ))
36956    }
36957}
36958
36959/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36960///
36961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36962#[inline]
36963#[target_feature(enable = "avx512f")]
36964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36965#[cfg_attr(test, assert_instr(vgetexpsd))]
36966pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36967    unsafe {
36968        transmute(vgetexpsd(
36969            a.as_f64x2(),
36970            b.as_f64x2(),
36971            f64x2::ZERO,
36972            k,
36973            _MM_FROUND_NO_EXC,
36974        ))
36975    }
36976}
36977
36978/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36980///    _MM_MANT_NORM_1_2     // interval [1, 2)\
36981///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
36982///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
36983///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36984/// The sign is determined by sc which can take the following values:\
36985///    _MM_MANT_SIGN_src     // sign = sign(src)\
36986///    _MM_MANT_SIGN_zero    // sign = 0\
36987///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
36988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36989///
36990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36991#[inline]
36992#[target_feature(enable = "avx512f")]
36993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36994#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36995#[rustc_legacy_const_generics(2, 3)]
36996pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36997    a: __m128,
36998    b: __m128,
36999) -> __m128 {
37000    unsafe {
37001        static_assert_uimm_bits!(NORM, 4);
37002        static_assert_uimm_bits!(SIGN, 2);
37003        let a = a.as_f32x4();
37004        let b = b.as_f32x4();
37005        let r = vgetmantss(
37006            a,
37007            b,
37008            SIGN << 2 | NORM,
37009            f32x4::ZERO,
37010            0b1,
37011            _MM_FROUND_CUR_DIRECTION,
37012        );
37013        transmute(r)
37014    }
37015}
37016
37017/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37018/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37019///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37020///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37021///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37022///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37023/// The sign is determined by sc which can take the following values:\
37024///    _MM_MANT_SIGN_src     // sign = sign(src)\
37025///    _MM_MANT_SIGN_zero    // sign = 0\
37026///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37027/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37028///
37029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
37030#[inline]
37031#[target_feature(enable = "avx512f")]
37032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37033#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37034#[rustc_legacy_const_generics(4, 5)]
37035pub fn _mm_mask_getmant_ss<
37036    const NORM: _MM_MANTISSA_NORM_ENUM,
37037    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37038>(
37039    src: __m128,
37040    k: __mmask8,
37041    a: __m128,
37042    b: __m128,
37043) -> __m128 {
37044    unsafe {
37045        static_assert_uimm_bits!(NORM, 4);
37046        static_assert_uimm_bits!(SIGN, 2);
37047        let a = a.as_f32x4();
37048        let b = b.as_f32x4();
37049        let src = src.as_f32x4();
37050        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37051        transmute(r)
37052    }
37053}
37054
37055/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37056/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37057///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37058///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37059///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37060///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37061/// The sign is determined by sc which can take the following values:\
37062///    _MM_MANT_SIGN_src     // sign = sign(src)\
37063///    _MM_MANT_SIGN_zero    // sign = 0\
37064///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37065/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37066///
37067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
37068#[inline]
37069#[target_feature(enable = "avx512f")]
37070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37071#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37072#[rustc_legacy_const_generics(3, 4)]
37073pub fn _mm_maskz_getmant_ss<
37074    const NORM: _MM_MANTISSA_NORM_ENUM,
37075    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37076>(
37077    k: __mmask8,
37078    a: __m128,
37079    b: __m128,
37080) -> __m128 {
37081    unsafe {
37082        static_assert_uimm_bits!(NORM, 4);
37083        static_assert_uimm_bits!(SIGN, 2);
37084        let a = a.as_f32x4();
37085        let b = b.as_f32x4();
37086        let r = vgetmantss(
37087            a,
37088            b,
37089            SIGN << 2 | NORM,
37090            f32x4::ZERO,
37091            k,
37092            _MM_FROUND_CUR_DIRECTION,
37093        );
37094        transmute(r)
37095    }
37096}
37097
37098/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37099/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37100///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37101///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37102///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37103///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37104/// The sign is determined by sc which can take the following values:\
37105///    _MM_MANT_SIGN_src     // sign = sign(src)\
37106///    _MM_MANT_SIGN_zero    // sign = 0\
37107///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37108/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37109///
37110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37111#[inline]
37112#[target_feature(enable = "avx512f")]
37113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37114#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37115#[rustc_legacy_const_generics(2, 3)]
37116pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37117    a: __m128d,
37118    b: __m128d,
37119) -> __m128d {
37120    unsafe {
37121        static_assert_uimm_bits!(NORM, 4);
37122        static_assert_uimm_bits!(SIGN, 2);
37123        let a = a.as_f64x2();
37124        let b = b.as_f64x2();
37125        let r = vgetmantsd(
37126            a,
37127            b,
37128            SIGN << 2 | NORM,
37129            f64x2::ZERO,
37130            0b1,
37131            _MM_FROUND_CUR_DIRECTION,
37132        );
37133        transmute(r)
37134    }
37135}
37136
37137/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37138/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37139///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37140///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37141///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37142///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37143/// The sign is determined by sc which can take the following values:\
37144///    _MM_MANT_SIGN_src     // sign = sign(src)\
37145///    _MM_MANT_SIGN_zero    // sign = 0\
37146///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37147/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37148///
37149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37150#[inline]
37151#[target_feature(enable = "avx512f")]
37152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37153#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37154#[rustc_legacy_const_generics(4, 5)]
37155pub fn _mm_mask_getmant_sd<
37156    const NORM: _MM_MANTISSA_NORM_ENUM,
37157    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37158>(
37159    src: __m128d,
37160    k: __mmask8,
37161    a: __m128d,
37162    b: __m128d,
37163) -> __m128d {
37164    unsafe {
37165        static_assert_uimm_bits!(NORM, 4);
37166        static_assert_uimm_bits!(SIGN, 2);
37167        let a = a.as_f64x2();
37168        let b = b.as_f64x2();
37169        let src = src.as_f64x2();
37170        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
37171        transmute(r)
37172    }
37173}
37174
37175/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37177///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37178///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37179///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37180///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37181/// The sign is determined by sc which can take the following values:\
37182///    _MM_MANT_SIGN_src     // sign = sign(src)\
37183///    _MM_MANT_SIGN_zero    // sign = 0\
37184///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37186///
37187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37188#[inline]
37189#[target_feature(enable = "avx512f")]
37190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37191#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37192#[rustc_legacy_const_generics(3, 4)]
37193pub fn _mm_maskz_getmant_sd<
37194    const NORM: _MM_MANTISSA_NORM_ENUM,
37195    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37196>(
37197    k: __mmask8,
37198    a: __m128d,
37199    b: __m128d,
37200) -> __m128d {
37201    unsafe {
37202        static_assert_uimm_bits!(NORM, 4);
37203        static_assert_uimm_bits!(SIGN, 2);
37204        let a = a.as_f64x2();
37205        let b = b.as_f64x2();
37206        let r = vgetmantsd(
37207            a,
37208            b,
37209            SIGN << 2 | NORM,
37210            f64x2::ZERO,
37211            k,
37212            _MM_FROUND_CUR_DIRECTION,
37213        );
37214        transmute(r)
37215    }
37216}
37217
37218/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37219/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37220/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37221/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37222/// * [`_MM_FROUND_TO_POS_INF`] : round up
37223/// * [`_MM_FROUND_TO_ZERO`] : truncate
37224/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37225///
37226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37227#[inline]
37228#[target_feature(enable = "avx512f")]
37229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37230#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37231#[rustc_legacy_const_generics(2)]
37232pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37233    unsafe {
37234        static_assert_uimm_bits!(IMM8, 8);
37235        let a = a.as_f32x4();
37236        let b = b.as_f32x4();
37237        let r = vrndscaless(
37238            a,
37239            b,
37240            f32x4::ZERO,
37241            0b11111111,
37242            IMM8,
37243            _MM_FROUND_CUR_DIRECTION,
37244        );
37245        transmute(r)
37246    }
37247}
37248
37249/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37250/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37251/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37252/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37253/// * [`_MM_FROUND_TO_POS_INF`] : round up
37254/// * [`_MM_FROUND_TO_ZERO`] : truncate
37255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37256///
37257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37258#[inline]
37259#[target_feature(enable = "avx512f")]
37260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37261#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37262#[rustc_legacy_const_generics(4)]
37263pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37264    src: __m128,
37265    k: __mmask8,
37266    a: __m128,
37267    b: __m128,
37268) -> __m128 {
37269    unsafe {
37270        static_assert_uimm_bits!(IMM8, 8);
37271        let a = a.as_f32x4();
37272        let b = b.as_f32x4();
37273        let src = src.as_f32x4();
37274        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37275        transmute(r)
37276    }
37277}
37278
37279/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37280/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37281/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37282/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37283/// * [`_MM_FROUND_TO_POS_INF`] : round up
37284/// * [`_MM_FROUND_TO_ZERO`] : truncate
37285/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37286///
37287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37288#[inline]
37289#[target_feature(enable = "avx512f")]
37290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37291#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37292#[rustc_legacy_const_generics(3)]
37293pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37294    unsafe {
37295        static_assert_uimm_bits!(IMM8, 8);
37296        let a = a.as_f32x4();
37297        let b = b.as_f32x4();
37298        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37299        transmute(r)
37300    }
37301}
37302
37303/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37304/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37305/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37306/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37307/// * [`_MM_FROUND_TO_POS_INF`] : round up
37308/// * [`_MM_FROUND_TO_ZERO`] : truncate
37309/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37310///
37311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37312#[inline]
37313#[target_feature(enable = "avx512f")]
37314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37315#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37316#[rustc_legacy_const_generics(2)]
37317pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37318    unsafe {
37319        static_assert_uimm_bits!(IMM8, 8);
37320        let a = a.as_f64x2();
37321        let b = b.as_f64x2();
37322        let r = vrndscalesd(
37323            a,
37324            b,
37325            f64x2::ZERO,
37326            0b11111111,
37327            IMM8,
37328            _MM_FROUND_CUR_DIRECTION,
37329        );
37330        transmute(r)
37331    }
37332}
37333
37334/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37335/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37336/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37337/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37338/// * [`_MM_FROUND_TO_POS_INF`] : round up
37339/// * [`_MM_FROUND_TO_ZERO`] : truncate
37340/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37341///
37342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37343#[inline]
37344#[target_feature(enable = "avx512f")]
37345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37346#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37347#[rustc_legacy_const_generics(4)]
37348pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37349    src: __m128d,
37350    k: __mmask8,
37351    a: __m128d,
37352    b: __m128d,
37353) -> __m128d {
37354    unsafe {
37355        static_assert_uimm_bits!(IMM8, 8);
37356        let a = a.as_f64x2();
37357        let b = b.as_f64x2();
37358        let src = src.as_f64x2();
37359        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37360        transmute(r)
37361    }
37362}
37363
37364/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37365/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37366/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37367/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37368/// * [`_MM_FROUND_TO_POS_INF`] : round up
37369/// * [`_MM_FROUND_TO_ZERO`] : truncate
37370/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37371///
37372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37373#[inline]
37374#[target_feature(enable = "avx512f")]
37375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37376#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37377#[rustc_legacy_const_generics(3)]
37378pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37379    unsafe {
37380        static_assert_uimm_bits!(IMM8, 8);
37381        let a = a.as_f64x2();
37382        let b = b.as_f64x2();
37383        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
37384        transmute(r)
37385    }
37386}
37387
37388/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37389///
37390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37391#[inline]
37392#[target_feature(enable = "avx512f")]
37393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37394#[cfg_attr(test, assert_instr(vscalefss))]
37395pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37396    unsafe {
37397        let a = a.as_f32x4();
37398        let b = b.as_f32x4();
37399        transmute(vscalefss(
37400            a,
37401            b,
37402            f32x4::ZERO,
37403            0b11111111,
37404            _MM_FROUND_CUR_DIRECTION,
37405        ))
37406    }
37407}
37408
37409/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37410///
37411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37412#[inline]
37413#[target_feature(enable = "avx512f")]
37414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37415#[cfg_attr(test, assert_instr(vscalefss))]
37416pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37417    unsafe {
37418        let a = a.as_f32x4();
37419        let b = b.as_f32x4();
37420        let src = src.as_f32x4();
37421        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
37422    }
37423}
37424
37425/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37426///
37427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37428#[inline]
37429#[target_feature(enable = "avx512f")]
37430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37431#[cfg_attr(test, assert_instr(vscalefss))]
37432pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37433    unsafe {
37434        transmute(vscalefss(
37435            a.as_f32x4(),
37436            b.as_f32x4(),
37437            f32x4::ZERO,
37438            k,
37439            _MM_FROUND_CUR_DIRECTION,
37440        ))
37441    }
37442}
37443
37444/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37445///
37446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37447#[inline]
37448#[target_feature(enable = "avx512f")]
37449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37450#[cfg_attr(test, assert_instr(vscalefsd))]
37451pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37452    unsafe {
37453        transmute(vscalefsd(
37454            a.as_f64x2(),
37455            b.as_f64x2(),
37456            f64x2::ZERO,
37457            0b11111111,
37458            _MM_FROUND_CUR_DIRECTION,
37459        ))
37460    }
37461}
37462
37463/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37464///
37465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37466#[inline]
37467#[target_feature(enable = "avx512f")]
37468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37469#[cfg_attr(test, assert_instr(vscalefsd))]
37470pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37471    unsafe {
37472        transmute(vscalefsd(
37473            a.as_f64x2(),
37474            b.as_f64x2(),
37475            src.as_f64x2(),
37476            k,
37477            _MM_FROUND_CUR_DIRECTION,
37478        ))
37479    }
37480}
37481
37482/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37483///
37484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37485#[inline]
37486#[target_feature(enable = "avx512f")]
37487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37488#[cfg_attr(test, assert_instr(vscalefsd))]
37489pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37490    unsafe {
37491        transmute(vscalefsd(
37492            a.as_f64x2(),
37493            b.as_f64x2(),
37494            f64x2::ZERO,
37495            k,
37496            _MM_FROUND_CUR_DIRECTION,
37497        ))
37498    }
37499}
37500
37501/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37502///
37503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37504#[inline]
37505#[target_feature(enable = "avx512f")]
37506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37507#[cfg_attr(test, assert_instr(vfmadd))]
37508pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37509    unsafe {
37510        let mut fmadd: f32 = simd_extract!(a, 0);
37511        if (k & 0b00000001) != 0 {
37512            let extractb: f32 = simd_extract!(b, 0);
37513            let extractc: f32 = simd_extract!(c, 0);
37514            fmadd = fmaf32(fmadd, extractb, extractc);
37515        }
37516        simd_insert!(a, 0, fmadd)
37517    }
37518}
37519
37520/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37521///
37522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37523#[inline]
37524#[target_feature(enable = "avx512f")]
37525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37526#[cfg_attr(test, assert_instr(vfmadd))]
37527pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37528    unsafe {
37529        let mut fmadd: f32 = 0.;
37530        if (k & 0b00000001) != 0 {
37531            let extracta: f32 = simd_extract!(a, 0);
37532            let extractb: f32 = simd_extract!(b, 0);
37533            let extractc: f32 = simd_extract!(c, 0);
37534            fmadd = fmaf32(extracta, extractb, extractc);
37535        }
37536        simd_insert!(a, 0, fmadd)
37537    }
37538}
37539
37540/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37541///
37542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37543#[inline]
37544#[target_feature(enable = "avx512f")]
37545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37546#[cfg_attr(test, assert_instr(vfmadd))]
37547pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37548    unsafe {
37549        let mut fmadd: f32 = simd_extract!(c, 0);
37550        if (k & 0b00000001) != 0 {
37551            let extracta: f32 = simd_extract!(a, 0);
37552            let extractb: f32 = simd_extract!(b, 0);
37553            fmadd = fmaf32(extracta, extractb, fmadd);
37554        }
37555        simd_insert!(c, 0, fmadd)
37556    }
37557}
37558
37559/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37560///
37561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37562#[inline]
37563#[target_feature(enable = "avx512f")]
37564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37565#[cfg_attr(test, assert_instr(vfmadd))]
37566pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37567    unsafe {
37568        let mut fmadd: f64 = simd_extract!(a, 0);
37569        if (k & 0b00000001) != 0 {
37570            let extractb: f64 = simd_extract!(b, 0);
37571            let extractc: f64 = simd_extract!(c, 0);
37572            fmadd = fmaf64(fmadd, extractb, extractc);
37573        }
37574        simd_insert!(a, 0, fmadd)
37575    }
37576}
37577
37578/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37579///
37580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37581#[inline]
37582#[target_feature(enable = "avx512f")]
37583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37584#[cfg_attr(test, assert_instr(vfmadd))]
37585pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37586    unsafe {
37587        let mut fmadd: f64 = 0.;
37588        if (k & 0b00000001) != 0 {
37589            let extracta: f64 = simd_extract!(a, 0);
37590            let extractb: f64 = simd_extract!(b, 0);
37591            let extractc: f64 = simd_extract!(c, 0);
37592            fmadd = fmaf64(extracta, extractb, extractc);
37593        }
37594        simd_insert!(a, 0, fmadd)
37595    }
37596}
37597
37598/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37599///
37600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37601#[inline]
37602#[target_feature(enable = "avx512f")]
37603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37604#[cfg_attr(test, assert_instr(vfmadd))]
37605pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37606    unsafe {
37607        let mut fmadd: f64 = simd_extract!(c, 0);
37608        if (k & 0b00000001) != 0 {
37609            let extracta: f64 = simd_extract!(a, 0);
37610            let extractb: f64 = simd_extract!(b, 0);
37611            fmadd = fmaf64(extracta, extractb, fmadd);
37612        }
37613        simd_insert!(c, 0, fmadd)
37614    }
37615}
37616
37617/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37618///
37619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37620#[inline]
37621#[target_feature(enable = "avx512f")]
37622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37623#[cfg_attr(test, assert_instr(vfmsub))]
37624pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37625    unsafe {
37626        let mut fmsub: f32 = simd_extract!(a, 0);
37627        if (k & 0b00000001) != 0 {
37628            let extractb: f32 = simd_extract!(b, 0);
37629            let extractc: f32 = simd_extract!(c, 0);
37630            let extractc = -extractc;
37631            fmsub = fmaf32(fmsub, extractb, extractc);
37632        }
37633        simd_insert!(a, 0, fmsub)
37634    }
37635}
37636
37637/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37638///
37639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37640#[inline]
37641#[target_feature(enable = "avx512f")]
37642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37643#[cfg_attr(test, assert_instr(vfmsub))]
37644pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37645    unsafe {
37646        let mut fmsub: f32 = 0.;
37647        if (k & 0b00000001) != 0 {
37648            let extracta: f32 = simd_extract!(a, 0);
37649            let extractb: f32 = simd_extract!(b, 0);
37650            let extractc: f32 = simd_extract!(c, 0);
37651            let extractc = -extractc;
37652            fmsub = fmaf32(extracta, extractb, extractc);
37653        }
37654        simd_insert!(a, 0, fmsub)
37655    }
37656}
37657
37658/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37659///
37660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37661#[inline]
37662#[target_feature(enable = "avx512f")]
37663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37664#[cfg_attr(test, assert_instr(vfmsub))]
37665pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37666    unsafe {
37667        let mut fmsub: f32 = simd_extract!(c, 0);
37668        if (k & 0b00000001) != 0 {
37669            let extracta: f32 = simd_extract!(a, 0);
37670            let extractb: f32 = simd_extract!(b, 0);
37671            let extractc = -fmsub;
37672            fmsub = fmaf32(extracta, extractb, extractc);
37673        }
37674        simd_insert!(c, 0, fmsub)
37675    }
37676}
37677
37678/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37679///
37680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37681#[inline]
37682#[target_feature(enable = "avx512f")]
37683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37684#[cfg_attr(test, assert_instr(vfmsub))]
37685pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37686    unsafe {
37687        let mut fmsub: f64 = simd_extract!(a, 0);
37688        if (k & 0b00000001) != 0 {
37689            let extractb: f64 = simd_extract!(b, 0);
37690            let extractc: f64 = simd_extract!(c, 0);
37691            let extractc = -extractc;
37692            fmsub = fmaf64(fmsub, extractb, extractc);
37693        }
37694        simd_insert!(a, 0, fmsub)
37695    }
37696}
37697
37698/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37699///
37700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37701#[inline]
37702#[target_feature(enable = "avx512f")]
37703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37704#[cfg_attr(test, assert_instr(vfmsub))]
37705pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37706    unsafe {
37707        let mut fmsub: f64 = 0.;
37708        if (k & 0b00000001) != 0 {
37709            let extracta: f64 = simd_extract!(a, 0);
37710            let extractb: f64 = simd_extract!(b, 0);
37711            let extractc: f64 = simd_extract!(c, 0);
37712            let extractc = -extractc;
37713            fmsub = fmaf64(extracta, extractb, extractc);
37714        }
37715        simd_insert!(a, 0, fmsub)
37716    }
37717}
37718
37719/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37720///
37721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37722#[inline]
37723#[target_feature(enable = "avx512f")]
37724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37725#[cfg_attr(test, assert_instr(vfmsub))]
37726pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37727    unsafe {
37728        let mut fmsub: f64 = simd_extract!(c, 0);
37729        if (k & 0b00000001) != 0 {
37730            let extracta: f64 = simd_extract!(a, 0);
37731            let extractb: f64 = simd_extract!(b, 0);
37732            let extractc = -fmsub;
37733            fmsub = fmaf64(extracta, extractb, extractc);
37734        }
37735        simd_insert!(c, 0, fmsub)
37736    }
37737}
37738
37739/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37740///
37741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37742#[inline]
37743#[target_feature(enable = "avx512f")]
37744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37745#[cfg_attr(test, assert_instr(vfnmadd))]
37746pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37747    unsafe {
37748        let mut fnmadd: f32 = simd_extract!(a, 0);
37749        if (k & 0b00000001) != 0 {
37750            let extracta = -fnmadd;
37751            let extractb: f32 = simd_extract!(b, 0);
37752            let extractc: f32 = simd_extract!(c, 0);
37753            fnmadd = fmaf32(extracta, extractb, extractc);
37754        }
37755        simd_insert!(a, 0, fnmadd)
37756    }
37757}
37758
37759/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37760///
37761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37762#[inline]
37763#[target_feature(enable = "avx512f")]
37764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37765#[cfg_attr(test, assert_instr(vfnmadd))]
37766pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37767    unsafe {
37768        let mut fnmadd: f32 = 0.;
37769        if (k & 0b00000001) != 0 {
37770            let extracta: f32 = simd_extract!(a, 0);
37771            let extracta = -extracta;
37772            let extractb: f32 = simd_extract!(b, 0);
37773            let extractc: f32 = simd_extract!(c, 0);
37774            fnmadd = fmaf32(extracta, extractb, extractc);
37775        }
37776        simd_insert!(a, 0, fnmadd)
37777    }
37778}
37779
37780/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37781///
37782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37783#[inline]
37784#[target_feature(enable = "avx512f")]
37785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37786#[cfg_attr(test, assert_instr(vfnmadd))]
37787pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37788    unsafe {
37789        let mut fnmadd: f32 = simd_extract!(c, 0);
37790        if (k & 0b00000001) != 0 {
37791            let extracta: f32 = simd_extract!(a, 0);
37792            let extracta = -extracta;
37793            let extractb: f32 = simd_extract!(b, 0);
37794            fnmadd = fmaf32(extracta, extractb, fnmadd);
37795        }
37796        simd_insert!(c, 0, fnmadd)
37797    }
37798}
37799
37800/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37801///
37802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37803#[inline]
37804#[target_feature(enable = "avx512f")]
37805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37806#[cfg_attr(test, assert_instr(vfnmadd))]
37807pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37808    unsafe {
37809        let mut fnmadd: f64 = simd_extract!(a, 0);
37810        if (k & 0b00000001) != 0 {
37811            let extracta = -fnmadd;
37812            let extractb: f64 = simd_extract!(b, 0);
37813            let extractc: f64 = simd_extract!(c, 0);
37814            fnmadd = fmaf64(extracta, extractb, extractc);
37815        }
37816        simd_insert!(a, 0, fnmadd)
37817    }
37818}
37819
37820/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37821///
37822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37823#[inline]
37824#[target_feature(enable = "avx512f")]
37825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37826#[cfg_attr(test, assert_instr(vfnmadd))]
37827pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37828    unsafe {
37829        let mut fnmadd: f64 = 0.;
37830        if (k & 0b00000001) != 0 {
37831            let extracta: f64 = simd_extract!(a, 0);
37832            let extracta = -extracta;
37833            let extractb: f64 = simd_extract!(b, 0);
37834            let extractc: f64 = simd_extract!(c, 0);
37835            fnmadd = fmaf64(extracta, extractb, extractc);
37836        }
37837        simd_insert!(a, 0, fnmadd)
37838    }
37839}
37840
37841/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37842///
37843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37844#[inline]
37845#[target_feature(enable = "avx512f")]
37846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37847#[cfg_attr(test, assert_instr(vfnmadd))]
37848pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37849    unsafe {
37850        let mut fnmadd: f64 = simd_extract!(c, 0);
37851        if (k & 0b00000001) != 0 {
37852            let extracta: f64 = simd_extract!(a, 0);
37853            let extracta = -extracta;
37854            let extractb: f64 = simd_extract!(b, 0);
37855            fnmadd = fmaf64(extracta, extractb, fnmadd);
37856        }
37857        simd_insert!(c, 0, fnmadd)
37858    }
37859}
37860
37861/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37862///
37863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37864#[inline]
37865#[target_feature(enable = "avx512f")]
37866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37867#[cfg_attr(test, assert_instr(vfnmsub))]
37868pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37869    unsafe {
37870        let mut fnmsub: f32 = simd_extract!(a, 0);
37871        if (k & 0b00000001) != 0 {
37872            let extracta = -fnmsub;
37873            let extractb: f32 = simd_extract!(b, 0);
37874            let extractc: f32 = simd_extract!(c, 0);
37875            let extractc = -extractc;
37876            fnmsub = fmaf32(extracta, extractb, extractc);
37877        }
37878        simd_insert!(a, 0, fnmsub)
37879    }
37880}
37881
37882/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37883///
37884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37885#[inline]
37886#[target_feature(enable = "avx512f")]
37887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37888#[cfg_attr(test, assert_instr(vfnmsub))]
37889pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37890    unsafe {
37891        let mut fnmsub: f32 = 0.;
37892        if (k & 0b00000001) != 0 {
37893            let extracta: f32 = simd_extract!(a, 0);
37894            let extracta = -extracta;
37895            let extractb: f32 = simd_extract!(b, 0);
37896            let extractc: f32 = simd_extract!(c, 0);
37897            let extractc = -extractc;
37898            fnmsub = fmaf32(extracta, extractb, extractc);
37899        }
37900        simd_insert!(a, 0, fnmsub)
37901    }
37902}
37903
37904/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37905///
37906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37907#[inline]
37908#[target_feature(enable = "avx512f")]
37909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37910#[cfg_attr(test, assert_instr(vfnmsub))]
37911pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37912    unsafe {
37913        let mut fnmsub: f32 = simd_extract!(c, 0);
37914        if (k & 0b00000001) != 0 {
37915            let extracta: f32 = simd_extract!(a, 0);
37916            let extracta = -extracta;
37917            let extractb: f32 = simd_extract!(b, 0);
37918            let extractc = -fnmsub;
37919            fnmsub = fmaf32(extracta, extractb, extractc);
37920        }
37921        simd_insert!(c, 0, fnmsub)
37922    }
37923}
37924
37925/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37926///
37927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37928#[inline]
37929#[target_feature(enable = "avx512f")]
37930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37931#[cfg_attr(test, assert_instr(vfnmsub))]
37932pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37933    unsafe {
37934        let mut fnmsub: f64 = simd_extract!(a, 0);
37935        if (k & 0b00000001) != 0 {
37936            let extracta = -fnmsub;
37937            let extractb: f64 = simd_extract!(b, 0);
37938            let extractc: f64 = simd_extract!(c, 0);
37939            let extractc = -extractc;
37940            fnmsub = fmaf64(extracta, extractb, extractc);
37941        }
37942        simd_insert!(a, 0, fnmsub)
37943    }
37944}
37945
37946/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37947///
37948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37949#[inline]
37950#[target_feature(enable = "avx512f")]
37951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37952#[cfg_attr(test, assert_instr(vfnmsub))]
37953pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37954    unsafe {
37955        let mut fnmsub: f64 = 0.;
37956        if (k & 0b00000001) != 0 {
37957            let extracta: f64 = simd_extract!(a, 0);
37958            let extracta = -extracta;
37959            let extractb: f64 = simd_extract!(b, 0);
37960            let extractc: f64 = simd_extract!(c, 0);
37961            let extractc = -extractc;
37962            fnmsub = fmaf64(extracta, extractb, extractc);
37963        }
37964        simd_insert!(a, 0, fnmsub)
37965    }
37966}
37967
37968/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37969///
37970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37971#[inline]
37972#[target_feature(enable = "avx512f")]
37973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37974#[cfg_attr(test, assert_instr(vfnmsub))]
37975pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37976    unsafe {
37977        let mut fnmsub: f64 = simd_extract!(c, 0);
37978        if (k & 0b00000001) != 0 {
37979            let extracta: f64 = simd_extract!(a, 0);
37980            let extracta = -extracta;
37981            let extractb: f64 = simd_extract!(b, 0);
37982            let extractc = -fnmsub;
37983            fnmsub = fmaf64(extracta, extractb, extractc);
37984        }
37985        simd_insert!(c, 0, fnmsub)
37986    }
37987}
37988
37989/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37990///
37991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37997///
37998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37999#[inline]
38000#[target_feature(enable = "avx512f")]
38001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38002#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38003#[rustc_legacy_const_generics(2)]
38004pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38005    unsafe {
38006        static_assert_rounding!(ROUNDING);
38007        let a = a.as_f32x4();
38008        let b = b.as_f32x4();
38009        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38010        transmute(r)
38011    }
38012}
38013
38014/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38015///
38016/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38017/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38018/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38019/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38020/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38022///
38023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
38024#[inline]
38025#[target_feature(enable = "avx512f")]
38026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38027#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38028#[rustc_legacy_const_generics(4)]
38029pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
38030    src: __m128,
38031    k: __mmask8,
38032    a: __m128,
38033    b: __m128,
38034) -> __m128 {
38035    unsafe {
38036        static_assert_rounding!(ROUNDING);
38037        let a = a.as_f32x4();
38038        let b = b.as_f32x4();
38039        let src = src.as_f32x4();
38040        let r = vaddss(a, b, src, k, ROUNDING);
38041        transmute(r)
38042    }
38043}
38044
38045/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38046///
38047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38048/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38049/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38050/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38051/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38052/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38053///
38054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
38055#[inline]
38056#[target_feature(enable = "avx512f")]
38057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38058#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38059#[rustc_legacy_const_generics(3)]
38060pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38061    unsafe {
38062        static_assert_rounding!(ROUNDING);
38063        let a = a.as_f32x4();
38064        let b = b.as_f32x4();
38065        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
38066        transmute(r)
38067    }
38068}
38069
38070/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38071///
38072/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38073/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38074/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38075/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38076/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38078///
38079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38080#[inline]
38081#[target_feature(enable = "avx512f")]
38082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38083#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38084#[rustc_legacy_const_generics(2)]
38085pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38086    unsafe {
38087        static_assert_rounding!(ROUNDING);
38088        let a = a.as_f64x2();
38089        let b = b.as_f64x2();
38090        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38091        transmute(r)
38092    }
38093}
38094
38095/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38096///
38097/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38103///
38104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38105#[inline]
38106#[target_feature(enable = "avx512f")]
38107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38108#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38109#[rustc_legacy_const_generics(4)]
38110pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38111    src: __m128d,
38112    k: __mmask8,
38113    a: __m128d,
38114    b: __m128d,
38115) -> __m128d {
38116    unsafe {
38117        static_assert_rounding!(ROUNDING);
38118        let a = a.as_f64x2();
38119        let b = b.as_f64x2();
38120        let src = src.as_f64x2();
38121        let r = vaddsd(a, b, src, k, ROUNDING);
38122        transmute(r)
38123    }
38124}
38125
38126/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38127///
38128/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38129/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38130/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38131/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38132/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38134///
38135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38136#[inline]
38137#[target_feature(enable = "avx512f")]
38138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38139#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38140#[rustc_legacy_const_generics(3)]
38141pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38142    unsafe {
38143        static_assert_rounding!(ROUNDING);
38144        let a = a.as_f64x2();
38145        let b = b.as_f64x2();
38146        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
38147        transmute(r)
38148    }
38149}
38150
38151/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38152///
38153/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38154/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38155/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38156/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38157/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38158/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38159///
38160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38161#[inline]
38162#[target_feature(enable = "avx512f")]
38163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38164#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38165#[rustc_legacy_const_generics(2)]
38166pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38167    unsafe {
38168        static_assert_rounding!(ROUNDING);
38169        let a = a.as_f32x4();
38170        let b = b.as_f32x4();
38171        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38172        transmute(r)
38173    }
38174}
38175
38176/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38177///
38178/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38179/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38180/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38181/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38182/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38183/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38184///
38185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38186#[inline]
38187#[target_feature(enable = "avx512f")]
38188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38189#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38190#[rustc_legacy_const_generics(4)]
38191pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38192    src: __m128,
38193    k: __mmask8,
38194    a: __m128,
38195    b: __m128,
38196) -> __m128 {
38197    unsafe {
38198        static_assert_rounding!(ROUNDING);
38199        let a = a.as_f32x4();
38200        let b = b.as_f32x4();
38201        let src = src.as_f32x4();
38202        let r = vsubss(a, b, src, k, ROUNDING);
38203        transmute(r)
38204    }
38205}
38206
38207/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38208///
38209/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38210/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38211/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38212/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38213/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38214/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38215///
38216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38217#[inline]
38218#[target_feature(enable = "avx512f")]
38219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38220#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38221#[rustc_legacy_const_generics(3)]
38222pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38223    unsafe {
38224        static_assert_rounding!(ROUNDING);
38225        let a = a.as_f32x4();
38226        let b = b.as_f32x4();
38227        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
38228        transmute(r)
38229    }
38230}
38231
38232/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38233///
38234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38240///
38241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38242#[inline]
38243#[target_feature(enable = "avx512f")]
38244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38245#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38246#[rustc_legacy_const_generics(2)]
38247pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38248    unsafe {
38249        static_assert_rounding!(ROUNDING);
38250        let a = a.as_f64x2();
38251        let b = b.as_f64x2();
38252        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38253        transmute(r)
38254    }
38255}
38256
38257/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38258///
38259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38265///
38266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38267#[inline]
38268#[target_feature(enable = "avx512f")]
38269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38270#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38271#[rustc_legacy_const_generics(4)]
38272pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38273    src: __m128d,
38274    k: __mmask8,
38275    a: __m128d,
38276    b: __m128d,
38277) -> __m128d {
38278    unsafe {
38279        static_assert_rounding!(ROUNDING);
38280        let a = a.as_f64x2();
38281        let b = b.as_f64x2();
38282        let src = src.as_f64x2();
38283        let r = vsubsd(a, b, src, k, ROUNDING);
38284        transmute(r)
38285    }
38286}
38287
38288/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38289///
38290/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38291/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38292/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38293/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38294/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38295/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38296///
38297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38298#[inline]
38299#[target_feature(enable = "avx512f")]
38300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38301#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38302#[rustc_legacy_const_generics(3)]
38303pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38304    unsafe {
38305        static_assert_rounding!(ROUNDING);
38306        let a = a.as_f64x2();
38307        let b = b.as_f64x2();
38308        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
38309        transmute(r)
38310    }
38311}
38312
38313/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38314///
38315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38321///
38322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38323#[inline]
38324#[target_feature(enable = "avx512f")]
38325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38326#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38327#[rustc_legacy_const_generics(2)]
38328pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38329    unsafe {
38330        static_assert_rounding!(ROUNDING);
38331        let a = a.as_f32x4();
38332        let b = b.as_f32x4();
38333        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38334        transmute(r)
38335    }
38336}
38337
38338/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38339///
38340/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38341/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38342/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38343/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38344/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38345/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38346///
38347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38348#[inline]
38349#[target_feature(enable = "avx512f")]
38350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38351#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38352#[rustc_legacy_const_generics(4)]
38353pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38354    src: __m128,
38355    k: __mmask8,
38356    a: __m128,
38357    b: __m128,
38358) -> __m128 {
38359    unsafe {
38360        static_assert_rounding!(ROUNDING);
38361        let a = a.as_f32x4();
38362        let b = b.as_f32x4();
38363        let src = src.as_f32x4();
38364        let r = vmulss(a, b, src, k, ROUNDING);
38365        transmute(r)
38366    }
38367}
38368
38369/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38370///
38371/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38372/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38373/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38374/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38375/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38376/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38377///
38378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38379#[inline]
38380#[target_feature(enable = "avx512f")]
38381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38382#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38383#[rustc_legacy_const_generics(3)]
38384pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38385    unsafe {
38386        static_assert_rounding!(ROUNDING);
38387        let a = a.as_f32x4();
38388        let b = b.as_f32x4();
38389        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
38390        transmute(r)
38391    }
38392}
38393
38394/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38395///
38396/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38397/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38398/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38399/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38400/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38401/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38402///
38403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38404#[inline]
38405#[target_feature(enable = "avx512f")]
38406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38407#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38408#[rustc_legacy_const_generics(2)]
38409pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38410    unsafe {
38411        static_assert_rounding!(ROUNDING);
38412        let a = a.as_f64x2();
38413        let b = b.as_f64x2();
38414        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38415        transmute(r)
38416    }
38417}
38418
38419/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38420///
38421/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38422/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38423/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38424/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38425/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38427///
38428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38429#[inline]
38430#[target_feature(enable = "avx512f")]
38431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38432#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38433#[rustc_legacy_const_generics(4)]
38434pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38435    src: __m128d,
38436    k: __mmask8,
38437    a: __m128d,
38438    b: __m128d,
38439) -> __m128d {
38440    unsafe {
38441        static_assert_rounding!(ROUNDING);
38442        let a = a.as_f64x2();
38443        let b = b.as_f64x2();
38444        let src = src.as_f64x2();
38445        let r = vmulsd(a, b, src, k, ROUNDING);
38446        transmute(r)
38447    }
38448}
38449
38450/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38451///
38452/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38458///
38459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38460#[inline]
38461#[target_feature(enable = "avx512f")]
38462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38463#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38464#[rustc_legacy_const_generics(3)]
38465pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38466    unsafe {
38467        static_assert_rounding!(ROUNDING);
38468        let a = a.as_f64x2();
38469        let b = b.as_f64x2();
38470        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
38471        transmute(r)
38472    }
38473}
38474
38475/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38476///
38477/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38478/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38479/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38480/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38481/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38482/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38483///
38484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38485#[inline]
38486#[target_feature(enable = "avx512f")]
38487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38488#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38489#[rustc_legacy_const_generics(2)]
38490pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38491    unsafe {
38492        static_assert_rounding!(ROUNDING);
38493        let a = a.as_f32x4();
38494        let b = b.as_f32x4();
38495        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
38496        transmute(r)
38497    }
38498}
38499
38500/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38501///
38502/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38503/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38504/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38505/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38506/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38507/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38508///
38509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38510#[inline]
38511#[target_feature(enable = "avx512f")]
38512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38513#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38514#[rustc_legacy_const_generics(4)]
38515pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38516    src: __m128,
38517    k: __mmask8,
38518    a: __m128,
38519    b: __m128,
38520) -> __m128 {
38521    unsafe {
38522        static_assert_rounding!(ROUNDING);
38523        let a = a.as_f32x4();
38524        let b = b.as_f32x4();
38525        let src = src.as_f32x4();
38526        let r = vdivss(a, b, src, k, ROUNDING);
38527        transmute(r)
38528    }
38529}
38530
38531/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38532///
38533/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38534/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38535/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38536/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38537/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38538/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38539///
38540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38541#[inline]
38542#[target_feature(enable = "avx512f")]
38543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38544#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38545#[rustc_legacy_const_generics(3)]
38546pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547    unsafe {
38548        static_assert_rounding!(ROUNDING);
38549        let a = a.as_f32x4();
38550        let b = b.as_f32x4();
38551        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
38552        transmute(r)
38553    }
38554}
38555
38556/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38557///
38558/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38559/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38560/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38561/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38562/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38564///
38565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38566#[inline]
38567#[target_feature(enable = "avx512f")]
38568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38569#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38570#[rustc_legacy_const_generics(2)]
38571pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38572    unsafe {
38573        static_assert_rounding!(ROUNDING);
38574        let a = a.as_f64x2();
38575        let b = b.as_f64x2();
38576        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
38577        transmute(r)
38578    }
38579}
38580
38581/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38582///
38583/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38584/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38585/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38586/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38587/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38588/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38589///
38590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38591#[inline]
38592#[target_feature(enable = "avx512f")]
38593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38594#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38595#[rustc_legacy_const_generics(4)]
38596pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38597    src: __m128d,
38598    k: __mmask8,
38599    a: __m128d,
38600    b: __m128d,
38601) -> __m128d {
38602    unsafe {
38603        static_assert_rounding!(ROUNDING);
38604        let a = a.as_f64x2();
38605        let b = b.as_f64x2();
38606        let src = src.as_f64x2();
38607        let r = vdivsd(a, b, src, k, ROUNDING);
38608        transmute(r)
38609    }
38610}
38611
38612/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38613///
38614/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38615/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38616/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38617/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38618/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38619/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38620///
38621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38622#[inline]
38623#[target_feature(enable = "avx512f")]
38624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38625#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38626#[rustc_legacy_const_generics(3)]
38627pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38628    unsafe {
38629        static_assert_rounding!(ROUNDING);
38630        let a = a.as_f64x2();
38631        let b = b.as_f64x2();
38632        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
38633        transmute(r)
38634    }
38635}
38636
38637/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38644#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38645#[rustc_legacy_const_generics(2)]
38646pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38647    unsafe {
38648        static_assert_sae!(SAE);
38649        let a = a.as_f32x4();
38650        let b = b.as_f32x4();
38651        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
38652        transmute(r)
38653    }
38654}
38655
38656/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38657/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38658///
38659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38660#[inline]
38661#[target_feature(enable = "avx512f")]
38662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38663#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38664#[rustc_legacy_const_generics(4)]
38665pub fn _mm_mask_max_round_ss<const SAE: i32>(
38666    src: __m128,
38667    k: __mmask8,
38668    a: __m128,
38669    b: __m128,
38670) -> __m128 {
38671    unsafe {
38672        static_assert_sae!(SAE);
38673        let a = a.as_f32x4();
38674        let b = b.as_f32x4();
38675        let src = src.as_f32x4();
38676        let r = vmaxss(a, b, src, k, SAE);
38677        transmute(r)
38678    }
38679}
38680
38681/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38682/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38683///
38684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38685#[inline]
38686#[target_feature(enable = "avx512f")]
38687#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38688#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38689#[rustc_legacy_const_generics(3)]
38690pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38691    unsafe {
38692        static_assert_sae!(SAE);
38693        let a = a.as_f32x4();
38694        let b = b.as_f32x4();
38695        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
38696        transmute(r)
38697    }
38698}
38699
38700/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38701/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38702///
38703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38704#[inline]
38705#[target_feature(enable = "avx512f")]
38706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38707#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38708#[rustc_legacy_const_generics(2)]
38709pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38710    unsafe {
38711        static_assert_sae!(SAE);
38712        let a = a.as_f64x2();
38713        let b = b.as_f64x2();
38714        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
38715        transmute(r)
38716    }
38717}
38718
38719/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38720/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38721///
38722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38723#[inline]
38724#[target_feature(enable = "avx512f")]
38725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38726#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38727#[rustc_legacy_const_generics(4)]
38728pub fn _mm_mask_max_round_sd<const SAE: i32>(
38729    src: __m128d,
38730    k: __mmask8,
38731    a: __m128d,
38732    b: __m128d,
38733) -> __m128d {
38734    unsafe {
38735        static_assert_sae!(SAE);
38736        let a = a.as_f64x2();
38737        let b = b.as_f64x2();
38738        let src = src.as_f64x2();
38739        let r = vmaxsd(a, b, src, k, SAE);
38740        transmute(r)
38741    }
38742}
38743
38744/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38745/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38746///
38747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38748#[inline]
38749#[target_feature(enable = "avx512f")]
38750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38751#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38752#[rustc_legacy_const_generics(3)]
38753pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38754    unsafe {
38755        static_assert_sae!(SAE);
38756        let a = a.as_f64x2();
38757        let b = b.as_f64x2();
38758        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
38759        transmute(r)
38760    }
38761}
38762
38763/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38764/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38765///
38766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38767#[inline]
38768#[target_feature(enable = "avx512f")]
38769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38770#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38771#[rustc_legacy_const_generics(2)]
38772pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38773    unsafe {
38774        static_assert_sae!(SAE);
38775        let a = a.as_f32x4();
38776        let b = b.as_f32x4();
38777        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
38778        transmute(r)
38779    }
38780}
38781
38782/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38784///
38785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38786#[inline]
38787#[target_feature(enable = "avx512f")]
38788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38789#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38790#[rustc_legacy_const_generics(4)]
38791pub fn _mm_mask_min_round_ss<const SAE: i32>(
38792    src: __m128,
38793    k: __mmask8,
38794    a: __m128,
38795    b: __m128,
38796) -> __m128 {
38797    unsafe {
38798        static_assert_sae!(SAE);
38799        let a = a.as_f32x4();
38800        let b = b.as_f32x4();
38801        let src = src.as_f32x4();
38802        let r = vminss(a, b, src, k, SAE);
38803        transmute(r)
38804    }
38805}
38806
38807/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38809///
38810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38811#[inline]
38812#[target_feature(enable = "avx512f")]
38813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38814#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38815#[rustc_legacy_const_generics(3)]
38816pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38817    unsafe {
38818        static_assert_sae!(SAE);
38819        let a = a.as_f32x4();
38820        let b = b.as_f32x4();
38821        let r = vminss(a, b, f32x4::ZERO, k, SAE);
38822        transmute(r)
38823    }
38824}
38825
38826/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38827/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38828///
38829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38830#[inline]
38831#[target_feature(enable = "avx512f")]
38832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38833#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38834#[rustc_legacy_const_generics(2)]
38835pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38836    unsafe {
38837        static_assert_sae!(SAE);
38838        let a = a.as_f64x2();
38839        let b = b.as_f64x2();
38840        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
38841        transmute(r)
38842    }
38843}
38844
38845/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38846/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38847///
38848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38849#[inline]
38850#[target_feature(enable = "avx512f")]
38851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38852#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38853#[rustc_legacy_const_generics(4)]
38854pub fn _mm_mask_min_round_sd<const SAE: i32>(
38855    src: __m128d,
38856    k: __mmask8,
38857    a: __m128d,
38858    b: __m128d,
38859) -> __m128d {
38860    unsafe {
38861        static_assert_sae!(SAE);
38862        let a = a.as_f64x2();
38863        let b = b.as_f64x2();
38864        let src = src.as_f64x2();
38865        let r = vminsd(a, b, src, k, SAE);
38866        transmute(r)
38867    }
38868}
38869
38870/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38871/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38872///
38873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38874#[inline]
38875#[target_feature(enable = "avx512f")]
38876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38877#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38878#[rustc_legacy_const_generics(3)]
38879pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38880    unsafe {
38881        static_assert_sae!(SAE);
38882        let a = a.as_f64x2();
38883        let b = b.as_f64x2();
38884        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
38885        transmute(r)
38886    }
38887}
38888
38889/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38890///
38891/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38892/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38893/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38894/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38895/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38896/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38897///
38898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38899#[inline]
38900#[target_feature(enable = "avx512f")]
38901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38902#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38903#[rustc_legacy_const_generics(2)]
38904pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38905    unsafe {
38906        static_assert_rounding!(ROUNDING);
38907        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
38908    }
38909}
38910
38911/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38912///
38913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38919///
38920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38921#[inline]
38922#[target_feature(enable = "avx512f")]
38923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38924#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38925#[rustc_legacy_const_generics(4)]
38926pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38927    src: __m128,
38928    k: __mmask8,
38929    a: __m128,
38930    b: __m128,
38931) -> __m128 {
38932    unsafe {
38933        static_assert_rounding!(ROUNDING);
38934        vsqrtss(a, b, src, k, ROUNDING)
38935    }
38936}
38937
38938/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38939///
38940/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38941/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38942/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38943/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38944/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38945/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38946///
38947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38948#[inline]
38949#[target_feature(enable = "avx512f")]
38950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38951#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38952#[rustc_legacy_const_generics(3)]
38953pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38954    unsafe {
38955        static_assert_rounding!(ROUNDING);
38956        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
38957    }
38958}
38959
38960/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38961///
38962/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38963/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38964/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38965/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38966/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38967/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38968///
38969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38970#[inline]
38971#[target_feature(enable = "avx512f")]
38972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38973#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38974#[rustc_legacy_const_generics(2)]
38975pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38976    unsafe {
38977        static_assert_rounding!(ROUNDING);
38978        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
38979    }
38980}
38981
38982/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38983///
38984/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38985/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38986/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38987/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38988/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38989/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38990///
38991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38992#[inline]
38993#[target_feature(enable = "avx512f")]
38994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38995#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38996#[rustc_legacy_const_generics(4)]
38997pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38998    src: __m128d,
38999    k: __mmask8,
39000    a: __m128d,
39001    b: __m128d,
39002) -> __m128d {
39003    unsafe {
39004        static_assert_rounding!(ROUNDING);
39005        vsqrtsd(a, b, src, k, ROUNDING)
39006    }
39007}
39008
39009/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39010///
39011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39017///
39018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
39019#[inline]
39020#[target_feature(enable = "avx512f")]
39021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39022#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
39023#[rustc_legacy_const_generics(3)]
39024pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
39025    k: __mmask8,
39026    a: __m128d,
39027    b: __m128d,
39028) -> __m128d {
39029    unsafe {
39030        static_assert_rounding!(ROUNDING);
39031        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
39032    }
39033}
39034
39035/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39036/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39037///
39038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
39039#[inline]
39040#[target_feature(enable = "avx512f")]
39041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39042#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39043#[rustc_legacy_const_generics(2)]
39044pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39045    unsafe {
39046        static_assert_sae!(SAE);
39047        let a = a.as_f32x4();
39048        let b = b.as_f32x4();
39049        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
39050        transmute(r)
39051    }
39052}
39053
39054/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39055/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39056///
39057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
39058#[inline]
39059#[target_feature(enable = "avx512f")]
39060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39061#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39062#[rustc_legacy_const_generics(4)]
39063pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
39064    src: __m128,
39065    k: __mmask8,
39066    a: __m128,
39067    b: __m128,
39068) -> __m128 {
39069    unsafe {
39070        static_assert_sae!(SAE);
39071        let a = a.as_f32x4();
39072        let b = b.as_f32x4();
39073        let src = src.as_f32x4();
39074        let r = vgetexpss(a, b, src, k, SAE);
39075        transmute(r)
39076    }
39077}
39078
39079/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39081///
39082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39083#[inline]
39084#[target_feature(enable = "avx512f")]
39085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39086#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39087#[rustc_legacy_const_generics(3)]
39088pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39089    unsafe {
39090        static_assert_sae!(SAE);
39091        let a = a.as_f32x4();
39092        let b = b.as_f32x4();
39093        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
39094        transmute(r)
39095    }
39096}
39097
39098/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39100///
39101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39102#[inline]
39103#[target_feature(enable = "avx512f")]
39104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39105#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39106#[rustc_legacy_const_generics(2)]
39107pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39108    unsafe {
39109        static_assert_sae!(SAE);
39110        let a = a.as_f64x2();
39111        let b = b.as_f64x2();
39112        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
39113        transmute(r)
39114    }
39115}
39116
39117/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39118/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39119///
39120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39121#[inline]
39122#[target_feature(enable = "avx512f")]
39123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39124#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39125#[rustc_legacy_const_generics(4)]
39126pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39127    src: __m128d,
39128    k: __mmask8,
39129    a: __m128d,
39130    b: __m128d,
39131) -> __m128d {
39132    unsafe {
39133        static_assert_sae!(SAE);
39134        let a = a.as_f64x2();
39135        let b = b.as_f64x2();
39136        let src = src.as_f64x2();
39137        let r = vgetexpsd(a, b, src, k, SAE);
39138        transmute(r)
39139    }
39140}
39141
39142/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39144///
39145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39146#[inline]
39147#[target_feature(enable = "avx512f")]
39148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39149#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39150#[rustc_legacy_const_generics(3)]
39151pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39152    unsafe {
39153        static_assert_sae!(SAE);
39154        let a = a.as_f64x2();
39155        let b = b.as_f64x2();
39156        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
39157        transmute(r)
39158    }
39159}
39160
39161/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39162/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39163///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39164///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39165///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39166///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39167/// The sign is determined by sc which can take the following values:\
39168///    _MM_MANT_SIGN_src     // sign = sign(src)\
39169///    _MM_MANT_SIGN_zero    // sign = 0\
39170///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39171/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39172///
39173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39174#[inline]
39175#[target_feature(enable = "avx512f")]
39176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39177#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39178#[rustc_legacy_const_generics(2, 3, 4)]
39179pub fn _mm_getmant_round_ss<
39180    const NORM: _MM_MANTISSA_NORM_ENUM,
39181    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39182    const SAE: i32,
39183>(
39184    a: __m128,
39185    b: __m128,
39186) -> __m128 {
39187    unsafe {
39188        static_assert_uimm_bits!(NORM, 4);
39189        static_assert_uimm_bits!(SIGN, 2);
39190        static_assert_mantissas_sae!(SAE);
39191        let a = a.as_f32x4();
39192        let b = b.as_f32x4();
39193        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
39194        transmute(r)
39195    }
39196}
39197
39198/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39199/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39200///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39201///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39202///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39203///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39204/// The sign is determined by sc which can take the following values:\
39205///    _MM_MANT_SIGN_src     // sign = sign(src)\
39206///    _MM_MANT_SIGN_zero    // sign = 0\
39207///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39209///
39210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39211#[inline]
39212#[target_feature(enable = "avx512f")]
39213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39214#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39215#[rustc_legacy_const_generics(4, 5, 6)]
39216pub fn _mm_mask_getmant_round_ss<
39217    const NORM: _MM_MANTISSA_NORM_ENUM,
39218    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39219    const SAE: i32,
39220>(
39221    src: __m128,
39222    k: __mmask8,
39223    a: __m128,
39224    b: __m128,
39225) -> __m128 {
39226    unsafe {
39227        static_assert_uimm_bits!(NORM, 4);
39228        static_assert_uimm_bits!(SIGN, 2);
39229        static_assert_mantissas_sae!(SAE);
39230        let a = a.as_f32x4();
39231        let b = b.as_f32x4();
39232        let src = src.as_f32x4();
39233        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
39234        transmute(r)
39235    }
39236}
39237
39238/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39239/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39240///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39241///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39242///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39243///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39244/// The sign is determined by sc which can take the following values:\
39245///    _MM_MANT_SIGN_src     // sign = sign(src)\
39246///    _MM_MANT_SIGN_zero    // sign = 0\
39247///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39248/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39249///
39250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39251#[inline]
39252#[target_feature(enable = "avx512f")]
39253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39254#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39255#[rustc_legacy_const_generics(3, 4, 5)]
39256pub fn _mm_maskz_getmant_round_ss<
39257    const NORM: _MM_MANTISSA_NORM_ENUM,
39258    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39259    const SAE: i32,
39260>(
39261    k: __mmask8,
39262    a: __m128,
39263    b: __m128,
39264) -> __m128 {
39265    unsafe {
39266        static_assert_uimm_bits!(NORM, 4);
39267        static_assert_uimm_bits!(SIGN, 2);
39268        static_assert_mantissas_sae!(SAE);
39269        let a = a.as_f32x4();
39270        let b = b.as_f32x4();
39271        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
39272        transmute(r)
39273    }
39274}
39275
39276/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39277/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39278///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39279///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39280///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39281///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39282/// The sign is determined by sc which can take the following values:\
39283///    _MM_MANT_SIGN_src     // sign = sign(src)\
39284///    _MM_MANT_SIGN_zero    // sign = 0\
39285///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39286/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39287///
39288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39289#[inline]
39290#[target_feature(enable = "avx512f")]
39291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39292#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39293#[rustc_legacy_const_generics(2, 3, 4)]
39294pub fn _mm_getmant_round_sd<
39295    const NORM: _MM_MANTISSA_NORM_ENUM,
39296    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39297    const SAE: i32,
39298>(
39299    a: __m128d,
39300    b: __m128d,
39301) -> __m128d {
39302    unsafe {
39303        static_assert_uimm_bits!(NORM, 4);
39304        static_assert_uimm_bits!(SIGN, 2);
39305        static_assert_mantissas_sae!(SAE);
39306        let a = a.as_f64x2();
39307        let b = b.as_f64x2();
39308        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
39309        transmute(r)
39310    }
39311}
39312
39313/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39314/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39315///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39316///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39317///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39318///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39319/// The sign is determined by sc which can take the following values:\
39320///    _MM_MANT_SIGN_src     // sign = sign(src)\
39321///    _MM_MANT_SIGN_zero    // sign = 0\
39322///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39323/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39324///
39325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39326#[inline]
39327#[target_feature(enable = "avx512f")]
39328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39329#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39330#[rustc_legacy_const_generics(4, 5, 6)]
39331pub fn _mm_mask_getmant_round_sd<
39332    const NORM: _MM_MANTISSA_NORM_ENUM,
39333    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39334    const SAE: i32,
39335>(
39336    src: __m128d,
39337    k: __mmask8,
39338    a: __m128d,
39339    b: __m128d,
39340) -> __m128d {
39341    unsafe {
39342        static_assert_uimm_bits!(NORM, 4);
39343        static_assert_uimm_bits!(SIGN, 2);
39344        static_assert_mantissas_sae!(SAE);
39345        let a = a.as_f64x2();
39346        let b = b.as_f64x2();
39347        let src = src.as_f64x2();
39348        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
39349        transmute(r)
39350    }
39351}
39352
39353/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39355///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39356///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39357///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39358///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39359/// The sign is determined by sc which can take the following values:\
39360///    _MM_MANT_SIGN_src     // sign = sign(src)\
39361///    _MM_MANT_SIGN_zero    // sign = 0\
39362///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39363/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39364///
39365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39366#[inline]
39367#[target_feature(enable = "avx512f")]
39368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39369#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39370#[rustc_legacy_const_generics(3, 4, 5)]
39371pub fn _mm_maskz_getmant_round_sd<
39372    const NORM: _MM_MANTISSA_NORM_ENUM,
39373    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39374    const SAE: i32,
39375>(
39376    k: __mmask8,
39377    a: __m128d,
39378    b: __m128d,
39379) -> __m128d {
39380    unsafe {
39381        static_assert_uimm_bits!(NORM, 4);
39382        static_assert_uimm_bits!(SIGN, 2);
39383        static_assert_mantissas_sae!(SAE);
39384        let a = a.as_f64x2();
39385        let b = b.as_f64x2();
39386        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
39387        transmute(r)
39388    }
39389}
39390
39391/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39395/// * [`_MM_FROUND_TO_POS_INF`] : round up
39396/// * [`_MM_FROUND_TO_ZERO`] : truncate
39397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39398///
39399/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39401#[inline]
39402#[target_feature(enable = "avx512f")]
39403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39404#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39405#[rustc_legacy_const_generics(2, 3)]
39406pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39407    unsafe {
39408        static_assert_uimm_bits!(IMM8, 8);
39409        static_assert_mantissas_sae!(SAE);
39410        let a = a.as_f32x4();
39411        let b = b.as_f32x4();
39412        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
39413        transmute(r)
39414    }
39415}
39416
39417/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39418/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39419/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39420/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39421/// * [`_MM_FROUND_TO_POS_INF`] : round up
39422/// * [`_MM_FROUND_TO_ZERO`] : truncate
39423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39424///
39425/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39427#[inline]
39428#[target_feature(enable = "avx512f")]
39429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39430#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39431#[rustc_legacy_const_generics(4, 5)]
39432pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39433    src: __m128,
39434    k: __mmask8,
39435    a: __m128,
39436    b: __m128,
39437) -> __m128 {
39438    unsafe {
39439        static_assert_uimm_bits!(IMM8, 8);
39440        static_assert_mantissas_sae!(SAE);
39441        let a = a.as_f32x4();
39442        let b = b.as_f32x4();
39443        let src = src.as_f32x4();
39444        let r = vrndscaless(a, b, src, k, IMM8, SAE);
39445        transmute(r)
39446    }
39447}
39448
39449/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39450/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39451/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39452/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39453/// * [`_MM_FROUND_TO_POS_INF`] : round up
39454/// * [`_MM_FROUND_TO_ZERO`] : truncate
39455/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39456///
39457/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39459#[inline]
39460#[target_feature(enable = "avx512f")]
39461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39462#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39463#[rustc_legacy_const_generics(3, 4)]
39464pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39465    k: __mmask8,
39466    a: __m128,
39467    b: __m128,
39468) -> __m128 {
39469    unsafe {
39470        static_assert_uimm_bits!(IMM8, 8);
39471        static_assert_mantissas_sae!(SAE);
39472        let a = a.as_f32x4();
39473        let b = b.as_f32x4();
39474        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
39475        transmute(r)
39476    }
39477}
39478
39479/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39480/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39481/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39482/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39483/// * [`_MM_FROUND_TO_POS_INF`] : round up
39484/// * [`_MM_FROUND_TO_ZERO`] : truncate
39485/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39486///
39487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39489#[inline]
39490#[target_feature(enable = "avx512f")]
39491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39492#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39493#[rustc_legacy_const_generics(2, 3)]
39494pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39495    unsafe {
39496        static_assert_uimm_bits!(IMM8, 8);
39497        static_assert_mantissas_sae!(SAE);
39498        let a = a.as_f64x2();
39499        let b = b.as_f64x2();
39500        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
39501        transmute(r)
39502    }
39503}
39504
39505/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39506/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39507/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39508/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39509/// * [`_MM_FROUND_TO_POS_INF`] : round up
39510/// * [`_MM_FROUND_TO_ZERO`] : truncate
39511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39512///
39513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39515#[inline]
39516#[target_feature(enable = "avx512f")]
39517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39518#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39519#[rustc_legacy_const_generics(4, 5)]
39520pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39521    src: __m128d,
39522    k: __mmask8,
39523    a: __m128d,
39524    b: __m128d,
39525) -> __m128d {
39526    unsafe {
39527        static_assert_uimm_bits!(IMM8, 8);
39528        static_assert_mantissas_sae!(SAE);
39529        let a = a.as_f64x2();
39530        let b = b.as_f64x2();
39531        let src = src.as_f64x2();
39532        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
39533        transmute(r)
39534    }
39535}
39536
39537/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39538/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39539/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39540/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39541/// * [`_MM_FROUND_TO_POS_INF`] : round up
39542/// * [`_MM_FROUND_TO_ZERO`] : truncate
39543/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39544///
39545/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39547#[inline]
39548#[target_feature(enable = "avx512f")]
39549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39550#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39551#[rustc_legacy_const_generics(3, 4)]
39552pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39553    k: __mmask8,
39554    a: __m128d,
39555    b: __m128d,
39556) -> __m128d {
39557    unsafe {
39558        static_assert_uimm_bits!(IMM8, 8);
39559        static_assert_mantissas_sae!(SAE);
39560        let a = a.as_f64x2();
39561        let b = b.as_f64x2();
39562        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
39563        transmute(r)
39564    }
39565}
39566
39567/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39568///
39569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39575///
39576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39577#[inline]
39578#[target_feature(enable = "avx512f")]
39579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39580#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39581#[rustc_legacy_const_generics(2)]
39582pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39583    unsafe {
39584        static_assert_rounding!(ROUNDING);
39585        let a = a.as_f32x4();
39586        let b = b.as_f32x4();
39587        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39588        transmute(r)
39589    }
39590}
39591
39592/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39593///
39594/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39595/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39596/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39597/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39598/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39599/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39600///
39601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39602#[inline]
39603#[target_feature(enable = "avx512f")]
39604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39605#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39606#[rustc_legacy_const_generics(4)]
39607pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39608    src: __m128,
39609    k: __mmask8,
39610    a: __m128,
39611    b: __m128,
39612) -> __m128 {
39613    unsafe {
39614        static_assert_rounding!(ROUNDING);
39615        let a = a.as_f32x4();
39616        let b = b.as_f32x4();
39617        let src = src.as_f32x4();
39618        let r = vscalefss(a, b, src, k, ROUNDING);
39619        transmute(r)
39620    }
39621}
39622
39623/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39624///
39625/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39626/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39627/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39628/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39629/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39630/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39631///
39632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39633#[inline]
39634#[target_feature(enable = "avx512f")]
39635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39636#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39637#[rustc_legacy_const_generics(3)]
39638pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39639    unsafe {
39640        static_assert_rounding!(ROUNDING);
39641        let a = a.as_f32x4();
39642        let b = b.as_f32x4();
39643        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
39644        transmute(r)
39645    }
39646}
39647
39648/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39649///
39650/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39651/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39652/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39653/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39654/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39655/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39656///
39657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39658#[inline]
39659#[target_feature(enable = "avx512f")]
39660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39661#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39662#[rustc_legacy_const_generics(2)]
39663pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39664    unsafe {
39665        static_assert_rounding!(ROUNDING);
39666        let a = a.as_f64x2();
39667        let b = b.as_f64x2();
39668        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
39669        transmute(r)
39670    }
39671}
39672
39673/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39674///
39675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39681///
39682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39683#[inline]
39684#[target_feature(enable = "avx512f")]
39685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39686#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39687#[rustc_legacy_const_generics(4)]
39688pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39689    src: __m128d,
39690    k: __mmask8,
39691    a: __m128d,
39692    b: __m128d,
39693) -> __m128d {
39694    unsafe {
39695        let a = a.as_f64x2();
39696        let b = b.as_f64x2();
39697        let src = src.as_f64x2();
39698        let r = vscalefsd(a, b, src, k, ROUNDING);
39699        transmute(r)
39700    }
39701}
39702
39703/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39704///
39705/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39706/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39707/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39708/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39709/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39710/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39711///
39712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39713#[inline]
39714#[target_feature(enable = "avx512f")]
39715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39716#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39717#[rustc_legacy_const_generics(3)]
39718pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39719    k: __mmask8,
39720    a: __m128d,
39721    b: __m128d,
39722) -> __m128d {
39723    unsafe {
39724        static_assert_rounding!(ROUNDING);
39725        let a = a.as_f64x2();
39726        let b = b.as_f64x2();
39727        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
39728        transmute(r)
39729    }
39730}
39731
39732/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39733///
39734/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39735/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39736/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39737/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39738/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39739/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39740///
39741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39742#[inline]
39743#[target_feature(enable = "avx512f")]
39744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39745#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39746#[rustc_legacy_const_generics(3)]
39747pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39748    unsafe {
39749        static_assert_rounding!(ROUNDING);
39750        let extracta: f32 = simd_extract!(a, 0);
39751        let extractb: f32 = simd_extract!(b, 0);
39752        let extractc: f32 = simd_extract!(c, 0);
39753        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39754        simd_insert!(a, 0, r)
39755    }
39756}
39757
39758/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39759///
39760/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39761/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39762/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39763/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39764/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39765/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39766///
39767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39768#[inline]
39769#[target_feature(enable = "avx512f")]
39770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39771#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39772#[rustc_legacy_const_generics(4)]
39773pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39774    a: __m128,
39775    k: __mmask8,
39776    b: __m128,
39777    c: __m128,
39778) -> __m128 {
39779    unsafe {
39780        static_assert_rounding!(ROUNDING);
39781        let mut fmadd: f32 = simd_extract!(a, 0);
39782        if (k & 0b00000001) != 0 {
39783            let extractb: f32 = simd_extract!(b, 0);
39784            let extractc: f32 = simd_extract!(c, 0);
39785            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
39786        }
39787        simd_insert!(a, 0, fmadd)
39788    }
39789}
39790
39791/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39792///
39793/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39794/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39795/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39796/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39797/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39798/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39799///
39800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39801#[inline]
39802#[target_feature(enable = "avx512f")]
39803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39804#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39805#[rustc_legacy_const_generics(4)]
39806pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39807    k: __mmask8,
39808    a: __m128,
39809    b: __m128,
39810    c: __m128,
39811) -> __m128 {
39812    unsafe {
39813        static_assert_rounding!(ROUNDING);
39814        let mut fmadd: f32 = 0.;
39815        if (k & 0b00000001) != 0 {
39816            let extracta: f32 = simd_extract!(a, 0);
39817            let extractb: f32 = simd_extract!(b, 0);
39818            let extractc: f32 = simd_extract!(c, 0);
39819            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
39820        }
39821        simd_insert!(a, 0, fmadd)
39822    }
39823}
39824
39825/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39826///
39827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39833///
39834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39835#[inline]
39836#[target_feature(enable = "avx512f")]
39837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39839#[rustc_legacy_const_generics(4)]
39840pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39841    a: __m128,
39842    b: __m128,
39843    c: __m128,
39844    k: __mmask8,
39845) -> __m128 {
39846    unsafe {
39847        static_assert_rounding!(ROUNDING);
39848        let mut fmadd: f32 = simd_extract!(c, 0);
39849        if (k & 0b00000001) != 0 {
39850            let extracta: f32 = simd_extract!(a, 0);
39851            let extractb: f32 = simd_extract!(b, 0);
39852            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
39853        }
39854        simd_insert!(c, 0, fmadd)
39855    }
39856}
39857
39858/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39859///
39860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39866///
39867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39868#[inline]
39869#[target_feature(enable = "avx512f")]
39870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39871#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39872#[rustc_legacy_const_generics(3)]
39873pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39874    unsafe {
39875        static_assert_rounding!(ROUNDING);
39876        let extracta: f64 = simd_extract!(a, 0);
39877        let extractb: f64 = simd_extract!(b, 0);
39878        let extractc: f64 = simd_extract!(c, 0);
39879        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39880        simd_insert!(a, 0, fmadd)
39881    }
39882}
39883
39884/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39885///
39886/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39887/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39888/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39889/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39890/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39891/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39892///
39893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39894#[inline]
39895#[target_feature(enable = "avx512f")]
39896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39897#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39898#[rustc_legacy_const_generics(4)]
39899pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39900    a: __m128d,
39901    k: __mmask8,
39902    b: __m128d,
39903    c: __m128d,
39904) -> __m128d {
39905    unsafe {
39906        static_assert_rounding!(ROUNDING);
39907        let mut fmadd: f64 = simd_extract!(a, 0);
39908        if (k & 0b00000001) != 0 {
39909            let extractb: f64 = simd_extract!(b, 0);
39910            let extractc: f64 = simd_extract!(c, 0);
39911            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
39912        }
39913        simd_insert!(a, 0, fmadd)
39914    }
39915}
39916
39917/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39918///
39919/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39920/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39921/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39922/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39923/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39924/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39925///
39926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39927#[inline]
39928#[target_feature(enable = "avx512f")]
39929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39930#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39931#[rustc_legacy_const_generics(4)]
39932pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39933    k: __mmask8,
39934    a: __m128d,
39935    b: __m128d,
39936    c: __m128d,
39937) -> __m128d {
39938    unsafe {
39939        static_assert_rounding!(ROUNDING);
39940        let mut fmadd: f64 = 0.;
39941        if (k & 0b00000001) != 0 {
39942            let extracta: f64 = simd_extract!(a, 0);
39943            let extractb: f64 = simd_extract!(b, 0);
39944            let extractc: f64 = simd_extract!(c, 0);
39945            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
39946        }
39947        simd_insert!(a, 0, fmadd)
39948    }
39949}
39950
39951/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39952///
39953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39959///
39960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39961#[inline]
39962#[target_feature(enable = "avx512f")]
39963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39964#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39965#[rustc_legacy_const_generics(4)]
39966pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39967    a: __m128d,
39968    b: __m128d,
39969    c: __m128d,
39970    k: __mmask8,
39971) -> __m128d {
39972    unsafe {
39973        static_assert_rounding!(ROUNDING);
39974        let mut fmadd: f64 = simd_extract!(c, 0);
39975        if (k & 0b00000001) != 0 {
39976            let extracta: f64 = simd_extract!(a, 0);
39977            let extractb: f64 = simd_extract!(b, 0);
39978            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
39979        }
39980        simd_insert!(c, 0, fmadd)
39981    }
39982}
39983
39984/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39985///
39986/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39987/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39988/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39989/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39990/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39991/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39992///
39993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39994#[inline]
39995#[target_feature(enable = "avx512f")]
39996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39997#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39998#[rustc_legacy_const_generics(3)]
39999pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40000    unsafe {
40001        static_assert_rounding!(ROUNDING);
40002        let extracta: f32 = simd_extract!(a, 0);
40003        let extractb: f32 = simd_extract!(b, 0);
40004        let extractc: f32 = simd_extract!(c, 0);
40005        let extractc = -extractc;
40006        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40007        simd_insert!(a, 0, fmsub)
40008    }
40009}
40010
40011/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40012///
40013/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40014/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40015/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40016/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40017/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40018/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40019///
40020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
40021#[inline]
40022#[target_feature(enable = "avx512f")]
40023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40024#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40025#[rustc_legacy_const_generics(4)]
40026pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
40027    a: __m128,
40028    k: __mmask8,
40029    b: __m128,
40030    c: __m128,
40031) -> __m128 {
40032    unsafe {
40033        static_assert_rounding!(ROUNDING);
40034        let mut fmsub: f32 = simd_extract!(a, 0);
40035        if (k & 0b00000001) != 0 {
40036            let extractb: f32 = simd_extract!(b, 0);
40037            let extractc: f32 = simd_extract!(c, 0);
40038            let extractc = -extractc;
40039            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
40040        }
40041        simd_insert!(a, 0, fmsub)
40042    }
40043}
40044
40045/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40046///
40047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40048/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40049/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40050/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40051/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40052/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40053///
40054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
40055#[inline]
40056#[target_feature(enable = "avx512f")]
40057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40058#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40059#[rustc_legacy_const_generics(4)]
40060pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
40061    k: __mmask8,
40062    a: __m128,
40063    b: __m128,
40064    c: __m128,
40065) -> __m128 {
40066    unsafe {
40067        static_assert_rounding!(ROUNDING);
40068        let mut fmsub: f32 = 0.;
40069        if (k & 0b00000001) != 0 {
40070            let extracta: f32 = simd_extract!(a, 0);
40071            let extractb: f32 = simd_extract!(b, 0);
40072            let extractc: f32 = simd_extract!(c, 0);
40073            let extractc = -extractc;
40074            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40075        }
40076        simd_insert!(a, 0, fmsub)
40077    }
40078}
40079
40080/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40081///
40082/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40087/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40088///
40089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40090#[inline]
40091#[target_feature(enable = "avx512f")]
40092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40093#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40094#[rustc_legacy_const_generics(4)]
40095pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40096    a: __m128,
40097    b: __m128,
40098    c: __m128,
40099    k: __mmask8,
40100) -> __m128 {
40101    unsafe {
40102        static_assert_rounding!(ROUNDING);
40103        let mut fmsub: f32 = simd_extract!(c, 0);
40104        if (k & 0b00000001) != 0 {
40105            let extracta: f32 = simd_extract!(a, 0);
40106            let extractb: f32 = simd_extract!(b, 0);
40107            let extractc = -fmsub;
40108            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40109        }
40110        simd_insert!(c, 0, fmsub)
40111    }
40112}
40113
40114/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40115///
40116/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40117/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40118/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40119/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40120/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40121/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40122///
40123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40124#[inline]
40125#[target_feature(enable = "avx512f")]
40126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40127#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40128#[rustc_legacy_const_generics(3)]
40129pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40130    unsafe {
40131        static_assert_rounding!(ROUNDING);
40132        let extracta: f64 = simd_extract!(a, 0);
40133        let extractb: f64 = simd_extract!(b, 0);
40134        let extractc: f64 = simd_extract!(c, 0);
40135        let extractc = -extractc;
40136        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40137        simd_insert!(a, 0, fmsub)
40138    }
40139}
40140
40141/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40142///
40143/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40144/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40145/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40146/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40147/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40148/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40149///
40150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40151#[inline]
40152#[target_feature(enable = "avx512f")]
40153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40154#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40155#[rustc_legacy_const_generics(4)]
40156pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40157    a: __m128d,
40158    k: __mmask8,
40159    b: __m128d,
40160    c: __m128d,
40161) -> __m128d {
40162    unsafe {
40163        static_assert_rounding!(ROUNDING);
40164        let mut fmsub: f64 = simd_extract!(a, 0);
40165        if (k & 0b00000001) != 0 {
40166            let extractb: f64 = simd_extract!(b, 0);
40167            let extractc: f64 = simd_extract!(c, 0);
40168            let extractc = -extractc;
40169            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
40170        }
40171        simd_insert!(a, 0, fmsub)
40172    }
40173}
40174
40175/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40176///
40177/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40178/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40179/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40180/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40181/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40182/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40183///
40184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40185#[inline]
40186#[target_feature(enable = "avx512f")]
40187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40188#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40189#[rustc_legacy_const_generics(4)]
40190pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40191    k: __mmask8,
40192    a: __m128d,
40193    b: __m128d,
40194    c: __m128d,
40195) -> __m128d {
40196    unsafe {
40197        static_assert_rounding!(ROUNDING);
40198        let mut fmsub: f64 = 0.;
40199        if (k & 0b00000001) != 0 {
40200            let extracta: f64 = simd_extract!(a, 0);
40201            let extractb: f64 = simd_extract!(b, 0);
40202            let extractc: f64 = simd_extract!(c, 0);
40203            let extractc = -extractc;
40204            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40205        }
40206        simd_insert!(a, 0, fmsub)
40207    }
40208}
40209
40210/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40211///
40212/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40218///
40219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40220#[inline]
40221#[target_feature(enable = "avx512f")]
40222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40223#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40224#[rustc_legacy_const_generics(4)]
40225pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40226    a: __m128d,
40227    b: __m128d,
40228    c: __m128d,
40229    k: __mmask8,
40230) -> __m128d {
40231    unsafe {
40232        static_assert_rounding!(ROUNDING);
40233        let mut fmsub: f64 = simd_extract!(c, 0);
40234        if (k & 0b00000001) != 0 {
40235            let extracta: f64 = simd_extract!(a, 0);
40236            let extractb: f64 = simd_extract!(b, 0);
40237            let extractc = -fmsub;
40238            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40239        }
40240        simd_insert!(c, 0, fmsub)
40241    }
40242}
40243
40244/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40245///
40246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40252///
40253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40254#[inline]
40255#[target_feature(enable = "avx512f")]
40256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40257#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40258#[rustc_legacy_const_generics(3)]
40259pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40260    unsafe {
40261        static_assert_rounding!(ROUNDING);
40262        let extracta: f32 = simd_extract!(a, 0);
40263        let extracta = -extracta;
40264        let extractb: f32 = simd_extract!(b, 0);
40265        let extractc: f32 = simd_extract!(c, 0);
40266        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40267        simd_insert!(a, 0, fnmadd)
40268    }
40269}
40270
40271/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40272///
40273/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40274/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40275/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40276/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40277/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40278/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40279///
40280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40281#[inline]
40282#[target_feature(enable = "avx512f")]
40283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40284#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40285#[rustc_legacy_const_generics(4)]
40286pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40287    a: __m128,
40288    k: __mmask8,
40289    b: __m128,
40290    c: __m128,
40291) -> __m128 {
40292    unsafe {
40293        static_assert_rounding!(ROUNDING);
40294        let mut fnmadd: f32 = simd_extract!(a, 0);
40295        if (k & 0b00000001) != 0 {
40296            let extracta = -fnmadd;
40297            let extractb: f32 = simd_extract!(b, 0);
40298            let extractc: f32 = simd_extract!(c, 0);
40299            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40300        }
40301        simd_insert!(a, 0, fnmadd)
40302    }
40303}
40304
40305/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40306///
40307/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40308/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40309/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40310/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40311/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40312/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40313///
40314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40315#[inline]
40316#[target_feature(enable = "avx512f")]
40317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40318#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40319#[rustc_legacy_const_generics(4)]
40320pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40321    k: __mmask8,
40322    a: __m128,
40323    b: __m128,
40324    c: __m128,
40325) -> __m128 {
40326    unsafe {
40327        static_assert_rounding!(ROUNDING);
40328        let mut fnmadd: f32 = 0.;
40329        if (k & 0b00000001) != 0 {
40330            let extracta: f32 = simd_extract!(a, 0);
40331            let extracta = -extracta;
40332            let extractb: f32 = simd_extract!(b, 0);
40333            let extractc: f32 = simd_extract!(c, 0);
40334            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40335        }
40336        simd_insert!(a, 0, fnmadd)
40337    }
40338}
40339
40340/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40341///
40342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40348///
40349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40350#[inline]
40351#[target_feature(enable = "avx512f")]
40352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40353#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40354#[rustc_legacy_const_generics(4)]
40355pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40356    a: __m128,
40357    b: __m128,
40358    c: __m128,
40359    k: __mmask8,
40360) -> __m128 {
40361    unsafe {
40362        static_assert_rounding!(ROUNDING);
40363        let mut fnmadd: f32 = simd_extract!(c, 0);
40364        if (k & 0b00000001) != 0 {
40365            let extracta: f32 = simd_extract!(a, 0);
40366            let extracta = -extracta;
40367            let extractb: f32 = simd_extract!(b, 0);
40368            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
40369        }
40370        simd_insert!(c, 0, fnmadd)
40371    }
40372}
40373
40374/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40375///
40376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40382///
40383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40384#[inline]
40385#[target_feature(enable = "avx512f")]
40386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40387#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40388#[rustc_legacy_const_generics(3)]
40389pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40390    unsafe {
40391        static_assert_rounding!(ROUNDING);
40392        let extracta: f64 = simd_extract!(a, 0);
40393        let extracta = -extracta;
40394        let extractb: f64 = simd_extract!(b, 0);
40395        let extractc: f64 = simd_extract!(c, 0);
40396        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40397        simd_insert!(a, 0, fnmadd)
40398    }
40399}
40400
40401/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40402///
40403/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40404/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40405/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40406/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40407/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40408/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40409///
40410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40411#[inline]
40412#[target_feature(enable = "avx512f")]
40413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40414#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40415#[rustc_legacy_const_generics(4)]
40416pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40417    a: __m128d,
40418    k: __mmask8,
40419    b: __m128d,
40420    c: __m128d,
40421) -> __m128d {
40422    unsafe {
40423        static_assert_rounding!(ROUNDING);
40424        let mut fnmadd: f64 = simd_extract!(a, 0);
40425        if (k & 0b00000001) != 0 {
40426            let extracta = -fnmadd;
40427            let extractb: f64 = simd_extract!(b, 0);
40428            let extractc: f64 = simd_extract!(c, 0);
40429            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40430        }
40431        simd_insert!(a, 0, fnmadd)
40432    }
40433}
40434
40435/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40436///
40437/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40438/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40439/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40440/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40441/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40442/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40443///
40444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40445#[inline]
40446#[target_feature(enable = "avx512f")]
40447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40448#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40449#[rustc_legacy_const_generics(4)]
40450pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40451    k: __mmask8,
40452    a: __m128d,
40453    b: __m128d,
40454    c: __m128d,
40455) -> __m128d {
40456    unsafe {
40457        static_assert_rounding!(ROUNDING);
40458        let mut fnmadd: f64 = 0.;
40459        if (k & 0b00000001) != 0 {
40460            let extracta: f64 = simd_extract!(a, 0);
40461            let extracta = -extracta;
40462            let extractb: f64 = simd_extract!(b, 0);
40463            let extractc: f64 = simd_extract!(c, 0);
40464            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40465        }
40466        simd_insert!(a, 0, fnmadd)
40467    }
40468}
40469
40470/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40471///
40472/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40473/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40474/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40475/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40476/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40477/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40478///
40479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40480#[inline]
40481#[target_feature(enable = "avx512f")]
40482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40483#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40484#[rustc_legacy_const_generics(4)]
40485pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40486    a: __m128d,
40487    b: __m128d,
40488    c: __m128d,
40489    k: __mmask8,
40490) -> __m128d {
40491    unsafe {
40492        static_assert_rounding!(ROUNDING);
40493        let mut fnmadd: f64 = simd_extract!(c, 0);
40494        if (k & 0b00000001) != 0 {
40495            let extracta: f64 = simd_extract!(a, 0);
40496            let extracta = -extracta;
40497            let extractb: f64 = simd_extract!(b, 0);
40498            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
40499        }
40500        simd_insert!(c, 0, fnmadd)
40501    }
40502}
40503
40504/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40505///
40506/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40507/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40508/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40509/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40510/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40512///
40513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40514#[inline]
40515#[target_feature(enable = "avx512f")]
40516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40517#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40518#[rustc_legacy_const_generics(3)]
40519pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40520    unsafe {
40521        static_assert_rounding!(ROUNDING);
40522        let extracta: f32 = simd_extract!(a, 0);
40523        let extracta = -extracta;
40524        let extractb: f32 = simd_extract!(b, 0);
40525        let extractc: f32 = simd_extract!(c, 0);
40526        let extractc = -extractc;
40527        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40528        simd_insert!(a, 0, fnmsub)
40529    }
40530}
40531
40532/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40533///
40534/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40535/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40536/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40537/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40538/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40539/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40540///
40541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40542#[inline]
40543#[target_feature(enable = "avx512f")]
40544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40545#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40546#[rustc_legacy_const_generics(4)]
40547pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40548    a: __m128,
40549    k: __mmask8,
40550    b: __m128,
40551    c: __m128,
40552) -> __m128 {
40553    unsafe {
40554        static_assert_rounding!(ROUNDING);
40555        let mut fnmsub: f32 = simd_extract!(a, 0);
40556        if (k & 0b00000001) != 0 {
40557            let extracta = -fnmsub;
40558            let extractb: f32 = simd_extract!(b, 0);
40559            let extractc: f32 = simd_extract!(c, 0);
40560            let extractc = -extractc;
40561            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40562        }
40563        simd_insert!(a, 0, fnmsub)
40564    }
40565}
40566
40567/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40568///
40569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40575///
40576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40577#[inline]
40578#[target_feature(enable = "avx512f")]
40579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40580#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40581#[rustc_legacy_const_generics(4)]
40582pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40583    k: __mmask8,
40584    a: __m128,
40585    b: __m128,
40586    c: __m128,
40587) -> __m128 {
40588    unsafe {
40589        static_assert_rounding!(ROUNDING);
40590        let mut fnmsub: f32 = 0.;
40591        if (k & 0b00000001) != 0 {
40592            let extracta: f32 = simd_extract!(a, 0);
40593            let extracta = -extracta;
40594            let extractb: f32 = simd_extract!(b, 0);
40595            let extractc: f32 = simd_extract!(c, 0);
40596            let extractc = -extractc;
40597            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40598        }
40599        simd_insert!(a, 0, fnmsub)
40600    }
40601}
40602
40603/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40604///
40605/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40606/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40607/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40608/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40609/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40610/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40611///
40612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40613#[inline]
40614#[target_feature(enable = "avx512f")]
40615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40616#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40617#[rustc_legacy_const_generics(4)]
40618pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40619    a: __m128,
40620    b: __m128,
40621    c: __m128,
40622    k: __mmask8,
40623) -> __m128 {
40624    unsafe {
40625        static_assert_rounding!(ROUNDING);
40626        let mut fnmsub: f32 = simd_extract!(c, 0);
40627        if (k & 0b00000001) != 0 {
40628            let extracta: f32 = simd_extract!(a, 0);
40629            let extracta = -extracta;
40630            let extractb: f32 = simd_extract!(b, 0);
40631            let extractc = -fnmsub;
40632            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
40633        }
40634        simd_insert!(c, 0, fnmsub)
40635    }
40636}
40637
40638/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40639///
40640/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40641/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40642/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40643/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40644/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40645/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40646///
40647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40648#[inline]
40649#[target_feature(enable = "avx512f")]
40650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40651#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40652#[rustc_legacy_const_generics(3)]
40653pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40654    unsafe {
40655        static_assert_rounding!(ROUNDING);
40656        let extracta: f64 = simd_extract!(a, 0);
40657        let extracta = -extracta;
40658        let extractb: f64 = simd_extract!(b, 0);
40659        let extractc: f64 = simd_extract!(c, 0);
40660        let extractc = -extractc;
40661        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40662        simd_insert!(a, 0, fnmsub)
40663    }
40664}
40665
40666/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40667///
40668/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40669/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40670/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40671/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40672/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40673/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40674///
40675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40676#[inline]
40677#[target_feature(enable = "avx512f")]
40678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40679#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40680#[rustc_legacy_const_generics(4)]
40681pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40682    a: __m128d,
40683    k: __mmask8,
40684    b: __m128d,
40685    c: __m128d,
40686) -> __m128d {
40687    unsafe {
40688        static_assert_rounding!(ROUNDING);
40689        let mut fnmsub: f64 = simd_extract!(a, 0);
40690        if (k & 0b00000001) != 0 {
40691            let extracta = -fnmsub;
40692            let extractb: f64 = simd_extract!(b, 0);
40693            let extractc: f64 = simd_extract!(c, 0);
40694            let extractc = -extractc;
40695            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40696        }
40697        simd_insert!(a, 0, fnmsub)
40698    }
40699}
40700
40701/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40702///
40703/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40704/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40705/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40706/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40707/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40708/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40709///
40710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40711#[inline]
40712#[target_feature(enable = "avx512f")]
40713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40714#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40715#[rustc_legacy_const_generics(4)]
40716pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40717    k: __mmask8,
40718    a: __m128d,
40719    b: __m128d,
40720    c: __m128d,
40721) -> __m128d {
40722    unsafe {
40723        static_assert_rounding!(ROUNDING);
40724        let mut fnmsub: f64 = 0.;
40725        if (k & 0b00000001) != 0 {
40726            let extracta: f64 = simd_extract!(a, 0);
40727            let extracta = -extracta;
40728            let extractb: f64 = simd_extract!(b, 0);
40729            let extractc: f64 = simd_extract!(c, 0);
40730            let extractc = -extractc;
40731            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40732        }
40733        simd_insert!(a, 0, fnmsub)
40734    }
40735}
40736
40737/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40738///
40739/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40740/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40741/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40742/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40743/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40744/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40745///
40746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40747#[inline]
40748#[target_feature(enable = "avx512f")]
40749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40750#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40751#[rustc_legacy_const_generics(4)]
40752pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40753    a: __m128d,
40754    b: __m128d,
40755    c: __m128d,
40756    k: __mmask8,
40757) -> __m128d {
40758    unsafe {
40759        static_assert_rounding!(ROUNDING);
40760        let mut fnmsub: f64 = simd_extract!(c, 0);
40761        if (k & 0b00000001) != 0 {
40762            let extracta: f64 = simd_extract!(a, 0);
40763            let extracta = -extracta;
40764            let extractb: f64 = simd_extract!(b, 0);
40765            let extractc = -fnmsub;
40766            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
40767        }
40768        simd_insert!(c, 0, fnmsub)
40769    }
40770}
40771
40772/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40773///
40774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40775#[inline]
40776#[target_feature(enable = "avx512f")]
40777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40778#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40779#[rustc_legacy_const_generics(3)]
40780pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40781    unsafe {
40782        static_assert_uimm_bits!(IMM8, 8);
40783        let a = a.as_f32x4();
40784        let b = b.as_f32x4();
40785        let c = c.as_i32x4();
40786        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40787        let fixupimm: f32 = simd_extract!(r, 0);
40788        let r = simd_insert!(a, 0, fixupimm);
40789        transmute(r)
40790    }
40791}
40792
40793/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40794///
40795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40796#[inline]
40797#[target_feature(enable = "avx512f")]
40798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40799#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40800#[rustc_legacy_const_generics(4)]
40801pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40802    a: __m128,
40803    k: __mmask8,
40804    b: __m128,
40805    c: __m128i,
40806) -> __m128 {
40807    unsafe {
40808        static_assert_uimm_bits!(IMM8, 8);
40809        let a = a.as_f32x4();
40810        let b = b.as_f32x4();
40811        let c = c.as_i32x4();
40812        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40813        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40814        let r = simd_insert!(a, 0, fixupimm);
40815        transmute(r)
40816    }
40817}
40818
40819/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40820///
40821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40822#[inline]
40823#[target_feature(enable = "avx512f")]
40824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40825#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40826#[rustc_legacy_const_generics(4)]
40827pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40828    k: __mmask8,
40829    a: __m128,
40830    b: __m128,
40831    c: __m128i,
40832) -> __m128 {
40833    unsafe {
40834        static_assert_uimm_bits!(IMM8, 8);
40835        let a = a.as_f32x4();
40836        let b = b.as_f32x4();
40837        let c = c.as_i32x4();
40838        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40839        let fixupimm: f32 = simd_extract!(fixupimm, 0);
40840        let r = simd_insert!(a, 0, fixupimm);
40841        transmute(r)
40842    }
40843}
40844
40845/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40846///
40847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40848#[inline]
40849#[target_feature(enable = "avx512f")]
40850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40851#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40852#[rustc_legacy_const_generics(3)]
40853pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40854    unsafe {
40855        static_assert_uimm_bits!(IMM8, 8);
40856        let a = a.as_f64x2();
40857        let b = b.as_f64x2();
40858        let c = c.as_i64x2();
40859        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
40860        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40861        let r = simd_insert!(a, 0, fixupimm);
40862        transmute(r)
40863    }
40864}
40865
40866/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40867///
40868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40869#[inline]
40870#[target_feature(enable = "avx512f")]
40871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40872#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40873#[rustc_legacy_const_generics(4)]
40874pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40875    a: __m128d,
40876    k: __mmask8,
40877    b: __m128d,
40878    c: __m128i,
40879) -> __m128d {
40880    unsafe {
40881        static_assert_uimm_bits!(IMM8, 8);
40882        let a = a.as_f64x2();
40883        let b = b.as_f64x2();
40884        let c = c.as_i64x2();
40885        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40886        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40887        let r = simd_insert!(a, 0, fixupimm);
40888        transmute(r)
40889    }
40890}
40891
40892/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40893///
40894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40895#[inline]
40896#[target_feature(enable = "avx512f")]
40897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40898#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40899#[rustc_legacy_const_generics(4)]
40900pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40901    k: __mmask8,
40902    a: __m128d,
40903    b: __m128d,
40904    c: __m128i,
40905) -> __m128d {
40906    unsafe {
40907        static_assert_uimm_bits!(IMM8, 8);
40908        let a = a.as_f64x2();
40909        let b = b.as_f64x2();
40910        let c = c.as_i64x2();
40911        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
40912        let fixupimm: f64 = simd_extract!(fixupimm, 0);
40913        let r = simd_insert!(a, 0, fixupimm);
40914        transmute(r)
40915    }
40916}
40917
40918/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920///
40921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40922#[inline]
40923#[target_feature(enable = "avx512f")]
40924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40925#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40926#[rustc_legacy_const_generics(3, 4)]
40927pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40928    a: __m128,
40929    b: __m128,
40930    c: __m128i,
40931) -> __m128 {
40932    unsafe {
40933        static_assert_uimm_bits!(IMM8, 8);
40934        static_assert_mantissas_sae!(SAE);
40935        let a = a.as_f32x4();
40936        let b = b.as_f32x4();
40937        let c = c.as_i32x4();
40938        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
40939        let fixupimm: f32 = simd_extract!(r, 0);
40940        let r = simd_insert!(a, 0, fixupimm);
40941        transmute(r)
40942    }
40943}
40944
40945/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40946/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40947///
40948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40949#[inline]
40950#[target_feature(enable = "avx512f")]
40951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40952#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40953#[rustc_legacy_const_generics(4, 5)]
40954pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40955    a: __m128,
40956    k: __mmask8,
40957    b: __m128,
40958    c: __m128i,
40959) -> __m128 {
40960    unsafe {
40961        static_assert_uimm_bits!(IMM8, 8);
40962        static_assert_mantissas_sae!(SAE);
40963        let a = a.as_f32x4();
40964        let b = b.as_f32x4();
40965        let c = c.as_i32x4();
40966        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
40967        let fixupimm: f32 = simd_extract!(r, 0);
40968        let r = simd_insert!(a, 0, fixupimm);
40969        transmute(r)
40970    }
40971}
40972
40973/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40975///
40976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40977#[inline]
40978#[target_feature(enable = "avx512f")]
40979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40980#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40981#[rustc_legacy_const_generics(4, 5)]
40982pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40983    k: __mmask8,
40984    a: __m128,
40985    b: __m128,
40986    c: __m128i,
40987) -> __m128 {
40988    unsafe {
40989        static_assert_uimm_bits!(IMM8, 8);
40990        static_assert_mantissas_sae!(SAE);
40991        let a = a.as_f32x4();
40992        let b = b.as_f32x4();
40993        let c = c.as_i32x4();
40994        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
40995        let fixupimm: f32 = simd_extract!(r, 0);
40996        let r = simd_insert!(a, 0, fixupimm);
40997        transmute(r)
40998    }
40999}
41000
41001/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41002/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41003///
41004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
41005#[inline]
41006#[target_feature(enable = "avx512f")]
41007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41008#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41009#[rustc_legacy_const_generics(3, 4)]
41010pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41011    a: __m128d,
41012    b: __m128d,
41013    c: __m128i,
41014) -> __m128d {
41015    unsafe {
41016        static_assert_uimm_bits!(IMM8, 8);
41017        static_assert_mantissas_sae!(SAE);
41018        let a = a.as_f64x2();
41019        let b = b.as_f64x2();
41020        let c = c.as_i64x2();
41021        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
41022        let fixupimm: f64 = simd_extract!(r, 0);
41023        let r = simd_insert!(a, 0, fixupimm);
41024        transmute(r)
41025    }
41026}
41027
41028/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41029/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41030///
41031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
41032#[inline]
41033#[target_feature(enable = "avx512f")]
41034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41035#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41036#[rustc_legacy_const_generics(4, 5)]
41037pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41038    a: __m128d,
41039    k: __mmask8,
41040    b: __m128d,
41041    c: __m128i,
41042) -> __m128d {
41043    unsafe {
41044        static_assert_uimm_bits!(IMM8, 8);
41045        static_assert_mantissas_sae!(SAE);
41046        let a = a.as_f64x2();
41047        let b = b.as_f64x2();
41048        let c = c.as_i64x2();
41049        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
41050        let fixupimm: f64 = simd_extract!(r, 0);
41051        let r = simd_insert!(a, 0, fixupimm);
41052        transmute(r)
41053    }
41054}
41055
41056/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41057/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41058///
41059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
41060#[inline]
41061#[target_feature(enable = "avx512f")]
41062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41063#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41064#[rustc_legacy_const_generics(4, 5)]
41065pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41066    k: __mmask8,
41067    a: __m128d,
41068    b: __m128d,
41069    c: __m128i,
41070) -> __m128d {
41071    unsafe {
41072        static_assert_uimm_bits!(IMM8, 8);
41073        static_assert_mantissas_sae!(SAE);
41074        let a = a.as_f64x2();
41075        let b = b.as_f64x2();
41076        let c = c.as_i64x2();
41077        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
41078        let fixupimm: f64 = simd_extract!(r, 0);
41079        let r = simd_insert!(a, 0, fixupimm);
41080        transmute(r)
41081    }
41082}
41083
41084/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41085///
41086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41087#[inline]
41088#[target_feature(enable = "avx512f")]
41089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41090#[cfg_attr(test, assert_instr(vcvtss2sd))]
41091pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41092    unsafe {
41093        transmute(vcvtss2sd(
41094            a.as_f64x2(),
41095            b.as_f32x4(),
41096            src.as_f64x2(),
41097            k,
41098            _MM_FROUND_CUR_DIRECTION,
41099        ))
41100    }
41101}
41102
41103/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41104///
41105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41106#[inline]
41107#[target_feature(enable = "avx512f")]
41108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41109#[cfg_attr(test, assert_instr(vcvtss2sd))]
41110pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41111    unsafe {
41112        transmute(vcvtss2sd(
41113            a.as_f64x2(),
41114            b.as_f32x4(),
41115            f64x2::ZERO,
41116            k,
41117            _MM_FROUND_CUR_DIRECTION,
41118        ))
41119    }
41120}
41121
41122/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41123///
41124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41125#[inline]
41126#[target_feature(enable = "avx512f")]
41127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41128#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41129pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41130    unsafe {
41131        transmute(vcvtsd2ss(
41132            a.as_f32x4(),
41133            b.as_f64x2(),
41134            src.as_f32x4(),
41135            k,
41136            _MM_FROUND_CUR_DIRECTION,
41137        ))
41138    }
41139}
41140
41141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41142///
41143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41144#[inline]
41145#[target_feature(enable = "avx512f")]
41146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41147#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41148pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41149    unsafe {
41150        transmute(vcvtsd2ss(
41151            a.as_f32x4(),
41152            b.as_f64x2(),
41153            f32x4::ZERO,
41154            k,
41155            _MM_FROUND_CUR_DIRECTION,
41156        ))
41157    }
41158}
41159
41160/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41162///
41163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41164#[inline]
41165#[target_feature(enable = "avx512f")]
41166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41167#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41168#[rustc_legacy_const_generics(2)]
41169pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41170    unsafe {
41171        static_assert_sae!(SAE);
41172        let a = a.as_f64x2();
41173        let b = b.as_f32x4();
41174        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
41175        transmute(r)
41176    }
41177}
41178
41179/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41180/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41181///
41182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41183#[inline]
41184#[target_feature(enable = "avx512f")]
41185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41186#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41187#[rustc_legacy_const_generics(4)]
41188pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41189    src: __m128d,
41190    k: __mmask8,
41191    a: __m128d,
41192    b: __m128,
41193) -> __m128d {
41194    unsafe {
41195        static_assert_sae!(SAE);
41196        let a = a.as_f64x2();
41197        let b = b.as_f32x4();
41198        let src = src.as_f64x2();
41199        let r = vcvtss2sd(a, b, src, k, SAE);
41200        transmute(r)
41201    }
41202}
41203
41204/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41205/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41206///
41207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41208#[inline]
41209#[target_feature(enable = "avx512f")]
41210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41211#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41212#[rustc_legacy_const_generics(3)]
41213pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41214    unsafe {
41215        static_assert_sae!(SAE);
41216        let a = a.as_f64x2();
41217        let b = b.as_f32x4();
41218        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
41219        transmute(r)
41220    }
41221}
41222
41223/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41225/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41226/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41227/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41228/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41229/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41230///
41231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41232#[inline]
41233#[target_feature(enable = "avx512f")]
41234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41235#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41236#[rustc_legacy_const_generics(2)]
41237pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41238    unsafe {
41239        static_assert_rounding!(ROUNDING);
41240        let a = a.as_f32x4();
41241        let b = b.as_f64x2();
41242        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41243        transmute(r)
41244    }
41245}
41246
41247/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41248/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41249/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41250/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41251/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41252/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41253/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41254///
41255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41256#[inline]
41257#[target_feature(enable = "avx512f")]
41258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41259#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41260#[rustc_legacy_const_generics(4)]
41261pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41262    src: __m128,
41263    k: __mmask8,
41264    a: __m128,
41265    b: __m128d,
41266) -> __m128 {
41267    unsafe {
41268        static_assert_rounding!(ROUNDING);
41269        let a = a.as_f32x4();
41270        let b = b.as_f64x2();
41271        let src = src.as_f32x4();
41272        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
41273        transmute(r)
41274    }
41275}
41276
41277/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41278/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41279/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41280/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41281/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41282/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41283/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41284///
41285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41286#[inline]
41287#[target_feature(enable = "avx512f")]
41288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41289#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41290#[rustc_legacy_const_generics(3)]
41291pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41292    unsafe {
41293        static_assert_rounding!(ROUNDING);
41294        let a = a.as_f32x4();
41295        let b = b.as_f64x2();
41296        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
41297        transmute(r)
41298    }
41299}
41300
41301/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41302/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41303/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41304/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41305/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41306/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41307/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41308///
41309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41310#[inline]
41311#[target_feature(enable = "avx512f")]
41312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41313#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41314#[rustc_legacy_const_generics(1)]
41315pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41316    unsafe {
41317        static_assert_rounding!(ROUNDING);
41318        let a = a.as_f32x4();
41319        vcvtss2si(a, ROUNDING)
41320    }
41321}
41322
41323/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41324/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41325/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41326/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41327/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41328/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41329/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41330///
41331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41332#[inline]
41333#[target_feature(enable = "avx512f")]
41334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41335#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41336#[rustc_legacy_const_generics(1)]
41337pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41338    unsafe {
41339        static_assert_rounding!(ROUNDING);
41340        let a = a.as_f32x4();
41341        vcvtss2si(a, ROUNDING)
41342    }
41343}
41344
41345/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41346/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41347/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41348/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41349/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41350/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41351/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41352///
41353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41354#[inline]
41355#[target_feature(enable = "avx512f")]
41356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41357#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41358#[rustc_legacy_const_generics(1)]
41359pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41360    unsafe {
41361        static_assert_rounding!(ROUNDING);
41362        let a = a.as_f32x4();
41363        vcvtss2usi(a, ROUNDING)
41364    }
41365}
41366
41367/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41368///
41369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41370#[inline]
41371#[target_feature(enable = "avx512f")]
41372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41373#[cfg_attr(test, assert_instr(vcvtss2si))]
41374pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41375    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41376}
41377
41378/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41379///
41380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41381#[inline]
41382#[target_feature(enable = "avx512f")]
41383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41384#[cfg_attr(test, assert_instr(vcvtss2usi))]
41385pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41386    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41387}
41388
41389/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41396///
41397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41398#[inline]
41399#[target_feature(enable = "avx512f")]
41400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41401#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41402#[rustc_legacy_const_generics(1)]
41403pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41404    unsafe {
41405        static_assert_rounding!(ROUNDING);
41406        let a = a.as_f64x2();
41407        vcvtsd2si(a, ROUNDING)
41408    }
41409}
41410
41411/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41412/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41413/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41414/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41415/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41416/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41417/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41418///
41419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41420#[inline]
41421#[target_feature(enable = "avx512f")]
41422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41423#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41424#[rustc_legacy_const_generics(1)]
41425pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41426    unsafe {
41427        static_assert_rounding!(ROUNDING);
41428        let a = a.as_f64x2();
41429        vcvtsd2si(a, ROUNDING)
41430    }
41431}
41432
41433/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41434/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41435/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41436/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41437/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41438/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41439/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41440///
41441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41442#[inline]
41443#[target_feature(enable = "avx512f")]
41444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41445#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41446#[rustc_legacy_const_generics(1)]
41447pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41448    unsafe {
41449        static_assert_rounding!(ROUNDING);
41450        let a = a.as_f64x2();
41451        vcvtsd2usi(a, ROUNDING)
41452    }
41453}
41454
41455/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41456///
41457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41458#[inline]
41459#[target_feature(enable = "avx512f")]
41460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41461#[cfg_attr(test, assert_instr(vcvtsd2si))]
41462pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41463    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41464}
41465
41466/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41467///
41468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41469#[inline]
41470#[target_feature(enable = "avx512f")]
41471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41472#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41473pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41474    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41475}
41476
41477/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41478///
41479/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41480/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41481/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41482/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41483/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41484/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41485///
41486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41487#[inline]
41488#[target_feature(enable = "avx512f")]
41489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41490#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41491#[rustc_legacy_const_generics(2)]
41492pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41493    unsafe {
41494        static_assert_rounding!(ROUNDING);
41495        let a = a.as_f32x4();
41496        let r = vcvtsi2ss(a, b, ROUNDING);
41497        transmute(r)
41498    }
41499}
41500
41501/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41502///
41503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41509///
41510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41511#[inline]
41512#[target_feature(enable = "avx512f")]
41513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41514#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41515#[rustc_legacy_const_generics(2)]
41516pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41517    unsafe {
41518        static_assert_rounding!(ROUNDING);
41519        let a = a.as_f32x4();
41520        let r = vcvtsi2ss(a, b, ROUNDING);
41521        transmute(r)
41522    }
41523}
41524
41525/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41532///
41533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41534#[inline]
41535#[target_feature(enable = "avx512f")]
41536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41537#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41538#[rustc_legacy_const_generics(2)]
41539pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41540    unsafe {
41541        static_assert_rounding!(ROUNDING);
41542        let a = a.as_f32x4();
41543        let r = vcvtusi2ss(a, b, ROUNDING);
41544        transmute(r)
41545    }
41546}
41547
41548/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41549///
41550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41551#[inline]
41552#[target_feature(enable = "avx512f")]
41553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41554#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41555pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41556    unsafe {
41557        let b = b as f32;
41558        simd_insert!(a, 0, b)
41559    }
41560}
41561
41562/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41563///
41564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41565#[inline]
41566#[target_feature(enable = "avx512f")]
41567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41568#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41569pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41570    unsafe {
41571        let b = b as f64;
41572        simd_insert!(a, 0, b)
41573    }
41574}
41575
41576/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41577/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41578///
41579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41580#[inline]
41581#[target_feature(enable = "avx512f")]
41582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41583#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41584#[rustc_legacy_const_generics(1)]
41585pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41586    unsafe {
41587        static_assert_sae!(SAE);
41588        let a = a.as_f32x4();
41589        vcvttss2si(a, SAE)
41590    }
41591}
41592
41593/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41594/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41595///
41596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41597#[inline]
41598#[target_feature(enable = "avx512f")]
41599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41600#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41601#[rustc_legacy_const_generics(1)]
41602pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41603    unsafe {
41604        static_assert_sae!(SAE);
41605        let a = a.as_f32x4();
41606        vcvttss2si(a, SAE)
41607    }
41608}
41609
41610/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41612///
41613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41614#[inline]
41615#[target_feature(enable = "avx512f")]
41616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41617#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41618#[rustc_legacy_const_generics(1)]
41619pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41620    unsafe {
41621        static_assert_sae!(SAE);
41622        let a = a.as_f32x4();
41623        vcvttss2usi(a, SAE)
41624    }
41625}
41626
41627/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41628///
41629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41630#[inline]
41631#[target_feature(enable = "avx512f")]
41632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41633#[cfg_attr(test, assert_instr(vcvttss2si))]
41634pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41635    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41636}
41637
41638/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41639///
41640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41641#[inline]
41642#[target_feature(enable = "avx512f")]
41643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41644#[cfg_attr(test, assert_instr(vcvttss2usi))]
41645pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41646    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41647}
41648
41649/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41651///
41652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41653#[inline]
41654#[target_feature(enable = "avx512f")]
41655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41656#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41657#[rustc_legacy_const_generics(1)]
41658pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41659    unsafe {
41660        static_assert_sae!(SAE);
41661        let a = a.as_f64x2();
41662        vcvttsd2si(a, SAE)
41663    }
41664}
41665
41666/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41667/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41668///
41669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41670#[inline]
41671#[target_feature(enable = "avx512f")]
41672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41673#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41674#[rustc_legacy_const_generics(1)]
41675pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41676    unsafe {
41677        static_assert_sae!(SAE);
41678        let a = a.as_f64x2();
41679        vcvttsd2si(a, SAE)
41680    }
41681}
41682
41683/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41684/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41685///
41686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41687#[inline]
41688#[target_feature(enable = "avx512f")]
41689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41690#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41691#[rustc_legacy_const_generics(1)]
41692pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41693    unsafe {
41694        static_assert_sae!(SAE);
41695        let a = a.as_f64x2();
41696        vcvttsd2usi(a, SAE)
41697    }
41698}
41699
41700/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41701///
41702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41703#[inline]
41704#[target_feature(enable = "avx512f")]
41705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41706#[cfg_attr(test, assert_instr(vcvttsd2si))]
41707pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41708    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41709}
41710
41711/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41712///
41713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41714#[inline]
41715#[target_feature(enable = "avx512f")]
41716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41717#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41718pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41719    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41720}
41721
41722/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41723///
41724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41725#[inline]
41726#[target_feature(enable = "avx512f")]
41727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41728#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41729pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41730    unsafe {
41731        let b = b as f32;
41732        simd_insert!(a, 0, b)
41733    }
41734}
41735
41736/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41737///
41738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41739#[inline]
41740#[target_feature(enable = "avx512f")]
41741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41742#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41743pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41744    unsafe {
41745        let b = b as f64;
41746        simd_insert!(a, 0, b)
41747    }
41748}
41749
41750/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41751/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41752///
41753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41754#[inline]
41755#[target_feature(enable = "avx512f")]
41756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41757#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41758#[rustc_legacy_const_generics(2, 3)]
41759pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41760    unsafe {
41761        static_assert_uimm_bits!(IMM5, 5);
41762        static_assert_mantissas_sae!(SAE);
41763        let a = a.as_f32x4();
41764        let b = b.as_f32x4();
41765        vcomiss(a, b, IMM5, SAE)
41766    }
41767}
41768
41769/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41771///
41772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41773#[inline]
41774#[target_feature(enable = "avx512f")]
41775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41777#[rustc_legacy_const_generics(2, 3)]
41778pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41779    unsafe {
41780        static_assert_uimm_bits!(IMM5, 5);
41781        static_assert_mantissas_sae!(SAE);
41782        let a = a.as_f64x2();
41783        let b = b.as_f64x2();
41784        vcomisd(a, b, IMM5, SAE)
41785    }
41786}
41787
41788/// Equal
41789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41790pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41791/// Less-than
41792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41793pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41794/// Less-than-or-equal
41795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41796pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41797/// False
41798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41799pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41800/// Not-equal
41801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41802pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41803/// Not less-than
41804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41805pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41806/// Not less-than-or-equal
41807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41808pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41809/// True
41810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41811pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41812
41813/// interval [1, 2)
41814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41815pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41816/// interval [0.5, 2)
41817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41818pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41819/// interval [0.5, 1)
41820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41821pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41822/// interval [0.75, 1.5)
41823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41824pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41825
41826/// sign = sign(SRC)
41827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41828pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41829/// sign = 0
41830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41831pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41832/// DEST = NaN if sign(SRC) = 1
41833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41834pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41835
41836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41837pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41839pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41841pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41843pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41845pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41847pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41849pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41851pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41853pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41855pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41857pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41859pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41861pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41863pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41865pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41867pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41869pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41871pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41873pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41875pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41877pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41879pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41881pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41883pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41885pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41887pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41889pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41891pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41893pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41895pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41897pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41899pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41901pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41903pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41905pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41907pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41909pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41911pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41913pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41915pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41917pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41919pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41921pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41923pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41925pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41927pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41929pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41931pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41933pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41935pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41937pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41939pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41941pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41943pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41945pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41947pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41949pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41951pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41953pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41955pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41957pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41959pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41961pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41963pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41965pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41967pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41969pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41971pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41973pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41975pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41977pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41979pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41981pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41983pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41985pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41987pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41989pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41991pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41993pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41995pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41997pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41999pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
42000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42001pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
42002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42003pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
42004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42005pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
42006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42007pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
42008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42009pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
42010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42011pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
42012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42013pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
42014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42015pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
42016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42017pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
42018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42019pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
42020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42021pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
42022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42023pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
42024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42025pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
42026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42027pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
42028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42029pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
42030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42031pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
42032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42033pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
42034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42035pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
42036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42037pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
42038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42039pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
42040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42041pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
42042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42043pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
42044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42045pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
42046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42047pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
42048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42049pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
42050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42051pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
42052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42053pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
42054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42055pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
42056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42057pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
42058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42059pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
42060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42061pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
42062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42063pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
42064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42065pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
42066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42067pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
42068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42069pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
42070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42071pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
42072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42073pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
42074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42075pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
42076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42077pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42079pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42081pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42083pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42085pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42087pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42089pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42091pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42093pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42095pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42097pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42099pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42101pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42103pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42105pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42107pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42109pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42111pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42113pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42115pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42117pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42119pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42121pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42123pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42125pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42127pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42129pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42131pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42133pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42135pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42137pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42139pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42141pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42143pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42145pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42147pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42149pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42151pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42153pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42155pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42157pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42159pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42161pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42163pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42165pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42167pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42169pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42171pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42173pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42175pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42177pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42179pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42181pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42183pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42185pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42187pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42189pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42191pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42193pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42195pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42197pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42199pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42201pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42203pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42205pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42207pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42209pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42211pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42213pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42215pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42217pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42219pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42221pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42223pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42225pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42227pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42229pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42231pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42233pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42235pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42237pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42239pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42241pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42243pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42245pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42247pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42249pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42251pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42253pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42255pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42257pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42259pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42261pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42263pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42265pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42267pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42269pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42271pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42273pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42275pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42277pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42279pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42281pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42283pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42285pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42287pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42289pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42291pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42293pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42295pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42297pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42299pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42301pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42303pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42305pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42307pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42309pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42311pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42313pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42315pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42317pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42319pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42321pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42323pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42325pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42327pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42329pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42331pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42333pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42335pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42337pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42339pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42341pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42343pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42345pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42347pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42348
42349#[allow(improper_ctypes)]
42350unsafe extern "C" {
42351    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42352    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42353    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42354    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42355
42356    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42357    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42358    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42359    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42360
42361    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42362    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42363    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42364    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42365
42366    #[link_name = "llvm.x86.avx512.add.ps.512"]
42367    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42368    #[link_name = "llvm.x86.avx512.add.pd.512"]
42369    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42370    #[link_name = "llvm.x86.avx512.sub.ps.512"]
42371    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42372    #[link_name = "llvm.x86.avx512.sub.pd.512"]
42373    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42374    #[link_name = "llvm.x86.avx512.mul.ps.512"]
42375    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42376    #[link_name = "llvm.x86.avx512.mul.pd.512"]
42377    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42378    #[link_name = "llvm.x86.avx512.div.ps.512"]
42379    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42380    #[link_name = "llvm.x86.avx512.div.pd.512"]
42381    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42382
42383    #[link_name = "llvm.x86.avx512.max.ps.512"]
42384    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42385    #[link_name = "llvm.x86.avx512.max.pd.512"]
42386    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42387    #[link_name = "llvm.x86.avx512.min.ps.512"]
42388    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42389    #[link_name = "llvm.x86.avx512.min.pd.512"]
42390    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42391
42392    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42393    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42394
42395    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42396    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42397    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42398    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42399
42400    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42401    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42402    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42403    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42404    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42405    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42406
42407    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42408    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42409    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42410    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42411    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42412    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42413
42414    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42415    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42416    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42417    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42418    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42419    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42420
42421    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42422    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42423    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42424    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42425    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42426    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42427
42428    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42429    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42430    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42431    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42432    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42433    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42434
42435    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42436    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42437    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42438    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42439    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42440    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42441
42442    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42443    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42444    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42445    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42446    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42447    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42448
42449    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42450    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42451    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42452    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42453    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42454    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42455
42456    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42457    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42458    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42459    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42460    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42461    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42462
42463    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42464    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42465    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42466    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42467    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42468    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42469
42470    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42471    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42472    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42473    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42474    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42475    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42476
42477    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42478    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42479    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42480    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42481    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42482    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42483
42484    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42485    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42486    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42487    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42488    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42489    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42490
42491    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42492    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42493    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42494    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42495    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42496    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42497
42498    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42499    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42500    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42501    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42502    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42503    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42504
42505    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42506    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42507    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42508    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42509    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42510    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42511
42512    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42513    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42514    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42515    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42516    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42517    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42518
42519    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42520    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42521
42522    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42523    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42524    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42525    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42526    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42527    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42528
42529    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42530    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42531    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42532    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42533
42534    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42535    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42536
42537    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42538    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42539    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42540    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42541    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42542    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42543
42544    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42545    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42546    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42547    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42548
42549    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42550    fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
42551    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42552    fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42553    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42554    fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42555
42556    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42557    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42558
42559    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42560    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42561    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42562    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42563    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42564    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42565
42566    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42567    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42568    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42569    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42570    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42571    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42572
42573    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42574    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42575    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42576    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42577    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42578    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42579
42580    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42581    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42582    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42583    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42584    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42585    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42586
42587    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42588    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42589    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42590    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42591    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42592    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42593
42594    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42595    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42596    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42597    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42598    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42599    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42600    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42601    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42602    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42603    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42604
42605    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42606    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42607    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42608    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42609    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42610    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42611
42612    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42613    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42614    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42615    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42616    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42617    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42618
42619    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42620    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42621    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42622    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42623    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42624    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42625
42626    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42627    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42628    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42629    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42630    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42631    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42632
42633    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42634    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42635    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42636    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42637    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42638    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42639
42640    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42641    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42642    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42643    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42644    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42645    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42646
42647    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42648    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42649    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42650    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42651    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42652    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42653
42654    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42655    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42656    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42657    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42658    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42659    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42660
42661    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42662    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42663    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42664    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42665    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42666    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42667
42668    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42669    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42670    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42671    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42672    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42673    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42674
42675    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42676    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42677    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42678    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42679    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42680    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42681
42682    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42683    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42684    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42685    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42686    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42687    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42688
42689    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42690    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42691    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42692    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42693    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42694    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42695
42696    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42697    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42698    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42699    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42700    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42701    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42702
42703    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42704    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42705    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42706    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42707    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42708    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42709
42710    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42711    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42712
42713    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42714    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42715    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42716    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42717    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42718    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42719
42720    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42721    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42722    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42723    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42724    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42725    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42726
42727    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42728    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42729    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42730    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42731    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42732    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42733
42734    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42735    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42736    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42737    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42738    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42739    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42740
42741    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42742    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42743    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42744    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42745    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42746    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42747
42748    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42749    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42750    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42751    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42752    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42753    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42754
42755    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42756    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42757    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42758    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42759    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42760    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42761
42762    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42763    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42764    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42765    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42766    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42767    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42768
42769    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42770    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42771    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42772    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42773    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42774    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42775
42776    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42777    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42778    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42779    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42780    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42781    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42782
42783    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42784    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42785    #[link_name = "llvm.x86.avx512.gather.dps.512"]
42786    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42787    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42788    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42789    #[link_name = "llvm.x86.avx512.gather.qps.512"]
42790    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42791    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42792    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42793    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42794    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42795    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42796    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42797    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42798    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42799
42800    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42801    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42802    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42803    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42804    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42805    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42806    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42807    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42808    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42809    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42810
42811    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42812    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42813    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42814    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42815    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42816    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42817
42818    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42819    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42820    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42821    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42822    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42823    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42824    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42825    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42826    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42827    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42828    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42829    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42830    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42831    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42832    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42833    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42834
42835    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42836    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42837    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42838    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42839    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42840    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42841    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42842    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42843    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42844    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42845    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42846    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42847    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42848    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42849    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42850    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42851
42852    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42853    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42854    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42855    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42856    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42857    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42858    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42859    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42860    #[link_name = "llvm.x86.avx512.gather3div4.si"]
42861    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42862    #[link_name = "llvm.x86.avx512.gather3div2.di"]
42863    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42864    #[link_name = "llvm.x86.avx512.gather3div2.df"]
42865    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42866    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42867    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42868
42869    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42870    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42871    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42872    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42873    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42874    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42875    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42876    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42877    #[link_name = "llvm.x86.avx512.gather3div8.si"]
42878    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42879    #[link_name = "llvm.x86.avx512.gather3div4.di"]
42880    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42881    #[link_name = "llvm.x86.avx512.gather3div4.df"]
42882    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42883    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42884    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42885
42886    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42887    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42888    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42889    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42890
42891    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42892    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42893    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42894    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42895    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42896    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42897
42898    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42899    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42900    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42901    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42902    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42903    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42904
42905    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42906    fn vprold(a: i32x16, i8: i32) -> i32x16;
42907    #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42908    fn vprold256(a: i32x8, i8: i32) -> i32x8;
42909    #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42910    fn vprold128(a: i32x4, i8: i32) -> i32x4;
42911
42912    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42913    fn vprord(a: i32x16, i8: i32) -> i32x16;
42914    #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42915    fn vprord256(a: i32x8, i8: i32) -> i32x8;
42916    #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42917    fn vprord128(a: i32x4, i8: i32) -> i32x4;
42918
42919    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42920    fn vprolq(a: i64x8, i8: i32) -> i64x8;
42921    #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42922    fn vprolq256(a: i64x4, i8: i32) -> i64x4;
42923    #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42924    fn vprolq128(a: i64x2, i8: i32) -> i64x2;
42925
42926    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42927    fn vprorq(a: i64x8, i8: i32) -> i64x8;
42928    #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42929    fn vprorq256(a: i64x4, i8: i32) -> i64x4;
42930    #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42931    fn vprorq128(a: i64x2, i8: i32) -> i64x2;
42932
42933    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42934    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42935    #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42936    fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42937    #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42938    fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42939
42940    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42941    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42942    #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42943    fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42944    #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42945    fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42946
42947    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42948    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42949    #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42950    fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42951    #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42952    fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42953
42954    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42955    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42956    #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42957    fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42958    #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42959    fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42960
42961    #[link_name = "llvm.x86.avx512.psllv.d.512"]
42962    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42963    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42964    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42965    #[link_name = "llvm.x86.avx512.psllv.q.512"]
42966    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42967    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42968    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42969
42970    #[link_name = "llvm.x86.avx512.psll.d.512"]
42971    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
42972    #[link_name = "llvm.x86.avx512.psrl.d.512"]
42973    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42974    #[link_name = "llvm.x86.avx512.psll.q.512"]
42975    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42976    #[link_name = "llvm.x86.avx512.psrl.q.512"]
42977    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42978
42979    #[link_name = "llvm.x86.avx512.psra.d.512"]
42980    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42981
42982    #[link_name = "llvm.x86.avx512.psra.q.512"]
42983    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42984    #[link_name = "llvm.x86.avx512.psra.q.256"]
42985    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42986    #[link_name = "llvm.x86.avx512.psra.q.128"]
42987    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42988
42989    #[link_name = "llvm.x86.avx512.psrav.d.512"]
42990    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42991
42992    #[link_name = "llvm.x86.avx512.psrav.q.512"]
42993    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42994    #[link_name = "llvm.x86.avx512.psrav.q.256"]
42995    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42996    #[link_name = "llvm.x86.avx512.psrav.q.128"]
42997    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42998
42999    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
43000    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
43001    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
43002    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
43003
43004    #[link_name = "llvm.x86.avx512.permvar.si.512"]
43005    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
43006
43007    #[link_name = "llvm.x86.avx512.permvar.di.512"]
43008    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
43009    #[link_name = "llvm.x86.avx512.permvar.di.256"]
43010    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
43011
43012    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
43013    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
43014
43015    #[link_name = "llvm.x86.avx512.permvar.df.512"]
43016    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
43017    #[link_name = "llvm.x86.avx512.permvar.df.256"]
43018    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
43019
43020    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
43021    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
43022    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
43023    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
43024    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
43025    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
43026
43027    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
43028    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
43029    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
43030    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
43031    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
43032    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
43033
43034    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
43035    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
43036    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
43037    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
43038    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
43039    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
43040
43041    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
43042    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
43043    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
43044    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
43045    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
43046    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
43047
43048    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
43049    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43050    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
43051    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43052    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
43053    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43054
43055    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
43056    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43057    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
43058    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43059    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
43060    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43061
43062    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
43063    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43064    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
43065    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43066    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
43067    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43068
43069    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
43070    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43071    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
43072    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43073    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
43074    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43075
43076    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
43077    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43078    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43079    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43080    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43081    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43082
43083    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43084    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43085    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43086    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43087    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43088    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43089
43090    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43091    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43092    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43093    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43094    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43095    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43096
43097    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43098    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43099    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43100    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43101    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43102    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43103
43104    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43105    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43106    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43107    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43108    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43109    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43110
43111    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43112    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43113    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43114    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43115    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43116    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43117
43118    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43119    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43120    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43121    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43122    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43123    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43124
43125    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43126    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43127    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43128    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43129    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43130    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43131
43132    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43133    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43134    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43135    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43136    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43137    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43138    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43139    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43140    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43141    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43142    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43143    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43144    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43145    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43146    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43147    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43148    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43149    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43150    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43151    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43152    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43153    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43154    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43155    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43156    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43157    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43158    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43159    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43160    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43161    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43162    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43163    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43164    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43165    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43166    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43167    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43168
43169    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43170    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43171    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43172    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43173    #[link_name = "llvm.x86.avx512.rcp14.ss"]
43174    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43175    #[link_name = "llvm.x86.avx512.rcp14.sd"]
43176    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43177
43178    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43179    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43180    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43181    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43182    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43183    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43184    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43185    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43186
43187    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43188    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43189    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43190    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43191
43192    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43193    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43194    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43195    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43196    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43197    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43198    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43199    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43200
43201    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43202    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43203    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43204    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43205
43206    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43207    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43208    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43209    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43210
43211    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43212    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43213    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43214    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43215
43216    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43217    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43218
43219    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43220    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43221
43222    #[link_name = "llvm.x86.avx512.cvttss2si"]
43223    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43224    #[link_name = "llvm.x86.avx512.cvttss2usi"]
43225    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43226
43227    #[link_name = "llvm.x86.avx512.cvttsd2si"]
43228    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43229    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43230    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43231
43232    #[link_name = "llvm.x86.avx512.vcomi.ss"]
43233    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43234    #[link_name = "llvm.x86.avx512.vcomi.sd"]
43235    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43236
43237    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43238    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43239    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43240    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43241    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43242    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43243    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43244    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43245    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43246    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43247    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43248    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43249    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43250    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43251    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43252    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43253    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43254    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43255    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43256    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43257    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43258    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43259    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43260    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43261
43262    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43263    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43264    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43265    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43266    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43267    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43268    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43269    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43270    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43271    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43272    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43273    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43274    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43275    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43276    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43277    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43278    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43279    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43280    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43281    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43282    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43283    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43284    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43285    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43286
43287    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43288    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43289    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43290    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43291    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43292    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43293    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43294    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43295    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43296    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43297    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43298    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43299    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43300    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43301    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43302    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43303    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43304    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43305    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43306    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43307    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43308    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43309    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43310    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43311
43312    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43313    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43314    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43315    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43316    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43317    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43318    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43319    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43320    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43321    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43322    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43323    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43324    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43325    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43326    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43327    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43328    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43329    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43330    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43331    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43332    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43333    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43334    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43335    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43336
43337    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43338    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43339    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43340    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43341    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43342    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43343    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43344    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43345    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43346    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43347    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43348    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43349    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43350    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43351    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43352    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43353    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43354    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43355    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43356    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43357    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43358    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43359    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43360    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43361
43362}
43363
43364#[cfg(test)]
43365mod tests {
43366
43367    use stdarch_test::simd_test;
43368
43369    use crate::core_arch::x86::*;
43370    use crate::hint::black_box;
43371    use crate::mem::{self};
43372
43373    #[simd_test(enable = "avx512f")]
43374    unsafe fn test_mm512_abs_epi32() {
43375        #[rustfmt::skip]
43376        let a = _mm512_setr_epi32(
43377            0, 1, -1, i32::MAX,
43378            i32::MIN, 100, -100, -32,
43379            0, 1, -1, i32::MAX,
43380            i32::MIN, 100, -100, -32,
43381        );
43382        let r = _mm512_abs_epi32(a);
43383        #[rustfmt::skip]
43384        let e = _mm512_setr_epi32(
43385            0, 1, 1, i32::MAX,
43386            i32::MAX.wrapping_add(1), 100, 100, 32,
43387            0, 1, 1, i32::MAX,
43388            i32::MAX.wrapping_add(1), 100, 100, 32,
43389        );
43390        assert_eq_m512i(r, e);
43391    }
43392
43393    #[simd_test(enable = "avx512f")]
43394    unsafe fn test_mm512_mask_abs_epi32() {
43395        #[rustfmt::skip]
43396        let a = _mm512_setr_epi32(
43397            0, 1, -1, i32::MAX,
43398            i32::MIN, 100, -100, -32,
43399            0, 1, -1, i32::MAX,
43400            i32::MIN, 100, -100, -32,
43401        );
43402        let r = _mm512_mask_abs_epi32(a, 0, a);
43403        assert_eq_m512i(r, a);
43404        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43405        #[rustfmt::skip]
43406        let e = _mm512_setr_epi32(
43407            0, 1, 1, i32::MAX,
43408            i32::MAX.wrapping_add(1), 100, 100, 32,
43409            0, 1, -1, i32::MAX,
43410            i32::MIN, 100, -100, -32,
43411        );
43412        assert_eq_m512i(r, e);
43413    }
43414
43415    #[simd_test(enable = "avx512f")]
43416    unsafe fn test_mm512_maskz_abs_epi32() {
43417        #[rustfmt::skip]
43418        let a = _mm512_setr_epi32(
43419            0, 1, -1, i32::MAX,
43420            i32::MIN, 100, -100, -32,
43421            0, 1, -1, i32::MAX,
43422            i32::MIN, 100, -100, -32,
43423        );
43424        let r = _mm512_maskz_abs_epi32(0, a);
43425        assert_eq_m512i(r, _mm512_setzero_si512());
43426        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43427        #[rustfmt::skip]
43428        let e = _mm512_setr_epi32(
43429            0, 1, 1, i32::MAX,
43430            i32::MAX.wrapping_add(1), 100, 100, 32,
43431            0, 0, 0, 0,
43432            0, 0, 0, 0,
43433        );
43434        assert_eq_m512i(r, e);
43435    }
43436
43437    #[simd_test(enable = "avx512f,avx512vl")]
43438    unsafe fn test_mm256_mask_abs_epi32() {
43439        #[rustfmt::skip]
43440        let a = _mm256_setr_epi32(
43441            0, 1, -1, i32::MAX,
43442            i32::MIN, 100, -100, -32,
43443        );
43444        let r = _mm256_mask_abs_epi32(a, 0, a);
43445        assert_eq_m256i(r, a);
43446        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43447        #[rustfmt::skip]
43448        let e = _mm256_setr_epi32(
43449            0, 1, 1, i32::MAX,
43450            i32::MAX.wrapping_add(1), 100, -100, -32,
43451        );
43452        assert_eq_m256i(r, e);
43453    }
43454
43455    #[simd_test(enable = "avx512f,avx512vl")]
43456    unsafe fn test_mm256_maskz_abs_epi32() {
43457        #[rustfmt::skip]
43458        let a = _mm256_setr_epi32(
43459            0, 1, -1, i32::MAX,
43460            i32::MIN, 100, -100, -32,
43461        );
43462        let r = _mm256_maskz_abs_epi32(0, a);
43463        assert_eq_m256i(r, _mm256_setzero_si256());
43464        let r = _mm256_maskz_abs_epi32(0b00001111, a);
43465        #[rustfmt::skip]
43466        let e = _mm256_setr_epi32(
43467            0, 1, 1, i32::MAX,
43468            0, 0, 0, 0,
43469        );
43470        assert_eq_m256i(r, e);
43471    }
43472
43473    #[simd_test(enable = "avx512f,avx512vl")]
43474    unsafe fn test_mm_mask_abs_epi32() {
43475        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43476        let r = _mm_mask_abs_epi32(a, 0, a);
43477        assert_eq_m128i(r, a);
43478        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43479        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43480        assert_eq_m128i(r, e);
43481    }
43482
43483    #[simd_test(enable = "avx512f,avx512vl")]
43484    unsafe fn test_mm_maskz_abs_epi32() {
43485        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43486        let r = _mm_maskz_abs_epi32(0, a);
43487        assert_eq_m128i(r, _mm_setzero_si128());
43488        let r = _mm_maskz_abs_epi32(0b00001111, a);
43489        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43490        assert_eq_m128i(r, e);
43491    }
43492
43493    #[simd_test(enable = "avx512f")]
43494    unsafe fn test_mm512_abs_ps() {
43495        #[rustfmt::skip]
43496        let a = _mm512_setr_ps(
43497            0., 1., -1., f32::MAX,
43498            f32::MIN, 100., -100., -32.,
43499            0., 1., -1., f32::MAX,
43500            f32::MIN, 100., -100., -32.,
43501        );
43502        let r = _mm512_abs_ps(a);
43503        #[rustfmt::skip]
43504        let e = _mm512_setr_ps(
43505            0., 1., 1., f32::MAX,
43506            f32::MAX, 100., 100., 32.,
43507            0., 1., 1., f32::MAX,
43508            f32::MAX, 100., 100., 32.,
43509        );
43510        assert_eq_m512(r, e);
43511    }
43512
43513    #[simd_test(enable = "avx512f")]
43514    unsafe fn test_mm512_mask_abs_ps() {
43515        #[rustfmt::skip]
43516        let a = _mm512_setr_ps(
43517            0., 1., -1., f32::MAX,
43518            f32::MIN, 100., -100., -32.,
43519            0., 1., -1., f32::MAX,
43520            f32::MIN, 100., -100., -32.,
43521        );
43522        let r = _mm512_mask_abs_ps(a, 0, a);
43523        assert_eq_m512(r, a);
43524        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43525        #[rustfmt::skip]
43526        let e = _mm512_setr_ps(
43527            0., 1., 1., f32::MAX,
43528            f32::MAX, 100., 100., 32.,
43529            0., 1., -1., f32::MAX,
43530            f32::MIN, 100., -100., -32.,
43531        );
43532        assert_eq_m512(r, e);
43533    }
43534
43535    #[simd_test(enable = "avx512f")]
43536    unsafe fn test_mm512_mask_mov_epi32() {
43537        let src = _mm512_set1_epi32(1);
43538        let a = _mm512_set1_epi32(2);
43539        let r = _mm512_mask_mov_epi32(src, 0, a);
43540        assert_eq_m512i(r, src);
43541        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43542        assert_eq_m512i(r, a);
43543    }
43544
43545    #[simd_test(enable = "avx512f")]
43546    unsafe fn test_mm512_maskz_mov_epi32() {
43547        let a = _mm512_set1_epi32(2);
43548        let r = _mm512_maskz_mov_epi32(0, a);
43549        assert_eq_m512i(r, _mm512_setzero_si512());
43550        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43551        assert_eq_m512i(r, a);
43552    }
43553
43554    #[simd_test(enable = "avx512f,avx512vl")]
43555    unsafe fn test_mm256_mask_mov_epi32() {
43556        let src = _mm256_set1_epi32(1);
43557        let a = _mm256_set1_epi32(2);
43558        let r = _mm256_mask_mov_epi32(src, 0, a);
43559        assert_eq_m256i(r, src);
43560        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43561        assert_eq_m256i(r, a);
43562    }
43563
43564    #[simd_test(enable = "avx512f,avx512vl")]
43565    unsafe fn test_mm256_maskz_mov_epi32() {
43566        let a = _mm256_set1_epi32(2);
43567        let r = _mm256_maskz_mov_epi32(0, a);
43568        assert_eq_m256i(r, _mm256_setzero_si256());
43569        let r = _mm256_maskz_mov_epi32(0b11111111, a);
43570        assert_eq_m256i(r, a);
43571    }
43572
43573    #[simd_test(enable = "avx512f,avx512vl")]
43574    unsafe fn test_mm_mask_mov_epi32() {
43575        let src = _mm_set1_epi32(1);
43576        let a = _mm_set1_epi32(2);
43577        let r = _mm_mask_mov_epi32(src, 0, a);
43578        assert_eq_m128i(r, src);
43579        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43580        assert_eq_m128i(r, a);
43581    }
43582
43583    #[simd_test(enable = "avx512f,avx512vl")]
43584    unsafe fn test_mm_maskz_mov_epi32() {
43585        let a = _mm_set1_epi32(2);
43586        let r = _mm_maskz_mov_epi32(0, a);
43587        assert_eq_m128i(r, _mm_setzero_si128());
43588        let r = _mm_maskz_mov_epi32(0b00001111, a);
43589        assert_eq_m128i(r, a);
43590    }
43591
43592    #[simd_test(enable = "avx512f")]
43593    unsafe fn test_mm512_mask_mov_ps() {
43594        let src = _mm512_set1_ps(1.);
43595        let a = _mm512_set1_ps(2.);
43596        let r = _mm512_mask_mov_ps(src, 0, a);
43597        assert_eq_m512(r, src);
43598        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43599        assert_eq_m512(r, a);
43600    }
43601
43602    #[simd_test(enable = "avx512f")]
43603    unsafe fn test_mm512_maskz_mov_ps() {
43604        let a = _mm512_set1_ps(2.);
43605        let r = _mm512_maskz_mov_ps(0, a);
43606        assert_eq_m512(r, _mm512_setzero_ps());
43607        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43608        assert_eq_m512(r, a);
43609    }
43610
43611    #[simd_test(enable = "avx512f,avx512vl")]
43612    unsafe fn test_mm256_mask_mov_ps() {
43613        let src = _mm256_set1_ps(1.);
43614        let a = _mm256_set1_ps(2.);
43615        let r = _mm256_mask_mov_ps(src, 0, a);
43616        assert_eq_m256(r, src);
43617        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43618        assert_eq_m256(r, a);
43619    }
43620
43621    #[simd_test(enable = "avx512f,avx512vl")]
43622    unsafe fn test_mm256_maskz_mov_ps() {
43623        let a = _mm256_set1_ps(2.);
43624        let r = _mm256_maskz_mov_ps(0, a);
43625        assert_eq_m256(r, _mm256_setzero_ps());
43626        let r = _mm256_maskz_mov_ps(0b11111111, a);
43627        assert_eq_m256(r, a);
43628    }
43629
43630    #[simd_test(enable = "avx512f,avx512vl")]
43631    unsafe fn test_mm_mask_mov_ps() {
43632        let src = _mm_set1_ps(1.);
43633        let a = _mm_set1_ps(2.);
43634        let r = _mm_mask_mov_ps(src, 0, a);
43635        assert_eq_m128(r, src);
43636        let r = _mm_mask_mov_ps(src, 0b00001111, a);
43637        assert_eq_m128(r, a);
43638    }
43639
43640    #[simd_test(enable = "avx512f,avx512vl")]
43641    unsafe fn test_mm_maskz_mov_ps() {
43642        let a = _mm_set1_ps(2.);
43643        let r = _mm_maskz_mov_ps(0, a);
43644        assert_eq_m128(r, _mm_setzero_ps());
43645        let r = _mm_maskz_mov_ps(0b00001111, a);
43646        assert_eq_m128(r, a);
43647    }
43648
43649    #[simd_test(enable = "avx512f")]
43650    unsafe fn test_mm512_add_epi32() {
43651        #[rustfmt::skip]
43652        let a = _mm512_setr_epi32(
43653            0, 1, -1, i32::MAX,
43654            i32::MIN, 100, -100, -32,
43655            0, 1, -1, i32::MAX,
43656            i32::MIN, 100, -100, -32,
43657        );
43658        let b = _mm512_set1_epi32(1);
43659        let r = _mm512_add_epi32(a, b);
43660        #[rustfmt::skip]
43661        let e = _mm512_setr_epi32(
43662            1, 2, 0, i32::MIN,
43663            i32::MIN + 1, 101, -99, -31,
43664            1, 2, 0, i32::MIN,
43665            i32::MIN + 1, 101, -99, -31,
43666        );
43667        assert_eq_m512i(r, e);
43668    }
43669
43670    #[simd_test(enable = "avx512f")]
43671    unsafe fn test_mm512_mask_add_epi32() {
43672        #[rustfmt::skip]
43673        let a = _mm512_setr_epi32(
43674            0, 1, -1, i32::MAX,
43675            i32::MIN, 100, -100, -32,
43676            0, 1, -1, i32::MAX,
43677            i32::MIN, 100, -100, -32,
43678        );
43679        let b = _mm512_set1_epi32(1);
43680        let r = _mm512_mask_add_epi32(a, 0, a, b);
43681        assert_eq_m512i(r, a);
43682        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43683        #[rustfmt::skip]
43684        let e = _mm512_setr_epi32(
43685            1, 2, 0, i32::MIN,
43686            i32::MIN + 1, 101, -99, -31,
43687            0, 1, -1, i32::MAX,
43688            i32::MIN, 100, -100, -32,
43689        );
43690        assert_eq_m512i(r, e);
43691    }
43692
43693    #[simd_test(enable = "avx512f")]
43694    unsafe fn test_mm512_maskz_add_epi32() {
43695        #[rustfmt::skip]
43696        let a = _mm512_setr_epi32(
43697            0, 1, -1, i32::MAX,
43698            i32::MIN, 100, -100, -32,
43699            0, 1, -1, i32::MAX,
43700            i32::MIN, 100, -100, -32,
43701        );
43702        let b = _mm512_set1_epi32(1);
43703        let r = _mm512_maskz_add_epi32(0, a, b);
43704        assert_eq_m512i(r, _mm512_setzero_si512());
43705        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43706        #[rustfmt::skip]
43707        let e = _mm512_setr_epi32(
43708            1, 2, 0, i32::MIN,
43709            i32::MIN + 1, 101, -99, -31,
43710            0, 0, 0, 0,
43711            0, 0, 0, 0,
43712        );
43713        assert_eq_m512i(r, e);
43714    }
43715
43716    #[simd_test(enable = "avx512f,avx512vl")]
43717    unsafe fn test_mm256_mask_add_epi32() {
43718        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43719        let b = _mm256_set1_epi32(1);
43720        let r = _mm256_mask_add_epi32(a, 0, a, b);
43721        assert_eq_m256i(r, a);
43722        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43723        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43724        assert_eq_m256i(r, e);
43725    }
43726
43727    #[simd_test(enable = "avx512f,avx512vl")]
43728    unsafe fn test_mm256_maskz_add_epi32() {
43729        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43730        let b = _mm256_set1_epi32(1);
43731        let r = _mm256_maskz_add_epi32(0, a, b);
43732        assert_eq_m256i(r, _mm256_setzero_si256());
43733        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43734        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43735        assert_eq_m256i(r, e);
43736    }
43737
43738    #[simd_test(enable = "avx512f,avx512vl")]
43739    unsafe fn test_mm_mask_add_epi32() {
43740        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43741        let b = _mm_set1_epi32(1);
43742        let r = _mm_mask_add_epi32(a, 0, a, b);
43743        assert_eq_m128i(r, a);
43744        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43745        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43746        assert_eq_m128i(r, e);
43747    }
43748
43749    #[simd_test(enable = "avx512f,avx512vl")]
43750    unsafe fn test_mm_maskz_add_epi32() {
43751        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43752        let b = _mm_set1_epi32(1);
43753        let r = _mm_maskz_add_epi32(0, a, b);
43754        assert_eq_m128i(r, _mm_setzero_si128());
43755        let r = _mm_maskz_add_epi32(0b00001111, a, b);
43756        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43757        assert_eq_m128i(r, e);
43758    }
43759
43760    #[simd_test(enable = "avx512f")]
43761    unsafe fn test_mm512_add_ps() {
43762        #[rustfmt::skip]
43763        let a = _mm512_setr_ps(
43764            0., 1., -1., f32::MAX,
43765            f32::MIN, 100., -100., -32.,
43766            0., 1., -1., f32::MAX,
43767            f32::MIN, 100., -100., -32.,
43768        );
43769        let b = _mm512_set1_ps(1.);
43770        let r = _mm512_add_ps(a, b);
43771        #[rustfmt::skip]
43772        let e = _mm512_setr_ps(
43773            1., 2., 0., f32::MAX,
43774            f32::MIN + 1., 101., -99., -31.,
43775            1., 2., 0., f32::MAX,
43776            f32::MIN + 1., 101., -99., -31.,
43777        );
43778        assert_eq_m512(r, e);
43779    }
43780
43781    #[simd_test(enable = "avx512f")]
43782    unsafe fn test_mm512_mask_add_ps() {
43783        #[rustfmt::skip]
43784        let a = _mm512_setr_ps(
43785            0., 1., -1., f32::MAX,
43786            f32::MIN, 100., -100., -32.,
43787            0., 1., -1., f32::MAX,
43788            f32::MIN, 100., -100., -32.,
43789        );
43790        let b = _mm512_set1_ps(1.);
43791        let r = _mm512_mask_add_ps(a, 0, a, b);
43792        assert_eq_m512(r, a);
43793        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43794        #[rustfmt::skip]
43795        let e = _mm512_setr_ps(
43796            1., 2., 0., f32::MAX,
43797            f32::MIN + 1., 101., -99., -31.,
43798            0., 1., -1., f32::MAX,
43799            f32::MIN, 100., -100., -32.,
43800        );
43801        assert_eq_m512(r, e);
43802    }
43803
43804    #[simd_test(enable = "avx512f")]
43805    unsafe fn test_mm512_maskz_add_ps() {
43806        #[rustfmt::skip]
43807        let a = _mm512_setr_ps(
43808            0., 1., -1., f32::MAX,
43809            f32::MIN, 100., -100., -32.,
43810            0., 1., -1., f32::MAX,
43811            f32::MIN, 100., -100., -32.,
43812        );
43813        let b = _mm512_set1_ps(1.);
43814        let r = _mm512_maskz_add_ps(0, a, b);
43815        assert_eq_m512(r, _mm512_setzero_ps());
43816        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43817        #[rustfmt::skip]
43818        let e = _mm512_setr_ps(
43819            1., 2., 0., f32::MAX,
43820            f32::MIN + 1., 101., -99., -31.,
43821            0., 0., 0., 0.,
43822            0., 0., 0., 0.,
43823        );
43824        assert_eq_m512(r, e);
43825    }
43826
43827    #[simd_test(enable = "avx512f,avx512vl")]
43828    unsafe fn test_mm256_mask_add_ps() {
43829        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43830        let b = _mm256_set1_ps(1.);
43831        let r = _mm256_mask_add_ps(a, 0, a, b);
43832        assert_eq_m256(r, a);
43833        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43834        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43835        assert_eq_m256(r, e);
43836    }
43837
43838    #[simd_test(enable = "avx512f,avx512vl")]
43839    unsafe fn test_mm256_maskz_add_ps() {
43840        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43841        let b = _mm256_set1_ps(1.);
43842        let r = _mm256_maskz_add_ps(0, a, b);
43843        assert_eq_m256(r, _mm256_setzero_ps());
43844        let r = _mm256_maskz_add_ps(0b11111111, a, b);
43845        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43846        assert_eq_m256(r, e);
43847    }
43848
43849    #[simd_test(enable = "avx512f,avx512vl")]
43850    unsafe fn test_mm_mask_add_ps() {
43851        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43852        let b = _mm_set1_ps(1.);
43853        let r = _mm_mask_add_ps(a, 0, a, b);
43854        assert_eq_m128(r, a);
43855        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43856        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43857        assert_eq_m128(r, e);
43858    }
43859
43860    #[simd_test(enable = "avx512f,avx512vl")]
43861    unsafe fn test_mm_maskz_add_ps() {
43862        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43863        let b = _mm_set1_ps(1.);
43864        let r = _mm_maskz_add_ps(0, a, b);
43865        assert_eq_m128(r, _mm_setzero_ps());
43866        let r = _mm_maskz_add_ps(0b00001111, a, b);
43867        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43868        assert_eq_m128(r, e);
43869    }
43870
43871    #[simd_test(enable = "avx512f")]
43872    unsafe fn test_mm512_sub_epi32() {
43873        #[rustfmt::skip]
43874        let a = _mm512_setr_epi32(
43875            0, 1, -1, i32::MAX,
43876            i32::MIN, 100, -100, -32,
43877            0, 1, -1, i32::MAX,
43878            i32::MIN, 100, -100, -32,
43879        );
43880        let b = _mm512_set1_epi32(1);
43881        let r = _mm512_sub_epi32(a, b);
43882        #[rustfmt::skip]
43883        let e = _mm512_setr_epi32(
43884            -1, 0, -2, i32::MAX - 1,
43885            i32::MAX, 99, -101, -33,
43886            -1, 0, -2, i32::MAX - 1,
43887            i32::MAX, 99, -101, -33,
43888        );
43889        assert_eq_m512i(r, e);
43890    }
43891
43892    #[simd_test(enable = "avx512f")]
43893    unsafe fn test_mm512_mask_sub_epi32() {
43894        #[rustfmt::skip]
43895        let a = _mm512_setr_epi32(
43896            0, 1, -1, i32::MAX,
43897            i32::MIN, 100, -100, -32,
43898            0, 1, -1, i32::MAX,
43899            i32::MIN, 100, -100, -32,
43900        );
43901        let b = _mm512_set1_epi32(1);
43902        let r = _mm512_mask_sub_epi32(a, 0, a, b);
43903        assert_eq_m512i(r, a);
43904        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43905        #[rustfmt::skip]
43906        let e = _mm512_setr_epi32(